diff options
| author | Mohit Agrawal <moagrawa@redhat.com> | 2017-05-25 21:43:42 +0530 | 
|---|---|---|
| committer | Jeff Darcy <jeff@pl.atyp.us> | 2017-05-31 20:43:53 +0000 | 
| commit | dba55ae364a2772904bb68a6bd0ea87289ee1470 (patch) | |
| tree | e8a7cf51bd45464cd26f9c4270787ffc50228854 | |
| parent | de92c363c95d16966dbcc9d8763fd4448dd84d13 (diff) | |
glusterfs: Not able to mount running volume after enable brick mux and stopped any volume
Problem: After enabled brick mux if any volume has down and then try ot run mount
         with running volume , mount command is hung.
Solution: After enable brick mux server has shared one data structure server_conf
          for all associated subvolumes.After down any subvolume in some
          ungraceful manner (remove brick directory) posix xlator sends
          GF_EVENT_CHILD_DOWN event to parent xlatros and server notify
          updates the child_up to false in server_conf.When client is trying
          to communicate with server through mount it checks conf->child_up
          and it is FALSE so it throws message "translator are not yet ready".
          From this patch updated structure server_conf to save child_up status
          for xlator wise. Another improtant correction from this patch is
          cleanup threads from server side xlators after stop the volume.
BUG: 1453977
Change-Id: Ic54da3f01881b7c9429ce92cc569236eb1d43e0d
Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
Reviewed-on: https://review.gluster.org/17356
Smoke: Gluster Build System <jenkins@build.gluster.org>
NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
Reviewed-by: Raghavendra Talur <rtalur@redhat.com>
CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
Reviewed-by: Jeff Darcy <jeff@pl.atyp.us>
20 files changed, 335 insertions, 98 deletions
diff --git a/glusterfsd/src/glusterfsd-mgmt.c b/glusterfsd/src/glusterfsd-mgmt.c index 8ede110121b..c17bf3bb6fc 100644 --- a/glusterfsd/src/glusterfsd-mgmt.c +++ b/glusterfsd/src/glusterfsd-mgmt.c @@ -198,10 +198,11 @@ glusterfs_handle_terminate (rpcsvc_request_t *req)  {          gd1_mgmt_brick_op_req   xlator_req      = {0,};          ssize_t                 ret; -        xlator_t                *top = NULL; -        xlator_t                *victim = NULL; -        glusterfs_ctx_t         *ctx    = NULL; -        xlator_list_t           **trav_p; +        glusterfs_ctx_t         *ctx            = NULL; +        xlator_t                *top            = NULL; +        xlator_t                *victim         = NULL; +        xlator_list_t           **trav_p        = NULL; +        gf_boolean_t            lockflag        = _gf_false;          ret = xdr_to_generic (req->msg[0], &xlator_req,                                (xdrproc_t)xdr_gd1_mgmt_brick_op_req); @@ -214,57 +215,54 @@ glusterfs_handle_terminate (rpcsvc_request_t *req)          LOCK (&ctx->volfile_lock);          {                  /* Find the xlator_list_t that points to our victim. */ -                top = glusterfsd_ctx->active->first; -                for (trav_p = &top->children; *trav_p; -                     trav_p = &(*trav_p)->next) { -                        victim = (*trav_p)->xlator; -                        if (strcmp (victim->name, xlator_req.name) == 0) { -                                break; +                if (glusterfsd_ctx->active) { +                        top = glusterfsd_ctx->active->first; +                        for (trav_p = &top->children; *trav_p; +                                                    trav_p = &(*trav_p)->next) { +                                victim = (*trav_p)->xlator; +                                if (strcmp (victim->name, xlator_req.name) == 0) { +                                        break; +                                }                          }                  } - -                if (!*trav_p) { -                        gf_log (THIS->name, GF_LOG_ERROR, -                                "can't terminate %s - not found", -                                xlator_req.name); -                        /* -                         * Used to be -ENOENT.  However, the caller asked us to -                         * make sure it's down and if it's already down that's -                         * good enough. -                         */ -                        glusterfs_terminate_response_send (req, 0); -                        goto err; -                } - +        } +        if (!*trav_p) { +                gf_log (THIS->name, GF_LOG_ERROR, +                        "can't terminate %s - not found", +                          xlator_req.name); +                /* +                 * Used to be -ENOENT.  However, the caller asked us to +                 * make sure it's down and if it's already down that's +                 * good enough. +                 */                  glusterfs_terminate_response_send (req, 0); -                if ((trav_p == &top->children) && !(*trav_p)->next) { -                        gf_log (THIS->name, GF_LOG_INFO, -                                "terminating after loss of last child %s", -                                xlator_req.name); -                        glusterfs_mgmt_pmap_signout (glusterfsd_ctx, -                                                     xlator_req.name); -                        kill (getpid(), SIGTERM); -                } else { -                        /* -                         * This is terribly unsafe without quiescing or shutting -                         * things down properly but it gets us to the point -                         * where we can test other stuff. -                         * -                         * TBD: finish implementing this "detach" code properly -                         */ -                        gf_log (THIS->name, GF_LOG_INFO, "detaching not-only" -                                " child %s", xlator_req.name); -                        top->notify (top, GF_EVENT_TRANSPORT_CLEANUP, victim); -                        glusterfs_mgmt_pmap_signout (glusterfsd_ctx, -                                                     xlator_req.name); - -                        *trav_p = (*trav_p)->next; -                        glusterfs_autoscale_threads (THIS->ctx, -1); -                } +                goto err; +        } +        glusterfs_terminate_response_send (req, 0); +        if ((trav_p == &top->children) && !(*trav_p)->next) { +                gf_log (THIS->name, GF_LOG_INFO, +                        "terminating after loss of last child %s", +                        xlator_req.name); +                glusterfs_mgmt_pmap_signout (glusterfsd_ctx, xlator_req.name); +                kill (getpid(), SIGTERM); +        } else { +                /* +                 * This is terribly unsafe without quiescing or shutting +                 * things down properly but it gets us to the point +                 * where we can test other stuff. +                 * +                 * TBD: finish implementing this "detach" code properly +                 */ +                UNLOCK (&ctx->volfile_lock); +                lockflag = _gf_true; +                gf_log (THIS->name, GF_LOG_INFO, "detaching not-only" +                         " child %s", xlator_req.name); +                top->notify (top, GF_EVENT_CLEANUP, victim);          }  err: -        UNLOCK (&ctx->volfile_lock); +        if (!lockflag) +                UNLOCK (&ctx->volfile_lock);          free (xlator_req.name);          xlator_req.name = NULL;          return 0; @@ -838,6 +836,7 @@ glusterfs_handle_attach (rpcsvc_request_t *req)          int32_t                 ret             = -1;          gd1_mgmt_brick_op_req   xlator_req      = {0,};          xlator_t                *this           = NULL; +        xlator_t                *nextchild      = NULL;          glusterfs_graph_t       *newgraph       = NULL;          glusterfs_ctx_t         *ctx            = NULL; @@ -862,15 +861,19 @@ glusterfs_handle_attach (rpcsvc_request_t *req)                          gf_log (this->name, GF_LOG_INFO,                                  "got attach for %s", xlator_req.name);                          ret = glusterfs_graph_attach (this->ctx->active, -                                                      xlator_req.name, -                                                      &newgraph); -                        if (ret == 0) { -                                ret = glusterfs_graph_parent_up (newgraph); +                                              xlator_req.name, &newgraph); +                        if (!ret && (newgraph && newgraph->first)) { +                                nextchild = newgraph->first; +                                ret = xlator_notify (nextchild, +                                                     GF_EVENT_PARENT_UP, +                                                     nextchild);                                  if (ret) { -                                        gf_msg (this->name, GF_LOG_ERROR, 0, +                                        gf_msg (this->name, GF_LOG_ERROR, +                                                0,                                                  LG_MSG_EVENT_NOTIFY_FAILED,                                                  "Parent up notification " -                                                "failed"); +                                                "failed for %s ", +                                                nextchild->name);                                          goto out;                                  }                                  glusterfs_autoscale_threads (this->ctx, 1); diff --git a/libglusterfs/src/defaults-tmpl.c b/libglusterfs/src/defaults-tmpl.c index 0889e07caa7..0ef14d5c68e 100644 --- a/libglusterfs/src/defaults-tmpl.c +++ b/libglusterfs/src/defaults-tmpl.c @@ -182,6 +182,17 @@ default_notify (xlator_t *this, int32_t event, void *data, ...)                  }          }          break; +        case GF_EVENT_CLEANUP: +        { +                xlator_list_t *list = this->children; + +                while (list) { +                        xlator_notify (list->xlator, event, this); +                        list = list->next; +                } +        } +        break; +          default:          {                  xlator_list_t *parent = this->parents; diff --git a/rpc/xdr/src/glusterfs-fops.x b/rpc/xdr/src/glusterfs-fops.x index 5b7fe001f1c..8a99ef5cfe7 100644 --- a/rpc/xdr/src/glusterfs-fops.x +++ b/rpc/xdr/src/glusterfs-fops.x @@ -78,7 +78,7 @@ enum glusterfs_fop_t {  };  /* Note: Removed event GF_EVENT_CHILD_MODIFIED=8, hence - *to preserve backward compatibiliy, GF_EVENT_TRANSPORT_CLEANUP = 9 + *to preserve backward compatibiliy, GF_EVENT_CLEANUP = 9   */  enum glusterfs_event_t {          GF_EVENT_PARENT_UP = 1, @@ -88,7 +88,7 @@ enum glusterfs_event_t {          GF_EVENT_CHILD_UP,          GF_EVENT_CHILD_DOWN,          GF_EVENT_CHILD_CONNECTING, -        GF_EVENT_TRANSPORT_CLEANUP = 9, +        GF_EVENT_CLEANUP = 9,          GF_EVENT_TRANSPORT_CONNECTED,          GF_EVENT_VOLFILE_MODIFIED,          GF_EVENT_GRAPH_NEW, diff --git a/tests/bugs/glusterd/bug-1444596_brick_mux_posix_hlth_chk_status.t b/tests/bugs/glusterd/bug-1444596_brick_mux_posix_hlth_chk_status.t index 39ab2dd723c..e082ba12173 100644 --- a/tests/bugs/glusterd/bug-1444596_brick_mux_posix_hlth_chk_status.t +++ b/tests/bugs/glusterd/bug-1444596_brick_mux_posix_hlth_chk_status.t @@ -34,6 +34,9 @@ EXPECT_WITHIN $PROCESS_UP_TIMEOUT 2 count_up_bricks $V1  EXPECT 1 count_brick_processes +TEST glusterfs -s $H0 --volfile-id $V1 $M0 +TEST touch $M0/file{1..10} +  pkill glusterd  TEST glusterd -LDEBUG  sleep 5 diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub.c b/xlators/features/bit-rot/src/stub/bit-rot-stub.c index 895e7690a5e..b8a34422522 100644 --- a/xlators/features/bit-rot/src/stub/bit-rot-stub.c +++ b/xlators/features/bit-rot/src/stub/bit-rot-stub.c @@ -10,6 +10,7 @@  #include <ctype.h>  #include <sys/uio.h> +#include <signal.h>  #include "glusterfs.h"  #include "xlator.h" @@ -212,6 +213,35 @@ out:          return ret;  } + +int +notify (xlator_t *this, int event, void *data, ...) +{ +        br_stub_private_t *priv = NULL; + +        if (!this) +                return 0; + +        priv = this->private; +        if (!priv) +                return 0; + +        switch (event) { +        case GF_EVENT_CLEANUP: +                if (priv->signth) { +                        (void) gf_thread_cleanup_xint (priv->signth); +                        priv->signth = 0; +                } +                if (priv->container.thread) { +                        (void) gf_thread_cleanup_xint (priv->container.thread); +                        priv->container.thread = 0; +                } +                break; +        } +        default_notify (this, event, data); +        return 0; +} +  void  fini (xlator_t *this)  { @@ -832,6 +862,7 @@ br_stub_signth (void *arg)          THIS = this;          while (1) { +                  pthread_mutex_lock (&priv->lock);                  {                          while (list_empty (&priv->squeue)) diff --git a/xlators/features/changelog/src/changelog-ev-handle.c b/xlators/features/changelog/src/changelog-ev-handle.c index caa5bbbadcd..38e127b7eb5 100644 --- a/xlators/features/changelog/src/changelog-ev-handle.c +++ b/xlators/features/changelog/src/changelog-ev-handle.c @@ -342,6 +342,7 @@ changelog_ev_dispatch (void *data)          while (1) {                  /* TODO: change this to be pthread cond based.. later */ +                  tv.tv_sec = 1;                  tv.tv_usec = 0;                  select (0, NULL, NULL, NULL, &tv); @@ -363,6 +364,7 @@ changelog_ev_dispatch (void *data)                          gf_msg (this->name, GF_LOG_WARNING, 0,                                  CHANGELOG_MSG_PUT_BUFFER_FAILED,                                  "failed to put buffer after consumption"); +          }          return NULL; diff --git a/xlators/features/changelog/src/changelog.c b/xlators/features/changelog/src/changelog.c index 697e8ca7b11..20af188d0d4 100644 --- a/xlators/features/changelog/src/changelog.c +++ b/xlators/features/changelog/src/changelog.c @@ -21,6 +21,7 @@  #include "changelog-messages.h"  #include <pthread.h> +#include <signal.h>  #include "changelog-rpc.h"  #include "errno.h" @@ -2098,6 +2099,7 @@ notify (xlator_t *this, int event, void *data, ...)          int                     ret             = 0;          int                     ret1            = 0;          struct list_head        queue           = {0, }; +        int                     i               = 0;          INIT_LIST_HEAD (&queue); @@ -2105,6 +2107,20 @@ notify (xlator_t *this, int event, void *data, ...)          if (!priv)                  goto out; +        if (event == GF_EVENT_CLEANUP) { +                if (priv->connector) { +                        (void) gf_thread_cleanup_xint (priv->connector); +                        priv->connector = 0; +                } + +                for (; i < NR_DISPATCHERS; i++) { +                        if (priv->ev_dispatcher[i]) { +                                (void) gf_thread_cleanup_xint (priv->ev_dispatcher[i]); +                                priv->ev_dispatcher[i] = 0; +                        } +               } +        } +          if (event == GF_EVENT_TRANSLATOR_OP) {                  dict = data; diff --git a/xlators/features/changetimerecorder/src/changetimerecorder.c b/xlators/features/changetimerecorder/src/changetimerecorder.c index 574b627a825..1811cec420f 100644 --- a/xlators/features/changetimerecorder/src/changetimerecorder.c +++ b/xlators/features/changetimerecorder/src/changetimerecorder.c @@ -2297,6 +2297,32 @@ out:          return 0;  } +int +notify (xlator_t *this, int event, void *data, ...) +{ + +       gf_ctr_private_t *priv = NULL; +       int               ret  = 0; + +       priv = this->private; + +       if (!priv) +               goto out; + +       if (event == GF_EVENT_CLEANUP) { +               if (fini_db (priv->_db_conn)) { +                       gf_msg (this->name, GF_LOG_WARNING, 0, +                                CTR_MSG_CLOSE_DB_CONN_FAILED, "Failed closing " +                                "db connection"); +               } +       } else  { +               ret = default_notify (this, event, data); +       } +out: +      return ret; + +} +  int32_t  mem_acct_init (xlator_t *this)  { diff --git a/xlators/features/index/src/index.c b/xlators/features/index/src/index.c index 975d5f998bd..f0462dcb7eb 100644 --- a/xlators/features/index/src/index.c +++ b/xlators/features/index/src/index.c @@ -15,6 +15,7 @@  #include "common-utils.h"  #include "index-messages.h"  #include <ftw.h> +#include <signal.h>  #define XATTROP_SUBDIR "xattrop"  #define DIRTY_SUBDIR "dirty" @@ -2264,7 +2265,6 @@ init (xlator_t *this)          int ret = -1;          int64_t count = -1;          index_priv_t *priv = NULL; -        pthread_t thread;          pthread_attr_t  w_attr;          gf_boolean_t    mutex_inited = _gf_false;          gf_boolean_t    cond_inited  = _gf_false; @@ -2381,7 +2381,7 @@ init (xlator_t *this)          count = index_fetch_link_count (this, XATTROP);          index_set_link_count (priv, count, XATTROP); -        ret = gf_thread_create (&thread, &w_attr, index_worker, this); +        ret = gf_thread_create (&priv->thread, &w_attr, index_worker, this);          if (ret) {                  gf_msg (this->name, GF_LOG_WARNING, ret,                          INDEX_MSG_WORKER_THREAD_CREATE_FAILED, @@ -2494,6 +2494,24 @@ int  notify (xlator_t *this, int event, void *data, ...)  {          int     ret = 0; +        index_priv_t *priv = NULL; + +        if (!this) +                return 0; + +        priv = this->private; +        if (!priv) +                return 0; + +        switch (event) { +        case GF_EVENT_CLEANUP: +                if (priv->thread) { +                        (void) gf_thread_cleanup_xint (priv->thread); +                        priv->thread = 0; +                } +                break; +        } +          ret = default_notify (this, event, data);          return ret;  } diff --git a/xlators/features/index/src/index.h b/xlators/features/index/src/index.h index 5fb5a65cd8e..f622ceced5b 100644 --- a/xlators/features/index/src/index.h +++ b/xlators/features/index/src/index.h @@ -60,6 +60,7 @@ typedef struct index_priv {          dict_t  *pending_watchlist;          dict_t  *complete_watchlist;          int64_t  pending_count; +        pthread_t thread;  } index_priv_t;  typedef struct index_local { diff --git a/xlators/mgmt/glusterd/src/glusterd.c b/xlators/mgmt/glusterd/src/glusterd.c index ef20689b614..789bc6e9ed1 100644 --- a/xlators/mgmt/glusterd/src/glusterd.c +++ b/xlators/mgmt/glusterd/src/glusterd.c @@ -1948,7 +1948,7 @@ notify (xlator_t *this, int32_t event, void *data, ...)                  case GF_EVENT_POLLERR:                          break; -                case GF_EVENT_TRANSPORT_CLEANUP: +                case GF_EVENT_CLEANUP:                          break;                  default: diff --git a/xlators/performance/io-threads/src/io-threads.c b/xlators/performance/io-threads/src/io-threads.c index 1247e41c99e..6e2d4c90e50 100644 --- a/xlators/performance/io-threads/src/io-threads.c +++ b/xlators/performance/io-threads/src/io-threads.c @@ -1069,7 +1069,8 @@ notify (xlator_t *this, int32_t event, void *data, ...)  {          iot_conf_t *conf = this->private; -        if (GF_EVENT_PARENT_DOWN == event) +        if ((GF_EVENT_PARENT_DOWN == event) || +            (GF_EVENT_CLEANUP == event))                  iot_exit_threads (conf);          default_notify (this, event, data); diff --git a/xlators/protocol/server/src/Makefile.am b/xlators/protocol/server/src/Makefile.am index d65e5b89957..72f811e97fb 100644 --- a/xlators/protocol/server/src/Makefile.am +++ b/xlators/protocol/server/src/Makefile.am @@ -20,7 +20,8 @@ AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \  	-DLIBDIR=\"$(libdir)/glusterfs/$(PACKAGE_VERSION)/auth\" \  	-I$(top_srcdir)/xlators/protocol/lib/src \  	-I$(top_srcdir)/rpc/rpc-lib/src \ -	-I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src +	-I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src \ +        -I$(top_srcdir)/glusterfsd/src  AM_CFLAGS = -Wall $(GF_CFLAGS) \              -DDATADIR=\"$(localstatedir)\" diff --git a/xlators/protocol/server/src/server-handshake.c b/xlators/protocol/server/src/server-handshake.c index f00804a3d3a..f8f8f99f123 100644 --- a/xlators/protocol/server/src/server-handshake.c +++ b/xlators/protocol/server/src/server-handshake.c @@ -413,6 +413,7 @@ server_setvolume (rpcsvc_request_t *req)          int32_t              fop_version   = 0;          int32_t              mgmt_version  = 0;          glusterfs_ctx_t     *ctx           = NULL; +        struct  _child_status *tmp         = NULL;          params = dict_new ();          reply  = dict_new (); @@ -512,13 +513,24 @@ server_setvolume (rpcsvc_request_t *req)                                        "initialised yet. Try again later");                  goto fail;          } - -        ret = dict_set_int32 (reply, "child_up", conf->child_up); -        if (ret < 0) +        list_for_each_entry (tmp, &conf->child_status->status_list, +                                                                  status_list) { +                if (strcmp (tmp->name, name) == 0) +                        break; +        } +        if (!tmp->name) {                  gf_msg (this->name, GF_LOG_ERROR, 0, -                        PS_MSG_DICT_GET_FAILED, "Failed to set 'child_up' " -                        "in the reply dict"); - +                        PS_MSG_CHILD_STATUS_FAILED, +                        "No xlator %s is found in " +                        "child status list", name); +        } else { +                ret = dict_set_int32 (reply, "child_up", tmp->child_up); +                if (ret < 0) +                        gf_msg (this->name, GF_LOG_ERROR, 0, +                                PS_MSG_DICT_GET_FAILED, +                                "Failed to set 'child_up' for xlator %s " +                                "in the reply dict", tmp->name); +        }          ret = dict_get_str (params, "process-uuid", &client_uid);          if (ret < 0) {                  ret = dict_set_str (reply, "ERROR", diff --git a/xlators/protocol/server/src/server-mem-types.h b/xlators/protocol/server/src/server-mem-types.h index 9165249d49a..76a78ac3ede 100644 --- a/xlators/protocol/server/src/server-mem-types.h +++ b/xlators/protocol/server/src/server-mem-types.h @@ -28,6 +28,7 @@ enum gf_server_mem_types_ {          gf_server_mt_setvolume_rsp_t,          gf_server_mt_lock_mig_t,          gf_server_mt_compound_rsp_t, +        gf_server_mt_child_status,          gf_server_mt_end,  };  #endif /* __SERVER_MEM_TYPES_H__ */ diff --git a/xlators/protocol/server/src/server-messages.h b/xlators/protocol/server/src/server-messages.h index b8245af18df..14729ade852 100644 --- a/xlators/protocol/server/src/server-messages.h +++ b/xlators/protocol/server/src/server-messages.h @@ -40,7 +40,7 @@   */  #define GLFS_PS_BASE                GLFS_MSGID_COMP_PS -#define GLFS_NUM_MESSAGES           91 +#define GLFS_NUM_MESSAGES           92  #define GLFS_MSGID_END              (GLFS_PS_BASE + GLFS_NUM_MESSAGES + 1)  /* Messages with message IDs */  #define glfs_msg_start_x GLFS_PS_BASE, "Invalid: Start of messages" @@ -857,6 +857,14 @@   */  #define PS_MSG_CLIENT_OPVERSION_GET_FAILED      (GLFS_PS_BASE + 91) + +/*! + * @messageid + * @diagnosis + * @recommendedaction + * + */ +#define PS_MSG_CHILD_STATUS_FAILED               (GLFS_PS_BASE + 92)  /*------------*/  #define glfs_msg_end_x GLFS_MSGID_END, "Invalid: End of messages" diff --git a/xlators/protocol/server/src/server.c b/xlators/protocol/server/src/server.c index 202fe71ba55..130a55372a6 100644 --- a/xlators/protocol/server/src/server.c +++ b/xlators/protocol/server/src/server.c @@ -23,6 +23,7 @@  #include "event.h"  #include "events.h"  #include "server-messages.h" +#include "glusterfsd.h"  rpcsvc_cbk_program_t server_cbk_prog = {          .progname  = "Gluster Callback", @@ -1065,7 +1066,9 @@ init (xlator_t *this)          if (ret)                  conf->conf_dir = CONFDIR; -        conf->child_up = _gf_false; +        conf->child_status = GF_CALLOC (1, sizeof (struct _child_status), +                                          gf_server_mt_child_status); +        INIT_LIST_HEAD (&conf->child_status->status_list);          /*ret = dict_get_str (this->options, "statedump-path", &statedump_path);          if (!ret) { @@ -1389,10 +1392,15 @@ server_process_child_event (xlator_t *this, int32_t event, void *data,          pthread_mutex_lock (&conf->mutex);          {                  list_for_each_entry (xprt, &conf->xprt_list, list) { -                        rpcsvc_callback_submit (conf->rpc, xprt, -                                                &server_cbk_prog, -                                                cbk_procnum, -                                                NULL, 0, NULL); +                        if (!xprt->xl_private) { +                                continue; +                        } +                        if (xprt->xl_private->bound_xl == data) { +                                rpcsvc_callback_submit (conf->rpc, xprt, +                                                        &server_cbk_prog, +                                                        cbk_procnum, +                                                        NULL, 0, NULL); +                        }                  }          }          pthread_mutex_unlock (&conf->mutex); @@ -1409,10 +1417,19 @@ notify (xlator_t *this, int32_t event, void *data, ...)          server_conf_t    *conf        = NULL;          rpc_transport_t  *xprt        = NULL;          rpc_transport_t  *xp_next     = NULL; +        xlator_t         *victim      = NULL; +        xlator_t         *top         = NULL; +        xlator_t         *travxl      = NULL; +        xlator_list_t    **trav_p     = NULL; +        struct  _child_status *tmp    = NULL; +        gf_boolean_t     victim_found = _gf_false; +        glusterfs_ctx_t  *ctx         = NULL;          GF_VALIDATE_OR_GOTO (THIS->name, this, out);          conf = this->private;          GF_VALIDATE_OR_GOTO (this->name, conf, out); +        victim = data; +        ctx    = THIS->ctx;          switch (event) {          case GF_EVENT_UPCALL: @@ -1441,7 +1458,24 @@ notify (xlator_t *this, int32_t event, void *data, ...)          case GF_EVENT_CHILD_UP:          { -                conf->child_up = _gf_true; +                list_for_each_entry (tmp, &conf->child_status->status_list, +                                                                 status_list) { +                        if (tmp->name == NULL) +                                break; +                        if (strcmp (tmp->name, victim->name) == 0) +                                break; +                } +                if (tmp->name) { +                        tmp->child_up = _gf_true; +                } else { +                        tmp  = GF_CALLOC (1, sizeof (struct _child_status), +                                          gf_server_mt_child_status); +                        INIT_LIST_HEAD (&tmp->status_list); +                        tmp->name  = gf_strdup (victim->name); +                        tmp->child_up = _gf_true; +                        list_add_tail (&tmp->status_list, +                                              &conf->child_status->status_list); +                }                  ret = server_process_child_event (this, event, data,                                                    GF_CBK_CHILD_UP);                  if (ret) { @@ -1450,14 +1484,25 @@ notify (xlator_t *this, int32_t event, void *data, ...)                                  "server_process_child_event failed");                          goto out;                  } -                  default_notify (this, event, data);                  break;          }          case GF_EVENT_CHILD_DOWN:          { -                conf->child_up = _gf_false; +                list_for_each_entry (tmp, &conf->child_status->status_list, +                                                                  status_list) { +                        if (strcmp (tmp->name, victim->name) == 0) { +                                tmp->child_up = _gf_false; +                                break; +                        } +                } +                if (!tmp->name) +                        gf_msg (this->name, GF_LOG_ERROR, 0, +                                PS_MSG_CHILD_STATUS_FAILED, +                                "No xlator %s is found in " +                                "child status list", victim->name); +                  ret = server_process_child_event (this, event, data,                                                    GF_CBK_CHILD_DOWN);                  if (ret) { @@ -1466,13 +1511,12 @@ notify (xlator_t *this, int32_t event, void *data, ...)                                  "server_process_child_event failed");                          goto out;                  } -                  default_notify (this, event, data);                  break;          } -        case GF_EVENT_TRANSPORT_CLEANUP: +        case GF_EVENT_CLEANUP:                  conf = this->private;                  pthread_mutex_lock (&conf->mutex);                  /* @@ -1493,8 +1537,37 @@ notify (xlator_t *this, int32_t event, void *data, ...)                                  rpc_transport_disconnect (xprt, _gf_false);                          }                  } +                list_for_each_entry (tmp, &conf->child_status->status_list, +                                                                 status_list) { +                        if (strcmp (tmp->name, victim->name) == 0) +                                break; +                } +                if (tmp->name && (strcmp (tmp->name, victim->name) == 0)) { +                        GF_FREE (tmp->name); +                        list_del (&tmp->status_list); +                }                  pthread_mutex_unlock (&conf->mutex); -                /* NB: do *not* propagate anywhere else */ +                if (this->ctx->active) { +                        top = this->ctx->active->first; +                        LOCK (&ctx->volfile_lock); +                                for (trav_p = &top->children; *trav_p; +                                                   trav_p = &(*trav_p)->next) { +                                        travxl = (*trav_p)->xlator; +                                        if (travxl && +                                                   strcmp (travxl->name, victim->name) == 0) { +                                                victim_found = _gf_true; +                                                break; +                                        } +                                } +                        UNLOCK (&ctx->volfile_lock); +                        if (victim_found) +                                (*trav_p) = (*trav_p)->next; +                        glusterfs_mgmt_pmap_signout (glusterfsd_ctx, +                                                     victim->name); +                        glusterfs_autoscale_threads (THIS->ctx, -1); +                        default_notify (victim, GF_EVENT_CLEANUP, data); + +                }                  break;          default: diff --git a/xlators/protocol/server/src/server.h b/xlators/protocol/server/src/server.h index 0b37eb1414a..09a2d74ff2b 100644 --- a/xlators/protocol/server/src/server.h +++ b/xlators/protocol/server/src/server.h @@ -73,6 +73,12 @@ struct _volfile_ctx {          uint32_t             checksum;  }; +struct _child_status { +        struct list_head status_list; +        char *name; +        gf_boolean_t child_up; + +};  struct server_conf {          rpcsvc_t               *rpc;          struct rpcsvc_config    rpc_conf; @@ -101,9 +107,7 @@ struct server_conf {                                              * in case if volume set options                                              * (say *.allow | *.reject) are                                              * tweeked */ -        gf_boolean_t            child_up; /* Set to true, when child is up, and -                                           * false, when child is down */ - +        struct _child_status    *child_status;          gf_lock_t               itable_lock;  };  typedef struct server_conf server_conf_t; diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c index 1a49af47a8b..52be925aae2 100644 --- a/xlators/storage/posix/src/posix-helpers.c +++ b/xlators/storage/posix/src/posix-helpers.c @@ -1411,6 +1411,7 @@ posix_janitor_thread_proc (void *data)          THIS = this;          while (1) { +                  time (&now);                  if ((now - priv->last_landfill_check) > priv->janitor_sleep_duration) {                          gf_msg_trace (this->name, 0, @@ -1844,9 +1845,11 @@ posix_health_check_thread_proc (void *data)          xlator_list_t       **trav_p             = NULL;          int                   count              = 0;          gf_boolean_t          victim_found       = _gf_false; +        glusterfs_ctx_t      *ctx                = NULL;          this = data;          priv = this->private; +        ctx  = THIS->ctx;          /* prevent races when the interval is updated */          interval = priv->health_check_interval; @@ -1896,10 +1899,12 @@ abort:          */          if (this->ctx->active) {                  top = this->ctx->active->first; -                for (trav_p = &top->children; *trav_p; +                LOCK (&ctx->volfile_lock); +                        for (trav_p = &top->children; *trav_p;                                                 trav_p = &(*trav_p)->next) { -                        count++; -                } +                                count++; +                        } +                UNLOCK (&ctx->volfile_lock);          }          if (count == 1) { @@ -1919,20 +1924,21 @@ abort:                          kill (getpid(), SIGKILL);          } else { -                for (trav_p = &top->children; *trav_p; -                     trav_p = &(*trav_p)->next) { -                        victim = (*trav_p)->xlator; -                        if (victim && -                            strcmp (victim->name, priv->base_path) == 0) { -                                victim_found = _gf_true; -                                break; +                LOCK (&ctx->volfile_lock); +                        for (trav_p = &top->children; *trav_p; +                             trav_p = &(*trav_p)->next) { +                                victim = (*trav_p)->xlator; +                                if (victim && +                                         strcmp (victim->name, priv->base_path) == 0) { +                                        victim_found = _gf_true; +                                        break; +                                }                          } -                } +                UNLOCK (&ctx->volfile_lock);                  if (victim_found) { -                        top->notify (top, GF_EVENT_TRANSPORT_CLEANUP, victim); -                        glusterfs_mgmt_pmap_signout (glusterfsd_ctx, -                                                     priv->base_path); -                        glusterfs_autoscale_threads (THIS->ctx, -1); +                        gf_log (THIS->name, GF_LOG_INFO, "detaching not-only " +                                " child %s", priv->base_path); +                        top->notify (top, GF_EVENT_CLEANUP, victim);                  }          } diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c index b1a529bcfee..754a45aa6fb 100644 --- a/xlators/storage/posix/src/posix.c +++ b/xlators/storage/posix/src/posix.c @@ -6626,6 +6626,9 @@ notify (xlator_t *this,          void *data,          ...)  { +        struct posix_private *priv = NULL; + +        priv = this->private;          switch (event)          {          case GF_EVENT_PARENT_UP: @@ -6634,6 +6637,23 @@ notify (xlator_t *this,                  default_notify (this, GF_EVENT_CHILD_UP, data);          }          break; +        case GF_EVENT_CLEANUP: +                if (priv->health_check) { +                        pthread_cancel (priv->health_check); +                        priv->health_check = 0; +                } +                if (priv->janitor) { +                        (void) gf_thread_cleanup_xint (priv->janitor); +                        priv->janitor = 0; +                } +                if (priv->fsyncer) { +                        (void) gf_thread_cleanup_xint (priv->fsyncer); +                        priv->fsyncer = 0; +                } +                if (priv->mount_lock) +                        (void) sys_closedir (priv->mount_lock); + +        break;          default:                  /* */                  break;  | 
