glusterd: big lock - a coarse-grained locking to prevent races

There are primarily three lists that are part of glusterd process, that are concurrently accessed. Namely, priv->volumes, priv->peers and volinfo->bricks_list. Big-lock approach ----------------- WHAT IS IT? Big lock is a coarse-grained lock which protects all three lists, mentioned above, from racy access. HOW DOES IT WORK? At any given point in time, glusterd's thread(s) are in execution _iff_ there is a preceding, inbound network event. Of course, the sigwaiter thread and timer thread are exceptions. A network event is an external trigger to glusterd, via the epoll thread, in the form of POLLIN and POLLERR. As long as we take the big-lock at all such entry points and yield it when we are done, we are guaranteed that all the network events, accessing the global lists, are serialised. This amounts to holding the big lock at - all the handlers of all the actors in glusterd. (POLLIN) - all the cbks in glusterd. (POLLIN) - rpc_notify (DISCONNECT event), if we access/modify one of the three lists. (POLLERR) In the case of synctask'ized volume operations, we must remember that, if we held the big lock for the entire duration of the handler, we may block other non-synctask rpc actors from executing. For eg, volume-start would block in PMAP SIGNIN, if done incorrectly. To prevent this, we need to yield the big lock, when we yield the synctask, and reacquire on waking up of the synctask. Change-Id: Ib929f9905b55fb6c3fc27fefb497a26dba058e4f BUG: 948686 Signed-off-by: Krishnan Parthasarathi <kparthas@redhat.com> Reviewed-on: http://review.gluster.org/4784 Reviewed-by: Jeff Darcy <jdarcy@redhat.com> Tested-by: Gluster Build System <jenkins@build.gluster.com>
author: Krishnan Parthasarathi <kparthas@redhat.com> 2013-04-02 07:56:25 +0530
committer: Anand Avati <avati@redhat.com> 2013-04-12 13:47:46 -0700
commit: f34343d3751cd73e8eabe6d5544fb1f58b316595 (patch)
tree: 869aa908771b3708f4ad9a7a7ec57a623239b9a5 /xlators/mgmt/glusterd/src/glusterd-utils.c
parent: 732cd267c924554a638519cff0df146b2688d6e8 (diff)
1 files changed, 31 insertions, 9 deletions
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
index 6c5d0e2f0..e2d46a6a6 100644
--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
@@ -23,6 +23,7 @@
 #include "timer.h"
 #include "defaults.h"
 #include "compat.h"
+#include "syncop.h"
 #include "run.h"
 #include "compat-errno.h"
 #include "statedump.h"
@@ -1252,6 +1253,7 @@ glusterd_brick_connect (glusterd_volinfo_t  *volinfo,
         char                    socketpath[PATH_MAX] = {0};
         dict_t                  *options = NULL;
         struct rpc_clnt         *rpc = NULL;
+        glusterd_conf_t         *priv = THIS->private;
 
         GF_ASSERT (volinfo);
         GF_ASSERT (brickinfo);
@@ -1270,9 +1272,11 @@ glusterd_brick_connect (glusterd_volinfo_t  *volinfo,
                                                              socketpath, 600);
                 if (ret)
                         goto out;
+                synclock_unlock (&priv->big_lock);
                 ret = glusterd_rpc_create (&rpc, options,
                                            glusterd_brick_rpc_notify,
                                            brickinfo);
+                synclock_lock (&priv->big_lock);
                 if (ret)
                         goto out;
                 brickinfo->rpc = rpc;
@@ -1422,10 +1426,14 @@ glusterd_volume_start_glusterfs (glusterd_volinfo_t  *volinfo,
                 runner_add_arg (&runner, "--mem-accounting");
 
         runner_log (&runner, "", GF_LOG_DEBUG, "Starting GlusterFS");
-        if (wait)
+        if (wait) {
+                synclock_unlock (&priv->big_lock);
                 ret = runner_run (&runner);
-        else
+                synclock_lock (&priv->big_lock);
+
+        } else {
                 ret = runner_run_nowait (&runner);
+        }
 
         if (ret)
                 goto out;
@@ -2403,6 +2411,19 @@ out:
         return in;
 }
 
+int
+glusterd_spawn_daemons (void *opaque)
+{
+        glusterd_conf_t *conf = THIS->private;
+        gf_boolean_t    start_bricks = (long) opaque;
+
+        if (start_bricks)
+                glusterd_restart_bricks (conf);
+        glusterd_restart_gsyncds (conf);
+        glusterd_restart_rebalance (conf);
+        return 0;
+}
+
 void
 glusterd_do_volume_quorum_action (xlator_t *this, glusterd_volinfo_t *volinfo,
                                   gf_boolean_t meets_quorum)
@@ -3399,6 +3420,7 @@ glusterd_nodesvc_connect (char *server, char *socketpath) {
         int                     ret = 0;
         dict_t                  *options = NULL;
         struct rpc_clnt         *rpc = NULL;
+        glusterd_conf_t         *priv = THIS->private;
 
         rpc = glusterd_nodesvc_get_rpc (server);
 
@@ -3412,9 +3434,11 @@ glusterd_nodesvc_connect (char *server, char *socketpath) {
                                                              socketpath, 600);
                 if (ret)
                         goto out;
+                synclock_unlock (&priv->big_lock);
                 ret = glusterd_rpc_create (&rpc, options,
                                            glusterd_nodesvc_rpc_notify,
                                            server);
+                synclock_lock (&priv->big_lock);
                 if (ret)
                         goto out;
                 (void) glusterd_nodesvc_set_rpc (server, rpc);
@@ -4055,13 +4079,8 @@ glusterd_restart_bricks (glusterd_conf_t *conf)
                 if (volinfo->status != GLUSTERD_STATUS_STARTED)
                         continue;
                 start_nodesvcs = _gf_true;
-                if (glusterd_is_volume_in_server_quorum (volinfo)) {
-                        //these bricks will be restarted once the quorum is met
-                        continue;
-                }
-
                 list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
-                        glusterd_brick_start (volinfo, brickinfo, _gf_true);
+                        glusterd_brick_start (volinfo, brickinfo, _gf_false);
                 }
         }
 
@@ -5577,7 +5596,9 @@ glusterd_start_gsync (glusterd_volinfo_t *master_vol, char *slave,
         runner_argprintf (&runner, ":%s", master_vol->volname);
         runner_add_args  (&runner, slave, "--config-set", "session-owner",
                           uuid_str, NULL);
+        synclock_unlock (&priv->big_lock);
         ret = runner_run (&runner);
+        synclock_lock (&priv->big_lock);
         if (ret == -1) {
                 errcode = -1;
                 goto out;
@@ -5588,7 +5609,9 @@ glusterd_start_gsync (glusterd_volinfo_t *master_vol, char *slave,
         runner_argprintf (&runner, "%s/"GSYNC_CONF, priv->workdir);
         runner_argprintf (&runner, ":%s", master_vol->volname);
         runner_add_arg   (&runner, slave);
+        synclock_unlock (&priv->big_lock);
         ret = runner_run (&runner);
+        synclock_lock (&priv->big_lock);
         if (ret == -1) {
                 gf_asprintf (op_errstr, GEOREP" start failed for %s %s",
                              master_vol->volname, slave);
@@ -6051,7 +6074,6 @@ glusterd_restart_rebalance (glusterd_conf_t *conf)
         return ret;
 }
 
-
 void
 glusterd_volinfo_reset_defrag_stats (glusterd_volinfo_t *volinfo)
 {
author	Krishnan Parthasarathi <kparthas@redhat.com>	2013-04-02 07:56:25 +0530
committer	Anand Avati <avati@redhat.com>	2013-04-12 13:47:46 -0700
commit	f34343d3751cd73e8eabe6d5544fb1f58b316595 (patch)
tree	869aa908771b3708f4ad9a7a7ec57a623239b9a5 /xlators/mgmt/glusterd/src/glusterd-utils.c
parent	732cd267c924554a638519cff0df146b2688d6e8 (diff)