cluster/afr: gluster volume heal $VN info sporadically reports nothing to heal

PROBLEM: Whenever rsync is done at the mount point of a replicate volume and during the rsycn glusterd and bricks of one of the machine gets killed then executing gluster volume heal info gives results sporadically.Sometimes it says Number of entries are zero and other time it gives different output. When glusterd and brick of one of the machine gets killed then at other machine, RPC_CLNT_DISCONNECT will get triggered periodically. So it may happen that during the execution of gluster volume heal info command, RPC_CLNT_DISCONNECT also gets triggered and glusterd will assume that it got a new rpc_disconnet notification and glusterd will call the glusterd_op_ac_send_unlock_drain function instead of calling the glusterd_op_ac_rcvd_brick_op_acc function which is responsible for printing the entries. So because of RCP_CLNT_DISCONNECT notification, glusterd will execute functions responsible for RPC_CLNT_DISCONNECT(glusterd_op_ac_send_unlock_drain) rather than executing functions responsible for gluster volume heal info command(glusterd_op_ac_rcvd_brick_op_acc). FIX: Put a condition in for RPC_CLNT_DISCONNECT in the glusterd_peer_rpc_notify function so that if RPC_DISCONNECT is triggered for the first for a peer then glusterd mark that peer as disconnected so that if RPC_DISCONNET is triggered again, because peer is already marked as disconnected, glusterd will not inject EVENTR_ACK_DRAIN and prints the Number of entries. Change-Id: I770e3175b050fd7c7beb801b611a2dff9bfcbac8 BUG: 857503 Signed-off-by: Venkatesh Somyajulu <vsomyaju@redhat.com> Reviewed-on: http://review.gluster.org/4043 Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Anand Avati <avati@redhat.com>
author: Venkatesh Somyajulu <vsomyaju@redhat.com> 2012-10-08 16:18:50 +0530
committer: Anand Avati <avati@redhat.com> 2012-10-11 17:56:24 -0700
commit: 72dafec18a79c0b66a18460725e0ade527939408 (patch)
tree: e4357a35b0cbc9cdbb2c23d00bdc0016a3d4c129 /xlators/mgmt/glusterd/src/glusterd-handler.c
parent: 92754cd1d12cdda18b27911f3e50eabd907edce6 (diff)
1 files changed, 29 insertions, 24 deletions
diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c
index cd395c49e73..ae4138ebe7a 100644
--- a/xlators/mgmt/glusterd/src/glusterd-handler.c
+++ b/xlators/mgmt/glusterd/src/glusterd-handler.c
@@ -2892,41 +2892,46 @@ glusterd_peer_rpc_notify (struct rpc_clnt *rpc, void *mydata,
                 gf_log (this->name, GF_LOG_DEBUG, "got RPC_CLNT_DISCONNECT %d",
                         peerinfo->state.state);
 
-                peerinfo->connected = 0;
-
                 /*
                   local glusterd (thinks that it) is the owner of the cluster
                   lock and 'fails' the operation on the first disconnect from
                   a peer.
                 */
-                glusterd_get_lock_owner (&owner);
-                if (!uuid_compare (conf->uuid, owner)) {
-                        ret = glusterd_op_sm_inject_event (GD_OP_EVENT_START_UNLOCK,
-                                                           NULL);
+
+                if (peerinfo->connected) {
+                        glusterd_get_lock_owner (&owner);
+                        if (!uuid_compare (conf->uuid, owner)) {
+                                ret = glusterd_op_sm_inject_event
+                                      (GD_OP_EVENT_START_UNLOCK, NULL);
+                                if (ret)
+                                        gf_log (this->name, GF_LOG_ERROR,
+                                                "Unable to enqueue cluster "
+                                                "unlock event");
+                                break;
+                        }
+
+                        peer_uuid = GF_CALLOC (1, sizeof (*peer_uuid),
+                                               gf_common_mt_char);
+                        if (!peer_uuid) {
+                                ret = -1;
+                                break;
+                        }
+
+                        uuid_copy (*peer_uuid, peerinfo->uuid);
+                        ret = glusterd_op_sm_inject_event
+                              (GD_OP_EVENT_LOCAL_UNLOCK_NO_RESP, peer_uuid);
                         if (ret)
                                 gf_log (this->name, GF_LOG_ERROR, "Unable"
-                                        " to enqueue cluster unlock event");
-                        break;
-                }
-
-                peer_uuid = GF_CALLOC (1, sizeof (*peer_uuid), gf_common_mt_char);
-                if (!peer_uuid) {
-                        ret = -1;
-                        break;
-                }
+                                        " to enque local lock flush event.");
 
-                uuid_copy (*peer_uuid, peerinfo->uuid);
-                ret = glusterd_op_sm_inject_event (GD_OP_EVENT_LOCAL_UNLOCK_NO_RESP,
-                                                   peer_uuid);
-                if (ret)
-                        gf_log (this->name, GF_LOG_ERROR, "Unable"
-                                " to enque local lock flush event.");
+                        //Inject friend disconnected here
+                        if (peerinfo->state.state == GD_FRIEND_STATE_DEFAULT)  {
+                                glusterd_friend_remove_notify (peerctx);
+                        }
 
-                //Inject friend disconnected here
-                if (peerinfo->state.state == GD_FRIEND_STATE_DEFAULT)  {
-                        glusterd_friend_remove_notify (peerctx);
                 }
 
+                peerinfo->connected = 0;
                 //default_notify (this, GF_EVENT_CHILD_DOWN, NULL);
                 break;
         }
author	Venkatesh Somyajulu <vsomyaju@redhat.com>	2012-10-08 16:18:50 +0530
committer	Anand Avati <avati@redhat.com>	2012-10-11 17:56:24 -0700
commit	72dafec18a79c0b66a18460725e0ade527939408 (patch)
tree	e4357a35b0cbc9cdbb2c23d00bdc0016a3d4c129 /xlators/mgmt/glusterd/src/glusterd-handler.c
parent	92754cd1d12cdda18b27911f3e50eabd907edce6 (diff)