From d7b3e068290c41b13ecd664771814202d7d26881 Mon Sep 17 00:00:00 2001
From: Avra Sengupta <asengupt@redhat.com>
Date: Mon, 7 Apr 2014 05:25:28 +0000
Subject: glusterd/snapshot: Adding snap_vol_id and snap_uuid to
 missed_snap_list

Persisting missing snapshot info on disk as well as in memory in
the following format:
-------------NODE-UUID--------------:--------------SNAP-UUID-------------=---------SNAP-VOL-ID------------:BRICKNUM:-------BRICKPATH--------:OPERATION:STATUS
927cb5fe-63da-48f5-82f6-e6a09ddc81c4:8258b18f-d408-483d-8239-204039dc6397=a17b4fe42c5a45f7a916438643edaa13:   3    :/brick/brick-dirs/brick3:    1    :   1
927cb5fe-63da-48f5-82f6-e6a09ddc81c4:8258b18f-d408-483d-8239-204039dc6397=a17b4fe42c5a45f7a916438643edaa13:   3    :/brick/brick-dirs/brick3:    3    :   1
927cb5fe-63da-48f5-82f6-e6a09ddc81c4:8258b18f-d408-483d-8239-204039dc6397=83a3cc05453b46b2a7eda4c9a9208638:   3    :/brick/brick-dirs/brick3:    1    :   1

This data will be stored on disk at /var/lib/glusterd/snaps/missed_snaps_list

In memory we maintain the data as a list of glusterd_missed_snap_info
in conf, the key for this list are the first two fields,
i.e NODE-UUID:SNAP-UUID.

For every NODE-UUID:SNAP-UUID, there can be multiple operations missed
on multiple bricks. So we maintain a list of glusterd_snap_op_t
for every node of glusterd_missed_snap_info

This list is maintained or updated during snapshot create, delete, and restore
operations which are the only operations that if missed, are recorded in this
list.

During snapshot create, if a node is down, or a brick is down, we don't
receive their mount point infos. snap_status of such bricks is marked as
-1, and their brick details are added to this list.

During snapshot delete, we check from originator node, if any other
nodes, holding bricks of the said snap are down. Those are also added to the list.
Also if the node is up, but the snapshot was pending for a snap
brick, and its snap_status is -1, we add that to the list too.
When a subsequent delete entry is processed for an already existing
create entry, we just mark the create entries status as done (2), and don't
add the delete entry to the list.

During snapshot restore, we check from originator node, if any other
nodes, holding bricks of the said snap are down. Those are also added to the list.
Also if the node is up, but the snapshot was pending for a snap
brick, and its snap_status is -1, we add that to the list too.
Like delete when a subsequent restore entry is processed for an already existing
create entry, we just mark the create entries status as done (2), and don't
add the restore entry to the list.

Change-Id: I54f63e28d3c40555d0f84528f38227103171f594
BUG: 1061685
Signed-off-by: Avra Sengupta <asengupt@redhat.com>
Reviewed-on: http://review.gluster.org/7454
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Rajesh Joseph <rjoseph@redhat.com>
Reviewed-by: Vijay Bellur <vbellur@redhat.com>
---
 xlators/mgmt/glusterd/src/glusterd-utils.c | 25 ++++++++++++++++++-------
 1 file changed, 18 insertions(+), 7 deletions(-)

(limited to 'xlators/mgmt/glusterd/src/glusterd-utils.c')

diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
index 74317eb44..b4644432d 100644
--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
@@ -709,7 +709,7 @@ glusterd_snap_volinfo_restore (dict_t *rsp_dict,
                         /* Adding missed delete to the dict */
                         ret = glusterd_add_missed_snaps_to_dict
                                                 (rsp_dict,
-                                                 snap_volinfo->volname,
+                                                 snap_volinfo,
                                                  brickinfo,
                                                  brick_count + 1,
                                                  GF_SNAP_OPTION_TYPE_RESTORE);
@@ -2693,8 +2693,10 @@ glusterd_add_missed_snaps_to_export_dict (dict_t *vols)
                                      snap_ops_list) {
                         snprintf (name_buf, sizeof(name_buf),
                                   "missed_snaps_%d", missed_snap_count);
-                        snprintf (value, sizeof(value), "%s=%d:%s:%d:%d",
-                                  missed_snapinfo->node_snap_info,
+                        snprintf (value, sizeof(value), "%s:%s=%s:%d:%s:%d:%d",
+                                  missed_snapinfo->node_uuid,
+                                  missed_snapinfo->snap_uuid,
+                                  snap_opinfo->snap_vol_id,
                                   snap_opinfo->brick_num,
                                   snap_opinfo->brick_path,
                                   snap_opinfo->op,
@@ -3937,7 +3939,11 @@ glusterd_volinfo_stop_stale_bricks (glusterd_volinfo_t *new_volinfo,
                                                      old_brickinfo->hostname,
                                                      old_brickinfo->path,
                                                      new_volinfo, &new_brickinfo);
-                if (ret) {
+                /* If the brick is stale, i.e it's not a part of the new volume
+                 * or if it's part of the new volume and is pending a snap,
+                 * then stop the brick process
+                 */
+                if (ret || (new_brickinfo->snap_status == -1)) {
                         /*TODO: may need to switch to 'atomic' flavour of
                          * brick_stop, once we make peer rpc program also
                          * synctask enabled*/
@@ -4240,7 +4246,14 @@ glusterd_import_friend_missed_snap_list (dict_t *vols)
                 goto out;
         }
 
-        ret = glusterd_store_update_missed_snaps (vols, missed_snap_count);
+        ret = glusterd_add_missed_snaps_to_list (vols, missed_snap_count);
+        if (ret) {
+                gf_log (this->name, GF_LOG_ERROR,
+                        "Failed to add missed snaps to list");
+                goto out;
+        }
+
+        ret = glusterd_store_update_missed_snaps ();
         if (ret) {
                 gf_log (this->name, GF_LOG_ERROR,
                         "Failed to update missed_snaps_list");
@@ -10086,7 +10099,6 @@ glusterd_missed_snapinfo_new (glusterd_missed_snap_info **missed_snapinfo)
         if (!new_missed_snapinfo)
                 goto out;
 
-        new_missed_snapinfo->node_snap_info = NULL;
         INIT_LIST_HEAD (&new_missed_snapinfo->missed_snaps);
         INIT_LIST_HEAD (&new_missed_snapinfo->snap_ops);
 
@@ -10116,7 +10128,6 @@ glusterd_missed_snap_op_new (glusterd_snap_op_t **snap_op)
         if (!new_snap_op)
                 goto out;
 
-        new_snap_op->brick_path = NULL;
         new_snap_op->brick_num = -1;
         new_snap_op->op = -1;
         new_snap_op->status = -1;
-- 
cgit