From d7b3e068290c41b13ecd664771814202d7d26881 Mon Sep 17 00:00:00 2001 From: Avra Sengupta Date: Mon, 7 Apr 2014 05:25:28 +0000 Subject: glusterd/snapshot: Adding snap_vol_id and snap_uuid to missed_snap_list Persisting missing snapshot info on disk as well as in memory in the following format: -------------NODE-UUID--------------:--------------SNAP-UUID-------------=---------SNAP-VOL-ID------------:BRICKNUM:-------BRICKPATH--------:OPERATION:STATUS 927cb5fe-63da-48f5-82f6-e6a09ddc81c4:8258b18f-d408-483d-8239-204039dc6397=a17b4fe42c5a45f7a916438643edaa13: 3 :/brick/brick-dirs/brick3: 1 : 1 927cb5fe-63da-48f5-82f6-e6a09ddc81c4:8258b18f-d408-483d-8239-204039dc6397=a17b4fe42c5a45f7a916438643edaa13: 3 :/brick/brick-dirs/brick3: 3 : 1 927cb5fe-63da-48f5-82f6-e6a09ddc81c4:8258b18f-d408-483d-8239-204039dc6397=83a3cc05453b46b2a7eda4c9a9208638: 3 :/brick/brick-dirs/brick3: 1 : 1 This data will be stored on disk at /var/lib/glusterd/snaps/missed_snaps_list In memory we maintain the data as a list of glusterd_missed_snap_info in conf, the key for this list are the first two fields, i.e NODE-UUID:SNAP-UUID. For every NODE-UUID:SNAP-UUID, there can be multiple operations missed on multiple bricks. So we maintain a list of glusterd_snap_op_t for every node of glusterd_missed_snap_info This list is maintained or updated during snapshot create, delete, and restore operations which are the only operations that if missed, are recorded in this list. During snapshot create, if a node is down, or a brick is down, we don't receive their mount point infos. snap_status of such bricks is marked as -1, and their brick details are added to this list. During snapshot delete, we check from originator node, if any other nodes, holding bricks of the said snap are down. Those are also added to the list. Also if the node is up, but the snapshot was pending for a snap brick, and its snap_status is -1, we add that to the list too. When a subsequent delete entry is processed for an already existing create entry, we just mark the create entries status as done (2), and don't add the delete entry to the list. During snapshot restore, we check from originator node, if any other nodes, holding bricks of the said snap are down. Those are also added to the list. Also if the node is up, but the snapshot was pending for a snap brick, and its snap_status is -1, we add that to the list too. Like delete when a subsequent restore entry is processed for an already existing create entry, we just mark the create entries status as done (2), and don't add the restore entry to the list. Change-Id: I54f63e28d3c40555d0f84528f38227103171f594 BUG: 1061685 Signed-off-by: Avra Sengupta Reviewed-on: http://review.gluster.org/7454 Tested-by: Gluster Build System Reviewed-by: Rajesh Joseph Reviewed-by: Vijay Bellur --- xlators/mgmt/glusterd/src/glusterd-utils.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) (limited to 'xlators/mgmt/glusterd/src/glusterd-utils.c') diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c index 74317eb44..b4644432d 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-utils.c @@ -709,7 +709,7 @@ glusterd_snap_volinfo_restore (dict_t *rsp_dict, /* Adding missed delete to the dict */ ret = glusterd_add_missed_snaps_to_dict (rsp_dict, - snap_volinfo->volname, + snap_volinfo, brickinfo, brick_count + 1, GF_SNAP_OPTION_TYPE_RESTORE); @@ -2693,8 +2693,10 @@ glusterd_add_missed_snaps_to_export_dict (dict_t *vols) snap_ops_list) { snprintf (name_buf, sizeof(name_buf), "missed_snaps_%d", missed_snap_count); - snprintf (value, sizeof(value), "%s=%d:%s:%d:%d", - missed_snapinfo->node_snap_info, + snprintf (value, sizeof(value), "%s:%s=%s:%d:%s:%d:%d", + missed_snapinfo->node_uuid, + missed_snapinfo->snap_uuid, + snap_opinfo->snap_vol_id, snap_opinfo->brick_num, snap_opinfo->brick_path, snap_opinfo->op, @@ -3937,7 +3939,11 @@ glusterd_volinfo_stop_stale_bricks (glusterd_volinfo_t *new_volinfo, old_brickinfo->hostname, old_brickinfo->path, new_volinfo, &new_brickinfo); - if (ret) { + /* If the brick is stale, i.e it's not a part of the new volume + * or if it's part of the new volume and is pending a snap, + * then stop the brick process + */ + if (ret || (new_brickinfo->snap_status == -1)) { /*TODO: may need to switch to 'atomic' flavour of * brick_stop, once we make peer rpc program also * synctask enabled*/ @@ -4240,7 +4246,14 @@ glusterd_import_friend_missed_snap_list (dict_t *vols) goto out; } - ret = glusterd_store_update_missed_snaps (vols, missed_snap_count); + ret = glusterd_add_missed_snaps_to_list (vols, missed_snap_count); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to add missed snaps to list"); + goto out; + } + + ret = glusterd_store_update_missed_snaps (); if (ret) { gf_log (this->name, GF_LOG_ERROR, "Failed to update missed_snaps_list"); @@ -10086,7 +10099,6 @@ glusterd_missed_snapinfo_new (glusterd_missed_snap_info **missed_snapinfo) if (!new_missed_snapinfo) goto out; - new_missed_snapinfo->node_snap_info = NULL; INIT_LIST_HEAD (&new_missed_snapinfo->missed_snaps); INIT_LIST_HEAD (&new_missed_snapinfo->snap_ops); @@ -10116,7 +10128,6 @@ glusterd_missed_snap_op_new (glusterd_snap_op_t **snap_op) if (!new_snap_op) goto out; - new_snap_op->brick_path = NULL; new_snap_op->brick_num = -1; new_snap_op->op = -1; new_snap_op->status = -1; -- cgit