From bf227251eadcc35a102fc9db0c39e36b7336954d Mon Sep 17 00:00:00 2001 From: Avra Sengupta Date: Tue, 13 Jan 2015 09:31:18 +0000 Subject: glusterd/snap: Fix restore cleanup If restore commit is successful on the originator and a few nodes, but fails on some other node, restore cleanup should restate the volume and the snapshot in question as it was before the command was run. Change-Id: I7bb0becc7f052f55bc818018bc84770944e76c80 BUG: 1181418 Signed-off-by: Avra Sengupta Reviewed-on: http://review.gluster.org/9441 Tested-by: Gluster Build System Reviewed-by: Rajesh Joseph Reviewed-by: Atin Mukherjee Reviewed-by: Kaushal M --- .../mgmt/glusterd/src/glusterd-snapshot-utils.c | 60 ++++++++++--- .../mgmt/glusterd/src/glusterd-snapshot-utils.h | 2 +- xlators/mgmt/glusterd/src/glusterd-snapshot.c | 99 ++++++++++++---------- 3 files changed, 101 insertions(+), 60 deletions(-) (limited to 'xlators/mgmt') diff --git a/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c b/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c index 8603e9e950c..33bcf6e7df0 100644 --- a/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c @@ -867,14 +867,15 @@ out: int32_t glusterd_perform_missed_op (glusterd_snap_t *snap, int32_t op) { - dict_t *dict = NULL; - int32_t ret = -1; - glusterd_conf_t *priv = NULL; - glusterd_volinfo_t *snap_volinfo = NULL; - glusterd_volinfo_t *volinfo = NULL; - glusterd_volinfo_t *tmp = NULL; - xlator_t *this = NULL; - uuid_t null_uuid = {0}; + dict_t *dict = NULL; + int32_t ret = -1; + glusterd_conf_t *priv = NULL; + glusterd_volinfo_t *snap_volinfo = NULL; + glusterd_volinfo_t *volinfo = NULL; + glusterd_volinfo_t *tmp = NULL; + xlator_t *this = NULL; + uuid_t null_uuid = {0}; + char *parent_volname = NULL; this = THIS; GF_ASSERT (this); @@ -903,13 +904,16 @@ glusterd_perform_missed_op (glusterd_snap_t *snap, int32_t op) case GF_SNAP_OPTION_TYPE_RESTORE: list_for_each_entry_safe (snap_volinfo, tmp, &snap->volumes, vol_list) { - ret = glusterd_volinfo_find - (snap_volinfo->parent_volname, - &volinfo); + parent_volname = gf_strdup + (snap_volinfo->parent_volname); + if (!parent_volname) + goto out; + + ret = glusterd_volinfo_find (parent_volname, &volinfo); if (ret) { gf_log (this->name, GF_LOG_ERROR, "Could not get volinfo of %s", - snap_volinfo->parent_volname); + parent_volname); goto out; } @@ -933,15 +937,38 @@ glusterd_perform_missed_op (glusterd_snap_t *snap, int32_t op) goto out; } - ret = glusterd_snapshot_restore_cleanup (dict, volinfo, + /* Restore is successful therefore delete the original + * volume's volinfo. If the volinfo is already restored + * then we should delete the backend LVMs */ + if (!uuid_is_null (volinfo->restored_from_snap)) { + ret = glusterd_lvm_snapshot_remove (dict, + volinfo); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to remove LVM backend"); + goto out; + } + } + + /* Detach the volinfo from priv->volumes, so that no new + * command can ref it any more and then unref it. + */ + list_del_init (&volinfo->vol_list); + glusterd_volinfo_unref (volinfo); + + ret = glusterd_snapshot_restore_cleanup (dict, + parent_volname, snap); if (ret) { gf_log (this->name, GF_LOG_ERROR, "Failed to perform snapshot restore " "cleanup for %s volume", - snap_volinfo->parent_volname); + parent_volname); goto out; } + + GF_FREE (parent_volname); + parent_volname = NULL; } break; @@ -956,6 +983,11 @@ glusterd_perform_missed_op (glusterd_snap_t *snap, int32_t op) out: dict_unref (dict); + if (parent_volname) { + GF_FREE (parent_volname); + parent_volname = NULL; + } + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); return ret; } diff --git a/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.h b/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.h index a5d82af559d..86b5e7443b1 100644 --- a/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.h +++ b/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.h @@ -134,7 +134,7 @@ glusterd_snap_brick_create (glusterd_volinfo_t *snap_volinfo, int glusterd_snapshot_restore_cleanup (dict_t *rsp_dict, - glusterd_volinfo_t *volinfo, + char *volname, glusterd_snap_t *snap); int diff --git a/xlators/mgmt/glusterd/src/glusterd-snapshot.c b/xlators/mgmt/glusterd/src/glusterd-snapshot.c index f1e5fbb7dda..322da605214 100644 --- a/xlators/mgmt/glusterd/src/glusterd-snapshot.c +++ b/xlators/mgmt/glusterd/src/glusterd-snapshot.c @@ -836,6 +836,25 @@ glusterd_snapshot_restore (dict_t *dict, char **op_errstr, dict_t *rsp_dict) "snap for %s", snapname); goto out; } + + /* Restore is successful therefore delete the original volume's + * volinfo. If the volinfo is already restored then we should + * delete the backend LVMs */ + if (!uuid_is_null (parent_volinfo->restored_from_snap)) { + ret = glusterd_lvm_snapshot_remove (rsp_dict, + parent_volinfo); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to remove LVM backend"); + goto out; + } + } + + /* Detach the volinfo from priv->volumes, so that no new + * command can ref it any more and then unref it. + */ + list_del_init (&parent_volinfo->vol_list); + glusterd_volinfo_unref (parent_volinfo); } ret = 0; @@ -7159,7 +7178,7 @@ out: */ int glusterd_snapshot_restore_cleanup (dict_t *rsp_dict, - glusterd_volinfo_t *volinfo, + char *volname, glusterd_snap_t *snap) { int ret = -1; @@ -7172,32 +7191,12 @@ glusterd_snapshot_restore_cleanup (dict_t *rsp_dict, priv = this->private; GF_ASSERT (rsp_dict); - GF_ASSERT (volinfo); + GF_ASSERT (volname); GF_ASSERT (snap); - /* If the volinfo is already restored then we should delete - * the backend LVMs */ - if (!uuid_is_null (volinfo->restored_from_snap)) { - ret = glusterd_lvm_snapshot_remove (rsp_dict, volinfo); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, "Failed to remove " - "LVM backend"); - goto out; - } - } - snprintf (delete_path, sizeof (delete_path), "%s/"GLUSTERD_TRASH"/vols-%s.deleted", priv->workdir, - volinfo->volname); - - /* Restore is successful therefore delete the original volume's - * volinfo. - */ - ret = glusterd_volinfo_delete (volinfo); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, "Failed to delete volinfo"); - goto out; - } + volname); /* Now delete the snap entry. */ ret = glusterd_snap_remove (rsp_dict, snap, _gf_false, _gf_true); @@ -7224,19 +7223,18 @@ out: * for some reasons. In such case we revert the restore operation. * * @param volinfo volinfo of the origin volume - * @param restore_from_store Boolean variable which tells whether to - * restore the origin from store or not. * * @return 0 on success and -1 on failure */ int -glusterd_snapshot_revert_partial_restored_vol (glusterd_volinfo_t *volinfo, - gf_boolean_t restore_from_store) +glusterd_snapshot_revert_partial_restored_vol (glusterd_volinfo_t *volinfo) { int ret = 0; char pathname [PATH_MAX] = {0,}; char trash_path[PATH_MAX] = {0,}; glusterd_volinfo_t *reverted_vol = NULL; + glusterd_volinfo_t *snap_vol = NULL; + glusterd_volinfo_t *tmp_vol = NULL; glusterd_conf_t *priv = NULL; xlator_t *this = NULL; @@ -7271,13 +7269,6 @@ glusterd_snapshot_revert_partial_restored_vol (glusterd_volinfo_t *volinfo, goto out; } - /* Skip the volinfo retrieval from the store if restore_from_store - * is not true. */ - if (!restore_from_store) { - ret = 0; - goto out; - } - /* Retrieve the volume from the store */ reverted_vol = glusterd_store_retrieve_volume (volinfo->volname, NULL); if (NULL == reverted_vol) { @@ -7286,6 +7277,14 @@ glusterd_snapshot_revert_partial_restored_vol (glusterd_volinfo_t *volinfo, goto out; } + /* Retrieve the snap_volumes list from the older volinfo */ + reverted_vol->snap_count = volinfo->snap_count; + list_for_each_entry_safe (snap_vol, tmp_vol, &volinfo->snap_volumes, + snapvol_list) { + list_add_tail (&snap_vol->snapvol_list, + &reverted_vol->snap_volumes); + } + /* Since we retrieved the volinfo from store now we don't * want the older volinfo. Therefore delete the older volinfo */ ret = glusterd_volinfo_delete (volinfo); @@ -7335,7 +7334,7 @@ glusterd_snapshot_revert_restore_from_snap (glusterd_snap_t *snap) goto out; } - ret = glusterd_snapshot_revert_partial_restored_vol (volinfo, _gf_true); + ret = glusterd_snapshot_revert_partial_restored_vol (volinfo); if (ret) { gf_log (this->name, GF_LOG_ERROR, "Failed to revert snapshot " "restore operation for %s volume", volname); @@ -7420,7 +7419,8 @@ glusterd_snapshot_restore_postop (dict_t *dict, int32_t op_ret, /* On success perform the cleanup operation */ if (0 == op_ret) { - ret = glusterd_snapshot_restore_cleanup (rsp_dict, volinfo, + ret = glusterd_snapshot_restore_cleanup (rsp_dict, + volname, snap); if (ret) { gf_log (this->name, GF_LOG_ERROR, "Failed to perform " @@ -7436,13 +7436,22 @@ glusterd_snapshot_restore_postop (dict_t *dict, int32_t op_ret, goto out; } - ret = glusterd_snapshot_revert_partial_restored_vol (volinfo, - _gf_false); + ret = glusterd_snapshot_revert_partial_restored_vol (volinfo); if (ret) { gf_log (this->name, GF_LOG_ERROR, "Failed to revert " "restore operation for %s volume", volname); goto out; } + + snap->snap_status = GD_SNAP_STATUS_IN_USE; + /* We need to save this in disk */ + ret = glusterd_store_snap (snap); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Could not store snap object for %s snap", + snap->snapname); + goto out; + } } ret = 0; @@ -8197,15 +8206,9 @@ gd_restore_snap_volume (dict_t *dict, dict_t *rsp_dict, uuid_copy (new_volinfo->restored_from_snap, snap_vol->snapshot->snap_id); - /* Bump the version of the restored volume, so that nodes * - * which are done can sync during handshake */ + /* Use the same version as the original version */ new_volinfo->version = orig_vol->version; - list_for_each_entry_safe (voliter, temp_volinfo, - &orig_vol->snap_volumes, snapvol_list) { - list_add_tail (&voliter->snapvol_list, - &new_volinfo->snap_volumes); - } /* Copy the snap vol info to the new_volinfo.*/ ret = glusterd_snap_volinfo_restore (dict, rsp_dict, new_volinfo, snap_vol, volcount); @@ -8251,6 +8254,12 @@ out: * if it was added there. */ (void)glusterd_volinfo_delete (new_volinfo); + } else { + list_for_each_entry_safe (voliter, temp_volinfo, + &orig_vol->snap_volumes, snapvol_list) { + list_add_tail (&voliter->snapvol_list, + &new_volinfo->snap_volumes); + } } return ret; -- cgit