diff options
author | Avra Sengupta <asengupt@redhat.com> | 2014-03-13 01:04:40 +0000 |
---|---|---|
committer | Rajesh Joseph <rjoseph@redhat.com> | 2014-04-02 06:03:25 -0700 |
commit | 0ce369a0aa511e98fd71c0337181a5577b2d8a1f (patch) | |
tree | 842fa4958e10a786572d22b81af2cd0813569da0 /xlators/mgmt/glusterd/src/glusterd-snapshot.c | |
parent | ee4e8bb5339f5517d3d248f559becfd58013a0fe (diff) |
glusterd/snapshot: Making snap operations crash consistent
In the events of a volume's brick being down, or a node being
down, making snap ops like create, delete, restore, and
status crash consistent.
Marking snap status of snap bricks which were not snapshotted
because the volume brick was down as -1, and not starting those
snap bricks till the snapshot is taken.
During delete bypassing lvm snapshot remove for snap bricks
whose snap status is -1
During restore bypass replacing xattrs on the snapshot bricks
whose snap status is -1. Also bumping restored volume's version
so as to handle nodes being down. On handshake of a restored
volume, passing brick's snap_status as well.
During snapshot status of the non-snapshotted brick details
display "N/A". If a node is down, the entry itself will not
be displayed.
Change-Id: Id042efd7507829995270da0b2b2a6282a08a053d
Signed-off-by: Avra Sengupta <asengupt@redhat.com>
Reviewed-on: http://review.gluster.org/7241
Reviewed-by: Vijaikumar Mallikarjuna <vmallika@redhat.com>
Reviewed-by: Rajesh Joseph <rjoseph@redhat.com>
Tested-by: Rajesh Joseph <rjoseph@redhat.com>
Diffstat (limited to 'xlators/mgmt/glusterd/src/glusterd-snapshot.c')
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-snapshot.c | 145 |
1 files changed, 119 insertions, 26 deletions
diff --git a/xlators/mgmt/glusterd/src/glusterd-snapshot.c b/xlators/mgmt/glusterd/src/glusterd-snapshot.c index 7711bd61a..256c34e9b 100644 --- a/xlators/mgmt/glusterd/src/glusterd-snapshot.c +++ b/xlators/mgmt/glusterd/src/glusterd-snapshot.c @@ -438,9 +438,9 @@ glusterd_snap_create_pre_val_use_rsp_dict (dict_t *dst, dict_t *src) ret = dict_get_ptr (src, key, (void **)&snap_brick_dir); if (ret) { - gf_log (this->name, GF_LOG_ERROR, - "Unable to fetch snap brick dir"); - goto out; + gf_log (this->name, GF_LOG_WARNING, + "Unable to fetch %s", key); + continue; } snprintf (key, sizeof(key) - 1, @@ -670,6 +670,16 @@ glusterd_snapshot_create_prevalidate (dict_t *dict, char **op_errstr, continue; } + if (!glusterd_is_brick_started (brickinfo)) { + gf_log (this->name, GF_LOG_WARNING, + "brick %s:%s is not started", + brickinfo->hostname, + brickinfo->path); + brick_order++; + brick_count++; + continue; + } + device = glusterd_get_brick_mount_details (brickinfo); if (!device) { snprintf (err_str, sizeof (err_str), @@ -988,6 +998,15 @@ glusterd_lvm_snapshot_remove (glusterd_volinfo_t *snap_vol) if (uuid_compare (brickinfo->uuid, MY_UUID)) continue; + if (brickinfo->snap_status == -1) { + gf_log (this->name, GF_LOG_INFO, + "snapshot was pending. lvm not present " + "for brick %s:%s of the snap %s.", + brickinfo->hostname, brickinfo->path, + snap_vol->snapshot->snapname); + continue; + } + ret = glusterd_get_brick_root (brickinfo->path, &mnt_pt); if (ret) { gf_log (this->name, GF_LOG_WARNING, "getting the root " @@ -2730,13 +2749,13 @@ out: static int32_t glusterd_add_bricks_to_snap_volume (dict_t *dict, glusterd_volinfo_t *snap_vol, glusterd_brickinfo_t *original_brickinfo, + glusterd_brickinfo_t *snap_brickinfo, char **snap_brick_dir, int64_t volcount, int32_t brick_count) { char key[PATH_MAX] = ""; char snap_brick_path[PATH_MAX] = ""; char *snap_device = NULL; - glusterd_brickinfo_t *snap_brickinfo = NULL; int32_t ret = -1; xlator_t *this = NULL; @@ -2745,25 +2764,29 @@ glusterd_add_bricks_to_snap_volume (dict_t *dict, glusterd_volinfo_t *snap_vol, GF_ASSERT (dict); GF_ASSERT (snap_vol); GF_ASSERT (original_brickinfo); + GF_ASSERT (snap_brickinfo); GF_ASSERT (snap_brick_dir); - ret = glusterd_brickinfo_new (&snap_brickinfo); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, - "initializing the brick for the snap " - "volume failed (snapname: %s)", - snap_vol->snapshot->snapname); - goto out; - } - snprintf (key, sizeof(key) - 1, "vol%ld.brickdir%d", volcount, brick_count); ret = dict_get_ptr (dict, key, (void **)snap_brick_dir); if (ret) { - gf_log (this->name, GF_LOG_ERROR, "Unable to fetch " - "snap mount path (%s)", key); - GF_FREE (snap_brickinfo); - goto out; + /* Using original brickinfo here because it will be a + * pending snapshot and storing the original brickinfo + * will help in mapping while recreating the missed snapshot + */ + gf_log (this->name, GF_LOG_WARNING, "Unable to fetch " + "snap mount path (%s). Using original brickinfo", key); + snap_brickinfo->snap_status = -1; + strcpy (snap_brick_path, original_brickinfo->path); + } else { + /* Create brick-path in the format /var/run/gluster/snaps/ * + * <snap-uuid>/<original-brick#>/snap-brick-dir * + */ + snprintf (snap_brick_path, sizeof(snap_brick_path), + "%s/%s/brick%d%s", snap_mount_folder, + snap_vol->volname, brick_count+1, + *snap_brick_dir); } snprintf (key, sizeof(key), "vol%ld.brick_snapdevice%d", @@ -2771,10 +2794,9 @@ glusterd_add_bricks_to_snap_volume (dict_t *dict, glusterd_volinfo_t *snap_vol, ret = dict_get_ptr (dict, key, (void **)&snap_device); if (ret) { gf_log (this->name, GF_LOG_ERROR, "Unable to fetch " - "snap device (%s)", key); - GF_FREE (snap_brickinfo); - goto out; - } + "snap device (%s). Leaving empty", key); + } else + strcpy (snap_brickinfo->device_path, snap_device); /* Create brick-path in the format /var/run/gluster/snaps/ * * <snap-uuid>/<original-brick#>/snap-brick-dir * @@ -2793,7 +2815,6 @@ glusterd_add_bricks_to_snap_volume (dict_t *dict, glusterd_volinfo_t *snap_vol, strcpy (snap_brickinfo->hostname, original_brickinfo->hostname); strcpy (snap_brickinfo->path, snap_brick_path); - strcpy (snap_brickinfo->device_path, snap_device); uuid_copy (snap_brickinfo->uuid, original_brickinfo->uuid); list_add_tail (&snap_brickinfo->brick_list, &snap_vol->bricks); @@ -2806,6 +2827,7 @@ static int32_t glusterd_take_brick_snapshot (glusterd_volinfo_t *origin_vol, glusterd_volinfo_t *snap_vol, glusterd_brickinfo_t *original_brickinfo, + glusterd_brickinfo_t *snap_brickinfo, char *snap_brick_dir, int32_t brick_count) { char *device = NULL; @@ -2817,6 +2839,7 @@ glusterd_take_brick_snapshot (glusterd_volinfo_t *origin_vol, GF_ASSERT (origin_vol); GF_ASSERT (snap_vol); GF_ASSERT (original_brickinfo); + GF_ASSERT (snap_brickinfo); GF_ASSERT (snap_brick_dir); if (!glusterd_is_brick_started (original_brickinfo)) { @@ -2826,6 +2849,7 @@ glusterd_take_brick_snapshot (glusterd_volinfo_t *origin_vol, original_brickinfo->path, origin_vol->volname, snap_vol->snapshot->snapname); + snap_brickinfo->snap_status = -1; ret = 0; goto out; } @@ -2879,6 +2903,7 @@ glusterd_do_snap_vol (glusterd_volinfo_t *origin_vol, glusterd_snap_t *snap, uuid_t *snap_volid = NULL; int32_t ret = -1; int32_t brick_count = 0; + glusterd_brickinfo_t *snap_brickinfo = NULL; xlator_t *this = NULL; this = THIS; @@ -2939,9 +2964,20 @@ glusterd_do_snap_vol (glusterd_volinfo_t *origin_vol, glusterd_snap_t *snap, /* Adding snap brickinfos to the snap volinfo */ brick_count = 0; list_for_each_entry (brickinfo, &origin_vol->bricks, brick_list) { + snap_brickinfo = NULL; + + ret = glusterd_brickinfo_new (&snap_brickinfo); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "initializing the brick for the snap " + "volume failed (snapname: %s)", snap->snapname); + goto out; + } + ret = glusterd_add_bricks_to_snap_volume (dict, snap_vol, brickinfo, + snap_brickinfo, &snap_brick_dir, volcount, brick_count); @@ -2950,11 +2986,13 @@ glusterd_do_snap_vol (glusterd_volinfo_t *origin_vol, glusterd_snap_t *snap, "Failed to add the snap brick for " "%s:%s to the snap volume", brickinfo->hostname, brickinfo->path); + GF_FREE (snap_brickinfo); goto out; } /* Take snapshot of the brick */ - if (uuid_compare (brickinfo->uuid, MY_UUID)) { + if ((uuid_compare (brickinfo->uuid, MY_UUID)) || + (snap_brickinfo->snap_status == -1)) { brick_count++; continue; } @@ -2962,6 +3000,7 @@ glusterd_do_snap_vol (glusterd_volinfo_t *origin_vol, glusterd_snap_t *snap, ret = glusterd_take_brick_snapshot (origin_vol, snap_vol, brickinfo, + snap_brickinfo, snap_brick_dir, brick_count); if (ret) { @@ -3021,6 +3060,15 @@ glusterd_do_snap_vol (glusterd_volinfo_t *origin_vol, glusterd_snap_t *snap, if (uuid_compare (brickinfo->uuid, MY_UUID)) continue; + if (brickinfo->snap_status == -1) { + gf_log (this->name, GF_LOG_INFO, + "not starting snap brick %s:%s for " + "for the snap %s (volume: %s)", + brickinfo->hostname, brickinfo->path, + snap->snapname, origin_vol->volname); + continue; + } + ret = glusterd_brick_start (snap_vol, brickinfo, _gf_true); if (ret) { gf_log (this->name, GF_LOG_WARNING, "starting the " @@ -3874,8 +3922,17 @@ glusterd_get_brick_lvm_details (dict_t *rsp_dict, break; token = strtok (buf, ":"); if (token != NULL) { + while (token && token[0] == ' ') + token++; + if (!token) { + ret = -1; + gf_log (this->name, GF_LOG_ERROR, + "Invalid vg entry"); + goto end; + } value = gf_strdup (token); if (!value) { + ret = -1; goto end; } ret = snprintf (key, sizeof (key), "%s.vgname", @@ -3896,6 +3953,7 @@ glusterd_get_brick_lvm_details (dict_t *rsp_dict, if (token != NULL) { value = gf_strdup (token); if (!value) { + ret = -1; goto end; } ret = snprintf (key, sizeof (key), "%s.data", @@ -3915,6 +3973,7 @@ glusterd_get_brick_lvm_details (dict_t *rsp_dict, if (token != NULL) { value = gf_strdup (token); if (!value) { + ret = -1; goto end; } ret = snprintf (key, sizeof (key), "%s.lvsize", @@ -3987,16 +4046,38 @@ glusterd_get_single_brick_status (char **op_errstr, dict_t *rsp_dict, value = gf_strdup (brick_path); if (!value) { + ret = -1; goto out; } ret = dict_set_dynstr (rsp_dict, key, value); if (ret) { - gf_log (this->name, GF_LOG_ERROR, "Unable to store " + gf_log (this->name, GF_LOG_ERROR, "Unable to store " "brick_path %s", brickinfo->path); goto out; } + if (brickinfo->snap_status == -1) { + /* Setting vgname as "Pending Snapshot" */ + value = gf_strdup ("Pending Snapshot"); + if (!value) { + ret = -1; + goto out; + } + + snprintf (key, sizeof (key), "%s.brick%d.vgname", + keyprefix, index); + ret = dict_set_dynstr (rsp_dict, key, value); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Could not save vgname "); + goto out; + } + + ret = 0; + goto out; + } + /* Ownership has been transferred to dict*. Therefore we must * initialize this to NULL */ @@ -4009,19 +4090,31 @@ glusterd_get_single_brick_status (char **op_errstr, dict_t *rsp_dict, } if (brickinfo->status == GF_BRICK_STOPPED) { - ret = dict_set_str (rsp_dict, key, "No"); + value = gf_strdup ("No"); + if (!value) { + ret = -1; + goto out; + } + ret = dict_set_str (rsp_dict, key, value); if (ret) { gf_log (this->name, GF_LOG_ERROR, "Could not save brick status"); goto out; } + value = NULL; } else { - ret = dict_set_str (rsp_dict, key, "Yes"); + value = gf_strdup ("Yes"); + if (!value) { + ret = -1; + goto out; + } + ret = dict_set_str (rsp_dict, key, value); if (ret) { gf_log (this->name, GF_LOG_ERROR, "Could not save brick status"); goto out; } + value = NULL; GLUSTERD_GET_BRICK_PIDFILE (pidfile, snap_volinfo, brickinfo, priv); |