summaryrefslogtreecommitdiffstats
path: root/xlators/mgmt/glusterd/src/glusterd-snapshot.c
diff options
context:
space:
mode:
authorAvra Sengupta <asengupt@redhat.com>2014-03-13 01:04:40 +0000
committerRajesh Joseph <rjoseph@redhat.com>2014-04-02 06:03:25 -0700
commit0ce369a0aa511e98fd71c0337181a5577b2d8a1f (patch)
tree842fa4958e10a786572d22b81af2cd0813569da0 /xlators/mgmt/glusterd/src/glusterd-snapshot.c
parentee4e8bb5339f5517d3d248f559becfd58013a0fe (diff)
glusterd/snapshot: Making snap operations crash consistent
In the events of a volume's brick being down, or a node being down, making snap ops like create, delete, restore, and status crash consistent. Marking snap status of snap bricks which were not snapshotted because the volume brick was down as -1, and not starting those snap bricks till the snapshot is taken. During delete bypassing lvm snapshot remove for snap bricks whose snap status is -1 During restore bypass replacing xattrs on the snapshot bricks whose snap status is -1. Also bumping restored volume's version so as to handle nodes being down. On handshake of a restored volume, passing brick's snap_status as well. During snapshot status of the non-snapshotted brick details display "N/A". If a node is down, the entry itself will not be displayed. Change-Id: Id042efd7507829995270da0b2b2a6282a08a053d Signed-off-by: Avra Sengupta <asengupt@redhat.com> Reviewed-on: http://review.gluster.org/7241 Reviewed-by: Vijaikumar Mallikarjuna <vmallika@redhat.com> Reviewed-by: Rajesh Joseph <rjoseph@redhat.com> Tested-by: Rajesh Joseph <rjoseph@redhat.com>
Diffstat (limited to 'xlators/mgmt/glusterd/src/glusterd-snapshot.c')
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-snapshot.c145
1 files changed, 119 insertions, 26 deletions
diff --git a/xlators/mgmt/glusterd/src/glusterd-snapshot.c b/xlators/mgmt/glusterd/src/glusterd-snapshot.c
index 7711bd61a..256c34e9b 100644
--- a/xlators/mgmt/glusterd/src/glusterd-snapshot.c
+++ b/xlators/mgmt/glusterd/src/glusterd-snapshot.c
@@ -438,9 +438,9 @@ glusterd_snap_create_pre_val_use_rsp_dict (dict_t *dst, dict_t *src)
ret = dict_get_ptr (src, key,
(void **)&snap_brick_dir);
if (ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "Unable to fetch snap brick dir");
- goto out;
+ gf_log (this->name, GF_LOG_WARNING,
+ "Unable to fetch %s", key);
+ continue;
}
snprintf (key, sizeof(key) - 1,
@@ -670,6 +670,16 @@ glusterd_snapshot_create_prevalidate (dict_t *dict, char **op_errstr,
continue;
}
+ if (!glusterd_is_brick_started (brickinfo)) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "brick %s:%s is not started",
+ brickinfo->hostname,
+ brickinfo->path);
+ brick_order++;
+ brick_count++;
+ continue;
+ }
+
device = glusterd_get_brick_mount_details (brickinfo);
if (!device) {
snprintf (err_str, sizeof (err_str),
@@ -988,6 +998,15 @@ glusterd_lvm_snapshot_remove (glusterd_volinfo_t *snap_vol)
if (uuid_compare (brickinfo->uuid, MY_UUID))
continue;
+ if (brickinfo->snap_status == -1) {
+ gf_log (this->name, GF_LOG_INFO,
+ "snapshot was pending. lvm not present "
+ "for brick %s:%s of the snap %s.",
+ brickinfo->hostname, brickinfo->path,
+ snap_vol->snapshot->snapname);
+ continue;
+ }
+
ret = glusterd_get_brick_root (brickinfo->path, &mnt_pt);
if (ret) {
gf_log (this->name, GF_LOG_WARNING, "getting the root "
@@ -2730,13 +2749,13 @@ out:
static int32_t
glusterd_add_bricks_to_snap_volume (dict_t *dict, glusterd_volinfo_t *snap_vol,
glusterd_brickinfo_t *original_brickinfo,
+ glusterd_brickinfo_t *snap_brickinfo,
char **snap_brick_dir, int64_t volcount,
int32_t brick_count)
{
char key[PATH_MAX] = "";
char snap_brick_path[PATH_MAX] = "";
char *snap_device = NULL;
- glusterd_brickinfo_t *snap_brickinfo = NULL;
int32_t ret = -1;
xlator_t *this = NULL;
@@ -2745,25 +2764,29 @@ glusterd_add_bricks_to_snap_volume (dict_t *dict, glusterd_volinfo_t *snap_vol,
GF_ASSERT (dict);
GF_ASSERT (snap_vol);
GF_ASSERT (original_brickinfo);
+ GF_ASSERT (snap_brickinfo);
GF_ASSERT (snap_brick_dir);
- ret = glusterd_brickinfo_new (&snap_brickinfo);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "initializing the brick for the snap "
- "volume failed (snapname: %s)",
- snap_vol->snapshot->snapname);
- goto out;
- }
-
snprintf (key, sizeof(key) - 1, "vol%ld.brickdir%d", volcount,
brick_count);
ret = dict_get_ptr (dict, key, (void **)snap_brick_dir);
if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "Unable to fetch "
- "snap mount path (%s)", key);
- GF_FREE (snap_brickinfo);
- goto out;
+ /* Using original brickinfo here because it will be a
+ * pending snapshot and storing the original brickinfo
+ * will help in mapping while recreating the missed snapshot
+ */
+ gf_log (this->name, GF_LOG_WARNING, "Unable to fetch "
+ "snap mount path (%s). Using original brickinfo", key);
+ snap_brickinfo->snap_status = -1;
+ strcpy (snap_brick_path, original_brickinfo->path);
+ } else {
+ /* Create brick-path in the format /var/run/gluster/snaps/ *
+ * <snap-uuid>/<original-brick#>/snap-brick-dir *
+ */
+ snprintf (snap_brick_path, sizeof(snap_brick_path),
+ "%s/%s/brick%d%s", snap_mount_folder,
+ snap_vol->volname, brick_count+1,
+ *snap_brick_dir);
}
snprintf (key, sizeof(key), "vol%ld.brick_snapdevice%d",
@@ -2771,10 +2794,9 @@ glusterd_add_bricks_to_snap_volume (dict_t *dict, glusterd_volinfo_t *snap_vol,
ret = dict_get_ptr (dict, key, (void **)&snap_device);
if (ret) {
gf_log (this->name, GF_LOG_ERROR, "Unable to fetch "
- "snap device (%s)", key);
- GF_FREE (snap_brickinfo);
- goto out;
- }
+ "snap device (%s). Leaving empty", key);
+ } else
+ strcpy (snap_brickinfo->device_path, snap_device);
/* Create brick-path in the format /var/run/gluster/snaps/ *
* <snap-uuid>/<original-brick#>/snap-brick-dir *
@@ -2793,7 +2815,6 @@ glusterd_add_bricks_to_snap_volume (dict_t *dict, glusterd_volinfo_t *snap_vol,
strcpy (snap_brickinfo->hostname, original_brickinfo->hostname);
strcpy (snap_brickinfo->path, snap_brick_path);
- strcpy (snap_brickinfo->device_path, snap_device);
uuid_copy (snap_brickinfo->uuid, original_brickinfo->uuid);
list_add_tail (&snap_brickinfo->brick_list, &snap_vol->bricks);
@@ -2806,6 +2827,7 @@ static int32_t
glusterd_take_brick_snapshot (glusterd_volinfo_t *origin_vol,
glusterd_volinfo_t *snap_vol,
glusterd_brickinfo_t *original_brickinfo,
+ glusterd_brickinfo_t *snap_brickinfo,
char *snap_brick_dir, int32_t brick_count)
{
char *device = NULL;
@@ -2817,6 +2839,7 @@ glusterd_take_brick_snapshot (glusterd_volinfo_t *origin_vol,
GF_ASSERT (origin_vol);
GF_ASSERT (snap_vol);
GF_ASSERT (original_brickinfo);
+ GF_ASSERT (snap_brickinfo);
GF_ASSERT (snap_brick_dir);
if (!glusterd_is_brick_started (original_brickinfo)) {
@@ -2826,6 +2849,7 @@ glusterd_take_brick_snapshot (glusterd_volinfo_t *origin_vol,
original_brickinfo->path,
origin_vol->volname,
snap_vol->snapshot->snapname);
+ snap_brickinfo->snap_status = -1;
ret = 0;
goto out;
}
@@ -2879,6 +2903,7 @@ glusterd_do_snap_vol (glusterd_volinfo_t *origin_vol, glusterd_snap_t *snap,
uuid_t *snap_volid = NULL;
int32_t ret = -1;
int32_t brick_count = 0;
+ glusterd_brickinfo_t *snap_brickinfo = NULL;
xlator_t *this = NULL;
this = THIS;
@@ -2939,9 +2964,20 @@ glusterd_do_snap_vol (glusterd_volinfo_t *origin_vol, glusterd_snap_t *snap,
/* Adding snap brickinfos to the snap volinfo */
brick_count = 0;
list_for_each_entry (brickinfo, &origin_vol->bricks, brick_list) {
+ snap_brickinfo = NULL;
+
+ ret = glusterd_brickinfo_new (&snap_brickinfo);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "initializing the brick for the snap "
+ "volume failed (snapname: %s)", snap->snapname);
+ goto out;
+ }
+
ret = glusterd_add_bricks_to_snap_volume (dict,
snap_vol,
brickinfo,
+ snap_brickinfo,
&snap_brick_dir,
volcount,
brick_count);
@@ -2950,11 +2986,13 @@ glusterd_do_snap_vol (glusterd_volinfo_t *origin_vol, glusterd_snap_t *snap,
"Failed to add the snap brick for "
"%s:%s to the snap volume",
brickinfo->hostname, brickinfo->path);
+ GF_FREE (snap_brickinfo);
goto out;
}
/* Take snapshot of the brick */
- if (uuid_compare (brickinfo->uuid, MY_UUID)) {
+ if ((uuid_compare (brickinfo->uuid, MY_UUID)) ||
+ (snap_brickinfo->snap_status == -1)) {
brick_count++;
continue;
}
@@ -2962,6 +3000,7 @@ glusterd_do_snap_vol (glusterd_volinfo_t *origin_vol, glusterd_snap_t *snap,
ret = glusterd_take_brick_snapshot (origin_vol,
snap_vol,
brickinfo,
+ snap_brickinfo,
snap_brick_dir,
brick_count);
if (ret) {
@@ -3021,6 +3060,15 @@ glusterd_do_snap_vol (glusterd_volinfo_t *origin_vol, glusterd_snap_t *snap,
if (uuid_compare (brickinfo->uuid, MY_UUID))
continue;
+ if (brickinfo->snap_status == -1) {
+ gf_log (this->name, GF_LOG_INFO,
+ "not starting snap brick %s:%s for "
+ "for the snap %s (volume: %s)",
+ brickinfo->hostname, brickinfo->path,
+ snap->snapname, origin_vol->volname);
+ continue;
+ }
+
ret = glusterd_brick_start (snap_vol, brickinfo, _gf_true);
if (ret) {
gf_log (this->name, GF_LOG_WARNING, "starting the "
@@ -3874,8 +3922,17 @@ glusterd_get_brick_lvm_details (dict_t *rsp_dict,
break;
token = strtok (buf, ":");
if (token != NULL) {
+ while (token && token[0] == ' ')
+ token++;
+ if (!token) {
+ ret = -1;
+ gf_log (this->name, GF_LOG_ERROR,
+ "Invalid vg entry");
+ goto end;
+ }
value = gf_strdup (token);
if (!value) {
+ ret = -1;
goto end;
}
ret = snprintf (key, sizeof (key), "%s.vgname",
@@ -3896,6 +3953,7 @@ glusterd_get_brick_lvm_details (dict_t *rsp_dict,
if (token != NULL) {
value = gf_strdup (token);
if (!value) {
+ ret = -1;
goto end;
}
ret = snprintf (key, sizeof (key), "%s.data",
@@ -3915,6 +3973,7 @@ glusterd_get_brick_lvm_details (dict_t *rsp_dict,
if (token != NULL) {
value = gf_strdup (token);
if (!value) {
+ ret = -1;
goto end;
}
ret = snprintf (key, sizeof (key), "%s.lvsize",
@@ -3987,16 +4046,38 @@ glusterd_get_single_brick_status (char **op_errstr, dict_t *rsp_dict,
value = gf_strdup (brick_path);
if (!value) {
+ ret = -1;
goto out;
}
ret = dict_set_dynstr (rsp_dict, key, value);
if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "Unable to store "
+ gf_log (this->name, GF_LOG_ERROR, "Unable to store "
"brick_path %s", brickinfo->path);
goto out;
}
+ if (brickinfo->snap_status == -1) {
+ /* Setting vgname as "Pending Snapshot" */
+ value = gf_strdup ("Pending Snapshot");
+ if (!value) {
+ ret = -1;
+ goto out;
+ }
+
+ snprintf (key, sizeof (key), "%s.brick%d.vgname",
+ keyprefix, index);
+ ret = dict_set_dynstr (rsp_dict, key, value);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Could not save vgname ");
+ goto out;
+ }
+
+ ret = 0;
+ goto out;
+ }
+
/* Ownership has been transferred to dict*. Therefore we must
* initialize this to NULL
*/
@@ -4009,19 +4090,31 @@ glusterd_get_single_brick_status (char **op_errstr, dict_t *rsp_dict,
}
if (brickinfo->status == GF_BRICK_STOPPED) {
- ret = dict_set_str (rsp_dict, key, "No");
+ value = gf_strdup ("No");
+ if (!value) {
+ ret = -1;
+ goto out;
+ }
+ ret = dict_set_str (rsp_dict, key, value);
if (ret) {
gf_log (this->name, GF_LOG_ERROR,
"Could not save brick status");
goto out;
}
+ value = NULL;
} else {
- ret = dict_set_str (rsp_dict, key, "Yes");
+ value = gf_strdup ("Yes");
+ if (!value) {
+ ret = -1;
+ goto out;
+ }
+ ret = dict_set_str (rsp_dict, key, value);
if (ret) {
gf_log (this->name, GF_LOG_ERROR,
"Could not save brick status");
goto out;
}
+ value = NULL;
GLUSTERD_GET_BRICK_PIDFILE (pidfile, snap_volinfo,
brickinfo, priv);