summaryrefslogtreecommitdiffstats
path: root/xlators/mgmt/glusterd/src
diff options
context:
space:
mode:
authorAvra Sengupta <asengupt@redhat.com>2014-03-13 01:04:40 +0000
committerRajesh Joseph <rjoseph@redhat.com>2014-04-02 06:03:25 -0700
commit0ce369a0aa511e98fd71c0337181a5577b2d8a1f (patch)
tree842fa4958e10a786572d22b81af2cd0813569da0 /xlators/mgmt/glusterd/src
parentee4e8bb5339f5517d3d248f559becfd58013a0fe (diff)
glusterd/snapshot: Making snap operations crash consistent
In the events of a volume's brick being down, or a node being down, making snap ops like create, delete, restore, and status crash consistent. Marking snap status of snap bricks which were not snapshotted because the volume brick was down as -1, and not starting those snap bricks till the snapshot is taken. During delete bypassing lvm snapshot remove for snap bricks whose snap status is -1 During restore bypass replacing xattrs on the snapshot bricks whose snap status is -1. Also bumping restored volume's version so as to handle nodes being down. On handshake of a restored volume, passing brick's snap_status as well. During snapshot status of the non-snapshotted brick details display "N/A". If a node is down, the entry itself will not be displayed. Change-Id: Id042efd7507829995270da0b2b2a6282a08a053d Signed-off-by: Avra Sengupta <asengupt@redhat.com> Reviewed-on: http://review.gluster.org/7241 Reviewed-by: Vijaikumar Mallikarjuna <vmallika@redhat.com> Reviewed-by: Rajesh Joseph <rjoseph@redhat.com> Tested-by: Rajesh Joseph <rjoseph@redhat.com>
Diffstat (limited to 'xlators/mgmt/glusterd/src')
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-snapshot.c145
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-store.c9
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-store.h1
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-utils.c51
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volgen.c5
-rw-r--r--xlators/mgmt/glusterd/src/glusterd.h1
6 files changed, 183 insertions, 29 deletions
diff --git a/xlators/mgmt/glusterd/src/glusterd-snapshot.c b/xlators/mgmt/glusterd/src/glusterd-snapshot.c
index 7711bd61a..256c34e9b 100644
--- a/xlators/mgmt/glusterd/src/glusterd-snapshot.c
+++ b/xlators/mgmt/glusterd/src/glusterd-snapshot.c
@@ -438,9 +438,9 @@ glusterd_snap_create_pre_val_use_rsp_dict (dict_t *dst, dict_t *src)
ret = dict_get_ptr (src, key,
(void **)&snap_brick_dir);
if (ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "Unable to fetch snap brick dir");
- goto out;
+ gf_log (this->name, GF_LOG_WARNING,
+ "Unable to fetch %s", key);
+ continue;
}
snprintf (key, sizeof(key) - 1,
@@ -670,6 +670,16 @@ glusterd_snapshot_create_prevalidate (dict_t *dict, char **op_errstr,
continue;
}
+ if (!glusterd_is_brick_started (brickinfo)) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "brick %s:%s is not started",
+ brickinfo->hostname,
+ brickinfo->path);
+ brick_order++;
+ brick_count++;
+ continue;
+ }
+
device = glusterd_get_brick_mount_details (brickinfo);
if (!device) {
snprintf (err_str, sizeof (err_str),
@@ -988,6 +998,15 @@ glusterd_lvm_snapshot_remove (glusterd_volinfo_t *snap_vol)
if (uuid_compare (brickinfo->uuid, MY_UUID))
continue;
+ if (brickinfo->snap_status == -1) {
+ gf_log (this->name, GF_LOG_INFO,
+ "snapshot was pending. lvm not present "
+ "for brick %s:%s of the snap %s.",
+ brickinfo->hostname, brickinfo->path,
+ snap_vol->snapshot->snapname);
+ continue;
+ }
+
ret = glusterd_get_brick_root (brickinfo->path, &mnt_pt);
if (ret) {
gf_log (this->name, GF_LOG_WARNING, "getting the root "
@@ -2730,13 +2749,13 @@ out:
static int32_t
glusterd_add_bricks_to_snap_volume (dict_t *dict, glusterd_volinfo_t *snap_vol,
glusterd_brickinfo_t *original_brickinfo,
+ glusterd_brickinfo_t *snap_brickinfo,
char **snap_brick_dir, int64_t volcount,
int32_t brick_count)
{
char key[PATH_MAX] = "";
char snap_brick_path[PATH_MAX] = "";
char *snap_device = NULL;
- glusterd_brickinfo_t *snap_brickinfo = NULL;
int32_t ret = -1;
xlator_t *this = NULL;
@@ -2745,25 +2764,29 @@ glusterd_add_bricks_to_snap_volume (dict_t *dict, glusterd_volinfo_t *snap_vol,
GF_ASSERT (dict);
GF_ASSERT (snap_vol);
GF_ASSERT (original_brickinfo);
+ GF_ASSERT (snap_brickinfo);
GF_ASSERT (snap_brick_dir);
- ret = glusterd_brickinfo_new (&snap_brickinfo);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "initializing the brick for the snap "
- "volume failed (snapname: %s)",
- snap_vol->snapshot->snapname);
- goto out;
- }
-
snprintf (key, sizeof(key) - 1, "vol%ld.brickdir%d", volcount,
brick_count);
ret = dict_get_ptr (dict, key, (void **)snap_brick_dir);
if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "Unable to fetch "
- "snap mount path (%s)", key);
- GF_FREE (snap_brickinfo);
- goto out;
+ /* Using original brickinfo here because it will be a
+ * pending snapshot and storing the original brickinfo
+ * will help in mapping while recreating the missed snapshot
+ */
+ gf_log (this->name, GF_LOG_WARNING, "Unable to fetch "
+ "snap mount path (%s). Using original brickinfo", key);
+ snap_brickinfo->snap_status = -1;
+ strcpy (snap_brick_path, original_brickinfo->path);
+ } else {
+ /* Create brick-path in the format /var/run/gluster/snaps/ *
+ * <snap-uuid>/<original-brick#>/snap-brick-dir *
+ */
+ snprintf (snap_brick_path, sizeof(snap_brick_path),
+ "%s/%s/brick%d%s", snap_mount_folder,
+ snap_vol->volname, brick_count+1,
+ *snap_brick_dir);
}
snprintf (key, sizeof(key), "vol%ld.brick_snapdevice%d",
@@ -2771,10 +2794,9 @@ glusterd_add_bricks_to_snap_volume (dict_t *dict, glusterd_volinfo_t *snap_vol,
ret = dict_get_ptr (dict, key, (void **)&snap_device);
if (ret) {
gf_log (this->name, GF_LOG_ERROR, "Unable to fetch "
- "snap device (%s)", key);
- GF_FREE (snap_brickinfo);
- goto out;
- }
+ "snap device (%s). Leaving empty", key);
+ } else
+ strcpy (snap_brickinfo->device_path, snap_device);
/* Create brick-path in the format /var/run/gluster/snaps/ *
* <snap-uuid>/<original-brick#>/snap-brick-dir *
@@ -2793,7 +2815,6 @@ glusterd_add_bricks_to_snap_volume (dict_t *dict, glusterd_volinfo_t *snap_vol,
strcpy (snap_brickinfo->hostname, original_brickinfo->hostname);
strcpy (snap_brickinfo->path, snap_brick_path);
- strcpy (snap_brickinfo->device_path, snap_device);
uuid_copy (snap_brickinfo->uuid, original_brickinfo->uuid);
list_add_tail (&snap_brickinfo->brick_list, &snap_vol->bricks);
@@ -2806,6 +2827,7 @@ static int32_t
glusterd_take_brick_snapshot (glusterd_volinfo_t *origin_vol,
glusterd_volinfo_t *snap_vol,
glusterd_brickinfo_t *original_brickinfo,
+ glusterd_brickinfo_t *snap_brickinfo,
char *snap_brick_dir, int32_t brick_count)
{
char *device = NULL;
@@ -2817,6 +2839,7 @@ glusterd_take_brick_snapshot (glusterd_volinfo_t *origin_vol,
GF_ASSERT (origin_vol);
GF_ASSERT (snap_vol);
GF_ASSERT (original_brickinfo);
+ GF_ASSERT (snap_brickinfo);
GF_ASSERT (snap_brick_dir);
if (!glusterd_is_brick_started (original_brickinfo)) {
@@ -2826,6 +2849,7 @@ glusterd_take_brick_snapshot (glusterd_volinfo_t *origin_vol,
original_brickinfo->path,
origin_vol->volname,
snap_vol->snapshot->snapname);
+ snap_brickinfo->snap_status = -1;
ret = 0;
goto out;
}
@@ -2879,6 +2903,7 @@ glusterd_do_snap_vol (glusterd_volinfo_t *origin_vol, glusterd_snap_t *snap,
uuid_t *snap_volid = NULL;
int32_t ret = -1;
int32_t brick_count = 0;
+ glusterd_brickinfo_t *snap_brickinfo = NULL;
xlator_t *this = NULL;
this = THIS;
@@ -2939,9 +2964,20 @@ glusterd_do_snap_vol (glusterd_volinfo_t *origin_vol, glusterd_snap_t *snap,
/* Adding snap brickinfos to the snap volinfo */
brick_count = 0;
list_for_each_entry (brickinfo, &origin_vol->bricks, brick_list) {
+ snap_brickinfo = NULL;
+
+ ret = glusterd_brickinfo_new (&snap_brickinfo);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "initializing the brick for the snap "
+ "volume failed (snapname: %s)", snap->snapname);
+ goto out;
+ }
+
ret = glusterd_add_bricks_to_snap_volume (dict,
snap_vol,
brickinfo,
+ snap_brickinfo,
&snap_brick_dir,
volcount,
brick_count);
@@ -2950,11 +2986,13 @@ glusterd_do_snap_vol (glusterd_volinfo_t *origin_vol, glusterd_snap_t *snap,
"Failed to add the snap brick for "
"%s:%s to the snap volume",
brickinfo->hostname, brickinfo->path);
+ GF_FREE (snap_brickinfo);
goto out;
}
/* Take snapshot of the brick */
- if (uuid_compare (brickinfo->uuid, MY_UUID)) {
+ if ((uuid_compare (brickinfo->uuid, MY_UUID)) ||
+ (snap_brickinfo->snap_status == -1)) {
brick_count++;
continue;
}
@@ -2962,6 +3000,7 @@ glusterd_do_snap_vol (glusterd_volinfo_t *origin_vol, glusterd_snap_t *snap,
ret = glusterd_take_brick_snapshot (origin_vol,
snap_vol,
brickinfo,
+ snap_brickinfo,
snap_brick_dir,
brick_count);
if (ret) {
@@ -3021,6 +3060,15 @@ glusterd_do_snap_vol (glusterd_volinfo_t *origin_vol, glusterd_snap_t *snap,
if (uuid_compare (brickinfo->uuid, MY_UUID))
continue;
+ if (brickinfo->snap_status == -1) {
+ gf_log (this->name, GF_LOG_INFO,
+ "not starting snap brick %s:%s for "
+ "for the snap %s (volume: %s)",
+ brickinfo->hostname, brickinfo->path,
+ snap->snapname, origin_vol->volname);
+ continue;
+ }
+
ret = glusterd_brick_start (snap_vol, brickinfo, _gf_true);
if (ret) {
gf_log (this->name, GF_LOG_WARNING, "starting the "
@@ -3874,8 +3922,17 @@ glusterd_get_brick_lvm_details (dict_t *rsp_dict,
break;
token = strtok (buf, ":");
if (token != NULL) {
+ while (token && token[0] == ' ')
+ token++;
+ if (!token) {
+ ret = -1;
+ gf_log (this->name, GF_LOG_ERROR,
+ "Invalid vg entry");
+ goto end;
+ }
value = gf_strdup (token);
if (!value) {
+ ret = -1;
goto end;
}
ret = snprintf (key, sizeof (key), "%s.vgname",
@@ -3896,6 +3953,7 @@ glusterd_get_brick_lvm_details (dict_t *rsp_dict,
if (token != NULL) {
value = gf_strdup (token);
if (!value) {
+ ret = -1;
goto end;
}
ret = snprintf (key, sizeof (key), "%s.data",
@@ -3915,6 +3973,7 @@ glusterd_get_brick_lvm_details (dict_t *rsp_dict,
if (token != NULL) {
value = gf_strdup (token);
if (!value) {
+ ret = -1;
goto end;
}
ret = snprintf (key, sizeof (key), "%s.lvsize",
@@ -3987,16 +4046,38 @@ glusterd_get_single_brick_status (char **op_errstr, dict_t *rsp_dict,
value = gf_strdup (brick_path);
if (!value) {
+ ret = -1;
goto out;
}
ret = dict_set_dynstr (rsp_dict, key, value);
if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "Unable to store "
+ gf_log (this->name, GF_LOG_ERROR, "Unable to store "
"brick_path %s", brickinfo->path);
goto out;
}
+ if (brickinfo->snap_status == -1) {
+ /* Setting vgname as "Pending Snapshot" */
+ value = gf_strdup ("Pending Snapshot");
+ if (!value) {
+ ret = -1;
+ goto out;
+ }
+
+ snprintf (key, sizeof (key), "%s.brick%d.vgname",
+ keyprefix, index);
+ ret = dict_set_dynstr (rsp_dict, key, value);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Could not save vgname ");
+ goto out;
+ }
+
+ ret = 0;
+ goto out;
+ }
+
/* Ownership has been transferred to dict*. Therefore we must
* initialize this to NULL
*/
@@ -4009,19 +4090,31 @@ glusterd_get_single_brick_status (char **op_errstr, dict_t *rsp_dict,
}
if (brickinfo->status == GF_BRICK_STOPPED) {
- ret = dict_set_str (rsp_dict, key, "No");
+ value = gf_strdup ("No");
+ if (!value) {
+ ret = -1;
+ goto out;
+ }
+ ret = dict_set_str (rsp_dict, key, value);
if (ret) {
gf_log (this->name, GF_LOG_ERROR,
"Could not save brick status");
goto out;
}
+ value = NULL;
} else {
- ret = dict_set_str (rsp_dict, key, "Yes");
+ value = gf_strdup ("Yes");
+ if (!value) {
+ ret = -1;
+ goto out;
+ }
+ ret = dict_set_str (rsp_dict, key, value);
if (ret) {
gf_log (this->name, GF_LOG_ERROR,
"Could not save brick status");
goto out;
}
+ value = NULL;
GLUSTERD_GET_BRICK_PIDFILE (pidfile, snap_volinfo,
brickinfo, priv);
diff --git a/xlators/mgmt/glusterd/src/glusterd-store.c b/xlators/mgmt/glusterd/src/glusterd-store.c
index 48910fe3a..256519598 100644
--- a/xlators/mgmt/glusterd/src/glusterd-store.c
+++ b/xlators/mgmt/glusterd/src/glusterd-store.c
@@ -249,6 +249,12 @@ glusterd_store_brickinfo_write (int fd, glusterd_brickinfo_t *brickinfo)
goto out;
}
+ snprintf (value, sizeof(value), "%d", brickinfo->snap_status);
+ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_BRICK_SNAP_STATUS,
+ value);
+ if (ret)
+ goto out;
+
if (!brickinfo->vg[0])
goto out;
@@ -1947,6 +1953,9 @@ glusterd_store_retrieve_bricks (glusterd_volinfo_t *volinfo)
strlen (GLUSTERD_STORE_KEY_BRICK_DEVICE_PATH))) {
strncpy (brickinfo->device_path, value,
sizeof (brickinfo->device_path));
+ } else if (!strncmp (key, GLUSTERD_STORE_KEY_BRICK_SNAP_STATUS,
+ strlen (GLUSTERD_STORE_KEY_BRICK_SNAP_STATUS))) {
+ gf_string2int (value, &brickinfo->snap_status);
} else if (!strncmp (key,
GLUSTERD_STORE_KEY_BRICK_VGNAME,
strlen (GLUSTERD_STORE_KEY_BRICK_VGNAME))) {
diff --git a/xlators/mgmt/glusterd/src/glusterd-store.h b/xlators/mgmt/glusterd/src/glusterd-store.h
index 85ef4fec0..e1efafcc4 100644
--- a/xlators/mgmt/glusterd/src/glusterd-store.h
+++ b/xlators/mgmt/glusterd/src/glusterd-store.h
@@ -77,6 +77,7 @@ typedef enum glusterd_store_ver_ac_{
#define GLUSTERD_STORE_KEY_BRICK_DECOMMISSIONED "decommissioned"
#define GLUSTERD_STORE_KEY_BRICK_VGNAME "vg"
#define GLUSTERD_STORE_KEY_BRICK_DEVICE_PATH "device_path"
+#define GLUSTERD_STORE_KEY_BRICK_SNAP_STATUS "snap-status"
#define GLUSTERD_STORE_KEY_PEER_UUID "uuid"
#define GLUSTERD_STORE_KEY_PEER_HOSTNAME "hostname"
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
index 8eec06c1a..a969c4c84 100644
--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
@@ -571,7 +571,7 @@ glusterd_brickinfo_dup (glusterd_brickinfo_t *brickinfo,
}
}
dup_brickinfo->status = brickinfo->status;
-
+ dup_brickinfo->snap_status = brickinfo->snap_status;
out:
return ret;
}
@@ -618,7 +618,10 @@ glusterd_snap_volinfo_restore (glusterd_volinfo_t *new_volinfo,
goto out;
}
- if (!uuid_compare (brickinfo->uuid, MY_UUID)) {
+ /* If the brick is not of this peer, or snapshot is missed *
+ * for the brick do not replace the xattr for it */
+ if ((!uuid_compare (brickinfo->uuid, MY_UUID)) &&
+ (brickinfo->snap_status != -1)) {
/* We need to replace the volume id of all the bricks
* to the volume id of the origin volume. new_volinfo
* has the origin volume's volume id*/
@@ -1566,6 +1569,16 @@ glusterd_volume_start_glusterfs (glusterd_volinfo_t *volinfo,
priv = this->private;
GF_ASSERT (priv);
+ if (brickinfo->snap_status == -1) {
+ gf_log (this->name, GF_LOG_INFO,
+ "Snapshot is pending on %s:%s. "
+ "Hence not starting the brick",
+ brickinfo->hostname,
+ brickinfo->path);
+ ret = 0;
+ goto out;
+ }
+
ret = _mk_rundir_p (volinfo);
if (ret)
goto out;
@@ -2077,7 +2090,10 @@ glusterd_add_volume_to_dict (glusterd_volinfo_t *volinfo,
glusterd_dict_ctx_t ctx = {0};
char *rebalance_id_str = NULL;
char *rb_id_str = NULL;
+ xlator_t *this = NULL;
+ this = THIS;
+ GF_ASSERT (this);
GF_ASSERT (dict);
GF_ASSERT (volinfo);
@@ -2315,6 +2331,17 @@ glusterd_add_volume_to_dict (glusterd_volinfo_t *volinfo,
if (ret)
goto out;
+ snprintf (key, sizeof (key), "volume%d.brick%d.snap_status",
+ count, i);
+ ret = dict_set_int32 (dict, key, brickinfo->snap_status);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to set snap_status for %s:%s",
+ brickinfo->hostname,
+ brickinfo->path);
+ goto out;
+ }
+
i++;
}
@@ -2833,6 +2860,7 @@ glusterd_import_new_brick (dict_t *vols, int32_t vol_count,
{
char key[512] = {0,};
int ret = -1;
+ int32_t snap_status = 0;
char *hostname = NULL;
char *path = NULL;
glusterd_brickinfo_t *new_brickinfo = NULL;
@@ -2860,12 +2888,22 @@ glusterd_import_new_brick (dict_t *vols, int32_t vol_count,
goto out;
}
+ snprintf (key, sizeof (key), "volume%d.brick%d.snap_status",
+ vol_count, brick_count);
+ ret = dict_get_int32 (vols, key, &snap_status);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "%s missing in payload", key);
+ goto out;
+ }
+
ret = glusterd_brickinfo_new (&new_brickinfo);
if (ret)
goto out;
strcpy (new_brickinfo->path, path);
strcpy (new_brickinfo->hostname, hostname);
+ new_brickinfo->snap_status = snap_status;
+
//peerinfo might not be added yet
(void) glusterd_resolve_brick (new_brickinfo);
ret = 0;
@@ -3076,6 +3114,15 @@ glusterd_import_volinfo (dict_t *vols, int count,
new_volinfo->is_snap_volume = is_snap_volume;
+ snprintf (key, sizeof (key), "volume%d.is_volume_restored", count);
+ ret = dict_get_uint32 (vols, key, &new_volinfo->is_volume_restored);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_ERROR, "Failed to get "
+ "is_volume_restored option for %s",
+ volname);
+ goto out;
+ }
+
snprintf (key, sizeof (key), "volume%d.snap-max-hard-limit", count);
ret = dict_get_uint64 (vols, key, &new_volinfo->snap_max_hard_limit);
if (ret) {
diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c
index b104971d8..9855dc15f 100644
--- a/xlators/mgmt/glusterd/src/glusterd-volgen.c
+++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c
@@ -4154,6 +4154,10 @@ gd_restore_snap_volume (glusterd_volinfo_t *orig_vol,
new_volinfo->snap_max_hard_limit = orig_vol->snap_max_hard_limit;
new_volinfo->is_volume_restored = _gf_true;
+ /* Bump the version of the restored volume, so that nodes *
+ * which are done can sync during handshake */
+ new_volinfo->version = orig_vol->version;
+
list_for_each_entry_safe (voliter, temp_volinfo,
&orig_vol->snap_volumes, snapvol_list) {
list_add_tail (&voliter->snapvol_list,
@@ -4211,7 +4215,6 @@ gd_restore_snap_volume (glusterd_volinfo_t *orig_vol,
goto out;
}
-
ret = 0;
out:
diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h
index 4634c1c4e..027732920 100644
--- a/xlators/mgmt/glusterd/src/glusterd.h
+++ b/xlators/mgmt/glusterd/src/glusterd.h
@@ -190,6 +190,7 @@ struct glusterd_brickinfo {
int decommissioned;
char vg[PATH_MAX]; /* FIXME: Use max size for length of vg */
int caps; /* Capability */
+ int32_t snap_status;
};
typedef struct glusterd_brickinfo glusterd_brickinfo_t;