summaryrefslogtreecommitdiffstats
path: root/xlators/cluster/dht/src
diff options
context:
space:
mode:
authorSusant Palai <spalai@redhat.com>2018-01-18 13:06:12 +0530
committerRaghavendra G <rgowdapp@redhat.com>2018-02-02 15:24:38 +0000
commit545a7ce6762a1b3a7b989b43a9d18b5b1b299df0 (patch)
tree0f2c3015697553914cb520dbda107f3843521f53 /xlators/cluster/dht/src
parentd9f773ba719397c12860f494a8cd38109e4b2fe3 (diff)
cluster/dht: avoid overwriting client writes during migration
For more details on this issue see https://github.com/gluster/glusterfs/issues/308 Solution: This is a restrictive solution where a file will not be migrated if a client writes to it during the migration. This does not check if the writes from the rebalance and the client actually do overlap. If dht_writev_cbk finds that the file is being migrated (PHASE1) it will set an xattr on the destination file indicating the file was updated by a non-rebalance client. Rebalance checks if any other client has written to the dst file and aborts the file migration if it finds the xattr. updates gluster/glusterfs#308 Change-Id: I73aec28bc9dbb8da57c7425ec88c6b6af0fbc9dd Signed-off-by: Susant Palai <spalai@redhat.com> Signed-off-by: Raghavendra G <rgowdapp@redhat.com> Signed-off-by: N Balachandran <nbalacha@redhat.com>
Diffstat (limited to 'xlators/cluster/dht/src')
-rw-r--r--xlators/cluster/dht/src/dht-common.h2
-rw-r--r--xlators/cluster/dht/src/dht-inode-write.c28
-rw-r--r--xlators/cluster/dht/src/dht-rebalance.c103
-rw-r--r--xlators/cluster/dht/src/dht-shared.c17
4 files changed, 138 insertions, 12 deletions
diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h
index 91ba3418643..9671bbe1cbe 100644
--- a/xlators/cluster/dht/src/dht-common.h
+++ b/xlators/cluster/dht/src/dht-common.h
@@ -695,6 +695,8 @@ struct dht_conf {
synclock_t link_lock;
gf_boolean_t use_fallocate;
+
+ gf_boolean_t force_migration;
};
typedef struct dht_conf dht_conf_t;
diff --git a/xlators/cluster/dht/src/dht-inode-write.c b/xlators/cluster/dht/src/dht-inode-write.c
index 7c596b1c099..226ee95e8b3 100644
--- a/xlators/cluster/dht/src/dht-inode-write.c
+++ b/xlators/cluster/dht/src/dht-inode-write.c
@@ -95,6 +95,33 @@ dht_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
/* Check if the rebalance phase1 is true */
if (IS_DHT_MIGRATION_PHASE1 (postbuf)) {
+ if (!dht_is_tier_xlator (this)) {
+ if (!local->xattr_req) {
+ local->xattr_req = dict_new ();
+ if (!local->xattr_req) {
+ gf_msg (this->name, GF_LOG_ERROR,
+ DHT_MSG_NO_MEMORY,
+ ENOMEM, "insufficient memory");
+ local->op_errno = ENOMEM;
+ local->op_ret = -1;
+ goto out;
+ }
+ }
+
+ ret = dict_set_uint32 (local->xattr_req,
+ GF_PROTECT_FROM_EXTERNAL_WRITES,
+ 1);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR,
+ DHT_MSG_DICT_SET_FAILED, 0,
+ "Failed to set key %s in dictionary",
+ GF_PROTECT_FROM_EXTERNAL_WRITES);
+ local->op_errno = ENOMEM;
+ local->op_ret = -1;
+ goto out;
+ }
+ }
+
dht_iatt_merge (this, &local->stbuf, postbuf, NULL);
dht_iatt_merge (this, &local->prebuf, prebuf, NULL);
@@ -146,7 +173,6 @@ dht_writev2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
return 0;
}
-
if (subvol == NULL)
goto out;
diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c
index 70d5a5f316f..d9c3149049c 100644
--- a/xlators/cluster/dht/src/dht-rebalance.c
+++ b/xlators/cluster/dht/src/dht-rebalance.c
@@ -26,7 +26,8 @@
#define MAX_REBAL_TYPE_SIZE 16
#define FILE_CNT_INTERVAL 600 /* 10 mins */
#define ESTIMATE_START_INTERVAL 600 /* 10 mins */
-
+#define HARDLINK_MIG_INPROGRESS -2
+#define SKIP_MIGRATION_FD_POSITIVE -3
#ifndef MAX
#define MAX(a, b) (((a) > (b))?(a):(b))
#endif
@@ -680,6 +681,7 @@ __dht_rebalance_create_dst_file (xlator_t *this, xlator_t *to, xlator_t *from,
struct iatt check_stbuf= {0,};
dht_conf_t *conf = NULL;
dict_t *dict = NULL;
+ dict_t *xdata = NULL;
conf = this->private;
@@ -725,7 +727,31 @@ __dht_rebalance_create_dst_file (xlator_t *this, xlator_t *to, xlator_t *from,
goto out;
}
- ret = syncop_lookup (to, loc, &new_stbuf, NULL, NULL, NULL);
+ if (!!dht_is_tier_xlator (this)) {
+ xdata = dict_new ();
+ if (!xdata) {
+ *fop_errno = ENOMEM;
+ ret = -1;
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "%s: dict_new failed)",
+ loc->path);
+ goto out;
+ }
+
+ ret = dict_set_int32 (xdata, GF_CLEAN_WRITE_PROTECTION, 1);
+ if (ret) {
+ *fop_errno = ENOMEM;
+ ret = -1;
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_DICT_SET_FAILED,
+ "%s: failed to set dictionary value: key = %s ",
+ loc->path, GF_CLEAN_WRITE_PROTECTION);
+ goto out;
+ }
+ }
+
+ ret = syncop_lookup (to, loc, &new_stbuf, NULL, xdata, NULL);
if (!ret) {
/* File exits in the destination, check if gfid matches */
if (gf_uuid_compare (stbuf->ia_gfid, new_stbuf.ia_gfid) != 0) {
@@ -875,6 +901,10 @@ out:
if (dict)
dict_unref (dict);
+ if (xdata)
+ dict_unref (dict);
+
+
return ret;
}
@@ -1090,9 +1120,9 @@ out:
}
static int
-__dht_rebalance_migrate_data (gf_defrag_info_t *defrag, xlator_t *from,
- xlator_t *to, fd_t *src, fd_t *dst,
- uint64_t ia_size, int hole_exists,
+__dht_rebalance_migrate_data (xlator_t *this, gf_defrag_info_t *defrag,
+ xlator_t *from, xlator_t *to, fd_t *src,
+ fd_t *dst, uint64_t ia_size, int hole_exists,
int *fop_errno)
{
int ret = 0;
@@ -1102,7 +1132,10 @@ __dht_rebalance_migrate_data (gf_defrag_info_t *defrag, xlator_t *from,
struct iobref *iobref = NULL;
uint64_t total = 0;
size_t read_size = 0;
+ dict_t *xdata = NULL;
+ dht_conf_t *conf = NULL;
+ conf = this->private;
/* if file size is '0', no need to enter this loop */
while (total < ia_size) {
read_size = (((ia_size - total) > DHT_REBALANCE_BLKSIZE) ?
@@ -1121,8 +1154,42 @@ __dht_rebalance_migrate_data (gf_defrag_info_t *defrag, xlator_t *from,
ret, offset, iobref,
fop_errno);
} else {
+ if (!conf->force_migration &&
+ !dht_is_tier_xlator (this)) {
+ xdata = dict_new ();
+ if (!xdata) {
+ gf_msg ("dht", GF_LOG_ERROR, 0,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "insufficient memory");
+ ret = -1;
+ *fop_errno = ENOMEM;
+ break;
+ }
+
+ /* Fail this write and abort rebalance if we
+ * detect a write from client since migration of
+ * this file started. This is done to avoid
+ * potential data corruption due to out of order
+ * writes from rebalance and client to the same
+ * region (as compared between src and dst
+ * files). See
+ * https://github.com/gluster/glusterfs/issues/308
+ * for more details.
+ */
+ ret = dict_set_int32 (xdata,
+ GF_AVOID_OVERWRITE, 1);
+ if (ret) {
+ gf_msg ("dht", GF_LOG_ERROR, 0,
+ ENOMEM, "failed to set dict");
+ ret = -1;
+ *fop_errno = ENOMEM;
+ break;
+ }
+
+ }
+
ret = syncop_writev (to, dst, vector, count,
- offset, iobref, 0, NULL, NULL);
+ offset, iobref, 0, xdata, NULL);
if (ret < 0) {
*fop_errno = -ret;
}
@@ -1158,6 +1225,10 @@ __dht_rebalance_migrate_data (gf_defrag_info_t *defrag, xlator_t *from,
else
ret = -1;
+ if (xdata) {
+ dict_unref (xdata);
+ }
+
return ret;
}
@@ -1575,7 +1646,6 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
goto out;
}
-
/* Do not migrate file in case lock migration is not enabled on the
* volume*/
if (!conf->lock_migration_enabled) {
@@ -1642,7 +1712,7 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
ret = __is_file_migratable (this, loc, &stbuf, xattr_rsp, flag, defrag, conf,
fop_errno);
if (ret) {
- if (ret == -2)
+ if (ret == HARDLINK_MIG_INPROGRESS)
ret = 0;
goto out;
}
@@ -1785,7 +1855,7 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
ret = __check_file_has_hardlink (this, loc, &stbuf, xattr_rsp,
flag, defrag, conf, fop_errno);
if (ret) {
- if (ret == -2)
+ if (ret == HARDLINK_MIG_INPROGRESS)
ret = 0;
goto out;
}
@@ -1794,8 +1864,8 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
file_has_holes = 1;
- ret = __dht_rebalance_migrate_data (defrag, from, to, src_fd, dst_fd,
- stbuf.ia_size,
+ ret = __dht_rebalance_migrate_data (this, defrag, from, to,
+ src_fd, dst_fd, stbuf.ia_size,
file_has_holes, fop_errno);
if (ret) {
gf_msg (this->name, GF_LOG_ERROR, 0,
@@ -2280,6 +2350,17 @@ out:
}
}
+ if (!dht_is_tier_xlator (this)) {
+ lk_ret = syncop_removexattr (to, loc,
+ GF_PROTECT_FROM_EXTERNAL_WRITES,
+ NULL, NULL);
+ if (lk_ret) {
+ gf_msg (this->name, GF_LOG_WARNING, -lk_ret, 0,
+ "%s: removexattr failed key %s", loc->path,
+ GF_CLEAN_WRITE_PROTECTION);
+ }
+ }
+
if (dict)
dict_unref (dict);
diff --git a/xlators/cluster/dht/src/dht-shared.c b/xlators/cluster/dht/src/dht-shared.c
index 8c011f72530..51c9d9cb3cf 100644
--- a/xlators/cluster/dht/src/dht-shared.c
+++ b/xlators/cluster/dht/src/dht-shared.c
@@ -524,6 +524,10 @@ dht_reconfigure (xlator_t *this, dict_t *options)
GF_OPTION_RECONF ("lock-migration", conf->lock_migration_enabled,
options, bool, out);
+ GF_OPTION_RECONF ("force-migration", conf->force_migration,
+ options, bool, out);
+
+
if (conf->defrag) {
if (dict_get_str (options, "rebal-throttle", &temp_str) == 0) {
ret = dht_configure_throttle (this, conf, temp_str);
@@ -810,6 +814,10 @@ dht_init (xlator_t *this)
GF_OPTION_INIT ("lock-migration", conf->lock_migration_enabled,
bool, err);
+ GF_OPTION_INIT ("force-migration", conf->force_migration,
+ bool, err);
+
+
if (defrag) {
defrag->lock_migration_enabled = conf->lock_migration_enabled;
@@ -1203,5 +1211,14 @@ struct volume_options options[] = {
.flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC
},
+ { .key = {"force-migration"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "If disabled, rebalance will not migrate files that "
+ "are being written to by an application",
+ .op_version = {GD_OP_VERSION_4_0_0},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC
+ },
+
{ .key = {NULL} },
};