summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorshishir gowda <shishirng@gluster.com>2012-02-17 19:21:07 +0530
committerVijay Bellur <vijay@gluster.com>2012-02-19 04:47:50 -0800
commit4a65356275cda45f665dfa99d3712f6fb9087aff (patch)
tree43a58dd347e770073d26c13fd54522e3825d957b
parent6123265556e54891369cc79a72b670a4b23a8a41 (diff)
cluster/dht: Support for hardlink rebalance when decommissioning
The support for hardlink rebalance is only available for decommissioning of a node. this can be triggered in two ways 1. remove-brick start 2. if decommission node value is set in vol file, then a normal rebalance command The way we handle it is- if (nlink > 1) do * if src file doesnt have linkto xattr * mark src's linkto to the dst * else * perform a link on the dst * do a look up * if nlinks = dst.nlinks * migrate data * else * continue crawling done Signed-off-by: shishir gowda <shishirng@gluster.com> Change-Id: If43b5524b872fd1413e9f7aa7f436cb244e30d8d BUG: 763844 Reviewed-on: http://review.gluster.com/2737 Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Amar Tumballi <amarts@redhat.com>
-rw-r--r--libglusterfs/src/syncop.c32
-rw-r--r--libglusterfs/src/syncop.h2
-rw-r--r--libglusterfs/src/xlator.c3
-rw-r--r--xlators/cluster/dht/src/dht-common.c8
-rw-r--r--xlators/cluster/dht/src/dht-common.h13
-rw-r--r--xlators/cluster/dht/src/dht-rebalance.c155
-rw-r--r--xlators/cluster/dht/src/dht.c1
-rw-r--r--xlators/storage/posix/src/posix.c1
8 files changed, 199 insertions, 16 deletions
diff --git a/libglusterfs/src/syncop.c b/libglusterfs/src/syncop.c
index 7b6f78248ef..712e5b1f239 100644
--- a/libglusterfs/src/syncop.c
+++ b/libglusterfs/src/syncop.c
@@ -938,6 +938,38 @@ syncop_unlink (xlator_t *subvol, loc_t *loc)
}
int
+syncop_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent)
+{
+ struct syncargs *args = NULL;
+
+ args = cookie;
+
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+
+ __wake (args);
+
+ return 0;
+}
+
+
+int
+syncop_link (xlator_t *subvol, loc_t *oldloc, loc_t *newloc)
+{
+ struct syncargs args = {0, };
+
+ SYNCOP (subvol, (&args), syncop_link_cbk, subvol->fops->link,
+ oldloc, newloc);
+
+ errno = args.op_errno;
+
+ return args.op_ret;
+}
+
+int
syncop_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, struct iatt *prebuf,
struct iatt *postbuf)
diff --git a/libglusterfs/src/syncop.h b/libglusterfs/src/syncop.h
index 627fb619703..7d8a2cb0230 100644
--- a/libglusterfs/src/syncop.h
+++ b/libglusterfs/src/syncop.h
@@ -217,5 +217,5 @@ int syncop_symlink (xlator_t *subvol, loc_t *loc, char *newpath, dict_t *dict);
int syncop_readlink (xlator_t *subvol, loc_t *loc, char **buffer, size_t size);
int syncop_mknod (xlator_t *subvol, loc_t *loc, mode_t mode, dev_t rdev,
dict_t *dict);
-
+int syncop_link (xlator_t *subvol, loc_t *oldloc, loc_t *newloc);
#endif /* _SYNCOP_H */
diff --git a/libglusterfs/src/xlator.c b/libglusterfs/src/xlator.c
index 428357633af..01475f5a1c3 100644
--- a/libglusterfs/src/xlator.c
+++ b/libglusterfs/src/xlator.c
@@ -567,7 +567,8 @@ loc_copy (loc_t *dst, loc_t *src)
if (src->parent)
dst->parent = inode_ref (src->parent);
- dst->path = gf_strdup (src->path);
+ if (src->path)
+ dst->path = gf_strdup (src->path);
if (!dst->path)
goto out;
diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
index f76dba40fcf..9f364d10c03 100644
--- a/xlators/cluster/dht/src/dht-common.c
+++ b/xlators/cluster/dht/src/dht-common.c
@@ -2145,7 +2145,7 @@ dht_setxattr (call_frame_t *frame, xlator_t *this,
data_t *tmp = NULL;
uint32_t dir_spread = 0;
char value[4096] = {0,};
- int forced_rebalance = 0;
+ gf_dht_migrate_data_type_t forced_rebalance = GF_DHT_MIGRATE_DATA;
int call_cnt = 0;
data_pair_t *trav = NULL;
@@ -2214,7 +2214,11 @@ dht_setxattr (call_frame_t *frame, xlator_t *this,
(ie, 'target' subvolume given there, etc) */
memcpy (value, tmp->data, tmp->len);
if (strcmp (value, "force") == 0)
- forced_rebalance = 1;
+ forced_rebalance =
+ GF_DHT_MIGRATE_DATA_EVEN_IF_LINK_EXISTS;
+
+ if (conf->decommission_in_progress)
+ forced_rebalance = GF_DHT_MIGRATE_HARDLINK;
local->rebalance.target_node = dht_subvol_get_hashed (this, loc);
if (!local->rebalance.target_node) {
diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h
index e44c947ddf2..2ccdea747cd 100644
--- a/xlators/cluster/dht/src/dht-common.h
+++ b/xlators/cluster/dht/src/dht-common.h
@@ -245,6 +245,7 @@ struct dht_conf {
/* to keep track of nodes which are decomissioned */
xlator_t **decommissioned_bricks;
+ int decommission_in_progress;
/* defrag related */
gf_defrag_info_t *defrag;
@@ -262,6 +263,12 @@ struct dht_disk_layout {
};
typedef struct dht_disk_layout dht_disk_layout_t;
+typedef enum {
+ GF_DHT_MIGRATE_DATA,
+ GF_DHT_MIGRATE_DATA_EVEN_IF_LINK_EXISTS,
+ GF_DHT_MIGRATE_HARDLINK,
+ GF_DHT_MIGRATE_HARDLINK_IN_PROGRESS
+} gf_dht_migrate_data_type_t;
#define ENTRY_MISSING(op_ret, op_errno) (op_ret == -1 && op_errno == ENOENT)
@@ -655,4 +662,10 @@ gf_defrag_stop (gf_defrag_info_t *defrag, dict_t *output);
void*
gf_defrag_start (void *this);
+int32_t
+gf_defrag_handle_hardlink (xlator_t *this, loc_t *loc, dict_t *xattrs,
+ struct iatt *stbuf);
+int
+dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
+ int flag);
#endif/* _DHT_H */
diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c
index 46fc8773eff..ecf664bb7c4 100644
--- a/xlators/cluster/dht/src/dht-rebalance.c
+++ b/xlators/cluster/dht/src/dht-rebalance.c
@@ -28,7 +28,6 @@
#define GF_DISK_SECTOR_SIZE 512
#define DHT_REBALANCE_PID 4242 /* Change it if required */
#define DHT_REBALANCE_BLKSIZE (128 * 1024)
-#define DHT_MIGRATE_EVEN_IF_LINK_EXISTS 1
static int
dht_write_with_holes (xlator_t *to, fd_t *fd, struct iovec *vec, int count,
@@ -99,8 +98,114 @@ out:
}
+int32_t
+gf_defrag_handle_hardlink (xlator_t *this, loc_t *loc, dict_t *xattrs,
+ struct iatt *stbuf)
+{
+ int32_t ret = -1;
+ xlator_t *cached_subvol = NULL;
+ xlator_t *hashed_subvol = NULL;
+ xlator_t *linkto_subvol = NULL;
+ data_t *data = NULL;
+ struct iatt iatt = {0,};
+ int32_t op_errno = 0;
+
+ GF_VALIDATE_OR_GOTO ("defrag", loc, out);
+ GF_VALIDATE_OR_GOTO ("defrag", loc->name, out);
+ GF_VALIDATE_OR_GOTO ("defrag", stbuf, out);
+ GF_VALIDATE_OR_GOTO ("defrag", this, out);
+ GF_VALIDATE_OR_GOTO ("defrag", xattrs, out);
+
+ if (uuid_is_null (loc->pargfid)) {
+ gf_log ("", GF_LOG_ERROR, "loc->pargfid is NULL for "
+ "%s", loc->path);
+ goto out;
+ }
+
+ if (uuid_is_null (loc->gfid)) {
+ gf_log ("", GF_LOG_ERROR, "loc->gfid is NULL for "
+ "%s", loc->path);
+ goto out;
+ }
+
+ cached_subvol = dht_subvol_get_cached (this, loc->inode);
+ if (!cached_subvol) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to get cached subvol"
+ " for %s on %s", loc->name, this->name);
+ goto out;
+ }
+
+ hashed_subvol = dht_subvol_get_hashed (this, loc);
+ if (!hashed_subvol) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to get hashed subvol"
+ " for %s on %s", loc->name, this->name);
+ goto out;
+ }
+
+ gf_log (this->name, GF_LOG_INFO, "Attempting to migrate hardlink %s "
+ "with gfid %s from %s -> %s", loc->name, uuid_utoa (loc->gfid),
+ cached_subvol->name, hashed_subvol->name);
+ data = dict_get (xattrs, DHT_LINKFILE_KEY);
+ /* set linkto on cached -> hashed if not present, else link it */
+ if (!data) {
+ ret = dict_set_str (xattrs, DHT_LINKFILE_KEY,
+ hashed_subvol->name);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to set "
+ "linkto xattr in dict for %s", loc->name);
+ goto out;
+ }
+
+ ret = syncop_setxattr (cached_subvol, loc, xattrs, 0);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Linkto setxattr "
+ "failed %s -> %s (%s)", cached_subvol->name,
+ loc->name, strerror (errno));
+ goto out;
+ }
+ goto out;
+ } else {
+ linkto_subvol = dht_linkfile_subvol (this, NULL, NULL, xattrs);
+ if (!linkto_subvol) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to get "
+ "linkto subvol for %s", loc->name);
+ } else {
+ hashed_subvol = linkto_subvol;
+ }
+
+ ret = syncop_link (hashed_subvol, loc, loc);
+ if (ret) {
+ op_errno = errno;
+ gf_log (this->name, GF_LOG_ERROR, "link of %s -> %s"
+ " failed on subvol %s (%s)", loc->name,
+ uuid_utoa(loc->gfid),
+ hashed_subvol->name, strerror (op_errno));
+ if (op_errno != EEXIST)
+ goto out;
+ }
+ }
+ ret = syncop_lookup (hashed_subvol, loc, NULL, &iatt, NULL, NULL);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed lookup %s on %s (%s)"
+ , loc->name, hashed_subvol->name, strerror (errno));
+ goto out;
+ }
+
+ if (iatt.ia_nlink == stbuf->ia_nlink) {
+ ret = dht_migrate_file (this, loc, cached_subvol, hashed_subvol,
+ GF_DHT_MIGRATE_HARDLINK_IN_PROGRESS);
+ if (ret)
+ goto out;
+ }
+ ret = 0;
+out:
+ return ret;
+}
+
+
static inline int
-__is_file_migratable (xlator_t *this, loc_t *loc, struct iatt *stbuf)
+__is_file_migratable (xlator_t *this, loc_t *loc,
+ struct iatt *stbuf, dict_t *xattrs, int flags)
{
int ret = -1;
@@ -111,11 +216,25 @@ __is_file_migratable (xlator_t *this, loc_t *loc, struct iatt *stbuf)
goto out;
}
+ if (flags == GF_DHT_MIGRATE_HARDLINK_IN_PROGRESS) {
+ ret = 0;
+ goto out;
+ }
if (stbuf->ia_nlink > 1) {
- /* TODO : support migrating hardlinks */
- gf_log (this->name, GF_LOG_WARNING, "%s: file has hardlinks",
- loc->path);
- ret = -ENOTSUP;
+ /* support for decomission */
+ if (flags == GF_DHT_MIGRATE_HARDLINK) {
+ ret = gf_defrag_handle_hardlink (this, loc,
+ xattrs, stbuf);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to migrate file with link",
+ loc->path);
+ }
+ } else {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: file has hardlinks", loc->path);
+ }
+ ret = ENOTSUP;
goto out;
}
@@ -504,6 +623,7 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
fd_t *dst_fd = NULL;
dict_t *dict = NULL;
dict_t *xattr = NULL;
+ dict_t *xattr_rsp = NULL;
int file_has_holes = 0;
gf_log (this->name, GF_LOG_INFO, "%s: attempting to move from %s to %s",
@@ -513,19 +633,29 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
if (!dict)
goto out;
+ ret = dict_set_int32 (dict, DHT_LINKFILE_KEY, 256);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s: failed to set 'linkto' key in dict", loc->path);
+ goto out;
+ }
+
/* Phase 1 - Data migration is in progress from now on */
- ret = syncop_lookup (from, loc, NULL, &stbuf, NULL, NULL);
+ ret = syncop_lookup (from, loc, dict, &stbuf, &xattr_rsp, NULL);
if (ret) {
gf_log (this->name, GF_LOG_ERROR, "%s: lookup failed on %s (%s)",
loc->path, from->name, strerror (errno));
goto out;
}
+ /* we no more require this key */
+ dict_del (dict, DHT_LINKFILE_KEY);
+
/* preserve source mode, so set the same to the destination */
src_ia_prot = stbuf.ia_prot;
/* Check if file can be migrated */
- ret = __is_file_migratable (this, loc, &stbuf);
+ ret = __is_file_migratable (this, loc, &stbuf, xattr_rsp, flag);
if (ret)
goto out;
@@ -543,7 +673,7 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
goto out;
/* Should happen on all files when 'force' option is not given */
- if (flag != DHT_MIGRATE_EVEN_IF_LINK_EXISTS) {
+ if (flag == GF_DHT_MIGRATE_DATA) {
ret = __dht_check_free_space (to, from, loc, &stbuf);
if (ret) {
goto out;
@@ -714,6 +844,8 @@ out:
if (xattr)
dict_unref (xattr);
+ if (xattr_rsp)
+ dict_unref (xattr_rsp);
if (dst_fd)
syncop_close (dst_fd);
@@ -909,6 +1041,8 @@ gf_defrag_migrate_data (xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
struct iatt iatt = {0,};
int32_t op_errno = 0;
+ gf_log (this->name, GF_LOG_INFO, "migate data called on %s",
+ loc->path);
fd = fd_create (loc->inode, defrag->pid);
if (!fd) {
gf_log (this->name, GF_LOG_ERROR, "Failed to create fd");
@@ -950,8 +1084,6 @@ gf_defrag_migrate_data (xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
continue;
defrag->num_files_lookedup++;
- if (entry->d_stat.ia_nlink > 1)
- continue;
loc_wipe (&entry_loc);
ret =dht_build_child_loc (this, &entry_loc, loc,
@@ -1223,7 +1355,6 @@ gf_defrag_start_crawl (void *data)
defrag = conf->defrag;
if (!defrag)
goto out;
-
dht_build_root_inode (this, &defrag->root_inode);
if (!defrag->root_inode)
goto out;
diff --git a/xlators/cluster/dht/src/dht.c b/xlators/cluster/dht/src/dht.c
index 816bf868e88..bb6f8c09901 100644
--- a/xlators/cluster/dht/src/dht.c
+++ b/xlators/cluster/dht/src/dht.c
@@ -284,6 +284,7 @@ dht_parse_decommissioned_bricks (xlator_t *this, dht_conf_t *conf,
}
ret = 0;
+ conf->decommission_in_progress = 1;
out:
if (dup_brick)
GF_FREE (dup_brick);
diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c
index 9e003a76a39..5849626b03c 100644
--- a/xlators/storage/posix/src/posix.c
+++ b/xlators/storage/posix/src/posix.c
@@ -756,6 +756,7 @@ posix_mknod (call_frame_t *frame, xlator_t *this,
internal call from distribute for creating 'linkfile', and that
linkfile may be for a hardlinked file */
if (dict_get (params, GLUSTERFS_INTERNAL_FOP_KEY)) {
+ dict_del (params, GLUSTERFS_INTERNAL_FOP_KEY);
op_ret = dict_get_ptr (params, "gfid-req", &uuid_req);
if (op_ret) {
gf_log (this->name, GF_LOG_DEBUG,