summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVenkatesh Somyajulu <vsomyaju@redhat.com>2014-09-10 22:52:43 +0530
committerNiels de Vos <ndevos@redhat.com>2014-09-17 08:34:54 -0700
commite24e812d25f10c008b80d72bc8b1fb2c401bd892 (patch)
tree6b53e21275b5e94b720222aa9bad65c27124d057
parent21342c76f123df72d6e78fdca437e92e5d6a6841 (diff)
cluster/dht: Modified logic of linkto file deletion on non-hashed
Currently whenever dht_lookup_everywhere gets called, if in dht_lookup_everywhere_cbk, a linkto file is found on non-hashed subvolume, file is unlinked. But there are cases when this file is under migration. Under such condition, we should avoid deletion of file. When some other rebalance process changes the layout of parent such that dst_file (w.r.t. migration) falls on non-hashed node, then may be lookup could have found it as linkto file but just before unlink, file is under migration or already migrated In such cased unlink can be avoided. Race: ------- If we have two bricks (brick-1 and brick-2) with initial file "a" under BaseDir which is hashed as well as cached on (brick-1). Assume "a" hashing gives 44. Brick-1 Brick-2 Initial Setup: BaseDir/a BaseDir [1-50] [51-100] Now add new-brick Brick-3. 1. Rebalance-1 on node Node-1 (Brick-1 node) will reset the BaseDir Layout. 2. After that it will perform a) Create linkto file on new-hashed (brick-2) b) Perform file migration. 1.Rebalance-1 Fixes the base-layout: Brick-1 Brick-2 Brick-3 --------- ---------- ------------ BaseDir/a BaseDir BaseDir [1-33] [34-66] [67-100] 2. Only a) is BaseDir/a BaseDir/a(linkto) BaseDir performed Create linktofile Now rebalance 2 on node-2 jumped in and it will perform step 1 and 2-a. After (rebal-2, step-1), it changes the layout of the BaseDir. BaseDir/a BaseDir/a(link) BaseDir [67-100] [1-33] [34-66] For (rebale-2, step-2), It will perform lookup at Brick-3 as w.r.t new layout 44 falls for brick-3. But lookup will fail. So dht_lookup_everywhere gets called. NOTE: On brick-2 by rebalance-1, a linkto file was created. Currently that linkto files gets deleted by rebalance-2 lookup as it is considered as stale linkto file. But with patch if rebalance is already in progress or rebalance is over, linkto file will not be unlinked. If rebalance is in progress fd will be open and if rebalance is over then linkto file wont be set. Change-Id: I3fee0d28de3c76197325536a9e30099d2413f079 BUG: 1129541 Signed-off-by: Venkatesh Somyajulu <vsomyaju@redhat.com> Reviewed-on: http://review.gluster.org/8345 Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Raghavendra G <rgowdapp@redhat.com> Reviewed-by: Shyamsundar Ranganathan <srangana@redhat.com> Reviewed-by: Vijay Bellur <vbellur@redhat.com> (cherry picked from commit 966997992bdbd5fffc632bf705678e287ed50bf7) Signed-off-by: Nithya Balachandran <nbalacha@redhat.com> Reviewed-on: http://review.gluster.org/8719 Reviewed-by: Jeff Darcy <jdarcy@redhat.com>
-rw-r--r--libglusterfs/src/glusterfs.h3
-rw-r--r--xlators/cluster/dht/src/dht-common.c92
-rw-r--r--xlators/cluster/dht/src/dht-common.h2
-rw-r--r--xlators/storage/posix/src/posix.c4
4 files changed, 78 insertions, 23 deletions
diff --git a/libglusterfs/src/glusterfs.h b/libglusterfs/src/glusterfs.h
index 9aa3817228a..2e5d93f84ca 100644
--- a/libglusterfs/src/glusterfs.h
+++ b/libglusterfs/src/glusterfs.h
@@ -183,6 +183,9 @@
== DHT_LINKFILE_MODE)
#define DHT_LINKFILE_STR "linkto"
+#define DHT_SKIP_NON_LINKTO_UNLINK "unlink-only-if-dht-linkto-file"
+#define DHT_SKIP_OPEN_FD_UNLINK "dont-unlink-for-open-fd"
+
/* NOTE: add members ONLY at the end (just before _MAXVALUE) */
typedef enum {
GF_FOP_NULL = 0,
diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
index ad635af95b9..2bb3c51487a 100644
--- a/xlators/cluster/dht/src/dht-common.c
+++ b/xlators/cluster/dht/src/dht-common.c
@@ -789,7 +789,16 @@ dht_lookup_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
struct iatt *preparent, struct iatt *postparent,
dict_t *xdata)
{
- int this_call_cnt = 0;
+ int this_call_cnt = 0;
+ dht_local_t *local = NULL;
+ const char *path = NULL;
+
+ local = (dht_local_t*)frame->local;
+ path = local->loc.path;
+
+ gf_log (this->name, GF_LOG_INFO, "lookup_unlink returned with "
+ "op_ret -> %d and op-errno -> %d for %s", op_ret, op_errno,
+ ((path == NULL)? "null" : path ));
this_call_cnt = dht_frame_return (frame);
if (is_last_call (this_call_cnt)) {
@@ -818,6 +827,28 @@ dht_lookup_unlink_stale_linkto_cbk (call_frame_t *frame, void *cookie,
return 0;
}
+int
+dht_fill_dict_to_avoid_unlink_of_migrating_file (dict_t *dict) {
+
+ int ret = 0;
+
+ ret = dict_set_int32 (dict, DHT_SKIP_NON_LINKTO_UNLINK, 1);
+
+ if (ret)
+ goto err;
+
+ ret = dict_set_int32 (dict, DHT_SKIP_OPEN_FD_UNLINK, 1);
+
+ if (ret)
+ goto err;
+
+
+ return 0;
+
+err:
+ return -1;
+
+}
/* Rebalance is performed from cached_node to hashed_node. Initial cached_node
* contains a non-linkto file. After migration it is converted to linkto and
* then unlinked. And at hashed_subvolume, first a linkto file is present,
@@ -893,7 +924,7 @@ dht_lookup_everywhere_done (call_frame_t *frame, xlator_t *this)
if (local->skip_unlink.handle_valid_link && hashed_subvol) {
- /*Purpose of "unlink-only-if-dht-linkto-file":
+ /*Purpose of "DHT_SKIP_NON_LINKTO_UNLINK":
* If this lookup is performed by rebalance and this
* rebalance process detected hashed file and by
* the time it sends the lookup request to cached node,
@@ -907,21 +938,10 @@ dht_lookup_everywhere_done (call_frame_t *frame, xlator_t *this)
* linkto file and not a migrated_file.
*/
- ret = dict_set_int32 (local->xattr_req,
- "unlink-only-if-dht-linkto-file",
- 1);
- if (ret)
- goto dict_err;
-
- /*Later other consumers can also use this key to avoid
- * unlinking in case of open_fd
- */
+ ret = dht_fill_dict_to_avoid_unlink_of_migrating_file
+ (local->xattr_req);
- ret = dict_set_int32 (local->xattr_req,
- "dont-unlink-for-open-fd", 1);
-
-dict_err:
if (ret) {
/* If for some reason, setting key in the dict
* fails, return with ENOENT, as with respect to
@@ -1151,6 +1171,7 @@ dht_lookup_everywhere_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int ret = -1;
int32_t fd_count = 0;
dht_conf_t *conf = NULL;
+ dict_t *dict_req = {0};
GF_VALIDATE_OR_GOTO ("dht", frame, out);
GF_VALIDATE_OR_GOTO ("dht", this, out);
@@ -1260,12 +1281,41 @@ unlock:
buf->ia_gfid);
} else if (!ret && (fd_count == 0)) {
- gf_log (this->name, GF_LOG_INFO,
- "deleting stale linkfile %s on %s",
- loc->path, subvol->name);
- STACK_WIND (frame, dht_lookup_unlink_cbk,
- subvol, subvol->fops->unlink, loc, 0, NULL);
- return 0;
+
+ dict_req = dict_new ();
+
+ ret = dht_fill_dict_to_avoid_unlink_of_migrating_file
+ (dict_req);
+
+ if (ret) {
+
+ /* Skip unlinking for dict_failure
+ *File is found as a linkto file on non-hashed,
+ *subvolume. In the current implementation,
+ *finding a linkto-file on non-hashed does not
+ *always implies that it is stale. So deletion
+ *of file should be done only when both fd is
+ *closed and linkto-xattr is set. In case of
+ *dict_set failure, avoid skipping of file.
+ *NOTE: dht_frame_return should get called for
+ * this block.
+ */
+
+ dict_unref (dict_req);
+
+ } else {
+ gf_log (this->name, GF_LOG_INFO,
+ "attempting deletion of stale linkfile "
+ "%s on %s", loc->path, subvol->name);
+
+ STACK_WIND (frame, dht_lookup_unlink_cbk,
+ subvol, subvol->fops->unlink, loc,
+ 0, dict_req);
+
+ dict_unref (dict_req);
+
+ return 0;
+ }
}
}
diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h
index bc7ee19e241..b61600d88ea 100644
--- a/xlators/cluster/dht/src/dht-common.h
+++ b/xlators/cluster/dht/src/dht-common.h
@@ -804,4 +804,6 @@ dht_subvol_status (dht_conf_t *conf, xlator_t *subvol);
int
dht_lookup_everywhere_done (call_frame_t *frame, xlator_t *this);
+int
+dht_fill_dict_to_avoid_unlink_of_migrating_file (dict_t *dict);
#endif/* _DHT_H */
diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c
index cedf71ef4de..549a2267d73 100644
--- a/xlators/storage/posix/src/posix.c
+++ b/xlators/storage/posix/src/posix.c
@@ -1407,7 +1407,7 @@ posix_unlink (call_frame_t *frame, xlator_t *this,
priv = this->private;
- op_ret = dict_get_int32 (xdata, "dont-unlink-for-open-fd",
+ op_ret = dict_get_int32 (xdata, DHT_SKIP_OPEN_FD_UNLINK,
&check_open_fd);
if (!op_ret && check_open_fd) {
@@ -1428,7 +1428,7 @@ posix_unlink (call_frame_t *frame, xlator_t *this,
}
- op_ret = dict_get_int32 (xdata, "unlink-only-if-dht-linkto-file",
+ op_ret = dict_get_int32 (xdata, DHT_SKIP_NON_LINKTO_UNLINK,
&unlink_if_linkto);
if (!op_ret && unlink_if_linkto) {