summaryrefslogtreecommitdiffstats
path: root/xlators
diff options
context:
space:
mode:
authorJeff Darcy <jdarcy@redhat.com>2016-03-31 17:15:37 -0400
committerJeff Darcy <jdarcy@redhat.com>2016-04-07 08:07:55 -0700
commit6602376e3e9e6d9f4f695475569322b61ccc2411 (patch)
tree58d4352c641b43615cccce363ce9f764e4cdd8eb /xlators
parente6c7da3769105f0e6fc8b6627f3b11727a2a216d (diff)
dht: add "nuke" functionality for efficient server-side deletion
This turns a special xattr into an rmdir with flags set. When that hits the posix translator on the server side, that causes the file/directory to be moved into the special "landfill" directory. From there, the posix janitor thread will take care of deleting it entirely on the server side - traversing it recursively if necessary. A couple of secondary issues were fixed to make this effective. * FUSE now ensures that setxattr values are NUL terminated. * The janitor thread now gets woken up immediately when something is placed in 'landfill' instead of only when file descriptors need to be closed. * The default landfill-emptying interval was reduced to 10s. To use the feature, issue a setxattr something like this: setfattr -n glusterfs.dht.nuke -v "" /mnt/glusterfs/vol/some_dir The value doesn't actually matter; the mere receipt of a request with this key is sufficient. Some day it might be useful to allow setting a required value as a sort of password, so that only those who know it can access the underlying special functionality. Change-Id: I8a343c2cdb40a76d5a06c707191fb67babb8514f Signed-off-by: Jeff Darcy <jdarcy@redhat.com> Reviewed-on: http://review.gluster.org/13878 Smoke: Gluster Build System <jenkins@build.gluster.com> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> CentOS-regression: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Raghavendra G <rgowdapp@redhat.com>
Diffstat (limited to 'xlators')
-rw-r--r--xlators/cluster/dht/src/dht-common.c45
-rw-r--r--xlators/mount/fuse/src/fuse-bridge.c9
-rw-r--r--xlators/storage/posix/src/posix.c21
3 files changed, 63 insertions, 12 deletions
diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
index 4c93084ec82..b14f20bcc65 100644
--- a/xlators/cluster/dht/src/dht-common.c
+++ b/xlators/cluster/dht/src/dht-common.c
@@ -3731,6 +3731,42 @@ err:
return 0;
}
+int
+dht_nuke_dir (call_frame_t *frame, xlator_t *this, loc_t *loc, data_t *tmp)
+{
+ if (!IA_ISDIR(loc->inode->ia_type)) {
+ DHT_STACK_UNWIND (setxattr, frame, -1, ENOTSUP, NULL);
+ return 0;
+ }
+
+ /* Setxattr didn't need the parent, but rmdir does. */
+ loc->parent = inode_parent (loc->inode, NULL, NULL);
+ if (!loc->parent) {
+ DHT_STACK_UNWIND (setxattr, frame, -1, ENOENT, NULL);
+ return 0;
+ }
+ gf_uuid_copy (loc->pargfid, loc->parent->gfid);
+
+ if (!loc->name && loc->path) {
+ loc->name = strrchr (loc->path, '/');
+ if (loc->name) {
+ ++(loc->name);
+ }
+ }
+
+ /*
+ * We do this instead of calling dht_rmdir_do directly for two reasons.
+ * The first is that we want to reuse all of the initialization that
+ * dht_rmdir does, so if it ever changes we'll just follow along. The
+ * second (i.e. why we don't use STACK_WIND_TAIL) is so that we don't
+ * obscure the fact that we came in via this path instead of a genuine
+ * rmdir. That makes debugging just a tiny bit easier.
+ */
+ STACK_WIND (frame, default_rmdir_cbk, this, this->fops->rmdir,
+ loc, 1, NULL);
+
+ return 0;
+}
int
dht_setxattr (call_frame_t *frame, xlator_t *this,
@@ -3955,6 +3991,11 @@ dht_setxattr (call_frame_t *frame, xlator_t *this,
goto err;
}
+ tmp = dict_get (xattr, "glusterfs.dht.nuke");
+ if (tmp) {
+ return dht_nuke_dir (frame, this, loc, tmp);
+ }
+
if (IA_ISDIR (loc->inode->ia_type)) {
for (i = 0; i < call_cnt; i++) {
@@ -7645,6 +7686,10 @@ dht_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
goto err;
}
+ if (flags) {
+ return dht_rmdir_do (frame, this);
+ }
+
for (i = 0; i < conf->subvolume_cnt; i++) {
STACK_WIND (frame, dht_rmdir_opendir_cbk,
conf->subvolumes[i],
diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c
index c6c38f3145b..aca1b3d14fd 100644
--- a/xlators/mount/fuse/src/fuse-bridge.c
+++ b/xlators/mount/fuse/src/fuse-bridge.c
@@ -3292,7 +3292,14 @@ fuse_setxattr (xlator_t *this, fuse_in_header_t *finh, void *msg)
}
if (fsi->size > 0) {
- dict_value = memdup (value, fsi->size);
+ /*
+ * Many translators expect setxattr values to be strings, but
+ * neither dict_get_str nor data_to_str do any checking or
+ * fixups to make sure that's the case. To avoid nasty
+ * surprises, allocate an extra byte and add a NUL here.
+ */
+ dict_value = memdup (value, fsi->size+1);
+ dict_value[fsi->size] = '\0';
}
dict_set (state->xattr, newkey,
data_from_dynptr ((void *)dict_value, fsi->size));
diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c
index b5df1d082ee..f72c13d9066 100644
--- a/xlators/storage/posix/src/posix.c
+++ b/xlators/storage/posix/src/posix.c
@@ -1994,6 +1994,7 @@ posix_rmdir (call_frame_t *frame, xlator_t *this,
} else {
sprintf (tmp_path, "%s/%s", priv->trash_path, gfid_str);
op_ret = sys_rename (real_path, tmp_path);
+ pthread_cond_signal (&priv->janitor_cond);
}
} else {
op_ret = sys_rmdir (real_path);
@@ -6528,7 +6529,6 @@ init (xlator_t *this)
int ret = 0;
int op_ret = -1;
ssize_t size = -1;
- int32_t janitor_sleep = 0;
uuid_t old_uuid = {0,};
uuid_t dict_uuid = {0,};
uuid_t gfid = {0,};
@@ -6857,16 +6857,9 @@ init (xlator_t *this)
}
ret = 0;
- _private->janitor_sleep_duration = 600;
+ GF_OPTION_INIT ("janitor-sleep-duration",
+ _private->janitor_sleep_duration, int32, out);
- dict_ret = dict_get_int32 (this->options, "janitor-sleep-duration",
- &janitor_sleep);
- if (dict_ret == 0) {
- gf_msg_debug (this->name, 0, "Setting janitor sleep duration "
- "to %d.", janitor_sleep);
-
- _private->janitor_sleep_duration = janitor_sleep;
- }
/* performing open dir on brick dir locks the brick dir
* and prevents it from being unmounted
*/
@@ -7105,7 +7098,13 @@ struct volume_options options[] = {
{ .key = {"background-unlink"},
.type = GF_OPTION_TYPE_BOOL },
{ .key = {"janitor-sleep-duration"},
- .type = GF_OPTION_TYPE_INT },
+ .type = GF_OPTION_TYPE_INT,
+ .min = 1,
+ .validate = GF_OPT_VALIDATE_MIN,
+ .default_value = "10",
+ .description = "Interval (in seconds) between times the internal "
+ "'landfill' directory is emptied."
+ },
{ .key = {"volume-id"},
.type = GF_OPTION_TYPE_ANY },
{ .key = {"glusterd-uuid"},