summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--tests/bugs/md-cache/bug-1211863.t60
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volume-set.c6
-rw-r--r--xlators/performance/md-cache/src/md-cache-messages.h10
-rw-r--r--xlators/performance/md-cache/src/md-cache.c258
4 files changed, 304 insertions, 30 deletions
diff --git a/tests/bugs/md-cache/bug-1211863.t b/tests/bugs/md-cache/bug-1211863.t
index b969fbb4b29..4fa9e33d7a1 100644
--- a/tests/bugs/md-cache/bug-1211863.t
+++ b/tests/bugs/md-cache/bug-1211863.t
@@ -5,18 +5,64 @@
cleanup;
-TEST glusterd
+## 1. Start glusterd
+TEST glusterd;
-TEST $CLI volume create $V0 $H0:$B0/$V0
+## 2. Lets create volume
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2,3};
+
+## 3. Start the volume
TEST $CLI volume start $V0
-TEST $CLI volume set $V0 cache-samba-metadata on
-EXPECT 'on' volinfo_field $V0 'performance.cache-samba-metadata'
+## 4. Enable the upcall xlator, and increase the md-cache timeout to max
+TEST $CLI volume set $V0 performance.md-cache-timeout 600
+TEST $CLI volume set $V0 performance.cache-samba-metadata on
-TEST $CLI volume set $V0 cache-samba-metadata off
-EXPECT 'off' volinfo_field $V0 'performance.cache-samba-metadata'
+## 6. Create two gluster mounts
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M1
+
+## 8. Create a file
+TEST touch $M0/file1
+
+## 9. Setxattr from mount-0
+TEST "setfattr -n user.DOSATTRIB -v "abc" $M0/file1"
+## 10. Getxattr from mount-1, this should return the correct value as it is a fresh getxattr
+TEST "getfattr -n user.DOSATTRIB $M1/file1 | grep -q abc"
+
+## 11. Now modify the same xattr from mount-0 again
+TEST "setfattr -n user.DOSATTRIB -v "xyz" $M0/file1"
+## 12. Since the xattr is already cached in mount-1 it returns the old xattr
+ #value, until the timeout (600)
+TEST "getfattr -n user.DOSATTRIB $M1/file1 | grep -q abc"
+
+## 13. Unmount to clean all the cache
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M1
+TEST $CLI volume set $V0 features.cache-invalidation on
+TEST $CLI volume set $V0 features.cache-invalidation-timeout 600
+
+## 18. Restart the volume to restart the bick process
TEST $CLI volume stop $V0
-TEST $CLI volume delete $V0
+TEST $CLI volume start $V0
+
+## 20. Create two gluster mounts
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M1
+
+## 22. Repeat the tests 11-14, but this time since cache invalidation is on,
+ #the getxattr will reflect the new value
+TEST "setfattr -n user.DOSATTRIB -v "abc" $M0/file1"
+TEST "getfattr -n user.DOSATTRIB $M1/file1 | grep -q abc"
+TEST "setfattr -n user.DOSATTRIB -v "xyz" $M0/file1"
+sleep 2; #There can be a very very small window where the next getxattr
+ #reaches md-cache, before the cache-invalidation caused by previous
+ #setxattr, reaches md-cache. Hence sleeping for 2 sec.
+ #Also it should not be > 600.
+TEST "getfattr -n user.DOSATTRIB $M1/file1 | grep -q xyz"
+
+TEST $CLI volume set $V0 cache-samba-metadata off
+EXPECT 'off' volinfo_field $V0 'performance.cache-samba-metadata'
cleanup;
diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
index 2e6aa378097..3e12c4f5fb0 100644
--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c
+++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
@@ -1967,6 +1967,12 @@ struct volopt_map_entry glusterd_volopt_map[] = {
.op_version = 2,
.flags = OPT_FLAG_CLIENT_OPT
},
+ { .key = "features.cache-invalidation",
+ .voltype = "performance/md-cache",
+ .option = "cache-invalidation",
+ .op_version = GD_OP_VERSION_3_9_0,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
/* Feature translators */
{ .key = "features.uss",
diff --git a/xlators/performance/md-cache/src/md-cache-messages.h b/xlators/performance/md-cache/src/md-cache-messages.h
index 2fe8d457a9a..a4259bacf1b 100644
--- a/xlators/performance/md-cache/src/md-cache-messages.h
+++ b/xlators/performance/md-cache/src/md-cache-messages.h
@@ -40,7 +40,7 @@
*/
#define GLFS_MD_CACHE_BASE GLFS_MSGID_COMP_MD_CACHE
-#define GLFS_MD_CACHE_NUM_MESSAGES 1
+#define GLFS_MD_CACHE_NUM_MESSAGES 2
#define GLFS_MSGID_END (GLFS_MD_CACHE_BASE + GLFS_MD_CACHE_NUM_MESSAGES + 1)
/* Messages with message IDs */
@@ -58,6 +58,14 @@
#define MD_CACHE_MSG_NO_MEMORY (GLFS_MD_CACHE_BASE + 1)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction None
+ *
+ */
+
+#define MD_CACHE_MSG_DISCARD_UPDATE (GLFS_MD_CACHE_BASE + 2)
/*------------*/
#define glfs_msg_end_x GLFS_MSGID_END, "Invalid: End of messages"
diff --git a/xlators/performance/md-cache/src/md-cache.c b/xlators/performance/md-cache/src/md-cache.c
index 0a4d3c78ead..57a1a0d1a4e 100644
--- a/xlators/performance/md-cache/src/md-cache.c
+++ b/xlators/performance/md-cache/src/md-cache.c
@@ -16,6 +16,8 @@
#include "md-cache-mem-types.h"
#include "compat-errno.h"
#include "glusterfs-acl.h"
+#include "defaults.h"
+#include "upcall-utils.h"
#include <assert.h>
#include <sys/time.h>
#include "md-cache-messages.h"
@@ -34,6 +36,9 @@ struct mdc_conf {
gf_boolean_t force_readdirp;
gf_boolean_t cache_swift_metadata;
gf_boolean_t cache_samba_metadata;
+ gf_boolean_t mdc_invalidation;
+ time_t last_child_down;
+ gf_lock_t lock;
};
@@ -312,20 +317,58 @@ unlock:
}
+/* Cache is valid if:
+ * - It is not cached before any brick was down. Brick down case is handled by
+ * invalidating all the cache when any brick went down.
+ * - The cache time is not expired
+ */
+static gf_boolean_t
+__is_cache_valid (xlator_t *this, time_t mdc_time)
+{
+ time_t now = 0;
+ gf_boolean_t ret = _gf_true;
+ struct mdc_conf *conf = NULL;
+ int timeout = 0;
+ time_t last_child_down = 0;
+
+ conf = this->private;
+
+ /* conf->lock here is not taken deliberately, so that the multi
+ * threaded IO doesn't contend on a global lock. While updating
+ * the variable, the lock is taken, so that atleast the writes are
+ * intact. The read of last_child_down may return junk, but that
+ * is for a very short period of time.
+ */
+ last_child_down = conf->last_child_down;
+ timeout = conf->timeout;
+
+ time (&now);
+
+ if ((mdc_time == 0) ||
+ ((last_child_down != 0) && (mdc_time < last_child_down))) {
+ ret = _gf_false;
+ goto out;
+ }
+
+ if (now >= (mdc_time + timeout)) {
+ ret = _gf_false;
+ }
+
+out:
+ return ret;
+}
+
+
static gf_boolean_t
is_md_cache_iatt_valid (xlator_t *this, struct md_cache *mdc)
{
- struct mdc_conf *conf = NULL;
- time_t now = 0;
gf_boolean_t ret = _gf_true;
- conf = this->private;
-
- time (&now);
LOCK (&mdc->lock);
{
- if (now >= (mdc->ia_time + conf->timeout))
- ret = _gf_false;
+ ret = __is_cache_valid (this, mdc->ia_time);
+ if (ret == _gf_false)
+ mdc->ia_time = 0;
}
UNLOCK (&mdc->lock);
@@ -336,18 +379,13 @@ is_md_cache_iatt_valid (xlator_t *this, struct md_cache *mdc)
static gf_boolean_t
is_md_cache_xatt_valid (xlator_t *this, struct md_cache *mdc)
{
- struct mdc_conf *conf = NULL;
- time_t now = 0;
gf_boolean_t ret = _gf_true;
- conf = this->private;
-
- time (&now);
-
LOCK (&mdc->lock);
{
- if (now >= (mdc->xa_time + conf->timeout))
- ret = _gf_false;
+ ret = __is_cache_valid (this, mdc->xa_time);
+ if (ret == _gf_false)
+ mdc->xa_time = 0;
}
UNLOCK (&mdc->lock);
@@ -397,12 +435,14 @@ int
mdc_inode_iatt_set_validate(xlator_t *this, inode_t *inode, struct iatt *prebuf,
struct iatt *iatt)
{
- int ret = -1;
+ int ret = 0;
struct md_cache *mdc = NULL;
mdc = mdc_inode_prep (this, inode);
- if (!mdc)
+ if (!mdc) {
+ ret = -1;
goto out;
+ }
LOCK (&mdc->lock);
{
@@ -411,6 +451,33 @@ mdc_inode_iatt_set_validate(xlator_t *this, inode_t *inode, struct iatt *prebuf,
goto unlock;
}
+ /* There could be a race in invalidation, where the
+ * invalidations in order A, B reaches md-cache in the order
+ * B, A. Hence, make sure the invalidation A is discarded if
+ * it comes after B. ctime of a file is always in ascending
+ * order unlike atime and mtime(which can be changed by user
+ * to any date), also ctime gets updates when atime/mtime
+ * changes, hence check for ctime only.
+ */
+ if (mdc->md_ctime > iatt->ia_ctime) {
+ gf_msg_callingfn (this->name, GF_LOG_DEBUG, EINVAL,
+ MD_CACHE_MSG_DISCARD_UPDATE,
+ "discarding the iatt validate "
+ "request");
+ ret = -1;
+ goto unlock;
+
+ }
+ if ((mdc->md_ctime == iatt->ia_ctime) &&
+ (mdc->md_ctime_nsec > iatt->ia_ctime_nsec)) {
+ gf_msg_callingfn (this->name, GF_LOG_DEBUG, EINVAL,
+ MD_CACHE_MSG_DISCARD_UPDATE,
+ "discarding the iatt validate "
+ "request(ctime_nsec)");
+ ret = -1;
+ goto unlock;
+ }
+
/*
* Invalidate the inode if the mtime or ctime has changed
* and the prebuf doesn't match the value we have cached.
@@ -434,7 +501,7 @@ mdc_inode_iatt_set_validate(xlator_t *this, inode_t *inode, struct iatt *prebuf,
}
unlock:
UNLOCK (&mdc->lock);
- ret = 0;
+
out:
return ret;
}
@@ -2284,15 +2351,81 @@ mdc_key_load_set (struct mdc_key *keys, char *pattern, gf_boolean_t val)
return 0;
}
+struct set {
+ inode_t *inode;
+ xlator_t *this;
+};
+
+static int
+mdc_inval_xatt (dict_t *d, char *k, data_t *v, void *tmp)
+{
+ struct set *tmp1 = NULL;
+ int ret = 0;
+
+ tmp1 = (struct set *)tmp;
+ ret = mdc_inode_xatt_unset (tmp1->this, tmp1->inode, k);
+ return ret;
+}
+
+static int
+mdc_invalidate (xlator_t *this, void *data)
+{
+ struct gf_upcall *up_data = NULL;
+ struct gf_upcall_cache_invalidation *up_ci = NULL;
+ inode_t *inode = NULL;
+ int ret = 0;
+ struct set tmp = {0, };
+ inode_table_t *itable = NULL;
+
+ up_data = (struct gf_upcall *)data;
+
+ if (up_data->event_type != GF_UPCALL_CACHE_INVALIDATION)
+ goto out;
+
+ up_ci = (struct gf_upcall_cache_invalidation *)up_data->data;
+
+ itable = ((xlator_t *)this->graph->top)->itable;
+ inode = inode_find (itable, up_data->gfid);
+ if (!inode) {
+ ret = -1;
+ goto out;
+ }
+
+ if (up_ci->flags & IATT_UPDATE_FLAGS) {
+ ret = mdc_inode_iatt_set_validate (this, inode, NULL,
+ &up_ci->stat);
+ /* one of the scenarios where ret < 0 is when this invalidate
+ * is older than the current stat, in that case do not
+ * update the xattrs as well
+ */
+ if (ret < 0)
+ goto out;
+ }
+ if (up_ci->flags & UP_XATTR) {
+ ret = mdc_inode_xatt_update (this, inode, up_ci->dict);
+ } else if (up_ci->flags & UP_XATTR_RM) {
+ tmp.inode = inode;
+ tmp.this = this;
+ ret = dict_foreach (up_ci->dict, mdc_inval_xatt, &tmp);
+ }
+
+out:
+ if (inode)
+ inode_unref (inode);
+
+ return ret;
+}
+
int
reconfigure (xlator_t *this, dict_t *options)
{
struct mdc_conf *conf = NULL;
+ int timeout = 0;
conf = this->private;
- GF_OPTION_RECONF ("md-cache-timeout", conf->timeout, options, int32, out);
+ GF_OPTION_RECONF ("md-cache-timeout", timeout, options, int32, out);
GF_OPTION_RECONF ("cache-selinux", conf->cache_selinux, options, bool, out);
mdc_key_load_set (mdc_keys, "security.", conf->cache_selinux);
@@ -2314,7 +2447,19 @@ reconfigure (xlator_t *this, dict_t *options)
conf->cache_samba_metadata);
GF_OPTION_RECONF("force-readdirp", conf->force_readdirp, options, bool, out);
-
+ GF_OPTION_RECONF("cache-invalidation", conf->mdc_invalidation, options,
+ bool, out);
+
+ /* If timeout is greater than 60s (default before the patch that added
+ * cache invalidation support was added) then, cache invalidation
+ * feature for md-cache needs to be enabled, if not set timeout to the
+ * previous max which is 60s
+ */
+ if ((timeout > 60) && (!conf->mdc_invalidation)) {
+ conf->timeout = 60;
+ goto out;
+ }
+ conf->timeout = timeout;
out:
return 0;
}
@@ -2332,6 +2477,7 @@ int
init (xlator_t *this)
{
struct mdc_conf *conf = NULL;
+ int timeout = 0;
conf = GF_CALLOC (sizeof (*conf), 1, gf_mdc_mt_mdc_conf_t);
if (!conf) {
@@ -2340,7 +2486,7 @@ init (xlator_t *this)
return -1;
}
- GF_OPTION_INIT ("md-cache-timeout", conf->timeout, int32, out);
+ GF_OPTION_INIT ("md-cache-timeout", timeout, int32, out);
GF_OPTION_INIT ("cache-selinux", conf->cache_selinux, bool, out);
mdc_key_load_set (mdc_keys, "security.", conf->cache_selinux);
@@ -2362,6 +2508,22 @@ init (xlator_t *this)
conf->cache_samba_metadata);
GF_OPTION_INIT("force-readdirp", conf->force_readdirp, bool, out);
+ GF_OPTION_INIT("cache-invalidation", conf->mdc_invalidation, bool, out);
+
+ LOCK_INIT (&conf->lock);
+ time (&conf->last_child_down);
+
+ /* If timeout is greater than 60s (default before the patch that added
+ * cache invalidation support was added) then, cache invalidation
+ * feature for md-cache needs to be enabled, if not set timeout to the
+ * previous max which is 60s
+ */
+ if ((timeout > 60) && (!conf->mdc_invalidation)) {
+ conf->timeout = 60;
+ goto out;
+ }
+ conf->timeout = timeout;
+
out:
this->private = conf;
@@ -2370,6 +2532,52 @@ out:
void
+mdc_update_child_down_time (xlator_t *this, time_t *now)
+{
+ struct mdc_conf *conf = NULL;
+
+ conf = this->private;
+
+ LOCK (&conf->lock);
+ {
+ conf->last_child_down = *now;
+ }
+ UNLOCK (&conf->lock);
+}
+
+
+int
+notify (xlator_t *this, int event, void *data, ...)
+{
+ int ret = 0;
+ struct mdc_conf *conf = NULL;
+ time_t now = 0;
+
+ conf = this->private;
+ switch (event) {
+ case GF_EVENT_CHILD_DOWN:
+ case GF_EVENT_SOME_CHILD_DOWN:
+ case GF_EVENT_CHILD_MODIFIED:
+ time (&now);
+ mdc_update_child_down_time (this, &now);
+ ret = default_notify (this, event, data);
+ break;
+ case GF_EVENT_UPCALL:
+ if (conf->mdc_invalidation)
+ ret = mdc_invalidate (this, data);
+ if (default_notify (this, event, data) != 0)
+ ret = -1;
+ break;
+ default:
+ ret = default_notify (this, event, data);
+ break;
+ }
+
+ return ret;
+}
+
+
+void
fini (xlator_t *this)
{
return;
@@ -2437,7 +2645,7 @@ struct volume_options options[] = {
{ .key = {"md-cache-timeout"},
.type = GF_OPTION_TYPE_INT,
.min = 0,
- .max = 60,
+ .max = 600,
.default_value = "1",
.description = "Time period after which cache has to be refreshed",
},
@@ -2447,5 +2655,11 @@ struct volume_options options[] = {
.description = "Convert all readdir requests to readdirplus to "
"collect stat info on each entry.",
},
+ { .key = {"cache-invalidation"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "false",
+ .description = "When \"on\", invalidates/updates the metadata cache "
+ "on receiving of the cache-invalidation notifications",
+ },
{ .key = {NULL} },
};