summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBrian Foster <bfoster@redhat.com>2012-06-13 12:08:38 -0400
committerAnand Avati <avati@redhat.com>2012-07-13 09:46:09 -0700
commit32ffb79f18cbaebcbe6bba51599ca234f44675cc (patch)
tree3f7589078e618cf0f575e5ad03a67afecb50d006
parentca4900497142127c31d0dba7a53a921200aaf790 (diff)
fuse/md-cache: add support for the 'fopen-keep-cache' mount option
fopen-keep-cache disables unconditional page-cache invalidations on file open in fuse (via FOPEN_KEEP_CACHE) and replaces that behavior with detection of remote changes and explicit invalidations from mount/fuse. This option improves local caching through the page cache and native client. This change defines a new 'invalidate' translator callback to identify when an inode's cache mapping has been determined to be invalid. md-cache implements the policy to detect and invoke inode invalidations. fuse-bridge and io-cache implement invalidate handlers to invalidate the respective caches (page cache in the case of fuse). BUG: 833564 Change-Id: I99818da5777eaf06276c1c0b194669f5bab92d48 Signed-off-by: Brian Foster <bfoster@redhat.com> Reviewed-on: http://review.gluster.com/3584 Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Anand Avati <avati@redhat.com>
-rw-r--r--glusterfsd/src/glusterfsd.c19
-rw-r--r--glusterfsd/src/glusterfsd.h1
-rw-r--r--libglusterfs/src/glusterfs.h1
-rw-r--r--libglusterfs/src/inode.c48
-rw-r--r--libglusterfs/src/inode.h3
-rw-r--r--libglusterfs/src/xlator.h3
-rw-r--r--xlators/mount/fuse/src/fuse-bridge.c113
-rw-r--r--xlators/mount/fuse/src/fuse-bridge.h1
-rwxr-xr-xxlators/mount/fuse/utils/mount.glusterfs.in5
-rw-r--r--xlators/performance/io-cache/src/io-cache.c15
-rw-r--r--xlators/performance/md-cache/src/md-cache.c32
11 files changed, 219 insertions, 22 deletions
diff --git a/glusterfsd/src/glusterfsd.c b/glusterfsd/src/glusterfsd.c
index 0ab8fcd..76f0194 100644
--- a/glusterfsd/src/glusterfsd.c
+++ b/glusterfsd/src/glusterfsd.c
@@ -168,6 +168,8 @@ static struct argp_option gf_options[] = {
"Brick name to be registered with Gluster portmapper" },
{"brick-port", ARGP_BRICK_PORT_KEY, "BRICK-PORT", OPTION_HIDDEN,
"Brick Port to be registered with Gluster portmapper" },
+ {"fopen-keep-cache", ARGP_FOPEN_KEEP_CACHE_KEY, 0, 0,
+ "Do not purge the cache on file open"},
{0, 0, 0, 0, "Fuse options:"},
{"direct-io-mode", ARGP_DIRECT_IO_MODE_KEY, "BOOL", OPTION_ARG_OPTIONAL,
@@ -368,6 +370,17 @@ create_fuse_mount (glusterfs_ctx_t *ctx)
}
}
+ if (cmd_args->fopen_keep_cache) {
+ ret = dict_set_static_ptr(master->options, "fopen-keep-cache",
+ "on");
+ if (ret < 0) {
+ gf_log("glusterfsd", GF_LOG_ERROR,
+ "failed to set dict value for key "
+ "fopen-keep-cache");
+ goto err;
+ }
+ }
+
switch (cmd_args->fuse_direct_io_mode) {
case GF_OPTION_DISABLE: /* disable */
ret = dict_set_static_ptr (master->options, ZR_DIRECT_IO_OPT,
@@ -814,7 +827,11 @@ parse_opts (int key, char *arg, struct argp_state *state)
ctx = glusterfs_ctx_get ();
ctx->mem_accounting = 1;
break;
- }
+
+ case ARGP_FOPEN_KEEP_CACHE_KEY:
+ cmd_args->fopen_keep_cache = 1;
+ break;
+ }
return 0;
}
diff --git a/glusterfsd/src/glusterfsd.h b/glusterfsd/src/glusterfsd.h
index 8ec1219..382a8cc 100644
--- a/glusterfsd/src/glusterfsd.h
+++ b/glusterfsd/src/glusterfsd.h
@@ -87,6 +87,7 @@ enum argp_option_keys {
ARGP_USER_MAP_ROOT_KEY = 156,
ARGP_MEM_ACCOUNTING_KEY = 157,
ARGP_SELINUX_KEY = 158,
+ ARGP_FOPEN_KEEP_CACHE_KEY = 159,
};
struct _gfd_vol_top_priv_t {
diff --git a/libglusterfs/src/glusterfs.h b/libglusterfs/src/glusterfs.h
index 780353d..357284e 100644
--- a/libglusterfs/src/glusterfs.h
+++ b/libglusterfs/src/glusterfs.h
@@ -285,6 +285,7 @@ struct _cmd_args {
int selinux;
int worm;
int mac_compat;
+ int fopen_keep_cache;
struct list_head xlator_options; /* list of xlator_option_t */
/* fuse options */
diff --git a/libglusterfs/src/inode.c b/libglusterfs/src/inode.c
index e32eddb..a0088c0 100644
--- a/libglusterfs/src/inode.c
+++ b/libglusterfs/src/inode.c
@@ -950,6 +950,54 @@ inode_forget (inode_t *inode, uint64_t nlookup)
return 0;
}
+/*
+ * Invalidate an inode. This is invoked when a translator decides that an inode's
+ * cache is no longer valid. Any translator interested in taking action in this
+ * situation can define the invalidate callback.
+ */
+int
+inode_invalidate(inode_t *inode)
+{
+ int ret = 0;
+ xlator_t *xl = NULL;
+ xlator_t *old_THIS = NULL;
+
+ if (!inode) {
+ gf_log_callingfn(THIS->name, GF_LOG_WARNING, "inode not found");
+ return -1;
+ }
+
+ /*
+ * The master xlator is not in the graph but it can define an invalidate
+ * handler.
+ */
+ xl = inode->table->xl->ctx->master;
+ if (xl && xl->cbks->invalidate) {
+ old_THIS = THIS;
+ THIS = xl;
+ ret = xl->cbks->invalidate(xl, inode);
+ THIS = old_THIS;
+ if (ret)
+ return ret;
+ }
+
+ xl = inode->table->xl->graph->first;
+ while (xl) {
+ old_THIS = THIS;
+ THIS = xl;
+ if (xl->cbks->invalidate)
+ ret = xl->cbks->invalidate(xl, inode);
+ THIS = old_THIS;
+
+ if (ret)
+ break;
+
+ xl = xl->next;
+ }
+
+ return ret;
+}
+
static void
__inode_unlink (inode_t *inode, inode_t *parent, const char *name)
diff --git a/libglusterfs/src/inode.h b/libglusterfs/src/inode.h
index 41003df..20e28f6 100644
--- a/libglusterfs/src/inode.h
+++ b/libglusterfs/src/inode.h
@@ -131,6 +131,9 @@ int
inode_forget (inode_t *inode, uint64_t nlookup);
int
+inode_invalidate(inode_t *inode);
+
+int
inode_rename (inode_table_t *table, inode_t *olddir, const char *oldname,
inode_t *newdir, const char *newname,
inode_t *inode, struct iatt *stbuf);
diff --git a/libglusterfs/src/xlator.h b/libglusterfs/src/xlator.h
index 2fce7dc..5162d20 100644
--- a/libglusterfs/src/xlator.h
+++ b/libglusterfs/src/xlator.h
@@ -730,10 +730,13 @@ typedef int32_t (*cbk_forget_t) (xlator_t *this,
typedef int32_t (*cbk_release_t) (xlator_t *this,
fd_t *fd);
+typedef int32_t (*cbk_invalidate_t)(xlator_t *this, inode_t *inode);
+
struct xlator_cbks {
cbk_forget_t forget;
cbk_release_t release;
cbk_release_t releasedir;
+ cbk_invalidate_t invalidate;
};
typedef int32_t (*dumpop_priv_t) (xlator_t *this);
diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c
index 3b4c6c6..21e14ef 100644
--- a/xlators/mount/fuse/src/fuse-bridge.c
+++ b/xlators/mount/fuse/src/fuse-bridge.c
@@ -25,6 +25,34 @@ static int gf_fuse_xattr_enotsup_log;
void fini (xlator_t *this_xl);
+static void fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino);
+
+/*
+ * Send an invalidate notification up to fuse to purge the file from local
+ * page cache.
+ */
+static int32_t
+fuse_invalidate(xlator_t *this, inode_t *inode)
+{
+ fuse_private_t *priv = this->private;
+ uint64_t nodeid;
+
+ /*
+ * NOTE: We only invalidate at the moment if fopen_keep_cache is
+ * enabled because otherwise this is a departure from default
+ * behavior. Specifically, the performance/write-behind xlator
+ * causes unconditional invalidations on write requests.
+ */
+ if (!priv->fopen_keep_cache)
+ return 0;
+
+ nodeid = inode_to_fuse_nodeid(inode);
+ gf_log(this->name, GF_LOG_DEBUG, "Invalidate inode id %lu.", nodeid);
+ fuse_invalidate_inode(this, nodeid);
+
+ return 0;
+}
+
fuse_fd_ctx_t *
__fuse_fd_ctx_check_n_create (xlator_t *this, fd_t *fd)
{
@@ -161,7 +189,7 @@ send_fuse_data (xlator_t *this, fuse_in_header_t *finh, void *data, size_t size)
static void
-fuse_invalidate (xlator_t *this, uint64_t fuse_ino)
+fuse_invalidate_entry (xlator_t *this, uint64_t fuse_ino)
{
struct fuse_out_header *fouh = NULL;
struct fuse_notify_inval_entry_out *fnieo = NULL;
@@ -207,6 +235,47 @@ fuse_invalidate (xlator_t *this, uint64_t fuse_ino)
}
}
+/*
+ * Send an inval inode notification to fuse. This causes an invalidation of the
+ * entire page cache mapping on the inode.
+ */
+static void
+fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino)
+{
+ struct fuse_out_header *fouh = NULL;
+ struct fuse_notify_inval_inode_out *fniio = NULL;
+ fuse_private_t *priv = NULL;
+ int rv = 0;
+ char inval_buf[INVAL_BUF_SIZE] = {0};
+
+ fouh = (struct fuse_out_header *) inval_buf;
+ fniio = (struct fuse_notify_inval_inode_out *) (fouh + 1);
+
+ priv = this->private;
+
+ if (priv->revchan_out < 0)
+ return;
+
+ fouh->unique = 0;
+ fouh->error = FUSE_NOTIFY_INVAL_INODE;
+ fouh->len = sizeof(struct fuse_out_header) +
+ sizeof(struct fuse_notify_inval_inode_out);
+
+ /* inval the entire mapping until we learn how to be more granular */
+ fniio->ino = fuse_ino;
+ fniio->off = 0;
+ fniio->len = -1;
+
+ rv = write(priv->revchan_out, inval_buf, fouh->len);
+ if (rv != fouh->len) {
+ gf_log("glusterfs-fuse", GF_LOG_ERROR, "kernel notification "
+ "daemon defunct");
+ close(priv->fd);
+ }
+
+ gf_log("glusterfs-fuse", GF_LOG_TRACE, "INVALIDATE inode: %lu", fuse_ino);
+}
+
int
send_fuse_err (xlator_t *this, fuse_in_header_t *finh, int error)
{
@@ -670,17 +739,27 @@ fuse_fd_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
|| (priv->direct_io_mode == 1))
foo.open_flags |= FOPEN_DIRECT_IO;
#ifdef GF_DARWIN_HOST_OS
- /* In Linux: by default, buffer cache
- * is purged upon open, setting
- * FOPEN_KEEP_CACHE implies no-purge
- *
- * In MacFUSE: by default, buffer cache
- * is left intact upon open, setting
- * FOPEN_PURGE_UBC implies purge
- *
- * [[Interesting...]]
- */
- foo.open_flags |= FOPEN_PURGE_UBC;
+ /* In Linux: by default, buffer cache
+ * is purged upon open, setting
+ * FOPEN_KEEP_CACHE implies no-purge
+ *
+ * In MacFUSE: by default, buffer cache
+ * is left intact upon open, setting
+ * FOPEN_PURGE_UBC implies purge
+ *
+ * [[Interesting...]]
+ */
+ if (!priv->fopen_keep_cache)
+ foo.open_flags |= FOPEN_PURGE_UBC;
+#else
+ /*
+ * If fopen-keep-cache is enabled, we set the associated
+ * flag here such that files are not invalidated on open.
+ * File invalidations occur either in fuse or explicitly
+ * when the cache is set invalid on the inode.
+ */
+ if (priv->fopen_keep_cache)
+ foo.open_flags |= FOPEN_KEEP_CACHE;
#endif
}
@@ -2663,7 +2742,7 @@ fuse_setxattr (xlator_t *this, fuse_in_header_t *finh, void *msg)
gf_log ("fuse", GF_LOG_TRACE,
"got request to invalidate %"PRIu64, finh->nodeid);
send_fuse_err (this, finh, 0);
- fuse_invalidate (this, finh->nodeid);
+ fuse_invalidate_entry (this, finh->nodeid);
GF_FREE (finh);
return;
}
@@ -4523,6 +4602,9 @@ init (xlator_t *this_xl)
GF_ASSERT (ret == 0);
}
+ GF_OPTION_INIT("fopen-keep-cache", priv->fopen_keep_cache, bool,
+ cleanup_exit);
+
cmd_args = &this_xl->ctx->cmd_args;
fsname = cmd_args->volfile;
if (!fsname && cmd_args->volfile_server) {
@@ -4644,6 +4726,7 @@ struct xlator_fops fops = {
};
struct xlator_cbks cbks = {
+ .invalidate = fuse_invalidate,
};
@@ -4683,5 +4766,9 @@ struct volume_options options[] = {
{ .key = {"read-only"},
.type = GF_OPTION_TYPE_BOOL
},
+ { .key = {"fopen-keep-cache"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "false"
+ },
{ .key = {NULL} },
};
diff --git a/xlators/mount/fuse/src/fuse-bridge.h b/xlators/mount/fuse/src/fuse-bridge.h
index c6c8438..dcd9629 100644
--- a/xlators/mount/fuse/src/fuse-bridge.h
+++ b/xlators/mount/fuse/src/fuse-bridge.h
@@ -109,6 +109,7 @@ struct fuse_private {
gf_boolean_t acl;
gf_boolean_t selinux;
gf_boolean_t read_only;
+ gf_boolean_t fopen_keep_cache;
fdtable_t *fdtable;
/* For fuse-reverse-validation */
diff --git a/xlators/mount/fuse/utils/mount.glusterfs.in b/xlators/mount/fuse/utils/mount.glusterfs.in
index ee95688..b623d34 100755
--- a/xlators/mount/fuse/utils/mount.glusterfs.in
+++ b/xlators/mount/fuse/utils/mount.glusterfs.in
@@ -132,6 +132,10 @@ start_glusterfs ()
cmd_line=$(echo "$cmd_line --entry-timeout=$entry_timeout");
fi
+ if [ -n "$fopen_keep_cache" ]; then
+ cmd_line=$(echo "$cmd_line --fopen-keep-cache");
+ fi
+
# for rdma volume, we have to fetch volfile with '.rdma' added
# to volume name, so that it fetches the right client vol file
volume_id_rdma="";
@@ -297,6 +301,7 @@ main ()
"acl") acl=1 ;;
"selinux") selinux=1 ;;
"worm") worm=1 ;;
+ "fopen-keep-cache") fopen_keep_cache=1 ;;
# "mount -t glusterfs" sends this, but it's useless.
"rw") ;;
*)
diff --git a/xlators/performance/io-cache/src/io-cache.c b/xlators/performance/io-cache/src/io-cache.c
index 85e8765..bdaf0f1 100644
--- a/xlators/performance/io-cache/src/io-cache.c
+++ b/xlators/performance/io-cache/src/io-cache.c
@@ -313,6 +313,18 @@ ioc_forget (xlator_t *this, inode_t *inode)
return 0;
}
+static int32_t
+ioc_invalidate(xlator_t *this, inode_t *inode)
+{
+ ioc_inode_t *ioc_inode = NULL;
+
+ inode_ctx_get(inode, this, (uint64_t *) &ioc_inode);
+
+ if (ioc_inode)
+ ioc_inode_flush(ioc_inode);
+
+ return 0;
+}
/*
* ioc_cache_validate_cbk -
@@ -1977,7 +1989,8 @@ struct xlator_dumpops dumpops = {
struct xlator_cbks cbks = {
.forget = ioc_forget,
- .release = ioc_release
+ .release = ioc_release,
+ .invalidate = ioc_invalidate,
};
struct volume_options options[] = {
diff --git a/xlators/performance/md-cache/src/md-cache.c b/xlators/performance/md-cache/src/md-cache.c
index 9acffba..cf1aee9 100644
--- a/xlators/performance/md-cache/src/md-cache.c
+++ b/xlators/performance/md-cache/src/md-cache.c
@@ -378,7 +378,8 @@ mdc_to_iatt (struct md_cache *mdc, struct iatt *iatt)
int
-mdc_inode_iatt_set (xlator_t *this, inode_t *inode, struct iatt *iatt)
+mdc_inode_iatt_set_validate(xlator_t *this, inode_t *inode, struct iatt *prebuf,
+ struct iatt *iatt)
{
int ret = -1;
struct md_cache *mdc = NULL;
@@ -394,6 +395,19 @@ mdc_inode_iatt_set (xlator_t *this, inode_t *inode, struct iatt *iatt)
goto unlock;
}
+ /*
+ * Invalidate the inode if the mtime or ctime has changed
+ * and the prebuf doesn't match the value we have cached.
+ * TODO: writev returns with a NULL iatt due to
+ * performance/write-behind, causing invalidation on writes.
+ */
+ if (IA_ISREG(inode->ia_type) &&
+ ((iatt->ia_mtime != mdc->md_mtime) ||
+ (iatt->ia_ctime != mdc->md_ctime)))
+ if (!prebuf || (prebuf->ia_ctime != mdc->md_ctime) ||
+ (prebuf->ia_mtime != mdc->md_mtime))
+ inode_invalidate(inode);
+
mdc_from_iatt (mdc, iatt);
time (&mdc->ia_time);
@@ -405,6 +419,10 @@ out:
return ret;
}
+int mdc_inode_iatt_set(xlator_t *this, inode_t *inode, struct iatt *iatt)
+{
+ return mdc_inode_iatt_set_validate(this, inode, NULL, iatt);
+}
int
mdc_inode_iatt_get (xlator_t *this, inode_t *inode, struct iatt *iatt)
@@ -859,7 +877,7 @@ mdc_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (!local)
goto out;
- mdc_inode_iatt_set (this, local->loc.inode, postbuf);
+ mdc_inode_iatt_set_validate(this, local->loc.inode, prebuf, postbuf);
out:
MDC_STACK_UNWIND (truncate, frame, op_ret, op_errno, prebuf, postbuf,
@@ -901,7 +919,7 @@ mdc_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (!local)
goto out;
- mdc_inode_iatt_set (this, local->fd->inode, postbuf);
+ mdc_inode_iatt_set_validate(this, local->fd->inode, prebuf, postbuf);
out:
MDC_STACK_UNWIND (ftruncate, frame, op_ret, op_errno, prebuf, postbuf,
@@ -1377,7 +1395,7 @@ mdc_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (!local)
goto out;
- mdc_inode_iatt_set (this, local->fd->inode, postbuf);
+ mdc_inode_iatt_set_validate(this, local->fd->inode, prebuf, postbuf);
out:
MDC_STACK_UNWIND (writev, frame, op_ret, op_errno, prebuf, postbuf,
@@ -1422,7 +1440,7 @@ mdc_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (!local)
goto out;
- mdc_inode_iatt_set (this, local->loc.inode, postbuf);
+ mdc_inode_iatt_set_validate(this, local->loc.inode, prebuf, postbuf);
out:
MDC_STACK_UNWIND (setattr, frame, op_ret, op_errno, prebuf, postbuf,
@@ -1464,7 +1482,7 @@ mdc_fsetattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (!local)
goto out;
- mdc_inode_iatt_set (this, local->fd->inode, postbuf);
+ mdc_inode_iatt_set_validate(this, local->fd->inode, prebuf, postbuf);
out:
MDC_STACK_UNWIND (fsetattr, frame, op_ret, op_errno, prebuf, postbuf,
@@ -1506,7 +1524,7 @@ mdc_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (!local)
goto out;
- mdc_inode_iatt_set (this, local->fd->inode, postbuf);
+ mdc_inode_iatt_set_validate(this, local->fd->inode, prebuf, postbuf);
out:
MDC_STACK_UNWIND (fsync, frame, op_ret, op_errno, prebuf, postbuf,