diff options
| author | Brian Foster <bfoster@redhat.com> | 2012-06-13 12:08:38 -0400 | 
|---|---|---|
| committer | Anand Avati <avati@redhat.com> | 2012-07-13 09:46:09 -0700 | 
| commit | 32ffb79f18cbaebcbe6bba51599ca234f44675cc (patch) | |
| tree | 3f7589078e618cf0f575e5ad03a67afecb50d006 | |
| parent | ca4900497142127c31d0dba7a53a921200aaf790 (diff) | |
fuse/md-cache: add support for the 'fopen-keep-cache' mount option
fopen-keep-cache disables unconditional page-cache invalidations
on file open in fuse (via FOPEN_KEEP_CACHE) and replaces that
behavior with detection of remote changes and explicit
invalidations from mount/fuse. This option improves local caching
through the page cache and native client.
This change defines a new 'invalidate' translator callback to
identify when an inode's cache mapping has been determined to be
invalid. md-cache implements the policy to detect and invoke
inode invalidations. fuse-bridge and io-cache implement
invalidate handlers to invalidate the respective caches (page
cache in the case of fuse).
BUG: 833564
Change-Id: I99818da5777eaf06276c1c0b194669f5bab92d48
Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-on: http://review.gluster.com/3584
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Anand Avati <avati@redhat.com>
| -rw-r--r-- | glusterfsd/src/glusterfsd.c | 19 | ||||
| -rw-r--r-- | glusterfsd/src/glusterfsd.h | 1 | ||||
| -rw-r--r-- | libglusterfs/src/glusterfs.h | 1 | ||||
| -rw-r--r-- | libglusterfs/src/inode.c | 48 | ||||
| -rw-r--r-- | libglusterfs/src/inode.h | 3 | ||||
| -rw-r--r-- | libglusterfs/src/xlator.h | 3 | ||||
| -rw-r--r-- | xlators/mount/fuse/src/fuse-bridge.c | 113 | ||||
| -rw-r--r-- | xlators/mount/fuse/src/fuse-bridge.h | 1 | ||||
| -rwxr-xr-x | xlators/mount/fuse/utils/mount.glusterfs.in | 5 | ||||
| -rw-r--r-- | xlators/performance/io-cache/src/io-cache.c | 15 | ||||
| -rw-r--r-- | xlators/performance/md-cache/src/md-cache.c | 32 | 
11 files changed, 219 insertions, 22 deletions
| diff --git a/glusterfsd/src/glusterfsd.c b/glusterfsd/src/glusterfsd.c index 0ab8fcd4c6d..76f01948193 100644 --- a/glusterfsd/src/glusterfsd.c +++ b/glusterfsd/src/glusterfsd.c @@ -168,6 +168,8 @@ static struct argp_option gf_options[] = {           "Brick name to be registered with Gluster portmapper" },          {"brick-port", ARGP_BRICK_PORT_KEY, "BRICK-PORT", OPTION_HIDDEN,           "Brick Port to be registered with Gluster portmapper" }, +	{"fopen-keep-cache", ARGP_FOPEN_KEEP_CACHE_KEY, 0, 0, +	 "Do not purge the cache on file open"},          {0, 0, 0, 0, "Fuse options:"},          {"direct-io-mode", ARGP_DIRECT_IO_MODE_KEY, "BOOL", OPTION_ARG_OPTIONAL, @@ -368,6 +370,17 @@ create_fuse_mount (glusterfs_ctx_t *ctx)                  }          } +	if (cmd_args->fopen_keep_cache) { +		ret = dict_set_static_ptr(master->options, "fopen-keep-cache", +			"on"); +		if (ret < 0) { +			gf_log("glusterfsd", GF_LOG_ERROR, +				"failed to set dict value for key " +				"fopen-keep-cache"); +			goto err; +		} +	} +          switch (cmd_args->fuse_direct_io_mode) {          case GF_OPTION_DISABLE: /* disable */                  ret = dict_set_static_ptr (master->options, ZR_DIRECT_IO_OPT, @@ -814,7 +827,11 @@ parse_opts (int key, char *arg, struct argp_state *state)                  ctx = glusterfs_ctx_get ();                  ctx->mem_accounting = 1;                  break; -        } + +	case ARGP_FOPEN_KEEP_CACHE_KEY: +		cmd_args->fopen_keep_cache = 1; +		break; +	}          return 0;  } diff --git a/glusterfsd/src/glusterfsd.h b/glusterfsd/src/glusterfsd.h index 8ec121954bb..382a8cc71fc 100644 --- a/glusterfsd/src/glusterfsd.h +++ b/glusterfsd/src/glusterfsd.h @@ -87,6 +87,7 @@ enum argp_option_keys {          ARGP_USER_MAP_ROOT_KEY            = 156,          ARGP_MEM_ACCOUNTING_KEY           = 157,          ARGP_SELINUX_KEY                  = 158, +	ARGP_FOPEN_KEEP_CACHE_KEY	  = 159,  };  struct _gfd_vol_top_priv_t { diff --git a/libglusterfs/src/glusterfs.h b/libglusterfs/src/glusterfs.h index 780353d29a8..357284e27b4 100644 --- a/libglusterfs/src/glusterfs.h +++ b/libglusterfs/src/glusterfs.h @@ -285,6 +285,7 @@ struct _cmd_args {          int              selinux;          int              worm;          int              mac_compat; +	int		 fopen_keep_cache;  	struct list_head xlator_options;  /* list of xlator_option_t */  	/* fuse options */ diff --git a/libglusterfs/src/inode.c b/libglusterfs/src/inode.c index e32eddb5d1c..a0088c03c55 100644 --- a/libglusterfs/src/inode.c +++ b/libglusterfs/src/inode.c @@ -950,6 +950,54 @@ inode_forget (inode_t *inode, uint64_t nlookup)          return 0;  } +/* + * Invalidate an inode. This is invoked when a translator decides that an inode's + * cache is no longer valid. Any translator interested in taking action in this + * situation can define the invalidate callback. + */ +int +inode_invalidate(inode_t *inode) +{ +	int ret = 0; +	xlator_t *xl = NULL; +	xlator_t *old_THIS = NULL; + +	if (!inode) { +		gf_log_callingfn(THIS->name, GF_LOG_WARNING, "inode not found"); +		return -1; +	} + +	/* +	 * The master xlator is not in the graph but it can define an invalidate +	 * handler. +	 */ +	xl = inode->table->xl->ctx->master; +	if (xl && xl->cbks->invalidate) { +		old_THIS = THIS; +		THIS = xl; +		ret = xl->cbks->invalidate(xl, inode); +		THIS = old_THIS; +		if (ret) +			return ret; +	} + +	xl = inode->table->xl->graph->first; +	while (xl) { +		old_THIS = THIS; +		THIS = xl; +		if (xl->cbks->invalidate) +			ret = xl->cbks->invalidate(xl, inode); +		THIS = old_THIS; + +		if (ret) +			break; + +		xl = xl->next; +	} + +	return ret; +} +  static void  __inode_unlink (inode_t *inode, inode_t *parent, const char *name) diff --git a/libglusterfs/src/inode.h b/libglusterfs/src/inode.h index 41003df71ca..20e28f6820d 100644 --- a/libglusterfs/src/inode.h +++ b/libglusterfs/src/inode.h @@ -131,6 +131,9 @@ int  inode_forget (inode_t *inode, uint64_t nlookup);  int +inode_invalidate(inode_t *inode); + +int  inode_rename (inode_table_t *table, inode_t *olddir, const char *oldname,  	      inode_t *newdir, const char *newname,  	      inode_t *inode, struct iatt *stbuf); diff --git a/libglusterfs/src/xlator.h b/libglusterfs/src/xlator.h index 2fce7dc474a..5162d20e500 100644 --- a/libglusterfs/src/xlator.h +++ b/libglusterfs/src/xlator.h @@ -730,10 +730,13 @@ typedef int32_t (*cbk_forget_t) (xlator_t *this,  typedef int32_t (*cbk_release_t) (xlator_t *this,                                    fd_t *fd); +typedef int32_t (*cbk_invalidate_t)(xlator_t *this, inode_t *inode); +  struct xlator_cbks {          cbk_forget_t    forget;          cbk_release_t   release;          cbk_release_t   releasedir; +	cbk_invalidate_t invalidate;  };  typedef int32_t (*dumpop_priv_t) (xlator_t *this); diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c index 3b4c6c68c97..21e14efb384 100644 --- a/xlators/mount/fuse/src/fuse-bridge.c +++ b/xlators/mount/fuse/src/fuse-bridge.c @@ -25,6 +25,34 @@ static int gf_fuse_xattr_enotsup_log;  void fini (xlator_t *this_xl); +static void fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino); + +/* + * Send an invalidate notification up to fuse to purge the file from local + * page cache. + */ +static int32_t +fuse_invalidate(xlator_t *this, inode_t *inode) +{ +	fuse_private_t *priv = this->private; +	uint64_t nodeid; + +	/* +	 * NOTE: We only invalidate at the moment if fopen_keep_cache is +	 * enabled because otherwise this is a departure from default +	 * behavior. Specifically, the performance/write-behind xlator +	 * causes unconditional invalidations on write requests. +	 */ +	if (!priv->fopen_keep_cache) +		return 0; + +	nodeid = inode_to_fuse_nodeid(inode); +	gf_log(this->name, GF_LOG_DEBUG, "Invalidate inode id %lu.", nodeid); +	fuse_invalidate_inode(this, nodeid); + +	return 0; +} +  fuse_fd_ctx_t *  __fuse_fd_ctx_check_n_create (xlator_t *this, fd_t *fd)  { @@ -161,7 +189,7 @@ send_fuse_data (xlator_t *this, fuse_in_header_t *finh, void *data, size_t size)  static void -fuse_invalidate (xlator_t *this, uint64_t fuse_ino) +fuse_invalidate_entry (xlator_t *this, uint64_t fuse_ino)  {          struct fuse_out_header             *fouh   = NULL;          struct fuse_notify_inval_entry_out *fnieo  = NULL; @@ -207,6 +235,47 @@ fuse_invalidate (xlator_t *this, uint64_t fuse_ino)          }  } +/* + * Send an inval inode notification to fuse. This causes an invalidation of the + * entire page cache mapping on the inode. + */ +static void +fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino) +{ +	struct fuse_out_header *fouh = NULL; +	struct fuse_notify_inval_inode_out *fniio = NULL; +	fuse_private_t *priv = NULL; +	int rv = 0; +	char inval_buf[INVAL_BUF_SIZE] = {0}; + +	fouh = (struct fuse_out_header *) inval_buf; +	fniio = (struct fuse_notify_inval_inode_out *) (fouh + 1); + +	priv = this->private; + +	if (priv->revchan_out < 0) +		return; + +	fouh->unique = 0; +	fouh->error = FUSE_NOTIFY_INVAL_INODE; +	fouh->len = sizeof(struct fuse_out_header) + +		sizeof(struct fuse_notify_inval_inode_out); + +	/* inval the entire mapping until we learn how to be more granular */ +	fniio->ino = fuse_ino; +	fniio->off = 0; +	fniio->len = -1; + +	rv = write(priv->revchan_out, inval_buf, fouh->len); +	if (rv != fouh->len) { +		gf_log("glusterfs-fuse", GF_LOG_ERROR, "kernel notification " +			"daemon defunct"); +		close(priv->fd); +	} + +	gf_log("glusterfs-fuse", GF_LOG_TRACE, "INVALIDATE inode: %lu", fuse_ino); +} +  int  send_fuse_err (xlator_t *this, fuse_in_header_t *finh, int error)  { @@ -670,17 +739,27 @@ fuse_fd_cbk (call_frame_t *frame, void *cookie, xlator_t *this,                              || (priv->direct_io_mode == 1))                                  foo.open_flags |= FOPEN_DIRECT_IO;  #ifdef GF_DARWIN_HOST_OS -                                /* In Linux: by default, buffer cache -                                 * is purged upon open, setting -                                 * FOPEN_KEEP_CACHE implies no-purge -                                 * -                                 * In MacFUSE: by default, buffer cache -                                 * is left intact upon open, setting -                                 * FOPEN_PURGE_UBC implies purge -                                 * -                                 * [[Interesting...]] -                                 */ -                                foo.open_flags |= FOPEN_PURGE_UBC; +                        /* In Linux: by default, buffer cache +                         * is purged upon open, setting +                         * FOPEN_KEEP_CACHE implies no-purge +                         * +                         * In MacFUSE: by default, buffer cache +                         * is left intact upon open, setting +                         * FOPEN_PURGE_UBC implies purge +                         * +                         * [[Interesting...]] +                         */ +			if (!priv->fopen_keep_cache) +				foo.open_flags |= FOPEN_PURGE_UBC; +#else +			/* +			 * If fopen-keep-cache is enabled, we set the associated +			 * flag here such that files are not invalidated on open. +			 * File invalidations occur either in fuse or explicitly +			 * when the cache is set invalid on the inode. +			 */ +			if (priv->fopen_keep_cache) +				foo.open_flags |= FOPEN_KEEP_CACHE;  #endif                  } @@ -2663,7 +2742,7 @@ fuse_setxattr (xlator_t *this, fuse_in_header_t *finh, void *msg)                  gf_log ("fuse", GF_LOG_TRACE,                          "got request to invalidate %"PRIu64, finh->nodeid);                  send_fuse_err (this, finh, 0); -                fuse_invalidate (this, finh->nodeid); +                fuse_invalidate_entry (this, finh->nodeid);                  GF_FREE (finh);                  return;          } @@ -4523,6 +4602,9 @@ init (xlator_t *this_xl)                  GF_ASSERT (ret == 0);          } +	GF_OPTION_INIT("fopen-keep-cache", priv->fopen_keep_cache, bool, +		cleanup_exit); +          cmd_args = &this_xl->ctx->cmd_args;          fsname = cmd_args->volfile;          if (!fsname && cmd_args->volfile_server) { @@ -4644,6 +4726,7 @@ struct xlator_fops fops = {  };  struct xlator_cbks cbks = { +	.invalidate = fuse_invalidate,  }; @@ -4683,5 +4766,9 @@ struct volume_options options[] = {          { .key = {"read-only"},            .type = GF_OPTION_TYPE_BOOL          }, +	{ .key = {"fopen-keep-cache"}, +	  .type = GF_OPTION_TYPE_BOOL, +	  .default_value = "false" +	},          { .key = {NULL} },  }; diff --git a/xlators/mount/fuse/src/fuse-bridge.h b/xlators/mount/fuse/src/fuse-bridge.h index c6c8438a9ed..dcd9629246c 100644 --- a/xlators/mount/fuse/src/fuse-bridge.h +++ b/xlators/mount/fuse/src/fuse-bridge.h @@ -109,6 +109,7 @@ struct fuse_private {          gf_boolean_t         acl;          gf_boolean_t         selinux;          gf_boolean_t         read_only; +	gf_boolean_t	     fopen_keep_cache;          fdtable_t           *fdtable;          /* For fuse-reverse-validation */ diff --git a/xlators/mount/fuse/utils/mount.glusterfs.in b/xlators/mount/fuse/utils/mount.glusterfs.in index ee956885c4c..b623d3428b7 100755 --- a/xlators/mount/fuse/utils/mount.glusterfs.in +++ b/xlators/mount/fuse/utils/mount.glusterfs.in @@ -132,6 +132,10 @@ start_glusterfs ()          cmd_line=$(echo "$cmd_line --entry-timeout=$entry_timeout");      fi +    if [ -n "$fopen_keep_cache" ]; then +	cmd_line=$(echo "$cmd_line --fopen-keep-cache"); +    fi +      # for rdma volume, we have to fetch volfile with '.rdma' added      # to volume name, so that it fetches the right client vol file      volume_id_rdma=""; @@ -297,6 +301,7 @@ main ()                      "acl")      acl=1 ;;                      "selinux")  selinux=1 ;;                      "worm")     worm=1 ;; +		    "fopen-keep-cache")	fopen_keep_cache=1 ;;                      # "mount -t glusterfs" sends this, but it's useless.                      "rw")       ;;                      *) diff --git a/xlators/performance/io-cache/src/io-cache.c b/xlators/performance/io-cache/src/io-cache.c index 85e876531a5..bdaf0f1b81a 100644 --- a/xlators/performance/io-cache/src/io-cache.c +++ b/xlators/performance/io-cache/src/io-cache.c @@ -313,6 +313,18 @@ ioc_forget (xlator_t *this, inode_t *inode)          return 0;  } +static int32_t +ioc_invalidate(xlator_t *this, inode_t *inode) +{ +	ioc_inode_t *ioc_inode = NULL; + +	inode_ctx_get(inode, this, (uint64_t *) &ioc_inode); + +	if (ioc_inode) +		ioc_inode_flush(ioc_inode); + +	return 0; +}  /*   * ioc_cache_validate_cbk - @@ -1977,7 +1989,8 @@ struct xlator_dumpops dumpops = {  struct xlator_cbks cbks = {          .forget      = ioc_forget, -        .release     = ioc_release +        .release     = ioc_release, +	.invalidate  = ioc_invalidate,  };  struct volume_options options[] = { diff --git a/xlators/performance/md-cache/src/md-cache.c b/xlators/performance/md-cache/src/md-cache.c index 9acffba2a4e..cf1aee9d6b9 100644 --- a/xlators/performance/md-cache/src/md-cache.c +++ b/xlators/performance/md-cache/src/md-cache.c @@ -378,7 +378,8 @@ mdc_to_iatt (struct md_cache *mdc, struct iatt *iatt)  int -mdc_inode_iatt_set (xlator_t *this, inode_t *inode, struct iatt *iatt) +mdc_inode_iatt_set_validate(xlator_t *this, inode_t *inode, struct iatt *prebuf, +			    struct iatt *iatt)  {          int              ret = -1;          struct md_cache *mdc = NULL; @@ -394,6 +395,19 @@ mdc_inode_iatt_set (xlator_t *this, inode_t *inode, struct iatt *iatt)                          goto unlock;                  } +		/* +		 * Invalidate the inode if the mtime or ctime has changed +		 * and the prebuf doesn't match the value we have cached. +		 * TODO: writev returns with a NULL iatt due to +		 * performance/write-behind, causing invalidation on writes. +		 */ +		if (IA_ISREG(inode->ia_type) && +		    ((iatt->ia_mtime != mdc->md_mtime) || +		    (iatt->ia_ctime != mdc->md_ctime))) +			if (!prebuf || (prebuf->ia_ctime != mdc->md_ctime) || +			    (prebuf->ia_mtime != mdc->md_mtime)) +				inode_invalidate(inode); +                  mdc_from_iatt (mdc, iatt);                  time (&mdc->ia_time); @@ -405,6 +419,10 @@ out:          return ret;  } +int mdc_inode_iatt_set(xlator_t *this, inode_t *inode, struct iatt *iatt) +{ +	return mdc_inode_iatt_set_validate(this, inode, NULL, iatt); +}  int  mdc_inode_iatt_get (xlator_t *this, inode_t *inode, struct iatt *iatt) @@ -859,7 +877,7 @@ mdc_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,          if (!local)                  goto out; -        mdc_inode_iatt_set (this, local->loc.inode, postbuf); +        mdc_inode_iatt_set_validate(this, local->loc.inode, prebuf, postbuf);  out:          MDC_STACK_UNWIND (truncate, frame, op_ret, op_errno, prebuf, postbuf, @@ -901,7 +919,7 @@ mdc_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,          if (!local)                  goto out; -        mdc_inode_iatt_set (this, local->fd->inode, postbuf); +        mdc_inode_iatt_set_validate(this, local->fd->inode, prebuf, postbuf);  out:          MDC_STACK_UNWIND (ftruncate, frame, op_ret, op_errno, prebuf, postbuf, @@ -1377,7 +1395,7 @@ mdc_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,          if (!local)                  goto out; -        mdc_inode_iatt_set (this, local->fd->inode, postbuf); +        mdc_inode_iatt_set_validate(this, local->fd->inode, prebuf, postbuf);  out:          MDC_STACK_UNWIND (writev, frame, op_ret, op_errno, prebuf, postbuf, @@ -1422,7 +1440,7 @@ mdc_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,          if (!local)                  goto out; -        mdc_inode_iatt_set (this, local->loc.inode, postbuf); +	mdc_inode_iatt_set_validate(this, local->loc.inode, prebuf, postbuf);  out:          MDC_STACK_UNWIND (setattr, frame, op_ret, op_errno, prebuf, postbuf, @@ -1464,7 +1482,7 @@ mdc_fsetattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,          if (!local)                  goto out; -        mdc_inode_iatt_set (this, local->fd->inode, postbuf); +        mdc_inode_iatt_set_validate(this, local->fd->inode, prebuf, postbuf);  out:          MDC_STACK_UNWIND (fsetattr, frame, op_ret, op_errno, prebuf, postbuf, @@ -1506,7 +1524,7 @@ mdc_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,          if (!local)                  goto out; -        mdc_inode_iatt_set (this, local->fd->inode, postbuf); +        mdc_inode_iatt_set_validate(this, local->fd->inode, prebuf, postbuf);  out:          MDC_STACK_UNWIND (fsync, frame, op_ret, op_errno, prebuf, postbuf, | 
