summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--doc/mount.glusterfs.84
-rw-r--r--glusterfsd/src/glusterfsd.c24
-rw-r--r--glusterfsd/src/glusterfsd.h1
-rw-r--r--libglusterfs/src/glusterfs/glusterfs.h1
-rw-r--r--libglusterfs/src/glusterfs/inode.h17
-rw-r--r--libglusterfs/src/inode.c254
-rw-r--r--libglusterfs/src/libglusterfs.sym2
-rw-r--r--tests/features/fuse-lru-limit.t42
-rw-r--r--xlators/mount/fuse/src/fuse-bridge.c127
-rw-r--r--xlators/mount/fuse/src/fuse-bridge.h3
-rwxr-xr-xxlators/mount/fuse/utils/mount.glusterfs.in7
11 files changed, 395 insertions, 87 deletions
diff --git a/doc/mount.glusterfs.8 b/doc/mount.glusterfs.8
index 367f02d9b1a..902b0c1ee5c 100644
--- a/doc/mount.glusterfs.8
+++ b/doc/mount.glusterfs.8
@@ -122,6 +122,10 @@ Provide list of backup volfile servers in the following format [default: None]
\fBDeprecated\fR option - placed here for backward compatibility [default: 1]
.TP
.TP
+\fBlru-limit=\fRN
+Set fuse module's limit for number of inodes kept in LRU list to N [default: 0]
+.TP
+.TP
\fBbackground-qlen=\fRN
Set fuse module's background queue length to N [default: 64]
.TP
diff --git a/glusterfsd/src/glusterfsd.c b/glusterfsd/src/glusterfsd.c
index 6347941f369..0dea52b6af1 100644
--- a/glusterfsd/src/glusterfsd.c
+++ b/glusterfsd/src/glusterfsd.c
@@ -219,6 +219,9 @@ static struct argp_option gf_options[] = {
"[default: 300]"},
{"resolve-gids", ARGP_RESOLVE_GIDS_KEY, 0, 0,
"Resolve all auxiliary groups in fuse translator (max 32 otherwise)"},
+ {"lru-limit", ARGP_FUSE_LRU_LIMIT_KEY, "N", 0,
+ "Set fuse module's limit for number of inodes kept in LRU list to N "
+ "[default: 0]"},
{"background-qlen", ARGP_FUSE_BACKGROUND_QLEN_KEY, "N", 0,
"Set fuse module's background queue length to N "
"[default: 64]"},
@@ -496,6 +499,15 @@ set_fuse_mount_options(glusterfs_ctx_t *ctx, dict_t *options)
}
}
+ if (cmd_args->lru_limit >= 0) {
+ ret = dict_set_int32(options, "lru-limit", cmd_args->lru_limit);
+ if (ret < 0) {
+ gf_msg("glusterfsd", GF_LOG_ERROR, 0, glusterfsd_msg_4,
+ "lru-limit");
+ goto err;
+ }
+ }
+
if (cmd_args->background_qlen) {
ret = dict_set_int32(options, "background-qlen",
cmd_args->background_qlen);
@@ -1257,6 +1269,13 @@ parse_opts(int key, char *arg, struct argp_state *state)
cmd_args->resolve_gids = 1;
break;
+ case ARGP_FUSE_LRU_LIMIT_KEY:
+ if (!gf_string2int32(arg, &cmd_args->lru_limit))
+ break;
+
+ argp_failure(state, -1, 0, "unknown LRU limit option %s", arg);
+ break;
+
case ARGP_FUSE_BACKGROUND_QLEN_KEY:
if (!gf_string2int(arg, &cmd_args->background_qlen))
break;
@@ -2085,6 +2104,11 @@ parse_cmdline(int argc, char *argv[], glusterfs_ctx_t *ctx)
ctx->ssl_cert_depth = glusterfs_read_secure_access_file();
}
+ /* Need to set lru_limit to below 0 to indicate there was nothing
+ specified. This is needed as 0 is a valid option, and may not be
+ default value. */
+ cmd_args->lru_limit = -1;
+
argp_parse(&argp, argc, argv, ARGP_IN_ORDER, NULL, cmd_args);
if (cmd_args->print_xlatordir || cmd_args->print_statedumpdir ||
diff --git a/glusterfsd/src/glusterfsd.h b/glusterfsd/src/glusterfsd.h
index 0042054e138..86ac61c1a92 100644
--- a/glusterfsd/src/glusterfsd.h
+++ b/glusterfsd/src/glusterfsd.h
@@ -109,6 +109,7 @@ enum argp_option_keys {
ARGP_ATTR_TIMES_GRANULARITY_KEY = 187,
ARGP_PRINT_LIBEXECDIR_KEY = 188,
ARGP_FUSE_FLUSH_HANDLE_INTERRUPT_KEY = 189,
+ ARGP_FUSE_LRU_LIMIT_KEY = 190,
};
struct _gfd_vol_top_priv {
diff --git a/libglusterfs/src/glusterfs/glusterfs.h b/libglusterfs/src/glusterfs/glusterfs.h
index 908a0ce774f..9f14f2f5440 100644
--- a/libglusterfs/src/glusterfs/glusterfs.h
+++ b/libglusterfs/src/glusterfs/glusterfs.h
@@ -524,6 +524,7 @@ struct _cmd_args {
pid_t client_pid;
int client_pid_set;
unsigned uid_map_root;
+ int32_t lru_limit;
int background_qlen;
int congestion_threshold;
char *fuse_mountopts;
diff --git a/libglusterfs/src/glusterfs/inode.h b/libglusterfs/src/glusterfs/inode.h
index 5934373ec5b..52efdd85ccc 100644
--- a/libglusterfs/src/glusterfs/inode.h
+++ b/libglusterfs/src/glusterfs/inode.h
@@ -54,6 +54,13 @@ struct _inode_table {
struct mem_pool *dentry_pool; /* memory pool for dentrys */
struct mem_pool *fd_mem_pool; /* memory pool for fd_t */
int ctxcount; /* number of slots in inode->ctx */
+
+ /* This is required for 'invalidation' when 'nlookup' would be used,
+ specially in case of fuse-bridge */
+ int32_t (*invalidator_fn)(xlator_t *, inode_t *);
+ xlator_t *invalidator_xl;
+ struct list_head invalidate; /* inodes which are in invalidation queue */
+ uint32_t invalidate_size; /* count of inodes in invalidation list */
};
struct _dentry {
@@ -100,6 +107,7 @@ struct _inode {
struct list_head list; /* active/lru/purge */
struct _inode_ctx *_ctx; /* replacement for dict_t *(inode->ctx) */
+ bool invalidate_sent; /* Set it if invalidator_fn is called for inode */
};
#define UUID0_STR "00000000-0000-0000-0000-000000000000"
@@ -107,7 +115,12 @@ struct _inode {
#define GFID_STR_PFX_LEN (sizeof(GFID_STR_PFX) - 1)
inode_table_t *
-inode_table_new(size_t lru_limit, xlator_t *xl);
+inode_table_new(uint32_t lru_limit, xlator_t *xl);
+
+inode_table_t *
+inode_table_with_invalidator(uint32_t lru_limit, xlator_t *xl,
+ int32_t (*invalidator_fn)(xlator_t *, inode_t *),
+ xlator_t *invalidator_xl);
void
inode_table_destroy_all(glusterfs_ctx_t *ctx);
@@ -139,6 +152,8 @@ inode_lookup(inode_t *inode);
int
inode_forget(inode_t *inode, uint64_t nlookup);
+int
+inode_forget_with_unref(inode_t *inode, uint64_t nlookup);
int
inode_ref_reduce_by_n(inode_t *inode, uint64_t nref);
diff --git a/libglusterfs/src/inode.c b/libglusterfs/src/inode.c
index 3bf32cfe442..b4a62897498 100644
--- a/libglusterfs/src/inode.c
+++ b/libglusterfs/src/inode.c
@@ -23,6 +23,100 @@
move latest accessed dentry to list_head of inode
*/
+// clang-format off
+/*
+
+Details as per Xavi:
+
+ I think we should have 3 lists: active, lru and invalidate.
+
+We'll need 3 things: refs, nlookups and invalidate_sent flag. Any change of
+refs, invalidate_sent flag and moving from one list to another must be done
+atomically.
+
+With this information, these are the states that cause a transition:
+
+ refs nlookups inv_sent op
+ 1 0 0 unref -> refs = 0, active--->destroy
+ 1 1 0 unref -> refs = 0, active--->lru
+ 1 1 0 forget -> nlookups = 0, active--->active
+ *0 1 0 forget -> nlookups = 0, lru--->destroy
+ *0 1 1 forget -> nlookups = 0, invalidate--->destroy
+ 0 1 0 ref -> refs = 1, lru--->active
+ 0 1 1 ref -> refs = 1, inv_sent = 0, invalidate--->active
+ 0 1 0 overflow -> refs = 1, inv_sent = 1, lru--->invalidate
+ 1 1 1 unref -> refs = 0, invalidate--->invalidate
+ 1 1 1 forget -> nlookups = 0, inv_sent = 0, invalidate--->active
+
+(*) technically these combinations cannot happen because a forget sent by the
+kernel first calls ref() and then unref(). However it's equivalent.
+
+overflow means that lru list has grown beyond the limit and the inode needs to
+be invalidated. All other combinations do not cause a change in state or are not
+possible.
+
+Based on this, the code could be similar to this:
+
+ ref(inode, inv)
+ {
+ if (refs == 0) {
+ if (inv_sent) {
+ invalidate_count--;
+ inv_sent = 0;
+ } else {
+ lru_count--;
+ }
+ if (inv) {
+ inv_sent = 1;
+ invalidate_count++;
+ list_move(inode, invalidate);
+ } else {
+ active_count++;
+ list_move(inode, active);
+ }
+ }
+ refs++;
+ }
+
+ unref(inode, clear)
+ {
+ if (clear && inv_sent) {
+ // there is a case of fuse itself sending forget, without
+ // invalidate, after entry delete, like unlink(), rmdir().
+ inv_sent = 0;
+ invalidate_count--;
+ active_count++;
+ list_move(inode, active);
+ }
+ refs--;
+ if ((refs == 0) && !inv_sent) {
+ active_count--;
+ if (nlookups == 0) {
+ destroy(inode);
+ } else {
+ lru_count++;
+ list_move(inode, lru);
+ }
+ }
+ }
+
+ forget(inode)
+ {
+ ref(inode, false);
+ nlookups--;
+ unref(inode, true);
+ }
+
+ overflow(inode)
+ {
+ ref(inode, true);
+ invalidator(inode);
+ unref(inode, false);
+ }
+
+*/
+// clang-format on
+
#define INODE_DUMP_LIST(head, key_buf, key_prefix, list_type) \
{ \
int i = 1; \
@@ -37,7 +131,7 @@
}
static inode_t *
-__inode_unref(inode_t *inode);
+__inode_unref(inode_t *inode, bool clear);
static int
inode_table_prune(inode_table_t *table);
@@ -132,7 +226,7 @@ __dentry_unset(dentry_t *dentry)
dentry->name = NULL;
if (dentry->parent) {
- __inode_unref(dentry->parent);
+ __inode_unref(dentry->parent, false);
dentry->parent = NULL;
}
@@ -446,7 +540,7 @@ out:
}
static inode_t *
-__inode_unref(inode_t *inode)
+__inode_unref(inode_t *inode, bool clear)
{
int index = 0;
xlator_t *this = NULL;
@@ -455,8 +549,6 @@ __inode_unref(inode_t *inode)
if (!inode)
return NULL;
- this = THIS;
-
/*
* Root inode should always be in active list of inode table. So unrefs
* on root inode are no-ops.
@@ -464,6 +556,13 @@ __inode_unref(inode_t *inode)
if (__is_root_gfid(inode->gfid))
return inode;
+ this = THIS;
+
+ if (clear && inode->invalidate_sent) {
+ inode->invalidate_sent = false;
+ inode->table->invalidate_size--;
+ __inode_activate(inode);
+ }
GF_ASSERT(inode->ref);
--inode->ref;
@@ -474,7 +573,7 @@ __inode_unref(inode_t *inode)
inode->_ctx[index].ref--;
}
- if (!inode->ref) {
+ if (!inode->ref && !inode->invalidate_sent) {
inode->table->active_size--;
nlookup = GF_ATOMIC_GET(inode->nlookup);
@@ -488,7 +587,7 @@ __inode_unref(inode_t *inode)
}
static inode_t *
-__inode_ref(inode_t *inode)
+__inode_ref(inode_t *inode, bool is_invalidate)
{
int index = 0;
xlator_t *this = NULL;
@@ -498,11 +597,6 @@ __inode_ref(inode_t *inode)
this = THIS;
- if (!inode->ref) {
- inode->table->lru_size--;
- __inode_activate(inode);
- }
-
/*
* Root inode should always be in active list of inode table. So unrefs
* on root inode are no-ops. If we do not allow unrefs but allow refs,
@@ -514,6 +608,22 @@ __inode_ref(inode_t *inode)
if (__is_root_gfid(inode->gfid) && inode->ref)
return inode;
+ if (!inode->ref) {
+ if (inode->invalidate_sent) {
+ inode->invalidate_sent = false;
+ inode->table->invalidate_size--;
+ } else {
+ inode->table->lru_size--;
+ }
+ if (is_invalidate) {
+ inode->invalidate_sent = true;
+ inode->table->invalidate_size++;
+ list_move_tail(&inode->list, &inode->table->invalidate);
+ } else {
+ __inode_activate(inode);
+ }
+ }
+
inode->ref++;
index = __inode_get_xl_index(inode, this);
@@ -537,7 +647,7 @@ inode_unref(inode_t *inode)
pthread_mutex_lock(&table->lock);
{
- inode = __inode_unref(inode);
+ inode = __inode_unref(inode, false);
}
pthread_mutex_unlock(&table->lock);
@@ -558,7 +668,7 @@ inode_ref(inode_t *inode)
pthread_mutex_lock(&table->lock);
{
- inode = __inode_ref(inode);
+ inode = __inode_ref(inode, false);
}
pthread_mutex_unlock(&table->lock);
@@ -592,7 +702,7 @@ __dentry_create(inode_t *inode, inode_t *parent, const char *name)
}
if (parent)
- newd->parent = __inode_ref(parent);
+ newd->parent = __inode_ref(parent, false);
list_add(&newd->inode_list, &inode->dentry_list);
newd->inode = inode;
@@ -662,7 +772,7 @@ inode_new(inode_table_t *table)
{
inode = __inode_create(table);
if (inode != NULL) {
- __inode_ref(inode);
+ __inode_ref(inode, false);
}
}
pthread_mutex_unlock(&table->lock);
@@ -769,7 +879,7 @@ inode_grep(inode_table_t *table, inode_t *parent, const char *name)
inode = dentry->inode;
if (inode)
- __inode_ref(inode);
+ __inode_ref(inode, false);
}
pthread_mutex_unlock(&table->lock);
@@ -912,7 +1022,7 @@ inode_find(inode_table_t *table, uuid_t gfid)
{
inode = __inode_find(table, gfid);
if (inode)
- __inode_ref(inode);
+ __inode_ref(inode, false);
}
pthread_mutex_unlock(&table->lock);
@@ -1057,7 +1167,7 @@ inode_link(inode_t *inode, inode_t *parent, const char *name, struct iatt *iatt)
linked_inode = __inode_link(inode, parent, name, iatt);
if (linked_inode)
- __inode_ref(linked_inode);
+ __inode_ref(linked_inode, false);
}
pthread_mutex_unlock(&table->lock);
@@ -1124,6 +1234,31 @@ inode_forget(inode_t *inode, uint64_t nlookup)
return 0;
}
+int
+inode_forget_with_unref(inode_t *inode, uint64_t nlookup)
+{
+ inode_table_t *table = NULL;
+
+ if (!inode) {
+ gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_INODE_NOT_FOUND,
+ "inode not found");
+ return -1;
+ }
+
+ table = inode->table;
+
+ pthread_mutex_lock(&table->lock);
+ {
+ inode_forget_atomic(inode, nlookup);
+ __inode_unref(inode, true);
+ }
+ pthread_mutex_unlock(&table->lock);
+
+ inode_table_prune(table);
+
+ return 0;
+}
+
/*
* Invalidate an inode. This is invoked when a translator decides that an
* inode's cache is no longer valid. Any translator interested in taking action
@@ -1298,7 +1433,7 @@ inode_parent(inode_t *inode, uuid_t pargfid, const char *name)
parent = dentry->parent;
if (parent)
- __inode_ref(parent);
+ __inode_ref(parent, false);
}
pthread_mutex_unlock(&table->lock);
@@ -1480,6 +1615,8 @@ inode_table_prune(inode_table_t *table)
inode_t *del = NULL;
inode_t *tmp = NULL;
inode_t *entry = NULL;
+ uint64_t nlookup = 0;
+ int64_t lru_size = 0;
if (!table)
return -1;
@@ -1488,7 +1625,11 @@ inode_table_prune(inode_table_t *table)
pthread_mutex_lock(&table->lock);
{
- while (table->lru_limit && table->lru_size > (table->lru_limit)) {
+ if (!table->lru_limit)
+ goto purge_list;
+
+ lru_size = table->lru_size;
+ while (lru_size > (table->lru_limit)) {
if (list_empty(&table->lru)) {
gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0,
LG_MSG_INVALID_INODE_LIST,
@@ -1498,26 +1639,46 @@ inode_table_prune(inode_table_t *table)
break;
}
+ lru_size--;
entry = list_entry(table->lru.next, inode_t, list);
+ /* The logic of invalidation is required only if invalidator_fn
+ is present */
+ if (table->invalidator_fn) {
+ /* check for valid inode with 'nlookup' */
+ nlookup = GF_ATOMIC_GET(entry->nlookup);
+ if (nlookup) {
+ __inode_ref(entry, true);
+ tmp = entry;
+ break;
+ }
+ }
table->lru_size--;
__inode_retire(entry);
-
ret++;
}
+ purge_list:
list_splice_init(&table->purge, &purge);
table->purge_size = 0;
}
pthread_mutex_unlock(&table->lock);
+ /* Pick 1 inode for invalidation */
+ if (tmp) {
+ xlator_t *old_THIS = THIS;
+ THIS = table->invalidator_xl;
+ table->invalidator_fn(table->invalidator_xl, tmp);
+ THIS = old_THIS;
+ inode_unref(tmp);
+ }
+
+ /* Just so that if purge list is handled too, then clear it off */
+ list_for_each_entry_safe(del, tmp, &purge, list)
{
- list_for_each_entry_safe(del, tmp, &purge, list)
- {
- list_del_init(&del->list);
- inode_forget_atomic(del, 0);
- __inode_destroy(del);
- }
+ list_del_init(&del->list);
+ inode_forget_atomic(del, 0);
+ __inode_destroy(del);
}
return ret;
@@ -1545,9 +1706,12 @@ __inode_table_init_root(inode_table_t *table)
}
inode_table_t *
-inode_table_new(size_t lru_limit, xlator_t *xl)
+inode_table_with_invalidator(uint32_t lru_limit, xlator_t *xl,
+ int32_t (*invalidator_fn)(xlator_t *, inode_t *),
+ xlator_t *invalidator_xl)
{
inode_table_t *new = NULL;
+ uint32_t mem_pool_size = lru_limit;
int ret = -1;
int i = 0;
@@ -1559,20 +1723,20 @@ inode_table_new(size_t lru_limit, xlator_t *xl)
new->ctxcount = xl->graph->xl_count + 1;
new->lru_limit = lru_limit;
+ new->invalidator_fn = invalidator_fn;
+ new->invalidator_xl = invalidator_xl;
new->hashsize = 14057; /* TODO: Random Number?? */
/* In case FUSE is initing the inode table. */
- if (lru_limit == 0)
- lru_limit = DEFAULT_INODE_MEMPOOL_ENTRIES;
-
- new->inode_pool = mem_pool_new(inode_t, lru_limit);
+ if (!mem_pool_size || (mem_pool_size > DEFAULT_INODE_MEMPOOL_ENTRIES))
+ mem_pool_size = DEFAULT_INODE_MEMPOOL_ENTRIES;
+ new->inode_pool = mem_pool_new(inode_t, mem_pool_size);
if (!new->inode_pool)
goto out;
- new->dentry_pool = mem_pool_new(dentry_t, lru_limit);
-
+ new->dentry_pool = mem_pool_new(dentry_t, mem_pool_size);
if (!new->dentry_pool)
goto out;
@@ -1604,6 +1768,7 @@ inode_table_new(size_t lru_limit, xlator_t *xl)
INIT_LIST_HEAD(&new->active);
INIT_LIST_HEAD(&new->lru);
INIT_LIST_HEAD(&new->purge);
+ INIT_LIST_HEAD(&new->invalidate);
ret = gf_asprintf(&new->name, "%s/inode", xl->name);
if (-1 == ret) {
@@ -1633,6 +1798,13 @@ out:
return new;
}
+inode_table_t *
+inode_table_new(uint32_t lru_limit, xlator_t *xl)
+{
+ /* Only fuse for now requires the inode table with invalidator */
+ return inode_table_with_invalidator(lru_limit, xl, NULL, NULL);
+}
+
int
inode_table_ctx_free(inode_table_t *table)
{
@@ -1771,6 +1943,14 @@ inode_table_destroy(inode_table_t *inode_table)
inode_table->lru_size--;
}
+ /* Same logic for invalidate list */
+ while (!list_empty(&inode_table->invalidate)) {
+ trav = list_first_entry(&inode_table->invalidate, inode_t, list);
+ inode_forget_atomic(trav, 0);
+ __inode_retire(trav);
+ inode_table->invalidate_size--;
+ }
+
while (!list_empty(&inode_table->active)) {
trav = list_first_entry(&inode_table->active, inode_t, list);
/* forget and unref the inode to retire and add it to
@@ -2280,6 +2460,7 @@ inode_dump(inode_t *inode, char *prefix)
gf_proc_dump_write("fd-count", "%u", inode->fd_count);
gf_proc_dump_write("active-fd-count", "%u", inode->active_fd_count);
gf_proc_dump_write("ref", "%u", inode->ref);
+ gf_proc_dump_write("invalidate-sent", "%d", inode->invalidate_sent);
gf_proc_dump_write("ia_type", "%d", inode->ia_type);
if (inode->_ctx) {
inode_ctx = GF_CALLOC(inode->table->ctxcount, sizeof(*inode_ctx),
@@ -2353,10 +2534,13 @@ inode_table_dump(inode_table_t *itable, char *prefix)
gf_proc_dump_write(key, "%d", itable->lru_size);
gf_proc_dump_build_key(key, prefix, "purge_size");
gf_proc_dump_write(key, "%d", itable->purge_size);
+ gf_proc_dump_build_key(key, prefix, "invalidate_size");
+ gf_proc_dump_write(key, "%d", itable->invalidate_size);
INODE_DUMP_LIST(&itable->active, key, prefix, "active");
INODE_DUMP_LIST(&itable->lru, key, prefix, "lru");
INODE_DUMP_LIST(&itable->purge, key, prefix, "purge");
+ INODE_DUMP_LIST(&itable->invalidate, key, prefix, "invalidate");
pthread_mutex_unlock(&itable->lock);
}
diff --git a/libglusterfs/src/libglusterfs.sym b/libglusterfs/src/libglusterfs.sym
index 6ca6a639456..464493d6cfc 100644
--- a/libglusterfs/src/libglusterfs.sym
+++ b/libglusterfs/src/libglusterfs.sym
@@ -791,6 +791,7 @@ __inode_find
inode_find
inode_find_directory_name
inode_forget
+inode_forget_with_unref
inode_from_path
inode_grep
inode_grep_for_gfid
@@ -815,6 +816,7 @@ inode_table_destroy_all
inode_table_dump
inode_table_dump_to_dict
inode_table_new
+inode_table_with_invalidator
__inode_table_set_lru_limit
inode_table_set_lru_limit
inode_unlink
diff --git a/tests/features/fuse-lru-limit.t b/tests/features/fuse-lru-limit.t
new file mode 100644
index 00000000000..9f1211660ce
--- /dev/null
+++ b/tests/features/fuse-lru-limit.t
@@ -0,0 +1,42 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 $H0:$B0/${V0}{0,1}
+TEST $CLI volume start $V0
+TEST glusterfs -s $H0 --volfile-id $V0 $M0
+
+EXPECT "1" get_mount_active_size_value $V0 $M0
+EXPECT "0" get_mount_lru_size_value $V0 $M0
+
+mkdir ${M0}/dir-{1..9}
+for i in {1..9}; do
+ for j in {1..1000}; do
+ echo "Test file" > ${M0}/dir-$i/file-$j;
+ done;
+done
+lc=$(get_mount_lru_size_value $V0 ${M0})
+# ideally it should be 9000+
+TEST [ $lc -ge 9000 ]
+
+TEST umount $M0
+
+TEST glusterfs -s $H0 --volfile-id $V0 --lru-limit 1000 $M0
+
+TEST find $M0
+lc=$(get_mount_lru_size_value $V0 ${M0})
+# ideally it should be <1000
+# Not sure if there are any possibilities of buffer need.
+TEST [ $lc -le 1000 ]
+
+TEST rm -rf $M0/*
+
+EXPECT "1" get_mount_active_size_value $V0 $M0
+EXPECT "0" get_mount_lru_size_value $V0 $M0
+
+cleanup
diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c
index 3f4e19c211e..5bc070658e2 100644
--- a/xlators/mount/fuse/src/fuse-bridge.c
+++ b/xlators/mount/fuse/src/fuse-bridge.c
@@ -216,8 +216,8 @@ check_and_dump_fuse_W(fuse_private_t *priv, struct iovec *iov_out, int count,
struct fuse_out_header *fouh = NULL;
if (res == -1) {
- gf_log("glusterfs-fuse", GF_LOG_ERROR,
- "writing to fuse device failed: %s", strerror(errno));
+ gf_log_callingfn("glusterfs-fuse", GF_LOG_ERROR,
+ "writing to fuse device failed: %s", strerror(errno));
return errno;
}
@@ -312,29 +312,29 @@ send_fuse_data(xlator_t *this, fuse_in_header_t *finh, void *data, size_t size)
#define send_fuse_obj(this, finh, obj) \
send_fuse_data(this, finh, obj, sizeof(*(obj)))
-#if FUSE_KERNEL_MINOR_VERSION >= 11
static void
fuse_invalidate_entry(xlator_t *this, uint64_t fuse_ino)
{
+#if FUSE_KERNEL_MINOR_VERSION >= 11
struct fuse_out_header *fouh = NULL;
struct fuse_notify_inval_entry_out *fnieo = NULL;
fuse_private_t *priv = NULL;
dentry_t *dentry = NULL;
+ dentry_t *tmp = NULL;
inode_t *inode = NULL;
size_t nlen = 0;
fuse_invalidate_node_t *node = NULL;
+ char gfid_str[UUID_CANONICAL_FORM_LEN + 1];
priv = this->private;
-
if (!priv->reverse_fuse_thread_started)
return;
- inode = fuse_ino_to_inode(fuse_ino, this);
- if (inode == NULL) {
+ inode = (inode_t *)(unsigned long)fuse_ino;
+ if (inode == NULL)
return;
- }
- list_for_each_entry(dentry, &inode->dentry_list, inode_list)
+ list_for_each_entry_safe(dentry, tmp, &inode->dentry_list, inode_list)
{
node = GF_CALLOC(1, sizeof(*node), gf_fuse_mt_invalidate_node_t);
if (node == NULL)
@@ -348,38 +348,41 @@ fuse_invalidate_entry(xlator_t *this, uint64_t fuse_ino)
fouh->unique = 0;
fouh->error = FUSE_NOTIFY_INVAL_ENTRY;
- nlen = strlen(dentry->name);
- fouh->len = sizeof(*fouh) + sizeof(*fnieo) + nlen + 1;
- fnieo->parent = inode_to_fuse_nodeid(dentry->parent);
-
- fnieo->namelen = nlen;
- strcpy(node->inval_buf + sizeof(*fouh) + sizeof(*fnieo), dentry->name);
+ if (dentry->name) {
+ nlen = strlen(dentry->name);
+ fouh->len = sizeof(*fouh) + sizeof(*fnieo) + nlen + 1;
+ fnieo->parent = inode_to_fuse_nodeid(dentry->parent);
- pthread_mutex_lock(&priv->invalidate_mutex);
- {
- list_add_tail(&node->next, &priv->invalidate_list);
- pthread_cond_signal(&priv->invalidate_cond);
+ fnieo->namelen = nlen;
+ strcpy((node->inval_buf + sizeof(*fouh) + sizeof(*fnieo)),
+ dentry->name);
}
- pthread_mutex_unlock(&priv->invalidate_mutex);
gf_log("glusterfs-fuse", GF_LOG_TRACE,
- "INVALIDATE entry: "
- "%" PRIu64 "/%s",
- fnieo->parent, dentry->name);
+ "INVALIDATE entry: %" PRIu64 "/%s (gfid:%s)", fnieo->parent,
+ dentry->name, uuid_utoa(inode->gfid));
if (dentry->parent) {
- fuse_log_eh(this, "Invalidated entry %s (parent: %s)", dentry->name,
- uuid_utoa(dentry->parent->gfid));
+ fuse_log_eh(this, "Invalidated entry %s (parent: %s) gfid:%s",
+ dentry->name, uuid_utoa(dentry->parent->gfid),
+ uuid_utoa_r(inode->gfid, gfid_str));
} else {
- fuse_log_eh(this, "Invalidated entry %s(nodeid: %" PRIu64 ")",
- dentry->name, fnieo->parent);
+ fuse_log_eh(this,
+ "Invalidated entry %s(nodeid: %" PRIu64 ") gfid:%s",
+ dentry->name, fnieo->parent, uuid_utoa(inode->gfid));
+ }
+
+ pthread_mutex_lock(&priv->invalidate_mutex);
+ {
+ list_add_tail(&node->next, &priv->invalidate_list);
+ pthread_cond_signal(&priv->invalidate_cond);
}
+ pthread_mutex_unlock(&priv->invalidate_mutex);
}
- if (inode)
- inode_unref(inode);
-}
#endif
+ return;
+}
/*
* Send an inval inode notification to fuse. This causes an invalidation of the
@@ -400,6 +403,10 @@ fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino)
if (!priv->reverse_fuse_thread_started)
return;
+ inode = (inode_t *)(unsigned long)fuse_ino;
+ if (inode == NULL)
+ return;
+
node = GF_CALLOC(1, sizeof(*node), gf_fuse_mt_invalidate_node_t);
if (node == NULL)
return;
@@ -419,7 +426,11 @@ fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino)
fniio->off = 0;
fniio->len = -1;
- inode = fuse_ino_to_inode(fuse_ino, this);
+ fuse_log_eh(this, "Invalidated inode %" PRIu64 " (gfid: %s)", fuse_ino,
+ uuid_utoa(inode->gfid));
+ gf_log("glusterfs-fuse", GF_LOG_TRACE,
+ "INVALIDATE inode: %" PRIu64 "(gfid:%s)", fuse_ino,
+ uuid_utoa(inode->gfid));
pthread_mutex_lock(&priv->invalidate_mutex);
{
@@ -428,24 +439,22 @@ fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino)
}
pthread_mutex_unlock(&priv->invalidate_mutex);
- gf_log("glusterfs-fuse", GF_LOG_TRACE, "INVALIDATE inode: %" PRIu64,
- fuse_ino);
-
- if (inode) {
- fuse_log_eh(this, "Invalidated inode %" PRIu64 " (gfid: %s)", fuse_ino,
- uuid_utoa(inode->gfid));
- } else {
- fuse_log_eh(this, "Invalidated inode %" PRIu64, fuse_ino);
- }
-
- if (inode)
- inode_unref(inode);
#else
gf_log("glusterfs-fuse", GF_LOG_WARNING,
- "fuse_invalidate_inode not implemented on OS X due to missing FUSE "
- "notification");
+ "fuse_invalidate_inode not implemented on this system");
#endif
+ return;
+}
+
+#if FUSE_KERNEL_MINOR_VERSION >= 11
+/* Need this function for the signature (inode_t *, instead of uint64_t) */
+static int32_t
+fuse_inode_invalidate_fn(xlator_t *this, inode_t *inode)
+{
+ fuse_invalidate_entry(this, (uint64_t)inode);
+ return 0;
}
+#endif
static fuse_timed_message_t *
fuse_timed_message_new(void)
@@ -1068,11 +1077,14 @@ do_forget(xlator_t *this, uint64_t unique, uint64_t nodeid, uint64_t nlookup)
{
inode_t *fuse_inode = fuse_ino_to_inode(nodeid, this);
+ gf_log("fuse", GF_LOG_TRACE,
+ "%" PRIu64 ": FORGET %" PRIu64 "/%" PRIu64 " gfid: (%s)", unique,
+ nodeid, nlookup, uuid_utoa(fuse_inode->gfid));
+
fuse_log_eh(this, "%" PRIu64 ": FORGET %" PRIu64 "/%" PRIu64 " gfid: (%s)",
unique, nodeid, nlookup, uuid_utoa(fuse_inode->gfid));
- inode_forget(fuse_inode, nlookup);
- inode_unref(fuse_inode);
+ inode_forget_with_unref(fuse_inode, nlookup);
}
static void
@@ -1087,10 +1099,6 @@ fuse_forget(xlator_t *this, fuse_in_header_t *finh, void *msg,
return;
}
- gf_log("glusterfs-fuse", GF_LOG_TRACE,
- "%" PRIu64 ": FORGET %" PRIu64 "/%" PRIu64, finh->unique,
- finh->nodeid, ffi->nlookup);
-
do_forget(this, finh->unique, finh->nodeid, ffi->nlookup);
GF_FREE(finh);
@@ -5658,7 +5666,9 @@ fuse_thread_proc(void *data)
fuse_in_header_t *finh = NULL;
struct iovec iov_in[2];
void *msg = NULL;
- const size_t msg0_size = sizeof(*finh) + 128;
+ /* we need 512 extra buffer size for BATCH_FORGET fop. By tests, it is
+ found to be reduces 'REALLOC()' in the loop */
+ const size_t msg0_size = sizeof(*finh) + 512;
fuse_handler_t **fuse_ops = NULL;
struct pollfd pfd[2] = {{
0,
@@ -5992,7 +6002,12 @@ fuse_graph_setup(xlator_t *this, glusterfs_graph_t *graph)
goto unlock;
}
+#if FUSE_KERNEL_MINOR_VERSION >= 11
+ itable = inode_table_with_invalidator(priv->lru_limit, graph->top,
+ fuse_inode_invalidate_fn, this);
+#else
itable = inode_table_new(0, graph->top);
+#endif
if (!itable) {
ret = -1;
goto unlock;
@@ -6453,6 +6468,8 @@ init(xlator_t *this_xl)
}
}
+ GF_OPTION_INIT("lru-limit", priv->lru_limit, uint32, cleanup_exit);
+
GF_OPTION_INIT("event-history", priv->event_history, bool, cleanup_exit);
GF_OPTION_INIT("thin-client", priv->thin_client, bool, cleanup_exit);
@@ -6780,6 +6797,14 @@ struct volume_options options[] = {
.description =
"Handle iterrupts in FLUSH handler (for testing purposes).",
},
+ {
+ .key = {"lru-limit"},
+ .type = GF_OPTION_TYPE_INT,
+ .default_value = "131072",
+ .min = 0,
+ .description = "makes glusterfs invalidate kernel inodes after "
+ "reaching this limit (0 means 'unlimited')",
+ },
{.key = {NULL}},
};
diff --git a/xlators/mount/fuse/src/fuse-bridge.h b/xlators/mount/fuse/src/fuse-bridge.h
index 60702ab1da5..b892113eb79 100644
--- a/xlators/mount/fuse/src/fuse-bridge.h
+++ b/xlators/mount/fuse/src/fuse-bridge.h
@@ -187,6 +187,9 @@ struct fuse_private {
pthread_mutex_t interrupt_mutex;
gf_boolean_t flush_handle_interrupt;
+
+ /* LRU Limit, if not set, default is 128k for now */
+ uint32_t lru_limit;
};
typedef struct fuse_private fuse_private_t;
diff --git a/xlators/mount/fuse/utils/mount.glusterfs.in b/xlators/mount/fuse/utils/mount.glusterfs.in
index 4a95cd80b87..d09a7cd663e 100755
--- a/xlators/mount/fuse/utils/mount.glusterfs.in
+++ b/xlators/mount/fuse/utils/mount.glusterfs.in
@@ -249,6 +249,10 @@ start_glusterfs ()
cmd_line=$(echo "$cmd_line --gid-timeout=$gid_timeout");
fi
+ if [ -n "$lru_limit" ]; then
+ cmd_line=$(echo "$cmd_line --lru-limit=$lru_limit");
+ fi
+
if [ -n "$bg_qlen" ]; then
cmd_line=$(echo "$cmd_line --background-qlen=$bg_qlen");
fi
@@ -489,6 +493,9 @@ with_options()
"gid-timeout")
gid_timeout=$value
;;
+ "lru-limit")
+ lru_limit=$value
+ ;;
"background-qlen")
bg_qlen=$value
;;