summaryrefslogtreecommitdiffstats
path: root/libglusterfs/src/inode.c
diff options
context:
space:
mode:
Diffstat (limited to 'libglusterfs/src/inode.c')
-rw-r--r--libglusterfs/src/inode.c253
1 files changed, 218 insertions, 35 deletions
diff --git a/libglusterfs/src/inode.c b/libglusterfs/src/inode.c
index 089aa6f9b21..12a8fbd014d 100644
--- a/libglusterfs/src/inode.c
+++ b/libglusterfs/src/inode.c
@@ -23,6 +23,100 @@
move latest accessed dentry to list_head of inode
*/
+// clang-format off
+/*
+
+Details as per Xavi:
+
+ I think we should have 3 lists: active, lru and invalidate.
+
+We'll need 3 things: refs, nlookups and invalidate_sent flag. Any change of
+refs, invalidate_sent flag and moving from one list to another must be done
+atomically.
+
+With this information, these are the states that cause a transition:
+
+ refs nlookups inv_sent op
+ 1 0 0 unref -> refs = 0, active--->destroy
+ 1 1 0 unref -> refs = 0, active--->lru
+ 1 1 0 forget -> nlookups = 0, active--->active
+ *0 1 0 forget -> nlookups = 0, lru--->destroy
+ *0 1 1 forget -> nlookups = 0, invalidate--->destroy
+ 0 1 0 ref -> refs = 1, lru--->active
+ 0 1 1 ref -> refs = 1, inv_sent = 0, invalidate--->active
+ 0 1 0 overflow -> refs = 1, inv_sent = 1, lru--->invalidate
+ 1 1 1 unref -> refs = 0, invalidate--->invalidate
+ 1 1 1 forget -> nlookups = 0, inv_sent = 0, invalidate--->active
+
+(*) technically these combinations cannot happen because a forget sent by the
+kernel first calls ref() and then unref(). However it's equivalent.
+
+overflow means that lru list has grown beyond the limit and the inode needs to
+be invalidated. All other combinations do not cause a change in state or are not
+possible.
+
+Based on this, the code could be similar to this:
+
+ ref(inode, inv)
+ {
+ if (refs == 0) {
+ if (inv_sent) {
+ invalidate_count--;
+ inv_sent = 0;
+ } else {
+ lru_count--;
+ }
+ if (inv) {
+ inv_sent = 1;
+ invalidate_count++;
+ list_move(inode, invalidate);
+ } else {
+ active_count++;
+ list_move(inode, active);
+ }
+ }
+ refs++;
+ }
+
+ unref(inode, clear)
+ {
+ if (clear && inv_sent) {
+ // there is a case of fuse itself sending forget, without
+ // invalidate, after entry delete, like unlink(), rmdir().
+ inv_sent = 0;
+ invalidate_count--;
+ active_count++;
+ list_move(inode, active);
+ }
+ refs--;
+ if ((refs == 0) && !inv_sent) {
+ active_count--;
+ if (nlookups == 0) {
+ destroy(inode);
+ } else {
+ lru_count++;
+ list_move(inode, lru);
+ }
+ }
+ }
+
+ forget(inode)
+ {
+ ref(inode, false);
+ nlookups--;
+ unref(inode, true);
+ }
+
+ overflow(inode)
+ {
+ ref(inode, true);
+ invalidator(inode);
+ unref(inode, false);
+ }
+
+*/
+// clang-format on
+
#define INODE_DUMP_LIST(head, key_buf, key_prefix, list_type) \
{ \
int i = 1; \
@@ -37,7 +131,7 @@
}
static inode_t *
-__inode_unref(inode_t *inode);
+__inode_unref(inode_t *inode, bool clear);
static int
inode_table_prune(inode_table_t *table);
@@ -132,7 +226,7 @@ __dentry_unset(dentry_t *dentry)
dentry->name = NULL;
if (dentry->parent) {
- __inode_unref(dentry->parent);
+ __inode_unref(dentry->parent, false);
dentry->parent = NULL;
}
@@ -446,7 +540,7 @@ out:
}
static inode_t *
-__inode_unref(inode_t *inode)
+__inode_unref(inode_t *inode, bool clear)
{
int index = 0;
xlator_t *this = NULL;
@@ -454,8 +548,6 @@ __inode_unref(inode_t *inode)
if (!inode)
return NULL;
- this = THIS;
-
/*
* Root inode should always be in active list of inode table. So unrefs
* on root inode are no-ops.
@@ -463,6 +555,13 @@ __inode_unref(inode_t *inode)
if (__is_root_gfid(inode->gfid))
return inode;
+ this = THIS;
+
+ if (clear && inode->invalidate_sent) {
+ inode->invalidate_sent = false;
+ inode->table->invalidate_size--;
+ __inode_activate(inode);
+ }
GF_ASSERT(inode->ref);
--inode->ref;
@@ -473,7 +572,7 @@ __inode_unref(inode_t *inode)
inode->_ctx[index].ref--;
}
- if (!inode->ref) {
+ if (!inode->ref && !inode->invalidate_sent) {
inode->table->active_size--;
if (inode->nlookup)
@@ -486,7 +585,7 @@ __inode_unref(inode_t *inode)
}
static inode_t *
-__inode_ref(inode_t *inode)
+__inode_ref(inode_t *inode, bool is_invalidate)
{
int index = 0;
xlator_t *this = NULL;
@@ -496,11 +595,6 @@ __inode_ref(inode_t *inode)
this = THIS;
- if (!inode->ref) {
- inode->table->lru_size--;
- __inode_activate(inode);
- }
-
/*
* Root inode should always be in active list of inode table. So unrefs
* on root inode are no-ops. If we do not allow unrefs but allow refs,
@@ -512,6 +606,22 @@ __inode_ref(inode_t *inode)
if (__is_root_gfid(inode->gfid) && inode->ref)
return inode;
+ if (!inode->ref) {
+ if (inode->invalidate_sent) {
+ inode->invalidate_sent = false;
+ inode->table->invalidate_size--;
+ } else {
+ inode->table->lru_size--;
+ }
+ if (is_invalidate) {
+ inode->invalidate_sent = true;
+ inode->table->invalidate_size++;
+ list_move_tail(&inode->list, &inode->table->invalidate);
+ } else {
+ __inode_activate(inode);
+ }
+ }
+
inode->ref++;
index = __inode_get_xl_index(inode, this);
@@ -535,7 +645,7 @@ inode_unref(inode_t *inode)
pthread_mutex_lock(&table->lock);
{
- inode = __inode_unref(inode);
+ inode = __inode_unref(inode, false);
}
pthread_mutex_unlock(&table->lock);
@@ -556,7 +666,7 @@ inode_ref(inode_t *inode)
pthread_mutex_lock(&table->lock);
{
- inode = __inode_ref(inode);
+ inode = __inode_ref(inode, false);
}
pthread_mutex_unlock(&table->lock);
@@ -590,7 +700,7 @@ __dentry_create(inode_t *inode, inode_t *parent, const char *name)
}
if (parent)
- newd->parent = __inode_ref(parent);
+ newd->parent = __inode_ref(parent, false);
list_add(&newd->inode_list, &inode->dentry_list);
newd->inode = inode;
@@ -660,7 +770,7 @@ inode_new(inode_table_t *table)
{
inode = __inode_create(table);
if (inode != NULL) {
- __inode_ref(inode);
+ __inode_ref(inode, false);
}
}
pthread_mutex_unlock(&table->lock);
@@ -773,7 +883,7 @@ inode_grep(inode_table_t *table, inode_t *parent, const char *name)
inode = dentry->inode;
if (inode)
- __inode_ref(inode);
+ __inode_ref(inode, false);
}
pthread_mutex_unlock(&table->lock);
@@ -916,7 +1026,7 @@ inode_find(inode_table_t *table, uuid_t gfid)
{
inode = __inode_find(table, gfid);
if (inode)
- __inode_ref(inode);
+ __inode_ref(inode, false);
}
pthread_mutex_unlock(&table->lock);
@@ -1061,7 +1171,7 @@ inode_link(inode_t *inode, inode_t *parent, const char *name, struct iatt *iatt)
linked_inode = __inode_link(inode, parent, name, iatt);
if (linked_inode)
- __inode_ref(linked_inode);
+ __inode_ref(linked_inode, false);
}
pthread_mutex_unlock(&table->lock);
@@ -1140,6 +1250,31 @@ inode_forget(inode_t *inode, uint64_t nlookup)
return 0;
}
+int
+inode_forget_with_unref(inode_t *inode, uint64_t nlookup)
+{
+ inode_table_t *table = NULL;
+
+ if (!inode) {
+ gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_INODE_NOT_FOUND,
+ "inode not found");
+ return -1;
+ }
+
+ table = inode->table;
+
+ pthread_mutex_lock(&table->lock);
+ {
+ __inode_forget(inode, nlookup);
+ __inode_unref(inode, true);
+ }
+ pthread_mutex_unlock(&table->lock);
+
+ inode_table_prune(table);
+
+ return 0;
+}
+
/*
* Invalidate an inode. This is invoked when a translator decides that an
* inode's cache is no longer valid. Any translator interested in taking action
@@ -1314,7 +1449,7 @@ inode_parent(inode_t *inode, uuid_t pargfid, const char *name)
parent = dentry->parent;
if (parent)
- __inode_ref(parent);
+ __inode_ref(parent, false);
}
pthread_mutex_unlock(&table->lock);
@@ -1496,6 +1631,7 @@ inode_table_prune(inode_table_t *table)
inode_t *del = NULL;
inode_t *tmp = NULL;
inode_t *entry = NULL;
+ int64_t lru_size = 0;
if (!table)
return -1;
@@ -1504,7 +1640,11 @@ inode_table_prune(inode_table_t *table)
pthread_mutex_lock(&table->lock);
{
- while (table->lru_limit && table->lru_size > (table->lru_limit)) {
+ if (!table->lru_limit)
+ goto purge_list;
+
+ lru_size = table->lru_size;
+ while (lru_size > (table->lru_limit)) {
if (list_empty(&table->lru)) {
gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0,
LG_MSG_INVALID_INODE_LIST,
@@ -1514,26 +1654,46 @@ inode_table_prune(inode_table_t *table)
break;
}
+ lru_size--;
entry = list_entry(table->lru.next, inode_t, list);
+ /* The logic of invalidation is required only if invalidator_fn
+ is present */
+ if (table->invalidator_fn) {
+ /* check for valid inode with 'nlookup' */
+ if (entry->nlookup) {
+ __inode_ref(entry, true);
+ tmp = entry;
+ break;
+ }
+ }
+
table->lru_size--;
__inode_retire(entry);
-
ret++;
}
+ purge_list:
list_splice_init(&table->purge, &purge);
table->purge_size = 0;
}
pthread_mutex_unlock(&table->lock);
+ /* Pick 1 inode for invalidation */
+ if (tmp) {
+ xlator_t *old_THIS = THIS;
+ THIS = table->invalidator_xl;
+ table->invalidator_fn(table->invalidator_xl, tmp);
+ THIS = old_THIS;
+ inode_unref(tmp);
+ }
+
+ /* Just so that if purge list is handled too, then clear it off */
+ list_for_each_entry_safe(del, tmp, &purge, list)
{
- list_for_each_entry_safe(del, tmp, &purge, list)
- {
- list_del_init(&del->list);
- __inode_forget(del, 0);
- __inode_destroy(del);
- }
+ list_del_init(&del->list);
+ __inode_forget(del, 0);
+ __inode_destroy(del);
}
return ret;
@@ -1561,9 +1721,12 @@ __inode_table_init_root(inode_table_t *table)
}
inode_table_t *
-inode_table_new(size_t lru_limit, xlator_t *xl)
+inode_table_with_invalidator(uint32_t lru_limit, xlator_t *xl,
+ int32_t (*invalidator_fn)(xlator_t *, inode_t *),
+ xlator_t *invalidator_xl)
{
inode_table_t *new = NULL;
+ uint32_t mem_pool_size = lru_limit;
int ret = -1;
int i = 0;
@@ -1575,20 +1738,20 @@ inode_table_new(size_t lru_limit, xlator_t *xl)
new->ctxcount = xl->graph->xl_count + 1;
new->lru_limit = lru_limit;
+ new->invalidator_fn = invalidator_fn;
+ new->invalidator_xl = invalidator_xl;
new->hashsize = 14057; /* TODO: Random Number?? */
/* In case FUSE is initing the inode table. */
- if (lru_limit == 0)
- lru_limit = DEFAULT_INODE_MEMPOOL_ENTRIES;
-
- new->inode_pool = mem_pool_new(inode_t, lru_limit);
+ if (!mem_pool_size || (mem_pool_size > DEFAULT_INODE_MEMPOOL_ENTRIES))
+ mem_pool_size = DEFAULT_INODE_MEMPOOL_ENTRIES;
+ new->inode_pool = mem_pool_new(inode_t, mem_pool_size);
if (!new->inode_pool)
goto out;
- new->dentry_pool = mem_pool_new(dentry_t, lru_limit);
-
+ new->dentry_pool = mem_pool_new(dentry_t, mem_pool_size);
if (!new->dentry_pool)
goto out;
@@ -1620,6 +1783,7 @@ inode_table_new(size_t lru_limit, xlator_t *xl)
INIT_LIST_HEAD(&new->active);
INIT_LIST_HEAD(&new->lru);
INIT_LIST_HEAD(&new->purge);
+ INIT_LIST_HEAD(&new->invalidate);
ret = gf_asprintf(&new->name, "%s/inode", xl->name);
if (-1 == ret) {
@@ -1649,6 +1813,13 @@ out:
return new;
}
+inode_table_t *
+inode_table_new(uint32_t lru_limit, xlator_t *xl)
+{
+ /* Only fuse for now requires the inode table with invalidator */
+ return inode_table_with_invalidator(lru_limit, xl, NULL, NULL);
+}
+
int
inode_table_ctx_free(inode_table_t *table)
{
@@ -1787,6 +1958,14 @@ inode_table_destroy(inode_table_t *inode_table)
inode_table->lru_size--;
}
+ /* Same logic for invalidate list */
+ while (!list_empty(&inode_table->invalidate)) {
+ trav = list_first_entry(&inode_table->invalidate, inode_t, list);
+ __inode_forget(trav, 0);
+ __inode_retire(trav);
+ inode_table->invalidate_size--;
+ }
+
while (!list_empty(&inode_table->active)) {
trav = list_first_entry(&inode_table->active, inode_t, list);
/* forget and unref the inode to retire and add it to
@@ -2294,6 +2473,7 @@ inode_dump(inode_t *inode, char *prefix)
gf_proc_dump_write("fd-count", "%u", inode->fd_count);
gf_proc_dump_write("active-fd-count", "%u", inode->active_fd_count);
gf_proc_dump_write("ref", "%u", inode->ref);
+ gf_proc_dump_write("invalidate-sent", "%d", inode->invalidate_sent);
gf_proc_dump_write("ia_type", "%d", inode->ia_type);
if (inode->_ctx) {
inode_ctx = GF_CALLOC(inode->table->ctxcount, sizeof(*inode_ctx),
@@ -2367,10 +2547,13 @@ inode_table_dump(inode_table_t *itable, char *prefix)
gf_proc_dump_write(key, "%d", itable->lru_size);
gf_proc_dump_build_key(key, prefix, "purge_size");
gf_proc_dump_write(key, "%d", itable->purge_size);
+ gf_proc_dump_build_key(key, prefix, "invalidate_size");
+ gf_proc_dump_write(key, "%d", itable->invalidate_size);
INODE_DUMP_LIST(&itable->active, key, prefix, "active");
INODE_DUMP_LIST(&itable->lru, key, prefix, "lru");
INODE_DUMP_LIST(&itable->purge, key, prefix, "purge");
+ INODE_DUMP_LIST(&itable->invalidate, key, prefix, "invalidate");
pthread_mutex_unlock(&itable->lock);
}