diff options
Diffstat (limited to 'libglusterfs/src/inode.c')
| -rw-r--r-- | libglusterfs/src/inode.c | 253 | 
1 files changed, 218 insertions, 35 deletions
diff --git a/libglusterfs/src/inode.c b/libglusterfs/src/inode.c index 089aa6f9b21..12a8fbd014d 100644 --- a/libglusterfs/src/inode.c +++ b/libglusterfs/src/inode.c @@ -23,6 +23,100 @@     move latest accessed dentry to list_head of inode  */ +// clang-format off +/* + +Details as per Xavi: + + I think we should have 3 lists: active, lru and invalidate. + +We'll need 3 things: refs, nlookups and invalidate_sent flag. Any change of +refs, invalidate_sent flag and moving from one list to another must be done +atomically. + +With this information, these are the states that cause a transition: + +    refs nlookups inv_sent    op +      1      0        0      unref  -> refs = 0, active--->destroy +      1      1        0      unref  -> refs = 0, active--->lru +      1      1        0     forget  -> nlookups = 0, active--->active +     *0      1        0     forget  -> nlookups = 0, lru--->destroy +     *0      1        1     forget  -> nlookups = 0, invalidate--->destroy +      0      1        0       ref   -> refs = 1, lru--->active +      0      1        1       ref   -> refs = 1, inv_sent = 0, invalidate--->active +      0      1        0    overflow -> refs = 1, inv_sent = 1, lru--->invalidate +      1      1        1      unref  -> refs = 0, invalidate--->invalidate +      1      1        1     forget  -> nlookups = 0, inv_sent = 0, invalidate--->active + +(*) technically these combinations cannot happen because a forget sent by the +kernel first calls ref() and then unref(). However it's equivalent. + +overflow means that lru list has grown beyond the limit and the inode needs to +be invalidated. All other combinations do not cause a change in state or are not +possible. + +Based on this, the code could be similar to this: + +    ref(inode, inv) +    { +        if (refs == 0) { +            if (inv_sent) { +                invalidate_count--; +                inv_sent = 0; +            } else { +                lru_count--; +            } +            if (inv) { +                inv_sent = 1; +                invalidate_count++; +                list_move(inode, invalidate); +            } else { +                active_count++; +                list_move(inode, active); +            } +        } +        refs++; +    } + +    unref(inode, clear) +    { +        if (clear && inv_sent) { +            // there is a case of fuse itself sending forget, without +            // invalidate, after entry delete, like unlink(), rmdir(). +            inv_sent = 0; +            invalidate_count--; +            active_count++; +            list_move(inode, active); +        } +        refs--; +        if ((refs == 0) && !inv_sent) { +            active_count--; +            if (nlookups == 0) { +                destroy(inode); +            } else { +                lru_count++; +                list_move(inode, lru); +            } +        } +    } + +    forget(inode) +    { +        ref(inode, false); +        nlookups--; +        unref(inode, true); +    } + +    overflow(inode) +    { +        ref(inode, true); +        invalidator(inode); +        unref(inode, false); +    } + +*/ +// clang-format on +  #define INODE_DUMP_LIST(head, key_buf, key_prefix, list_type)                  \      {                                                                          \          int i = 1;                                                             \ @@ -37,7 +131,7 @@      }  static inode_t * -__inode_unref(inode_t *inode); +__inode_unref(inode_t *inode, bool clear);  static int  inode_table_prune(inode_table_t *table); @@ -132,7 +226,7 @@ __dentry_unset(dentry_t *dentry)      dentry->name = NULL;      if (dentry->parent) { -        __inode_unref(dentry->parent); +        __inode_unref(dentry->parent, false);          dentry->parent = NULL;      } @@ -446,7 +540,7 @@ out:  }  static inode_t * -__inode_unref(inode_t *inode) +__inode_unref(inode_t *inode, bool clear)  {      int index = 0;      xlator_t *this = NULL; @@ -454,8 +548,6 @@ __inode_unref(inode_t *inode)      if (!inode)          return NULL; -    this = THIS; -      /*       * Root inode should always be in active list of inode table. So unrefs       * on root inode are no-ops. @@ -463,6 +555,13 @@ __inode_unref(inode_t *inode)      if (__is_root_gfid(inode->gfid))          return inode; +    this = THIS; + +    if (clear && inode->invalidate_sent) { +        inode->invalidate_sent = false; +        inode->table->invalidate_size--; +        __inode_activate(inode); +    }      GF_ASSERT(inode->ref);      --inode->ref; @@ -473,7 +572,7 @@ __inode_unref(inode_t *inode)          inode->_ctx[index].ref--;      } -    if (!inode->ref) { +    if (!inode->ref && !inode->invalidate_sent) {          inode->table->active_size--;          if (inode->nlookup) @@ -486,7 +585,7 @@ __inode_unref(inode_t *inode)  }  static inode_t * -__inode_ref(inode_t *inode) +__inode_ref(inode_t *inode, bool is_invalidate)  {      int index = 0;      xlator_t *this = NULL; @@ -496,11 +595,6 @@ __inode_ref(inode_t *inode)      this = THIS; -    if (!inode->ref) { -        inode->table->lru_size--; -        __inode_activate(inode); -    } -      /*       * Root inode should always be in active list of inode table. So unrefs       * on root inode are no-ops. If we do not allow unrefs but allow refs, @@ -512,6 +606,22 @@ __inode_ref(inode_t *inode)      if (__is_root_gfid(inode->gfid) && inode->ref)          return inode; +    if (!inode->ref) { +        if (inode->invalidate_sent) { +            inode->invalidate_sent = false; +            inode->table->invalidate_size--; +        } else { +            inode->table->lru_size--; +        } +        if (is_invalidate) { +            inode->invalidate_sent = true; +            inode->table->invalidate_size++; +            list_move_tail(&inode->list, &inode->table->invalidate); +        } else { +            __inode_activate(inode); +        } +    } +      inode->ref++;      index = __inode_get_xl_index(inode, this); @@ -535,7 +645,7 @@ inode_unref(inode_t *inode)      pthread_mutex_lock(&table->lock);      { -        inode = __inode_unref(inode); +        inode = __inode_unref(inode, false);      }      pthread_mutex_unlock(&table->lock); @@ -556,7 +666,7 @@ inode_ref(inode_t *inode)      pthread_mutex_lock(&table->lock);      { -        inode = __inode_ref(inode); +        inode = __inode_ref(inode, false);      }      pthread_mutex_unlock(&table->lock); @@ -590,7 +700,7 @@ __dentry_create(inode_t *inode, inode_t *parent, const char *name)      }      if (parent) -        newd->parent = __inode_ref(parent); +        newd->parent = __inode_ref(parent, false);      list_add(&newd->inode_list, &inode->dentry_list);      newd->inode = inode; @@ -660,7 +770,7 @@ inode_new(inode_table_t *table)      {          inode = __inode_create(table);          if (inode != NULL) { -            __inode_ref(inode); +            __inode_ref(inode, false);          }      }      pthread_mutex_unlock(&table->lock); @@ -773,7 +883,7 @@ inode_grep(inode_table_t *table, inode_t *parent, const char *name)              inode = dentry->inode;          if (inode) -            __inode_ref(inode); +            __inode_ref(inode, false);      }      pthread_mutex_unlock(&table->lock); @@ -916,7 +1026,7 @@ inode_find(inode_table_t *table, uuid_t gfid)      {          inode = __inode_find(table, gfid);          if (inode) -            __inode_ref(inode); +            __inode_ref(inode, false);      }      pthread_mutex_unlock(&table->lock); @@ -1061,7 +1171,7 @@ inode_link(inode_t *inode, inode_t *parent, const char *name, struct iatt *iatt)          linked_inode = __inode_link(inode, parent, name, iatt);          if (linked_inode) -            __inode_ref(linked_inode); +            __inode_ref(linked_inode, false);      }      pthread_mutex_unlock(&table->lock); @@ -1140,6 +1250,31 @@ inode_forget(inode_t *inode, uint64_t nlookup)      return 0;  } +int +inode_forget_with_unref(inode_t *inode, uint64_t nlookup) +{ +    inode_table_t *table = NULL; + +    if (!inode) { +        gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_INODE_NOT_FOUND, +                         "inode not found"); +        return -1; +    } + +    table = inode->table; + +    pthread_mutex_lock(&table->lock); +    { +        __inode_forget(inode, nlookup); +        __inode_unref(inode, true); +    } +    pthread_mutex_unlock(&table->lock); + +    inode_table_prune(table); + +    return 0; +} +  /*   * Invalidate an inode. This is invoked when a translator decides that an   * inode's cache is no longer valid. Any translator interested in taking action @@ -1314,7 +1449,7 @@ inode_parent(inode_t *inode, uuid_t pargfid, const char *name)              parent = dentry->parent;          if (parent) -            __inode_ref(parent); +            __inode_ref(parent, false);      }      pthread_mutex_unlock(&table->lock); @@ -1496,6 +1631,7 @@ inode_table_prune(inode_table_t *table)      inode_t *del = NULL;      inode_t *tmp = NULL;      inode_t *entry = NULL; +    int64_t lru_size = 0;      if (!table)          return -1; @@ -1504,7 +1640,11 @@ inode_table_prune(inode_table_t *table)      pthread_mutex_lock(&table->lock);      { -        while (table->lru_limit && table->lru_size > (table->lru_limit)) { +        if (!table->lru_limit) +            goto purge_list; + +        lru_size = table->lru_size; +        while (lru_size > (table->lru_limit)) {              if (list_empty(&table->lru)) {                  gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0,                                   LG_MSG_INVALID_INODE_LIST, @@ -1514,26 +1654,46 @@ inode_table_prune(inode_table_t *table)                  break;              } +            lru_size--;              entry = list_entry(table->lru.next, inode_t, list); +            /* The logic of invalidation is required only if invalidator_fn +               is present */ +            if (table->invalidator_fn) { +                /* check for valid inode with 'nlookup' */ +                if (entry->nlookup) { +                    __inode_ref(entry, true); +                    tmp = entry; +                    break; +                } +            } +              table->lru_size--;              __inode_retire(entry); -              ret++;          } +    purge_list:          list_splice_init(&table->purge, &purge);          table->purge_size = 0;      }      pthread_mutex_unlock(&table->lock); +    /* Pick 1 inode for invalidation */ +    if (tmp) { +        xlator_t *old_THIS = THIS; +        THIS = table->invalidator_xl; +        table->invalidator_fn(table->invalidator_xl, tmp); +        THIS = old_THIS; +        inode_unref(tmp); +    } + +    /* Just so that if purge list is handled too, then clear it off */ +    list_for_each_entry_safe(del, tmp, &purge, list)      { -        list_for_each_entry_safe(del, tmp, &purge, list) -        { -            list_del_init(&del->list); -            __inode_forget(del, 0); -            __inode_destroy(del); -        } +        list_del_init(&del->list); +        __inode_forget(del, 0); +        __inode_destroy(del);      }      return ret; @@ -1561,9 +1721,12 @@ __inode_table_init_root(inode_table_t *table)  }  inode_table_t * -inode_table_new(size_t lru_limit, xlator_t *xl) +inode_table_with_invalidator(uint32_t lru_limit, xlator_t *xl, +                             int32_t (*invalidator_fn)(xlator_t *, inode_t *), +                             xlator_t *invalidator_xl)  {      inode_table_t *new = NULL; +    uint32_t mem_pool_size = lru_limit;      int ret = -1;      int i = 0; @@ -1575,20 +1738,20 @@ inode_table_new(size_t lru_limit, xlator_t *xl)      new->ctxcount = xl->graph->xl_count + 1;      new->lru_limit = lru_limit; +    new->invalidator_fn = invalidator_fn; +    new->invalidator_xl = invalidator_xl;      new->hashsize = 14057; /* TODO: Random Number?? */      /* In case FUSE is initing the inode table. */ -    if (lru_limit == 0) -        lru_limit = DEFAULT_INODE_MEMPOOL_ENTRIES; - -    new->inode_pool = mem_pool_new(inode_t, lru_limit); +    if (!mem_pool_size || (mem_pool_size > DEFAULT_INODE_MEMPOOL_ENTRIES)) +        mem_pool_size = DEFAULT_INODE_MEMPOOL_ENTRIES; +    new->inode_pool = mem_pool_new(inode_t, mem_pool_size);      if (!new->inode_pool)          goto out; -    new->dentry_pool = mem_pool_new(dentry_t, lru_limit); - +    new->dentry_pool = mem_pool_new(dentry_t, mem_pool_size);      if (!new->dentry_pool)          goto out; @@ -1620,6 +1783,7 @@ inode_table_new(size_t lru_limit, xlator_t *xl)      INIT_LIST_HEAD(&new->active);      INIT_LIST_HEAD(&new->lru);      INIT_LIST_HEAD(&new->purge); +    INIT_LIST_HEAD(&new->invalidate);      ret = gf_asprintf(&new->name, "%s/inode", xl->name);      if (-1 == ret) { @@ -1649,6 +1813,13 @@ out:      return new;  } +inode_table_t * +inode_table_new(uint32_t lru_limit, xlator_t *xl) +{ +    /* Only fuse for now requires the inode table with invalidator */ +    return inode_table_with_invalidator(lru_limit, xl, NULL, NULL); +} +  int  inode_table_ctx_free(inode_table_t *table)  { @@ -1787,6 +1958,14 @@ inode_table_destroy(inode_table_t *inode_table)              inode_table->lru_size--;          } +        /* Same logic for invalidate list */ +        while (!list_empty(&inode_table->invalidate)) { +            trav = list_first_entry(&inode_table->invalidate, inode_t, list); +            __inode_forget(trav, 0); +            __inode_retire(trav); +            inode_table->invalidate_size--; +        } +          while (!list_empty(&inode_table->active)) {              trav = list_first_entry(&inode_table->active, inode_t, list);              /* forget and unref the inode to retire and add it to @@ -2294,6 +2473,7 @@ inode_dump(inode_t *inode, char *prefix)          gf_proc_dump_write("fd-count", "%u", inode->fd_count);          gf_proc_dump_write("active-fd-count", "%u", inode->active_fd_count);          gf_proc_dump_write("ref", "%u", inode->ref); +        gf_proc_dump_write("invalidate-sent", "%d", inode->invalidate_sent);          gf_proc_dump_write("ia_type", "%d", inode->ia_type);          if (inode->_ctx) {              inode_ctx = GF_CALLOC(inode->table->ctxcount, sizeof(*inode_ctx), @@ -2367,10 +2547,13 @@ inode_table_dump(inode_table_t *itable, char *prefix)      gf_proc_dump_write(key, "%d", itable->lru_size);      gf_proc_dump_build_key(key, prefix, "purge_size");      gf_proc_dump_write(key, "%d", itable->purge_size); +    gf_proc_dump_build_key(key, prefix, "invalidate_size"); +    gf_proc_dump_write(key, "%d", itable->invalidate_size);      INODE_DUMP_LIST(&itable->active, key, prefix, "active");      INODE_DUMP_LIST(&itable->lru, key, prefix, "lru");      INODE_DUMP_LIST(&itable->purge, key, prefix, "purge"); +    INODE_DUMP_LIST(&itable->invalidate, key, prefix, "invalidate");      pthread_mutex_unlock(&itable->lock);  }  | 
