diff options
| author | Raghavendra G <rgowdapp@redhat.com> | 2016-11-24 14:58:20 +0530 | 
|---|---|---|
| committer | Atin Mukherjee <amukherj@redhat.com> | 2016-12-22 03:43:14 -0800 | 
| commit | 96fb35624060565e02e946a970b3e777071bde9c (patch) | |
| tree | a375bccd2b79b0d0c269474a2a318138ca6045aa | |
| parent | 7ee998b9041d594d93a4e2ef369892c185e80def (diff) | |
performance/readdir-ahead: limit cache size
This patch introduces a new option called "rda-cache-limit", which is
the maximum value the entire readdir-ahead cache can grow into. Since,
readdir-ahead holds a reference to inode through dentries, this patch
also accounts memory stored by various xlators in inode contexts.
Change-Id: I84cc0ca812f35e0f9041f8cc71effae53a9e7f99
BUG: 1356960
Signed-off-by: Raghavendra G <rgowdapp@redhat.com>
Reviewed-on: http://review.gluster.org/16137
NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
Reviewed-by: Poornima G <pgurusid@redhat.com>
Smoke: Gluster Build System <jenkins@build.gluster.org>
CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
| -rw-r--r-- | libglusterfs/src/inode.c | 32 | ||||
| -rw-r--r-- | libglusterfs/src/inode.h | 3 | ||||
| -rw-r--r-- | libglusterfs/src/xlator.h | 6 | ||||
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-volume-set.c | 27 | ||||
| -rw-r--r-- | xlators/performance/readdir-ahead/src/readdir-ahead.c | 116 | ||||
| -rw-r--r-- | xlators/performance/readdir-ahead/src/readdir-ahead.h | 4 | 
6 files changed, 152 insertions, 36 deletions
diff --git a/libglusterfs/src/inode.c b/libglusterfs/src/inode.c index 6e4c2f11bc2..841f0b63f16 100644 --- a/libglusterfs/src/inode.c +++ b/libglusterfs/src/inode.c @@ -2504,3 +2504,35 @@ out:          return;  } + +size_t +inode_ctx_size (inode_t *inode) +{ +        int       i    = 0; +        size_t    size = 0; +        xlator_t *xl   = NULL, *old_THIS = NULL; + +        if (!inode) +                goto out; + +        LOCK (&inode->lock); +        { +                for (i = 0; i < inode->table->ctxcount; i++) { +                        if (!inode->_ctx[i].xl_key) +                                continue; + +                        xl = (xlator_t *)(long)inode->_ctx[i].xl_key; +                        old_THIS = THIS; +                        THIS = xl; + +                        if (xl->cbks->ictxsize) +                                size += xl->cbks->ictxsize (xl, inode); + +                        THIS = old_THIS; +                } +        } +        UNLOCK (&inode->lock); + +out: +        return size; +} diff --git a/libglusterfs/src/inode.h b/libglusterfs/src/inode.h index 114aeae78bb..5289b15bca6 100644 --- a/libglusterfs/src/inode.h +++ b/libglusterfs/src/inode.h @@ -279,4 +279,7 @@ inode_needs_lookup (inode_t *inode, xlator_t *this);  int  inode_has_dentry (inode_t *inode); +size_t +inode_ctx_size (inode_t *inode); +  #endif /* _INODE_H */ diff --git a/libglusterfs/src/xlator.h b/libglusterfs/src/xlator.h index 70e6f0a108d..b11d1a96f32 100644 --- a/libglusterfs/src/xlator.h +++ b/libglusterfs/src/xlator.h @@ -847,6 +847,10 @@ typedef int32_t (*cbk_client_t)(xlator_t *this, client_t *client);  typedef void (*cbk_ictxmerge_t) (xlator_t *this, fd_t *fd,                                   inode_t *inode, inode_t *linked_inode); +typedef size_t (*cbk_inodectx_size_t)(xlator_t *this, inode_t *inode); + +typedef size_t (*cbk_fdctx_size_t)(xlator_t *this, fd_t *fd); +  struct xlator_cbks {          cbk_forget_t             forget;          cbk_release_t            release; @@ -855,6 +859,8 @@ struct xlator_cbks {          cbk_client_t             client_destroy;          cbk_client_t             client_disconnect;          cbk_ictxmerge_t          ictxmerge; +        cbk_inodectx_size_t      ictxsize; +        cbk_fdctx_size_t         fdctxsize;  };  typedef int32_t (*dumpop_priv_t) (xlator_t *this); diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c index 68c57fc10cb..d9d6bc2bf7c 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c +++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c @@ -2625,7 +2625,6 @@ struct volopt_map_entry glusterd_volopt_map[] = {                           "option. Unmount and delete the shared storage volume "                           " on disabling this option."          }, -  #if USE_GFDB /* no GFDB means tiering is disabled */          /* tier translator - global tunables */          { .key         = "cluster.write-freq-threshold", @@ -3089,6 +3088,32 @@ struct volopt_map_entry glusterd_volopt_map[] = {            .op_version = GD_OP_VERSION_3_8_4,            .flags      = OPT_FLAG_CLIENT_OPT          }, +	{ .key         = "performance.rda-request-size", +	  .voltype     = "performance/readdir-ahead", +          .option      = "rda-request-size", +          .flags       = OPT_FLAG_CLIENT_OPT, +          .type        = DOC, +          .op_version  = GD_OP_VERSION_3_9_1, +	}, +	{ .key         = "performance.rda-low-wmark", +          .voltype     = "performance/readdir-ahead", +          .option      = "rda-low-wmark", +          .type        = DOC, +          .flags       = OPT_FLAG_CLIENT_OPT, +          .op_version  = GD_OP_VERSION_3_9_1, +	}, +	{ .key         = "performance.rda-high-wmark", +          .voltype     = "performance/readdir-ahead", +          .type        = DOC, +          .flags       = OPT_FLAG_CLIENT_OPT, +          .op_version  = GD_OP_VERSION_3_9_1, +	}, +        { .key         = "performance.rda-cache-limit", +          .voltype     = "performance/readdir-ahead", +          .type        = DOC, +          .flags       = OPT_FLAG_CLIENT_OPT, +          .op_version  = GD_OP_VERSION_3_9_1, +        },          { .key         = NULL          }  }; diff --git a/xlators/performance/readdir-ahead/src/readdir-ahead.c b/xlators/performance/readdir-ahead/src/readdir-ahead.c index c3daf916e97..4b57a8b1c3f 100644 --- a/xlators/performance/readdir-ahead/src/readdir-ahead.c +++ b/xlators/performance/readdir-ahead/src/readdir-ahead.c @@ -97,7 +97,8 @@ rda_can_serve_readdirp(struct rda_fd_ctx *ctx, size_t request_size)  {  	if ((ctx->state & RDA_FD_EOD) ||  	    (ctx->state & RDA_FD_ERROR) || -	    (!(ctx->state & RDA_FD_PLUGGED) && (ctx->cur_size > 0))) +	    (!(ctx->state & RDA_FD_PLUGGED) && (ctx->cur_size > 0)) || +            (request_size && ctx->cur_size >= request_size))  		return _gf_true;  	return _gf_false; @@ -111,20 +112,28 @@ static int32_t  __rda_serve_readdirp(xlator_t *this, gf_dirent_t *entries, size_t request_size,  		   struct rda_fd_ctx *ctx)  { -	gf_dirent_t *dirent, *tmp; -	size_t dirent_size, size = 0; -	int32_t count = 0; -	struct rda_priv *priv = this->private; +	gf_dirent_t     *dirent, *tmp; +	size_t           dirent_size, size = 0, inodectx_size = 0; +	int32_t          count             = 0; +	struct rda_priv *priv              = NULL; + +        priv = this->private;  	list_for_each_entry_safe(dirent, tmp, &ctx->entries.list, list) {  		dirent_size = gf_dirent_size(dirent->d_name);  		if (size + dirent_size > request_size)  			break; +                inodectx_size = 0; + +                inode_ctx_del (dirent->inode, this, (void *)&inodectx_size); +  		size += dirent_size;  		list_del_init(&dirent->list);  		ctx->cur_size -= dirent_size; +                priv->rda_cache_size -= (dirent_size + inodectx_size); +  		list_add_tail(&dirent->list, &entries->list);  		ctx->cur_offset = dirent->d_off;  		count++; @@ -234,11 +243,17 @@ rda_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,  	 * the request out of the preload or the request that enables us to do  	 * so is in flight...  	 */ -	if (rda_can_serve_readdirp(ctx, size)) +	if (rda_can_serve_readdirp(ctx, size)) {  		call_resume(stub); -	else +        } else {  		ctx->stub = stub; +                if (!(ctx->state & RDA_FD_RUNNING)) { +                        fill = 1; +                        ctx->state |= RDA_FD_RUNNING; +                } +        } +  	UNLOCK(&ctx->lock);  	if (fill) @@ -266,6 +281,7 @@ rda_fill_fd_cbk(call_frame_t *frame, void *cookie, xlator_t *this,  	struct rda_fd_ctx *ctx = local->ctx;  	struct rda_priv *priv = this->private;  	int fill = 1; +        size_t inodectx_size = 0, dirent_size = 0;  	LOCK(&ctx->lock); @@ -286,7 +302,19 @@ rda_fill_fd_cbk(call_frame_t *frame, void *cookie, xlator_t *this,  			/* must preserve entry order */  			list_add_tail(&dirent->list, &ctx->entries.list); -			ctx->cur_size += gf_dirent_size(dirent->d_name); +                        dirent_size = gf_dirent_size (dirent->d_name); +                        inodectx_size = 0; + +                        if (dirent->inode) { +                                inodectx_size = inode_ctx_size (dirent->inode); +                                inode_ctx_set (dirent->inode, this, +                                               (void *)inodectx_size); +                        } + +			ctx->cur_size += dirent_size; + +                        priv->rda_cache_size += (dirent_size + inodectx_size); +  			ctx->next_offset = dirent->d_off;  		}  	} @@ -321,19 +349,21 @@ out:  	 * If we have been marked for bypass and have no pending stub, clear the  	 * run state so we stop preloading the context with entries.  	 */ -	if ((ctx->state & RDA_FD_BYPASS) && !ctx->stub) +	if (!ctx->stub && ((ctx->state & RDA_FD_BYPASS) +                           || (priv->rda_cache_size > priv->rda_cache_limit)))  		ctx->state &= ~RDA_FD_RUNNING;  	if (!(ctx->state & RDA_FD_RUNNING)) {  		fill = 0; -        if (ctx->xattrs) { -                /* -                 * fill = 0 and hence rda_fill_fd() won't be invoked. -                 * unref for ref taken in rda_fill_fd() -                 */ -                dict_unref (ctx->xattrs); -                ctx->xattrs = NULL; -        } +                if (ctx->xattrs) { +                        /* +                         * fill = 0 and hence rda_fill_fd() won't be invoked. +                         * unref for ref taken in rda_fill_fd() +                         */ +                        dict_unref (ctx->xattrs); +                        ctx->xattrs = NULL; +                } +  		STACK_DESTROY(ctx->fill_frame->root);  		ctx->fill_frame = NULL;  	} @@ -393,10 +423,10 @@ rda_fill_fd(call_frame_t *frame, xlator_t *this, fd_t *fd)  		ctx->fill_frame = nframe; -        if (!ctx->xattrs && orig_local && orig_local->xattrs) { -                /* when this function is invoked by rda_opendir_cbk */ -                ctx->xattrs = dict_ref(orig_local->xattrs); -        } +                if (!ctx->xattrs && orig_local && orig_local->xattrs) { +                        /* when this function is invoked by rda_opendir_cbk */ +                        ctx->xattrs = dict_ref(orig_local->xattrs); +                }  	} else {  		nframe = ctx->fill_frame;  		local = nframe->local; @@ -578,11 +608,13 @@ reconfigure(xlator_t *this, dict_t *options)  	struct rda_priv *priv = this->private;  	GF_OPTION_RECONF("rda-request-size", priv->rda_req_size, options, -			 uint32, err); -	GF_OPTION_RECONF("rda-low-wmark", priv->rda_low_wmark, options, size_uint64, -			 err); -	GF_OPTION_RECONF("rda-high-wmark", priv->rda_high_wmark, options, size_uint64, -			 err); +			 size_uint64, err); +	GF_OPTION_RECONF("rda-low-wmark", priv->rda_low_wmark, options, +                         size_uint64, err); +	GF_OPTION_RECONF("rda-high-wmark", priv->rda_high_wmark, options, +                         size_uint64, err); +        GF_OPTION_RECONF("rda-cache-limit", priv->rda_cache_limit, options, +                         size_uint64, err);  	return 0;  err: @@ -619,9 +651,13 @@ init(xlator_t *this)  	if (!this->local_pool)  		goto err; -	GF_OPTION_INIT("rda-request-size", priv->rda_req_size, uint32, err); +	GF_OPTION_INIT("rda-request-size", priv->rda_req_size, size_uint64, +                       err);  	GF_OPTION_INIT("rda-low-wmark", priv->rda_low_wmark, size_uint64, err); -	GF_OPTION_INIT("rda-high-wmark", priv->rda_high_wmark, size_uint64, err); +	GF_OPTION_INIT("rda-high-wmark", priv->rda_high_wmark, size_uint64, +                       err); +        GF_OPTION_INIT("rda-cache-limit", priv->rda_cache_limit, size_uint64, +                       err);  	return 0; @@ -657,26 +693,38 @@ struct xlator_cbks cbks = {  struct volume_options options[] = {  	{ .key = {"rda-request-size"}, -	  .type = GF_OPTION_TYPE_INT, +	  .type = GF_OPTION_TYPE_SIZET,  	  .min = 4096,  	  .max = 131072, -	  .default_value = "131072", -	  .description = "readdir-ahead request size", +	  .default_value = "128KB", +	  .description = "size of buffer in readdirp calls initiated by " +                         "readdir-ahead ",  	},  	{ .key = {"rda-low-wmark"},  	  .type = GF_OPTION_TYPE_SIZET,  	  .min = 0,  	  .max = 10 * GF_UNIT_MB,  	  .default_value = "4096", -	  .description = "the value under which we plug", +	  .description = "the value under which readdir-ahead plugs",  	},  	{ .key = {"rda-high-wmark"},  	  .type = GF_OPTION_TYPE_SIZET,  	  .min = 0,  	  .max = 100 * GF_UNIT_MB, -	  .default_value = "131072", -	  .description = "the value over which we unplug", +	  .default_value = "128KB", +	  .description = "the value over which readdir-ahead unplugs",  	}, +        { .key = {"rda-cache-limit"}, +          .type = GF_OPTION_TYPE_SIZET, +          .min = 0, +          .max = 1 * GF_UNIT_GB, +          .default_value = "10MB", +          .description = "maximum size of cache consumed by readdir-ahead " +                         "xlator. This value is global and total memory " +                         "consumption by readdir-ahead is capped by this " +                         "value, irrespective of the number/size of " +                         "directories cached", +        },          { .key = {NULL} },  }; diff --git a/xlators/performance/readdir-ahead/src/readdir-ahead.h b/xlators/performance/readdir-ahead/src/readdir-ahead.h index f030f10a0af..6b65a6250cc 100644 --- a/xlators/performance/readdir-ahead/src/readdir-ahead.h +++ b/xlators/performance/readdir-ahead/src/readdir-ahead.h @@ -40,9 +40,11 @@ struct rda_local {  };  struct rda_priv { -	uint32_t rda_req_size; +	uint64_t rda_req_size;  	uint64_t rda_low_wmark;  	uint64_t rda_high_wmark; +        uint64_t rda_cache_limit; +        uint64_t rda_cache_size;  };  #endif /* __READDIR_AHEAD_H */  | 
