summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVijay Bellur <vbellur@redhat.com>2012-12-12 13:42:15 +0530
committerVijay Bellur <vbellur@redhat.com>2012-12-12 03:24:00 -0500
commitae4970422efcbb168072c4db0ee82bbf2d2824f7 (patch)
treed2cf11891f3a05bfd9d62fa4c16267e2d396ce53
parent19a391344daecde4ee6446897c248be5b828b67b (diff)
Rebase commit #2
Change-Id: Ie983d0b9862cc1401187532ed896e57bd3488e2b BUG: 871323 Reviewed-on: https://code.engineering.redhat.com/gerrit/1893 Reviewed-by: Vijay Bellur <vbellur@redhat.com> Tested-by: Vijay Bellur <vbellur@redhat.com>
-rw-r--r--glusterfsd/src/glusterfsd.c2
-rw-r--r--libglusterfs/src/gidcache.c177
-rw-r--r--libglusterfs/src/gidcache.h52
-rw-r--r--libglusterfs/src/glusterfs.h1
-rw-r--r--xlators/storage/posix/src/posix-aio.h49
5 files changed, 280 insertions, 1 deletions
diff --git a/glusterfsd/src/glusterfsd.c b/glusterfsd/src/glusterfsd.c
index 58fa38d..44678bb 100644
--- a/glusterfsd/src/glusterfsd.c
+++ b/glusterfsd/src/glusterfsd.c
@@ -339,7 +339,7 @@ set_fuse_mount_options (glusterfs_ctx_t *ctx, dict_t *options)
}
if (cmd_args->enable_ino32) {
- ret = dict_set_static_ptr (master->options, "enable-ino32",
+ ret = dict_set_static_ptr (options, "enable-ino32",
"on");
if (ret < 0) {
gf_log ("glusterfsd", GF_LOG_ERROR,
diff --git a/libglusterfs/src/gidcache.c b/libglusterfs/src/gidcache.c
new file mode 100644
index 0000000..c55ed25
--- /dev/null
+++ b/libglusterfs/src/gidcache.c
@@ -0,0 +1,177 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "gidcache.h"
+#include "mem-pool.h"
+
+/*
+ * We treat this as a very simple set-associative LRU cache, with entries aged
+ * out after a configurable interval. Hardly rocket science, but lots of
+ * details to worry about.
+ */
+#define BUCKET_START(p,n) ((p) + ((n) * AUX_GID_CACHE_ASSOC))
+
+/*
+ * Initialize the cache.
+ */
+int gid_cache_init(gid_cache_t *cache, uint32_t timeout)
+{
+ if (!cache)
+ return -1;
+
+ LOCK_INIT(&cache->gc_lock);
+ cache->gc_max_age = timeout;
+ cache->gc_nbuckets = AUX_GID_CACHE_BUCKETS;
+ memset(cache->gc_cache, 0, sizeof(gid_list_t) * AUX_GID_CACHE_SIZE);
+
+ return 0;
+}
+
+/*
+ * Look up an ID in the cache. If found, return the actual cache entry to avoid
+ * an additional allocation and memory copy. The caller should copy the data and
+ * release (unlock) the cache as soon as possible.
+ */
+const gid_list_t *gid_cache_lookup(gid_cache_t *cache, uint64_t id)
+{
+ int bucket;
+ int i;
+ time_t now;
+ const gid_list_t *agl;
+
+ LOCK(&cache->gc_lock);
+ now = time(NULL);
+ bucket = id % cache->gc_nbuckets;
+ agl = BUCKET_START(cache->gc_cache, bucket);
+ for (i = 0; i < AUX_GID_CACHE_ASSOC; i++, agl++) {
+ if (!agl->gl_list)
+ continue;
+ if (agl->gl_id != id)
+ continue;
+
+ /*
+ * We don't put new entries in the cache when expiration=0, but
+ * there might be entries still in there if expiration was
+ * changed very recently. Writing the check this way ensures
+ * that they're not used.
+ */
+ if (now < agl->gl_deadline) {
+ return agl;
+ }
+
+ /*
+ * We're not going to find any more UID matches, and reaping
+ * is handled further down to maintain LRU order.
+ */
+ break;
+ }
+ UNLOCK(&cache->gc_lock);
+ return NULL;
+}
+
+/*
+ * Release an entry found via lookup.
+ */
+void gid_cache_release(gid_cache_t *cache, const gid_list_t *agl)
+{
+ UNLOCK(&cache->gc_lock);
+}
+
+/*
+ * Add a new list entry to the cache. If an entry for this ID already exists,
+ * update it.
+ */
+int gid_cache_add(gid_cache_t *cache, gid_list_t *gl)
+{
+ gid_list_t *agl;
+ int bucket;
+ int i;
+ time_t now;
+
+ if (!gl || !gl->gl_list)
+ return -1;
+
+ if (!cache->gc_max_age)
+ return 0;
+
+ LOCK(&cache->gc_lock);
+ now = time(NULL);
+
+ /*
+ * Scan for the first free entry or one that matches this id. The id
+ * check is added to address a bug where the cache might contain an
+ * expired entry for this id. Since lookup occurs in LRU order and
+ * does not reclaim entries, it will always return failure on discovery
+ * of an expired entry. This leads to duplicate entries being added,
+ * which still do not satisfy lookups until the expired entry (and
+ * everything before it) is reclaimed.
+ *
+ * We address this through reuse of an entry already allocated to this
+ * id, whether expired or not, since we have obviously already received
+ * more recent data. The entry is repopulated with the new data and a new
+ * deadline and is pushed forward to reside as the last populated entry in
+ * the bucket.
+ */
+ bucket = gl->gl_id % cache->gc_nbuckets;
+ agl = BUCKET_START(cache->gc_cache, bucket);
+ for (i = 0; i < AUX_GID_CACHE_ASSOC; ++i, ++agl) {
+ if (agl->gl_id == gl->gl_id)
+ break;
+ if (!agl->gl_list)
+ break;
+ }
+
+ /*
+ * The way we allocate free entries naturally places the newest
+ * ones at the highest indices, so evicting the lowest makes
+ * sense, but that also means we can't just replace it with the
+ * one that caused the eviction. That would cause us to thrash
+ * the first entry while others remain idle. Therefore, we
+ * need to slide the other entries down and add the new one at
+ * the end just as if the *last* slot had been free.
+ *
+ * Deadline expiration is also handled here, since the oldest
+ * expired entry will be in the first position. This does mean
+ * the bucket can stay full of expired entries if we're idle
+ * but, if the small amount of extra memory or scan time before
+ * we decide to evict someone ever become issues, we could
+ * easily add a reaper thread.
+ */
+
+ if (i >= AUX_GID_CACHE_ASSOC) {
+ /* cache full, evict the first (LRU) entry */
+ i = 0;
+ agl = BUCKET_START(cache->gc_cache, bucket);
+ GF_FREE(agl->gl_list);
+ } else if (agl->gl_list) {
+ /* evict the old entry we plan to reuse */
+ GF_FREE(agl->gl_list);
+ }
+
+ /*
+ * If we have evicted an entry, slide the subsequent populated entries
+ * back and populate the last entry.
+ */
+ for (; i < AUX_GID_CACHE_ASSOC - 1; i++) {
+ if (!agl[1].gl_list)
+ break;
+ agl[0] = agl[1];
+ agl++;
+ }
+
+ agl->gl_id = gl->gl_id;
+ agl->gl_count = gl->gl_count;
+ agl->gl_list = gl->gl_list;
+ agl->gl_deadline = now + cache->gc_max_age;
+
+ UNLOCK(&cache->gc_lock);
+
+ return 1;
+}
diff --git a/libglusterfs/src/gidcache.h b/libglusterfs/src/gidcache.h
new file mode 100644
index 0000000..f904f26
--- /dev/null
+++ b/libglusterfs/src/gidcache.h
@@ -0,0 +1,52 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __GIDCACHE_H__
+#define __GIDCACHE_H__
+
+#include "glusterfs.h"
+#include "locking.h"
+
+/*
+ * TBD: make the cache size tunable
+ *
+ * The current size represents a pretty trivial amount of memory, and should
+ * provide good hit rates even for quite busy systems. If we ever want to
+ * support really large cache sizes, we'll need to do dynamic allocation
+ * instead of just defining an array within a private structure. It doesn't make
+ * a whole lot of sense to change the associativity, because it won't improve
+ * hit rates all that much and will increase the maintenance cost as we have
+ * to scan more entries with every lookup/update.
+ */
+
+#define AUX_GID_CACHE_ASSOC 4
+#define AUX_GID_CACHE_BUCKETS 256
+#define AUX_GID_CACHE_SIZE (AUX_GID_CACHE_ASSOC * AUX_GID_CACHE_BUCKETS)
+
+typedef struct {
+ uint64_t gl_id;
+ int gl_count;
+ gid_t *gl_list;
+ time_t gl_deadline;
+} gid_list_t;
+
+typedef struct {
+ gf_lock_t gc_lock;
+ uint32_t gc_max_age;
+ unsigned int gc_nbuckets;
+ gid_list_t gc_cache[AUX_GID_CACHE_SIZE];
+} gid_cache_t;
+
+int gid_cache_init(gid_cache_t *, uint32_t);
+const gid_list_t *gid_cache_lookup(gid_cache_t *, uint64_t);
+void gid_cache_release(gid_cache_t *, const gid_list_t *);
+int gid_cache_add(gid_cache_t *, gid_list_t *);
+
+#endif /* __GIDCACHE_H__ */
diff --git a/libglusterfs/src/glusterfs.h b/libglusterfs/src/glusterfs.h
index d331a5e..8532b31 100644
--- a/libglusterfs/src/glusterfs.h
+++ b/libglusterfs/src/glusterfs.h
@@ -300,6 +300,7 @@ struct _cmd_args {
int enable_ino32;
int worm;
int mac_compat;
+ int gid_timeout;
struct list_head xlator_options; /* list of xlator_option_t */
/* fuse options */
diff --git a/xlators/storage/posix/src/posix-aio.h b/xlators/storage/posix/src/posix-aio.h
new file mode 100644
index 0000000..545130a
--- /dev/null
+++ b/xlators/storage/posix/src/posix-aio.h
@@ -0,0 +1,49 @@
+/*
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef _POSIX_AIO_H
+#define _POSIX_AIO_H
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "xlator.h"
+#include "glusterfs.h"
+
+// Maximum number of concurrently submitted IO events. The heaviest load
+// GlusterFS has been able to handle had 60-80 concurrent calls
+#define POSIX_AIO_MAX_NR_EVENTS 256
+
+// Maximum number of completed IO operations to reap per getevents syscall
+#define POSIX_AIO_MAX_NR_GETEVENTS 16
+
+
+int posix_aio_on (xlator_t *this);
+int posix_aio_off (xlator_t *this);
+
+int posix_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata);
+
+int posix_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iovec *vector, int32_t count, off_t offset,
+ uint32_t flags, struct iobref *iobref, dict_t *xdata);
+
+#endif /* !_POSIX_AIO_H */