5 files changed, 280 insertions, 1 deletions
diff --git a/glusterfsd/src/glusterfsd.c b/glusterfsd/src/glusterfsd.c
index 58fa38d0b3b..44678bb1315 100644
--- a/glusterfsd/src/glusterfsd.c
+++ b/glusterfsd/src/glusterfsd.c
@@ -339,7 +339,7 @@ set_fuse_mount_options (glusterfs_ctx_t *ctx, dict_t *options)
         }
 
         if (cmd_args->enable_ino32) {
-                ret = dict_set_static_ptr (master->options, "enable-ino32",
+                ret = dict_set_static_ptr (options, "enable-ino32",
                                            "on");
                 if (ret < 0) {
                         gf_log ("glusterfsd", GF_LOG_ERROR,
diff --git a/libglusterfs/src/gidcache.c b/libglusterfs/src/gidcache.c
new file mode 100644
index 00000000000..c55ed2581ba
--- /dev/null
+++ b/libglusterfs/src/gidcache.c
@@ -0,0 +1,177 @@
+/*
+  Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+  This file is part of GlusterFS.
+
+  This file is licensed to you under your choice of the GNU Lesser
+  General Public License, version 3 or any later version (LGPLv3 or
+  later), or the GNU General Public License, version 2 (GPLv2), in all
+  cases as published by the Free Software Foundation.
+*/
+
+#include "gidcache.h"
+#include "mem-pool.h"
+
+/*
+ * We treat this as a very simple set-associative LRU cache, with entries aged
+ * out after a configurable interval.  Hardly rocket science, but lots of
+ * details to worry about.
+ */
+#define BUCKET_START(p,n)       ((p) + ((n) * AUX_GID_CACHE_ASSOC))
+
+/*
+ * Initialize the cache.
+ */
+int gid_cache_init(gid_cache_t *cache, uint32_t timeout)
+{
+	if (!cache)
+		return -1;
+
+	LOCK_INIT(&cache->gc_lock);
+	cache->gc_max_age = timeout;
+	cache->gc_nbuckets = AUX_GID_CACHE_BUCKETS;
+	memset(cache->gc_cache, 0, sizeof(gid_list_t) * AUX_GID_CACHE_SIZE);
+
+	return 0;
+}
+
+/*
+ * Look up an ID in the cache. If found, return the actual cache entry to avoid
+ * an additional allocation and memory copy. The caller should copy the data and
+ * release (unlock) the cache as soon as possible.
+ */
+const gid_list_t *gid_cache_lookup(gid_cache_t *cache, uint64_t id)
+{
+	int bucket;
+	int i;
+	time_t now;
+	const gid_list_t *agl;
+
+	LOCK(&cache->gc_lock);
+	now = time(NULL);
+	bucket = id % cache->gc_nbuckets;
+	agl = BUCKET_START(cache->gc_cache, bucket);
+	for (i = 0; i < AUX_GID_CACHE_ASSOC; i++, agl++) {
+		if (!agl->gl_list)
+			continue;
+		if (agl->gl_id != id)
+			continue;
+
+		/*
+		 * We don't put new entries in the cache when expiration=0, but
+		 * there might be entries still in there if expiration was
+		 * changed very recently.  Writing the check this way ensures
+		 * that they're not used.
+		 */
+		if (now < agl->gl_deadline) {
+			return agl;
+		}
+
+		/*
+		 * We're not going to find any more UID matches, and reaping
+		 * is handled further down to maintain LRU order.
+		 */
+		break;
+	}
+	UNLOCK(&cache->gc_lock);
+	return NULL;
+}
+
+/*
+ * Release an entry found via lookup.
+ */
+void gid_cache_release(gid_cache_t *cache, const gid_list_t *agl)
+{
+	UNLOCK(&cache->gc_lock);
+}
+
+/*
+ * Add a new list entry to the cache. If an entry for this ID already exists,
+ * update it.
+ */
+int gid_cache_add(gid_cache_t *cache, gid_list_t *gl)
+{
+	gid_list_t *agl;
+	int bucket;
+	int i;
+	time_t now;
+
+	if (!gl || !gl->gl_list)
+		return -1;
+
+	if (!cache->gc_max_age)
+		return 0;
+
+	LOCK(&cache->gc_lock);
+	now = time(NULL);
+
+	/*
+	 * Scan for the first free entry or one that matches this id. The id
+	 * check is added to address a bug where the cache might contain an
+	 * expired entry for this id. Since lookup occurs in LRU order and
+	 * does not reclaim entries, it will always return failure on discovery
+	 * of an expired entry. This leads to duplicate entries being added,
+	 * which still do not satisfy lookups until the expired entry (and
+	 * everything before it) is reclaimed.
+	 *
+	 * We address this through reuse of an entry already allocated to this
+	 * id, whether expired or not, since we have obviously already received
+	 * more recent data. The entry is repopulated with the new data and a new
+	 * deadline and is pushed forward to reside as the last populated entry in
+	 * the bucket.
+	 */
+	bucket = gl->gl_id % cache->gc_nbuckets;
+	agl = BUCKET_START(cache->gc_cache, bucket);
+	for (i = 0; i < AUX_GID_CACHE_ASSOC; ++i, ++agl) {
+		if (agl->gl_id == gl->gl_id)
+			break;
+		if (!agl->gl_list)
+			break;
+	}
+
+	/*
+	 * The way we allocate free entries naturally places the newest
+	 * ones at the highest indices, so evicting the lowest makes
+	 * sense, but that also means we can't just replace it with the
+	 * one that caused the eviction.  That would cause us to thrash
+	 * the first entry while others remain idle.  Therefore, we
+	 * need to slide the other entries down and add the new one at
+	 * the end just as if the *last* slot had been free.
+	 *
+	 * Deadline expiration is also handled here, since the oldest
+	 * expired entry will be in the first position.  This does mean
+	 * the bucket can stay full of expired entries if we're idle
+	 * but, if the small amount of extra memory or scan time before
+	 * we decide to evict someone ever become issues, we could
+	 * easily add a reaper thread.
+	 */
+
+	if (i >= AUX_GID_CACHE_ASSOC) {
+		/* cache full, evict the first (LRU) entry */
+		i = 0;
+		agl = BUCKET_START(cache->gc_cache, bucket);
+		GF_FREE(agl->gl_list);
+	} else if (agl->gl_list) {
+		/* evict the old entry we plan to reuse */
+		GF_FREE(agl->gl_list);
+	}
+
+	/*
+	 * If we have evicted an entry, slide the subsequent populated entries
+	 * back and populate the last entry.
+	 */
+	for (; i < AUX_GID_CACHE_ASSOC - 1; i++) {
+		if (!agl[1].gl_list)
+			break;
+		agl[0] = agl[1];
+		agl++;
+	}
+
+	agl->gl_id = gl->gl_id;
+	agl->gl_count = gl->gl_count;
+	agl->gl_list = gl->gl_list;
+	agl->gl_deadline = now + cache->gc_max_age;
+
+	UNLOCK(&cache->gc_lock);
+
+	return 1;
+}
diff --git a/libglusterfs/src/gidcache.h b/libglusterfs/src/gidcache.h
new file mode 100644
index 00000000000..f904f26eb97
--- /dev/null
+++ b/libglusterfs/src/gidcache.h
@@ -0,0 +1,52 @@
+/*
+  Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+  This file is part of GlusterFS.
+
+  This file is licensed to you under your choice of the GNU Lesser
+  General Public License, version 3 or any later version (LGPLv3 or
+  later), or the GNU General Public License, version 2 (GPLv2), in all
+  cases as published by the Free Software Foundation.
+*/
+
+#ifndef __GIDCACHE_H__
+#define __GIDCACHE_H__
+
+#include "glusterfs.h"
+#include "locking.h"
+
+/*
+ * TBD: make the cache size tunable
+ *
+ * The current size represents a pretty trivial amount of memory, and should
+ * provide good hit rates even for quite busy systems.  If we ever want to
+ * support really large cache sizes, we'll need to do dynamic allocation
+ * instead of just defining an array within a private structure. It doesn't make
+ * a whole lot of sense to change the associativity, because it won't improve
+ * hit rates all that much and will increase the maintenance cost as we have
+ * to scan more entries with every lookup/update.
+ */
+
+#define AUX_GID_CACHE_ASSOC     4
+#define AUX_GID_CACHE_BUCKETS   256
+#define AUX_GID_CACHE_SIZE      (AUX_GID_CACHE_ASSOC * AUX_GID_CACHE_BUCKETS)
+
+typedef struct {
+	uint64_t	gl_id;
+	int		gl_count;
+	gid_t		*gl_list;
+	time_t		gl_deadline;
+} gid_list_t;
+
+typedef struct {
+	gf_lock_t	gc_lock;
+	uint32_t	gc_max_age;
+	unsigned int	gc_nbuckets;
+	gid_list_t	gc_cache[AUX_GID_CACHE_SIZE];
+} gid_cache_t;
+
+int gid_cache_init(gid_cache_t *, uint32_t);
+const gid_list_t *gid_cache_lookup(gid_cache_t *, uint64_t);
+void gid_cache_release(gid_cache_t *, const gid_list_t *);
+int gid_cache_add(gid_cache_t *, gid_list_t *);
+
+#endif /* __GIDCACHE_H__ */
diff --git a/libglusterfs/src/glusterfs.h b/libglusterfs/src/glusterfs.h
index d331a5ee640..8532b318235 100644
--- a/libglusterfs/src/glusterfs.h
+++ b/libglusterfs/src/glusterfs.h
@@ -300,6 +300,7 @@ struct _cmd_args {
         int              enable_ino32;
         int              worm;
         int              mac_compat;
+        int              gid_timeout;
 	struct list_head xlator_options;  /* list of xlator_option_t */
 
 	/* fuse options */
diff --git a/xlators/storage/posix/src/posix-aio.h b/xlators/storage/posix/src/posix-aio.h
new file mode 100644
index 00000000000..545130a2889
--- /dev/null
+++ b/xlators/storage/posix/src/posix-aio.h
@@ -0,0 +1,49 @@
+/*
+   Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+   This file is part of GlusterFS.
+
+   GlusterFS is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3 of the License,
+   or (at your option) any later version.
+
+   GlusterFS is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see
+   <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef _POSIX_AIO_H
+#define _POSIX_AIO_H
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "xlator.h"
+#include "glusterfs.h"
+
+// Maximum number of concurrently submitted IO events. The heaviest load
+// GlusterFS has been able to handle had 60-80 concurrent calls
+#define POSIX_AIO_MAX_NR_EVENTS 256
+
+// Maximum number of completed IO operations to reap per getevents syscall
+#define POSIX_AIO_MAX_NR_GETEVENTS 16
+
+
+int posix_aio_on (xlator_t *this);
+int posix_aio_off (xlator_t *this);
+
+int posix_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+		 off_t offset, uint32_t flags, dict_t *xdata);
+
+int posix_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
+		  struct iovec *vector, int32_t count, off_t offset,
+		  uint32_t flags, struct iobref *iobref, dict_t *xdata);
+
+#endif /* !_POSIX_AIO_H */