summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRichard Wareing <rwareing@fb.com>2015-06-23 17:03:11 -0700
committerVijay Bellur <vbellur@redhat.com>2015-11-01 09:14:34 -0800
commitd3e496cbcd35b9d9b840e328ae109c44f59083ce (patch)
tree25eedba238f9168cd11015885973d5a960c1596e
parent846697d91ed56ca3d76f2a78e87e7675f127f21d (diff)
debug/io-stats: Add FOP sampling feature
Summary: - Using sampling feature you can record details about every Nth FOP. The fields in each sample are: FOP type, hostname, uid, gid, FOP priority, port and time taken (latency) to fufill the request. - Implemented using a ring buffer which is not (m/c) allocated in the IO path, this should make the sampling process pretty cheap. - DNS resolution done @ dump time not @ sample time for performance w/ cache - Metrics can be used for both diagnostics, traffic/IO profiling as well as P95/P99 calculations - To control this feature there are two new volume options: diagnostics.fop-sample-interval - The sampling interval, e.g. 1 means sample every FOP, 100 means sample every 100th FOP diagnostics.fop-sample-buf-size - The size (in bytes) of the ring buffer used to store the samples. In the even more samples are collected in the stats dump interval than can be held in this buffer, the oldest samples shall be discarded. Samples are stored in the log directory under /var/log/glusterfs/samples. - Uses DNS cache written by sshreyas@fb.com (Thank-you!), the DNS cache TTL is controlled by the diagnostics.stats-dnscache-ttl-sec option and defaults to 24hrs. Test Plan: - Valgrind'd to ensure it's leak free - Run prove test(s) - Shadow testing on 100+ brick cluster Change-Id: I9ee14c2fa18486b7efb38e59f70687249d3f96d8 BUG: 1271310 Signed-off-by: Jeff Darcy <jdarcy@redhat.com> Reviewed-on: http://review.gluster.org/12210 Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Vijay Bellur <vbellur@redhat.com>
-rw-r--r--extras/glusterfs-logrotate9
-rw-r--r--libglusterfs/src/common-utils.c260
-rw-r--r--libglusterfs/src/common-utils.h21
-rw-r--r--libglusterfs/src/glusterfs.h6
-rw-r--r--libglusterfs/src/mem-types.h2
-rw-r--r--libglusterfs/src/stack.h1
-rw-r--r--tests/basic/fop-sampling.t61
-rw-r--r--xlators/debug/io-stats/src/io-stats-mem-types.h2
-rw-r--r--xlators/debug/io-stats/src/io-stats.c494
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volume-set.c15
-rw-r--r--xlators/nfs/server/src/nfs-fops.c1
-rw-r--r--xlators/nfs/server/src/nfs.c15
-rw-r--r--xlators/nfs/server/src/nfs.h5
-rw-r--r--xlators/protocol/server/src/server-helpers.c7
14 files changed, 864 insertions, 35 deletions
diff --git a/extras/glusterfs-logrotate b/extras/glusterfs-logrotate
index e3319afaa96..575c0eee771 100644
--- a/extras/glusterfs-logrotate
+++ b/extras/glusterfs-logrotate
@@ -26,3 +26,12 @@
/usr/bin/killall -HUP glusterfsd > /dev/null 2>&1 || true
endscript
}
+
+/var/log/glusterfs/samples/*.samp {
+ daily
+ rotate 3
+ sharedscripts
+ missingok
+ compress
+ delaycompress
+}
diff --git a/libglusterfs/src/common-utils.c b/libglusterfs/src/common-utils.c
index 2dcd54f1829..a89e120c0fb 100644
--- a/libglusterfs/src/common-utils.c
+++ b/libglusterfs/src/common-utils.c
@@ -34,6 +34,7 @@
#if defined(GF_BSD_HOST_OS) || defined(GF_DARWIN_HOST_OS)
#include <sys/sysctl.h>
#endif
+#include <libgen.h>
#include "compat-errno.h"
#include "logging.h"
@@ -210,7 +211,7 @@ out:
}
/**
- * gf_resolve_parent_path -- Given a path, returns an allocated string
+ * gf_resolve_path_parent -- Given a path, returns an allocated string
* containing the parent's path.
* @path: Path to parse
* @return: The parent path if found, NULL otherwise
@@ -359,6 +360,135 @@ err:
return -1;
}
+/**
+ * gf_dnscache_init -- Initializes a dnscache struct and sets the ttl
+ * to the specified value in the parameter.
+ *
+ * @ttl: the TTL in seconds
+ * @return: SUCCESS: Pointer to an allocated dnscache struct
+ * FAILURE: NULL
+ */
+struct dnscache *
+gf_dnscache_init (time_t ttl)
+{
+ struct dnscache *cache = GF_MALLOC (sizeof (*cache),
+ gf_common_mt_dnscache);
+ cache->cache_dict = NULL;
+ cache->ttl = ttl;
+ return cache;
+}
+
+/**
+ * gf_dnscache_entry_init -- Initialize a dnscache entry
+ *
+ * @return: SUCCESS: Pointer to an allocated dnscache entry struct
+ * FAILURE: NULL
+ */
+struct dnscache_entry *
+gf_dnscache_entry_init ()
+{
+ struct dnscache_entry *entry = GF_CALLOC (1, sizeof (*entry),
+ gf_common_mt_dnscache_entry);
+ return entry;
+}
+
+/**
+ * gf_dnscache_entry_deinit -- Free memory used by a dnscache entry
+ *
+ * @entry: Pointer to deallocate
+ */
+void
+gf_dnscache_entry_deinit (struct dnscache_entry *entry)
+{
+ GF_FREE (entry->ip);
+ GF_FREE (entry->fqdn);
+ GF_FREE (entry);
+}
+
+/**
+ * gf_rev_dns_lookup -- Perform a reverse DNS lookup on the IP address.
+ *
+ * @ip: The IP address to perform a reverse lookup on
+ *
+ * @return: success: Allocated string containing the hostname
+ * failure: NULL
+ */
+char *
+gf_rev_dns_lookup_cached (const char *ip, struct dnscache *dnscache)
+{
+ char *fqdn = NULL;
+ int ret = 0;
+ dict_t *cache = NULL;
+ data_t *entrydata = NULL;
+ struct dnscache_entry *dnsentry = NULL;
+ gf_boolean_t from_cache = _gf_false;
+
+ if (!dnscache)
+ goto out;
+
+ if (!dnscache->cache_dict) {
+ dnscache->cache_dict = dict_new ();
+ if (!dnscache->cache_dict) {
+ goto out;
+ }
+ }
+ cache = dnscache->cache_dict;
+
+ /* Quick cache lookup to see if we already hold it */
+ entrydata = dict_get (cache, (char *)ip);
+ if (entrydata) {
+ dnsentry = (struct dnscache_entry *)entrydata->data;
+ /* First check the TTL & timestamp */
+ if (time (NULL) - dnsentry->timestamp > dnscache->ttl) {
+ gf_dnscache_entry_deinit (dnsentry);
+ entrydata->data = NULL; /* Mark this as 'null' so
+ * dict_del () doesn't try free
+ * this after we've already
+ * freed it.
+ */
+
+ dict_del (cache, (char *)ip); /* Remove this entry */
+ } else {
+ /* Cache entry is valid, get the FQDN and return */
+ fqdn = dnsentry->fqdn;
+ from_cache = _gf_true; /* Mark this as from cache */
+ goto out;
+ }
+ }
+
+ /* Get the FQDN */
+ ret = gf_get_hostname_from_ip ((char *)ip, &fqdn);
+ if (ret != 0)
+ goto out;
+
+ if (!fqdn) {
+ gf_log_callingfn ("resolver", GF_LOG_CRITICAL,
+ "Allocation failed for the host address");
+ goto out;
+ }
+
+ from_cache = _gf_false;
+out:
+ /* Insert into the cache */
+ if (fqdn && !from_cache) {
+ struct dnscache_entry *entry = gf_dnscache_entry_init ();
+
+ if (!entry) {
+ goto out;
+ }
+ entry->fqdn = fqdn;
+ entry->ip = gf_strdup (ip);
+ if (!ip) {
+ gf_dnscache_entry_deinit (entry);
+ goto out;
+ }
+ entry->timestamp = time (NULL);
+
+ entrydata = bin_to_data (entry, sizeof (*entry));
+ dict_set (cache, (char *)ip, entrydata);
+ }
+ return fqdn;
+}
struct xldump {
int lineno;
@@ -4012,3 +4142,131 @@ _unmask_cancellation (void)
{
(void) pthread_setcancelstate (PTHREAD_CANCEL_ENABLE, NULL);
}
+
+
+const char *
+fop_enum_to_pri_string (glusterfs_fop_t fop)
+{
+ switch (fop) {
+ case GF_FOP_OPEN:
+ case GF_FOP_STAT:
+ case GF_FOP_FSTAT:
+ case GF_FOP_LOOKUP:
+ case GF_FOP_ACCESS:
+ case GF_FOP_READLINK:
+ case GF_FOP_OPENDIR:
+ case GF_FOP_STATFS:
+ case GF_FOP_READDIR:
+ case GF_FOP_READDIRP:
+ return "HIGH";
+
+ case GF_FOP_CREATE:
+ case GF_FOP_FLUSH:
+ case GF_FOP_LK:
+ case GF_FOP_INODELK:
+ case GF_FOP_FINODELK:
+ case GF_FOP_ENTRYLK:
+ case GF_FOP_FENTRYLK:
+ case GF_FOP_UNLINK:
+ case GF_FOP_SETATTR:
+ case GF_FOP_FSETATTR:
+ case GF_FOP_MKNOD:
+ case GF_FOP_MKDIR:
+ case GF_FOP_RMDIR:
+ case GF_FOP_SYMLINK:
+ case GF_FOP_RENAME:
+ case GF_FOP_LINK:
+ case GF_FOP_SETXATTR:
+ case GF_FOP_GETXATTR:
+ case GF_FOP_FGETXATTR:
+ case GF_FOP_FSETXATTR:
+ case GF_FOP_REMOVEXATTR:
+ case GF_FOP_FREMOVEXATTR:
+ case GF_FOP_IPC:
+ return "NORMAL";
+
+ case GF_FOP_READ:
+ case GF_FOP_WRITE:
+ case GF_FOP_FSYNC:
+ case GF_FOP_TRUNCATE:
+ case GF_FOP_FTRUNCATE:
+ case GF_FOP_FSYNCDIR:
+ case GF_FOP_XATTROP:
+ case GF_FOP_FXATTROP:
+ case GF_FOP_RCHECKSUM:
+ case GF_FOP_ZEROFILL:
+ case GF_FOP_FALLOCATE:
+ return "LOW";
+
+ case GF_FOP_NULL:
+ case GF_FOP_FORGET:
+ case GF_FOP_RELEASE:
+ case GF_FOP_RELEASEDIR:
+ case GF_FOP_GETSPEC:
+ case GF_FOP_MAXVALUE:
+ case GF_FOP_DISCARD:
+ return "LEAST";
+ }
+ return "UNKNOWN";
+}
+
+const char *
+fop_enum_to_string (glusterfs_fop_t fop)
+{
+ static const char *const str_map[] = {
+ "NULL",
+ "STAT",
+ "READLINK",
+ "MKNOD",
+ "MKDIR",
+ "UNLINK",
+ "RMDIR",
+ "SYMLINK",
+ "RENAME",
+ "LINK",
+ "TRUNCATE",
+ "OPEN",
+ "READ",
+ "WRITE",
+ "STATFS",
+ "FLUSH",
+ "FSYNC",
+ "SETXATTR",
+ "GETXATTR",
+ "REMOVEXATTR",
+ "OPENDIR",
+ "FSYNCDIR",
+ "ACCESS",
+ "CREATE",
+ "FTRUNCATE",
+ "FSTAT",
+ "LK",
+ "LOOKUP",
+ "READDIR",
+ "INODELK",
+ "FINODELK",
+ "ENTRYLK",
+ "FENTRYLK",
+ "XATTROP",
+ "FXATTROP",
+ "FGETXATTR",
+ "FSETXATTR",
+ "RCHECKSUM",
+ "SETATTR",
+ "FSETATTR",
+ "READDIRP",
+ "FORGET",
+ "RELEASE",
+ "RELEASEDIR",
+ "GETSPEC",
+ "FREMOVEXATTR",
+ "FALLOCATE",
+ "DISCARD",
+ "ZEROFILL",
+ "IPC",
+ "MAXVALUE"};
+ if (fop <= GF_FOP_MAXVALUE)
+ return str_map[fop];
+
+ return "UNKNOWNFOP";
+}
diff --git a/libglusterfs/src/common-utils.h b/libglusterfs/src/common-utils.h
index 77a8cdd51c7..bf574fdabc7 100644
--- a/libglusterfs/src/common-utils.h
+++ b/libglusterfs/src/common-utils.h
@@ -159,6 +159,27 @@ typedef struct dht_changelog_rename_info {
typedef int (*gf_cmp) (void *, void *);
+struct _dict;
+
+struct dnscache {
+ struct _dict *cache_dict;
+ time_t ttl;
+};
+
+struct dnscache_entry {
+ char *ip;
+ char *fqdn;
+ time_t timestamp;
+};
+
+
+struct dnscache *gf_dnscache_init (time_t ttl);
+struct dnscache_entry *gf_dnscache_entry_init ();
+void gf_dnscache_entry_deinit (struct dnscache_entry *entry);
+char *gf_rev_dns_lookup_cached (const char *ip, struct dnscache *dnscache);
+
+char *gf_resolve_path_parent (const char *path);
+
void gf_global_variable_init(void);
int32_t gf_resolve_ip6 (const char *hostname, uint16_t port, int family,
diff --git a/libglusterfs/src/glusterfs.h b/libglusterfs/src/glusterfs.h
index 3bc76f6622a..4c7f9f517e3 100644
--- a/libglusterfs/src/glusterfs.h
+++ b/libglusterfs/src/glusterfs.h
@@ -258,6 +258,10 @@
/* NOTE: add members ONLY at the end (just before _MAXVALUE) */
+/*
+ * OTHER NOTE: fop_enum_to_str and fop_enum_to_pri_str (in common-utils.h) also
+ * contain lists of fops, so if you update this list UPDATE THOSE TOO.
+ */
typedef enum {
GF_FOP_NULL = 0,
GF_FOP_STAT,
@@ -312,6 +316,8 @@ typedef enum {
GF_FOP_MAXVALUE,
} glusterfs_fop_t;
+const char *fop_enum_to_pri_string (glusterfs_fop_t fop);
+const char *fop_enum_to_string (glusterfs_fop_t fop);
typedef enum {
GF_MGMT_NULL = 0,
diff --git a/libglusterfs/src/mem-types.h b/libglusterfs/src/mem-types.h
index 84949c61487..70c4ea770d5 100644
--- a/libglusterfs/src/mem-types.h
+++ b/libglusterfs/src/mem-types.h
@@ -128,6 +128,8 @@ enum gf_common_mem_types_ {
gf_common_mt_ereg,
gf_common_mt_wr,
gf_common_mt_rdma_arena_mr,
+ gf_common_mt_dnscache = 115,
+ gf_common_mt_dnscache_entry = 116,
gf_common_mt_parser_t,
gf_common_quota_meta_t,
/*related to gfdb library*/
diff --git a/libglusterfs/src/stack.h b/libglusterfs/src/stack.h
index 5c0655f2ead..43d943c62b6 100644
--- a/libglusterfs/src/stack.h
+++ b/libglusterfs/src/stack.h
@@ -96,6 +96,7 @@ struct _call_stack_t {
uid_t uid;
gid_t gid;
pid_t pid;
+ char identifier[UNIX_PATH_MAX];
uint16_t ngrps;
uint32_t groups_small[SMALL_GROUP_COUNT];
uint32_t *groups_large;
diff --git a/tests/basic/fop-sampling.t b/tests/basic/fop-sampling.t
new file mode 100644
index 00000000000..cea8aa737c0
--- /dev/null
+++ b/tests/basic/fop-sampling.t
@@ -0,0 +1,61 @@
+#!/bin/bash
+#
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+SAMPLE_FILE="$(gluster --print-logdir)/samples/glusterfs_${V0}.samp"
+
+function print_cnt() {
+ local FOP_TYPE=$1
+ local FOP_CNT=$(grep ,${FOP_TYPE} ${SAMPLE_FILE} | wc -l)
+ echo $FOP_CNT
+}
+
+# Verify we got non-zero counts for stats/lookup/readdir
+check_samples() {
+ STAT_CNT=$(print_cnt STAT)
+ if [ "$STAT_CNT" -le "0" ]; then
+ echo "STAT count is zero"
+ return
+ fi
+
+ LOOKUP_CNT=$(print_cnt LOOKUP)
+ if [ "$LOOKUP_CNT" -le "0" ]; then
+ echo "LOOKUP count is zero"
+ return
+ fi
+
+ READDIR_CNT=$(print_cnt READDIR)
+ if [ "$READDIR_CNT" -le "0" ]; then
+ echo "READDIR count is zero"
+ return
+ fi
+
+ echo "OK"
+}
+
+cleanup;
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
+TEST $CLI volume set $V0 nfs.disable off
+TEST $CLI volume set $V0 diagnostics.latency-measurement on
+TEST $CLI volume set $V0 diagnostics.count-fop-hits on
+TEST $CLI volume set $V0 diagnostics.stats-dump-interval 2
+TEST $CLI volume set $V0 diagnostics.fop-sample-buf-size 65535
+TEST $CLI volume set $V0 diagnostics.fop-sample-interval 1
+TEST $CLI volume set $V0 diagnostics.stats-dnscache-ttl-sec 3600
+
+TEST $CLI volume start $V0
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+
+for i in {1..5}
+do
+ dd if=/dev/zero of=${M0}/testfile$i bs=4k count=1
+done
+
+TEST ls -l $M0
+EXPECT_WITHIN 6 "OK" check_samples
+
+cleanup
diff --git a/xlators/debug/io-stats/src/io-stats-mem-types.h b/xlators/debug/io-stats/src/io-stats-mem-types.h
index d7a1055a571..9dde9373264 100644
--- a/xlators/debug/io-stats/src/io-stats-mem-types.h
+++ b/xlators/debug/io-stats/src/io-stats-mem-types.h
@@ -20,6 +20,8 @@ enum gf_io_stats_mem_types_ {
gf_io_stats_mt_ios_fd,
gf_io_stats_mt_ios_stat,
gf_io_stats_mt_ios_stat_list,
+ gf_io_stats_mt_ios_sample_buf,
+ gf_io_stats_mt_ios_sample,
gf_io_stats_mt_end
};
#endif
diff --git a/xlators/debug/io-stats/src/io-stats.c b/xlators/debug/io-stats/src/io-stats.c
index 3157a65dae1..e73cae0a928 100644
--- a/xlators/debug/io-stats/src/io-stats.c
+++ b/xlators/debug/io-stats/src/io-stats.c
@@ -34,8 +34,12 @@
#include "logging.h"
#include "cli1-xdr.h"
#include "statedump.h"
+#include <pwd.h>
+#include <grp.h>
#define MAX_LIST_MEMBERS 100
+#define DEFAULT_PWD_BUF_SZ 16384
+#define DEFAULT_GRP_BUF_SZ 16384
typedef enum {
IOS_STATS_TYPE_NONE,
@@ -82,6 +86,25 @@ struct ios_stat_head {
struct ios_stat_list *iosstats;
};
+typedef struct _ios_sample_t {
+ uid_t uid;
+ gid_t gid;
+ char identifier[UNIX_PATH_MAX];
+ glusterfs_fop_t fop_type;
+ struct timeval timestamp;
+ double elapsed;
+} ios_sample_t;
+
+
+typedef struct _ios_sample_buf_t {
+ uint64_t pos; /* Position in write buffer */
+ uint64_t size; /* Size of ring buffer */
+ uint64_t collected; /* Number of samples we've collected */
+ uint64_t observed; /* Number of FOPs we've observed */
+ ios_sample_t *ios_samples; /* Our list of samples */
+} ios_sample_buf_t;
+
+
struct ios_lat {
double min;
double max;
@@ -102,7 +125,6 @@ struct ios_global_stats {
struct timeval max_openfd_time;
};
-
struct ios_conf {
gf_lock_t lock;
struct ios_global_stats cumulative;
@@ -116,6 +138,12 @@ struct ios_conf {
int32_t ios_dump_interval;
pthread_t dump_thread;
gf_boolean_t dump_thread_should_die;
+ gf_lock_t ios_sampling_lock;
+ int32_t ios_sample_interval;
+ int32_t ios_sample_buf_size;
+ ios_sample_buf_t *ios_sample_buf;
+ struct dnscache *dnscache;
+ int32_t ios_dnscache_ttl_sec;
};
@@ -133,7 +161,8 @@ typedef enum {
IOS_DUMP_TYPE_FILE = 1,
IOS_DUMP_TYPE_DICT = 2,
IOS_DUMP_TYPE_JSON_FILE = 3,
- IOS_DUMP_TYPE_MAX = 4
+ IOS_DUMP_TYPE_SAMPLES = 4,
+ IOS_DUMP_TYPE_MAX = 5
} ios_dump_type_t;
struct ios_dump_args {
@@ -163,6 +192,7 @@ is_fop_latency_started (call_frame_t *frame)
}
#ifdef GF_LINUX_HOST_OS
+const char *_IOS_SAMP_DIR = "/var/log/glusterfs/samples";
#define _IOS_DUMP_DIR DATADIR "/lib/glusterd/stats"
#else
#define _IOS_DUMP_DIR DATADIR "/db/glusterd/stats"
@@ -429,6 +459,72 @@ ios_inode_ctx_get (inode_t *inode, xlator_t *this, struct ios_stat **iosstat)
}
+/*
+ * So why goto all this trouble? Why not just queue up some samples in
+ * a big list and malloc away? Well malloc is expensive relative
+ * to what we are measuring, so cannot have any malloc's (or worse
+ * callocs) in our measurement code paths. Instead, we are going to
+ * pre-allocate a circular buffer and collect a maximum number of samples.
+ * Prior to dumping them all we'll create a new buffer and swap the
+ * old buffer with the new, and then proceed to dump the statistics
+ * in our dump thread.
+ *
+ */
+ios_sample_buf_t *
+ios_create_sample_buf (size_t buf_size)
+{
+ ios_sample_buf_t *ios_sample_buf = NULL;
+ ios_sample_t *ios_samples = NULL;
+
+ ios_sample_buf = GF_CALLOC (1,
+ sizeof (*ios_sample_buf),
+ gf_io_stats_mt_ios_sample_buf);
+ if (!ios_sample_buf)
+ goto err;
+
+ ios_samples = GF_CALLOC (buf_size,
+ sizeof (*ios_samples),
+ gf_io_stats_mt_ios_sample);
+
+ if (!ios_samples)
+ goto err;
+
+ ios_sample_buf->ios_samples = ios_samples;
+ ios_sample_buf->size = buf_size;
+ ios_sample_buf->pos = 0;
+ ios_sample_buf->observed = 0;
+ ios_sample_buf->collected = 0;
+
+ return ios_sample_buf;
+err:
+ GF_FREE (ios_sample_buf);
+ return NULL;
+}
+
+void
+ios_destroy_sample_buf (ios_sample_buf_t *ios_sample_buf)
+{
+ GF_FREE (ios_sample_buf->ios_samples);
+ GF_FREE (ios_sample_buf);
+}
+
+static int
+ios_init_sample_buf (struct ios_conf *conf)
+{
+ int32_t ret = -1;
+
+ GF_ASSERT (conf);
+ LOCK (&conf->lock);
+ conf->ios_sample_buf = ios_create_sample_buf (
+ conf->ios_sample_buf_size);
+ if (!conf->ios_sample_buf)
+ goto out;
+ ret = 0;
+out:
+ UNLOCK (&conf->lock);
+ return ret;
+}
+
int
ios_stat_add_to_list (struct ios_stat_head *list_head, uint64_t value,
struct ios_stat *iosstat)
@@ -815,6 +911,230 @@ out:
return ret;
}
+char *
+_resolve_username (xlator_t *this, uid_t uid)
+{
+ struct passwd pwd;
+ struct passwd *pwd_result = NULL;
+ size_t pwd_buf_len;
+ char *pwd_buf = NULL;
+ char *ret = NULL;
+
+ /* Prepare our buffer for the uid->username translation */
+#ifdef _SC_GETGR_R_SIZE_MAX
+ pwd_buf_len = sysconf (_SC_GETGR_R_SIZE_MAX);
+#else
+ pwd_buf_len = -1;
+#endif
+ if (pwd_buf_len == -1) {
+ pwd_buf_len = DEFAULT_PWD_BUF_SZ; /* per the man page */
+ }
+
+ pwd_buf = alloca (pwd_buf_len);
+ if (!pwd_buf)
+ goto err;
+
+ getpwuid_r (uid, &pwd, pwd_buf, pwd_buf_len,
+ &pwd_result);
+ if (!pwd_result)
+ goto err;
+
+ ret = gf_strdup (pwd.pw_name);
+ if (ret)
+ return ret;
+ else
+ gf_log (this->name, GF_LOG_ERROR,
+ "gf_strdup failed, failing username "
+ "resolution.");
+err:
+ return ret;
+}
+
+char *
+_resolve_group_name (xlator_t *this, gid_t gid)
+{
+ struct group grp;
+ struct group *grp_result = NULL;
+ size_t grp_buf_len;
+ char *grp_buf = NULL;
+ char *ret = NULL;
+
+ /* Prepare our buffer for the gid->group name translation */
+#ifdef _SC_GETGR_R_SIZE_MAX
+ grp_buf_len = sysconf (_SC_GETGR_R_SIZE_MAX);
+#else
+ grp_buf_len = -1;
+#endif
+ if (grp_buf_len == -1) {
+ grp_buf_len = DEFAULT_GRP_BUF_SZ; /* per the man page */
+ }
+
+ grp_buf = alloca (grp_buf_len);
+ if (!grp_buf) {
+ goto err;
+ }
+
+ getgrgid_r (gid, &grp, grp_buf, grp_buf_len,
+ &grp_result);
+ if (!grp_result)
+ goto err;
+
+ ret = gf_strdup (grp.gr_name);
+ if (ret)
+ return ret;
+ else
+ gf_log (this->name, GF_LOG_ERROR,
+ "gf_strdup failed, failing username "
+ "resolution.");
+err:
+ return ret;
+}
+
+
+/*
+ * This function writes out a latency sample to a given file descriptor
+ * and beautifies the output in the process.
+ */
+void
+_io_stats_write_latency_sample (xlator_t *this, ios_sample_t *sample,
+ FILE *logfp)
+{
+ double epoch_time = 0.00;
+ char *xlator_name = NULL;
+ char *instance_name = NULL;
+ char *hostname = NULL;
+ char *identifier = NULL;
+ char *port = NULL;
+ char *port_pos = NULL;
+ char *group_name = NULL;
+ char *username = NULL;
+ struct ios_conf *conf = NULL;
+
+ conf = this->private;
+
+ epoch_time = (sample->timestamp).tv_sec +
+ ((sample->timestamp).tv_usec / 1000000.0);
+
+ if (!sample->identifier || (strlen (sample->identifier) == 0)) {
+ hostname = "Unknown";
+ port = "Unknown";
+ } else {
+ identifier = strdupa (sample->identifier);
+ port_pos = strrchr (identifier, ':');
+ if (!port_pos || strlen(port_pos) < 2)
+ goto err;
+ port = strdupa (port_pos + 1);
+ if (!port)
+ goto err;
+ *port_pos = '\0';
+ hostname = gf_rev_dns_lookup_cached (identifier,
+ conf->dnscache);
+ if (!hostname)
+ hostname = "Unknown";
+ }
+
+ xlator_name = this->name;
+ if (!xlator_name || strlen (xlator_name) == 0)
+ xlator_name = "Unknown";
+
+ instance_name = this->instance_name;
+ if (!instance_name || strlen (instance_name) == 0)
+ instance_name = "N/A";
+
+ /* Resolve the UID to a string username */
+ username = _resolve_username (this, sample->uid);
+ if (!username) {
+ username = GF_MALLOC (30, gf_common_mt_char);
+ sprintf (username, "%d", (int32_t)sample->uid);
+ }
+
+ /* Resolve the GID to a string group name */
+ group_name = _resolve_group_name (this, sample->gid);
+ if (!group_name) {
+ group_name = GF_MALLOC (30, gf_common_mt_char);
+ sprintf (group_name, "%d", (int32_t)sample->gid);
+ }
+
+ ios_log (this, logfp,
+ "%0.6lf,%s,%s,%0.4lf,%s,%s,%s,%s,%s,%s",
+ epoch_time, fop_enum_to_pri_string (sample->fop_type),
+ fop_enum_to_string (sample->fop_type),
+ sample->elapsed, xlator_name, instance_name, username,
+ group_name, hostname, port);
+ goto out;
+err:
+ gf_log (this->name, GF_LOG_ERROR,
+ "Error parsing socket identifier");
+out:
+ GF_FREE (group_name);
+ GF_FREE (username);
+}
+
+/*
+ * Takes our current sample buffer in conf->io_sample_buf, and saves
+ * a reference to this, init's a new buffer, and then dumps out the
+ * contents of the saved reference.
+ */
+int
+io_stats_dump_latency_samples_logfp (xlator_t *this, FILE *logfp)
+{
+ uint64_t i = 0;
+ struct ios_conf *conf = NULL;
+ ios_sample_buf_t *sample_buf = NULL;
+ int ret = 1; /* Default to error */
+
+ conf = this->private;
+
+ /* Save pointer to old buffer; the CS equivalent of
+ * Indiana Jones: https://www.youtube.com/watch?v=Pr-8AP0To4k,
+ * though ours will end better I hope!
+ */
+ sample_buf = conf->ios_sample_buf;
+ if (!sample_buf) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Sampling buffer is null, bailing!");
+ goto out;
+ }
+
+ /* Empty case, nothing to do, exit. */
+ if (sample_buf->collected == 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "No samples, dump not required.");
+ ret = 0;
+ goto out;
+ }
+
+ /* Init a new buffer, so we are free to work on the one we saved a
+ * reference to above.
+ */
+ if (ios_init_sample_buf (conf) != 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Failed to init new sampling buffer, out of memory?");
+ goto out;
+ }
+
+ /* Wrap-around case, dump from pos to sample_buf->size -1
+ * and then from 0 to sample_buf->pos (covered off by
+ * "simple case")
+ */
+ if (sample_buf->collected > sample_buf->pos + 1) {
+ for (i = sample_buf->pos; i < sample_buf->size; i++) {
+ _io_stats_write_latency_sample (this,
+ &(sample_buf->ios_samples[i]), logfp);
+ }
+ }
+
+ /* Simple case: Dump from 0 to sample_buf->pos */
+ for (i = 0; i < sample_buf->pos; i++) {
+ _io_stats_write_latency_sample (this,
+ &(sample_buf->ios_samples[i]), logfp);
+ }
+ ios_destroy_sample_buf (sample_buf);
+
+out:
+ return ret;
+}
+
int
io_stats_dump_global_to_logfp (xlator_t *this, struct ios_global_stats *stats,
struct timeval *now, int interval, FILE* logfp)
@@ -1284,6 +1604,48 @@ io_stats_dump_fd (xlator_t *this, struct ios_fd *iosfd)
return 0;
}
+void collect_ios_latency_sample (struct ios_conf *conf,
+ glusterfs_fop_t fop_type, double elapsed,
+ call_frame_t *frame)
+{
+ ios_sample_buf_t *ios_sample_buf = NULL;
+ ios_sample_t *ios_sample = NULL;
+ struct timeval *timestamp = NULL;
+ call_stack_t *root = NULL;
+
+
+ ios_sample_buf = conf->ios_sample_buf;
+ LOCK (&conf->ios_sampling_lock);
+ if (conf->ios_sample_interval == 0 ||
+ ios_sample_buf->observed % conf->ios_sample_interval != 0)
+ goto out;
+
+ timestamp = &frame->begin;
+ root = frame->root;
+
+ ios_sample = &(ios_sample_buf->ios_samples[ios_sample_buf->pos]);
+ ios_sample->elapsed = elapsed;
+ ios_sample->fop_type = fop_type;
+ ios_sample->uid = root->uid;
+ ios_sample->gid = root->gid;
+ (ios_sample->timestamp).tv_sec = timestamp->tv_sec;
+ (ios_sample->timestamp).tv_usec = timestamp->tv_usec;
+ memcpy (&ios_sample->identifier, &root->identifier,
+ sizeof (root->identifier));
+
+ /* We've reached the end of the circular buffer, start from the
+ * beginning. */
+ if (ios_sample_buf->pos == (ios_sample_buf->size - 1))
+ ios_sample_buf->pos = 0;
+ else
+ ios_sample_buf->pos++;
+ ios_sample_buf->collected++;
+out:
+ ios_sample_buf->observed++;
+ UNLOCK (&conf->ios_sampling_lock);
+ return;
+}
+
static void
update_ios_latency_stats (struct ios_global_stats *stats, double elapsed,
glusterfs_fop_t op)
@@ -1321,6 +1683,7 @@ update_ios_latency (struct ios_conf *conf, call_frame_t *frame,
update_ios_latency_stats (&conf->cumulative, elapsed, op);
update_ios_latency_stats (&conf->incremental, elapsed, op);
+ collect_ios_latency_sample (conf, op, elapsed, frame);
return 0;
}
@@ -1660,7 +2023,7 @@ io_stats_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (iosstat) {
BUMP_STATS (iosstat, IOS_STATS_TYPE_READDIRP);
- iosstat = NULL;
+ iosstat = NULL;
}
STACK_UNWIND_STRICT (readdirp, frame, op_ret, op_errno, buf, xdata);
@@ -2512,14 +2875,18 @@ _ios_destroy_dump_thread (struct ios_conf *conf) {
void *
_ios_dump_thread (xlator_t *this) {
struct ios_conf *conf = NULL;
- FILE *logfp = NULL;
+ FILE *stats_logfp = NULL;
+ FILE *samples_logfp = NULL;
struct ios_dump_args args = {0};
int i;
- int bytes_written = 0;
- char filename[PATH_MAX];
+ int stats_bytes_written = 0;
+ int samples_bytes_written = 0;
+ char stats_filename[PATH_MAX];
+ char samples_filename[PATH_MAX];
char *xlator_name;
char *instance_name;
- gf_boolean_t log_fopen_failure = _gf_true;
+ gf_boolean_t log_stats_fopen_failure = _gf_true;
+ gf_boolean_t log_samples_fopen_failure = _gf_true;
int old_cancel_type;
conf = this->private;
@@ -2546,17 +2913,34 @@ _ios_dump_thread (xlator_t *this) {
goto out;
}
}
+ if (mkdir (_IOS_SAMP_DIR, S_IRWXU | S_IRWXO | S_IRWXG) == (-1)) {
+ if (errno != EEXIST) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "could not create stats-sample directory %s",
+ _IOS_SAMP_DIR);
+ goto out;
+ }
+ }
if (instance_name) {
- bytes_written = snprintf (filename, PATH_MAX,
+ stats_bytes_written = snprintf (stats_filename, PATH_MAX,
"%s/%s_%s_%s.dump", _IOS_DUMP_DIR,
__progname, xlator_name, instance_name);
+ samples_bytes_written = snprintf (samples_filename, PATH_MAX,
+ "%s/%s_%s_%s.samp", _IOS_SAMP_DIR,
+ __progname, xlator_name, instance_name);
} else {
- bytes_written = snprintf (filename, PATH_MAX, "%s/%s_%s.dump",
- _IOS_DUMP_DIR, __progname, xlator_name);
- }
- if (bytes_written >= PATH_MAX) {
+ stats_bytes_written = snprintf (stats_filename, PATH_MAX,
+ "%s/%s_%s.dump", _IOS_DUMP_DIR, __progname,
+ xlator_name);
+ samples_bytes_written = snprintf (samples_filename, PATH_MAX,
+ "%s/%s_%s.samp", _IOS_SAMP_DIR, __progname,
+ xlator_name);
+ }
+ if ((stats_bytes_written >= PATH_MAX) ||
+ (samples_bytes_written >= PATH_MAX)) {
gf_log (this->name, GF_LOG_ERROR,
- "Invalid path for IO Stats dump: %s", filename);
+ "Invalid path for stats dump (%s) and/or latency "
+ "samples (%s)", stats_filename, samples_filename);
goto out;
}
while (1) {
@@ -2572,22 +2956,32 @@ _ios_dump_thread (xlator_t *this) {
* just hold it open and rewind/truncate on each iteration.
* Leaving it alone for now.
*/
- logfp = fopen (filename, "w+");
- if (!logfp) {
- if (log_fopen_failure) {
- gf_log (this->name, GF_LOG_ERROR,
- "could not open stats-dump file %s",
- filename);
- log_fopen_failure = _gf_false;
- }
- continue;
+ stats_logfp = fopen (stats_filename, "w+");
+ if (stats_logfp) {
+ (void) ios_dump_args_init (&args,
+ IOS_DUMP_TYPE_JSON_FILE,
+ stats_logfp);
+ io_stats_dump (this, &args, GF_CLI_INFO_ALL, _gf_false);
+ fclose (stats_logfp);
+ log_stats_fopen_failure = _gf_true;
+ } else if (log_stats_fopen_failure) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "could not open stats-dump file %s (%s)",
+ stats_filename, strerror(errno));
+ log_stats_fopen_failure = _gf_false;
+ }
+ samples_logfp = fopen (samples_filename, "w+");
+ if (samples_logfp) {
+ io_stats_dump_latency_samples_logfp (this,
+ samples_logfp);
+ fclose (samples_logfp);
+ log_samples_fopen_failure = _gf_true;
+ } else if (log_samples_fopen_failure) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "could not open samples-dump file %s (%s)",
+ samples_filename, strerror(errno));
+ log_samples_fopen_failure = _gf_false;
}
- (void) ios_dump_args_init (
- &args, IOS_DUMP_TYPE_JSON_FILE,
- logfp);
- io_stats_dump (this, &args, GF_CLI_INFO_ALL, _gf_false);
- fclose (logfp);
- log_fopen_failure = _gf_true;
}
out:
gf_log (this->name, GF_LOG_INFO, "IO stats dump thread terminated");
@@ -3116,6 +3510,10 @@ reconfigure (xlator_t *this, dict_t *options)
(void *) &_ios_dump_thread, this);
}
+ GF_OPTION_RECONF ("ios-sample-interval", conf->ios_sample_interval,
+ options, int32, out);
+ GF_OPTION_RECONF ("ios-sample-buf-size", conf->ios_sample_buf_size,
+ options, int32, out);
GF_OPTION_RECONF ("sys-log-level", sys_log_str, options, str, out);
if (sys_log_str) {
sys_log_level = glusterd_check_log_level (sys_log_str);
@@ -3229,6 +3627,7 @@ init (xlator_t *this)
* in case of error paths.
*/
LOCK_INIT (&conf->lock);
+ LOCK_INIT (&conf->ios_sampling_lock);
gettimeofday (&conf->cumulative.started_at, NULL);
gettimeofday (&conf->incremental.started_at, NULL);
@@ -3247,6 +3646,22 @@ init (xlator_t *this)
GF_OPTION_INIT ("ios-dump-interval", conf->ios_dump_interval,
int32, out);
+ GF_OPTION_INIT ("ios-sample-interval", conf->ios_sample_interval,
+ int32, out);
+
+ GF_OPTION_INIT ("ios-sample-buf-size", conf->ios_sample_buf_size,
+ int32, out);
+
+ if (ios_init_sample_buf (conf) != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Out of memory.");
+ return -1;
+ }
+
+ GF_OPTION_INIT ("ios-dnscache-ttl-sec", conf->ios_dnscache_ttl_sec,
+ int32, out);
+ conf->dnscache = gf_dnscache_init (conf->ios_dnscache_ttl_sec);
+
GF_OPTION_INIT ("sys-log-level", sys_log_str, str, out);
if (sys_log_str) {
sys_log_level = glusterd_check_log_level (sys_log_str);
@@ -3497,6 +3912,29 @@ struct volume_options options[] = {
.description = "Interval (in seconds) at which to auto-dump "
"statistics. Zero disables automatic dumping."
},
+ { .key = { "ios-sample-interval" },
+ .type = GF_OPTION_TYPE_INT,
+ .min = 0,
+ .max = 65535,
+ .default_value = "0",
+ .description = "Interval in which we want to collect FOP latency "
+ "samples. 2 means collect a sample every 2nd FOP."
+ },
+ { .key = { "ios-sample-buf-size" },
+ .type = GF_OPTION_TYPE_INT,
+ .min = 1024,
+ .max = 1024*1024,
+ .default_value = "65535",
+ .description = "The maximum size of our FOP sampling ring buffer."
+ },
+ { .key = { "ios-dnscache-ttl-sec" },
+ .type = GF_OPTION_TYPE_INT,
+ .min = 1,
+ .max = 3600 * 72,
+ .default_value = "86400",
+ .description = "The interval after wish a cached DNS entry will be "
+ "re-validated. Default: 24 hrs"
+ },
{ .key = { "latency-measurement" },
.type = GF_OPTION_TYPE_BOOL,
.default_value = "off",
diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
index 4ea7d3baa8a..ccf4b380ffb 100644
--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c
+++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
@@ -849,6 +849,21 @@ struct volopt_map_entry glusterd_volopt_map[] = {
.option = "ios-dump-interval",
.op_version = 1
},
+ { .key = "diagnostics.fop-sample-interval",
+ .voltype = "debug/io-stats",
+ .option = "ios-sample-interval",
+ .op_version = 1
+ },
+ { .key = "diagnostics.fop-sample-buf-size",
+ .voltype = "debug/io-stats",
+ .option = "ios-sample-buf-size",
+ .op_version = 1
+ },
+ { .key = "diagnostics.stats-dnscache-ttl-sec",
+ .voltype = "debug/io-stats",
+ .option = "ios-dnscache-ttl-sec",
+ .op_version = 1
+ },
/* IO-cache xlator options */
{ .key = "performance.cache-max-file-size",
diff --git a/xlators/nfs/server/src/nfs-fops.c b/xlators/nfs/server/src/nfs-fops.c
index 819ae98b081..52fdd9796c3 100644
--- a/xlators/nfs/server/src/nfs-fops.c
+++ b/xlators/nfs/server/src/nfs-fops.c
@@ -219,6 +219,7 @@ nfs_create_frame (xlator_t *xl, nfs_user_t *nfu)
frame->root->pid = NFS_PID;
frame->root->uid = nfu->uid;
frame->root->gid = nfu->gids[NFS_PRIMGID_IDX];
+ memcpy (&frame->root->identifier, &nfu->identifier, UNIX_PATH_MAX);
frame->root->lk_owner = nfu->lk_owner;
if (nfu->ngrps != 1) {
diff --git a/xlators/nfs/server/src/nfs.c b/xlators/nfs/server/src/nfs.c
index 4dda35c49ef..116854b9109 100644
--- a/xlators/nfs/server/src/nfs.c
+++ b/xlators/nfs/server/src/nfs.c
@@ -639,8 +639,8 @@ nfs_user_root_create (nfs_user_t *newnfu)
int
-nfs_user_create (nfs_user_t *newnfu, uid_t uid, gid_t gid, gid_t *auxgids,
- int auxcount)
+nfs_user_create (nfs_user_t *newnfu, uid_t uid, gid_t gid,
+ rpc_transport_t *trans, gid_t *auxgids, int auxcount)
{
int x = 1;
int y = 0;
@@ -655,6 +655,10 @@ nfs_user_create (nfs_user_t *newnfu, uid_t uid, gid_t gid, gid_t *auxgids,
newnfu->uid = uid;
newnfu->gids[0] = gid;
newnfu->ngrps = 1;
+ if (trans) {
+ memcpy (&newnfu->identifier, trans->peerinfo.identifier,
+ UNIX_PATH_MAX);
+ }
gf_msg_trace (GF_NFS, 0, "uid: %d, gid %d, gids: %d", uid, gid,
auxcount);
@@ -683,7 +687,9 @@ nfs_request_user_init (nfs_user_t *nfu, rpcsvc_request_t *req)
gidarr = rpcsvc_auth_unix_auxgids (req, &gids);
nfs_user_create (nfu, rpcsvc_request_uid (req),
- rpcsvc_request_gid (req), gidarr, gids);
+ rpcsvc_request_gid (req),
+ rpcsvc_request_transport (req),
+ gidarr, gids);
return;
}
@@ -699,7 +705,8 @@ nfs_request_primary_user_init (nfs_user_t *nfu, rpcsvc_request_t *req,
return;
gidarr = rpcsvc_auth_unix_auxgids (req, &gids);
- nfs_user_create (nfu, uid, gid, gidarr, gids);
+ nfs_user_create (nfu, uid, gid, rpcsvc_request_transport (req),
+ gidarr, gids);
return;
}
diff --git a/xlators/nfs/server/src/nfs.h b/xlators/nfs/server/src/nfs.h
index 107140bc720..82df163d494 100644
--- a/xlators/nfs/server/src/nfs.h
+++ b/xlators/nfs/server/src/nfs.h
@@ -125,14 +125,15 @@ typedef struct nfs_user_info {
gid_t gids[NFS_NGROUPS];
int ngrps;
gf_lkowner_t lk_owner;
+ char identifier[UNIX_PATH_MAX]; /* ip of user */
} nfs_user_t;
extern int
nfs_user_root_create (nfs_user_t *newnfu);
extern int
-nfs_user_create (nfs_user_t *newnfu, uid_t uid, gid_t gid, gid_t *auxgids,
- int auxcount);
+nfs_user_create (nfs_user_t *newnfu, uid_t uid, gid_t gid,
+ rpc_transport_t *trans, gid_t *auxgids, int auxcount);
extern void
nfs_request_user_init (nfs_user_t *nfu, rpcsvc_request_t *req);
diff --git a/xlators/protocol/server/src/server-helpers.c b/xlators/protocol/server/src/server-helpers.c
index bcd1fed8b84..b369e835b24 100644
--- a/xlators/protocol/server/src/server-helpers.c
+++ b/xlators/protocol/server/src/server-helpers.c
@@ -423,6 +423,7 @@ get_frame_from_request (rpcsvc_request_t *req)
server_conf_t *priv = NULL;
clienttable_t *clienttable = NULL;
unsigned int i = 0;
+ rpc_transport_t *trans = NULL;
GF_VALIDATE_OR_GOTO ("server", req, out);
@@ -500,6 +501,12 @@ get_frame_from_request (rpcsvc_request_t *req)
server_resolve_groups (frame, req);
else
server_decode_groups (frame, req);
+ trans = req->trans;
+ if (trans) {
+ memcpy (&frame->root->identifier, trans->peerinfo.identifier,
+ sizeof (trans->peerinfo.identifier));
+ }
+
frame->local = req;
out: