io-stats: Expose io-thread queue depths

Summary: - This diff exposes the io-thread queue depths by sending a specialized getxattr() call down to the io-threads translator. - Port of D3086477, D3094145, D3095505 to 3.8 Test Plan: Tested on devserver, will run prove tests. Valgrind + ASAN pass as well. Reviewers: rwareing, kvigor Subscribers: dld, moox, dph Differential Revision: https://phabricator.fb.com/D3086477 Change-Id: Ia452a4fcdb9173a751c4cb48d739b25c235f6855 Reviewed-on: https://review.gluster.org/18143 Smoke: Gluster Build System <jenkins@build.gluster.org> CentOS-regression: Gluster Build System <jenkins@build.gluster.org> Reviewed-by: Shreyas Siravara <sshreyas@fb.com>
author: Shreyas Siravara <sshreyas@fb.com> 2016-03-22 21:04:35 -0700
committer: Shreyas Siravara <sshreyas@fb.com> 2017-08-30 03:10:20 +0000
commit: 69509ee7d270302c232556b5c941fb6a22b4dced (patch)
tree: fc79f03525c7c1037079deb173acb2c8bc789702
parent: 9f9da37e3afa1f9394fb5edf49334ef9d6a6dd00 (diff)
9 files changed, 126 insertions, 10 deletions
diff --git a/libglusterfs/src/dict.h b/libglusterfs/src/dict.h
index 1f6c1a0eae9..5259c6befa1 100644
--- a/libglusterfs/src/dict.h
+++ b/libglusterfs/src/dict.h
@@ -159,6 +159,8 @@ data_t * data_copy (data_t *old);
 dict_t *get_new_dict_full (uint32_t size_hint);
 dict_t *get_new_dict ();
 
+#define dict_for_each(d, c) for (c = d->members_list; c; c = c->next)
+
 int dict_foreach (dict_t *this,
                   int (*fn)(dict_t *this,
                             char *key,
diff --git a/libglusterfs/src/glusterfs.h b/libglusterfs/src/glusterfs.h
index 399d695665b..59f3df19420 100644
--- a/libglusterfs/src/glusterfs.h
+++ b/libglusterfs/src/glusterfs.h
@@ -78,6 +78,7 @@
 #define ZR_STRICT_VOLFILE_CHECK "strict-volfile-check"
 #define ZR_DUMP_FUSE            "dump-fuse"
 #define ZR_FUSE_MOUNTOPTS       "fuse-mountopts"
+#define IO_THREADS_QUEUE_SIZE_KEY "io-thread-queue-size"
 
 #define GF_XATTR_CLRLK_CMD      "glusterfs.clrlk"
 #define GF_XATTR_PATHINFO_KEY   "trusted.glusterfs.pathinfo"
@@ -283,6 +284,51 @@
 #define GF_LK_ADVISORY 0
 #define GF_LK_MANDATORY 1
 
+#define GF_CHECK_XATTR_KEY_AND_GOTO(key, cmpkey, errval, lbl)   \
+        do {                                                    \
+                if (key && strcmp (key, cmpkey) == 0) {         \
+                        errval = -EINVAL;                       \
+                        goto lbl;                               \
+                }                                               \
+        } while (0);                                            \
+
+
+typedef enum {
+        GF_FOP_PRI_UNSPEC = -1,         /* Priority not specified */
+        GF_FOP_PRI_HI = 0,              /* low latency */
+        GF_FOP_PRI_NORMAL,              /* normal */
+        GF_FOP_PRI_LO,                  /* bulk */
+        GF_FOP_PRI_LEAST,               /* least */
+        GF_FOP_PRI_MAX,
+} gf_fop_pri_t;
+
+/* For backwards compatibility in io-threads */
+typedef gf_fop_pri_t iot_pri_t;
+#define IOT_PRI_UNSPEC  GF_FOP_PRI_UNSPEC
+#define IOT_PRI_HI      GF_FOP_PRI_HI
+#define IOT_PRI_NORMAL  GF_FOP_PRI_NORMAL
+#define IOT_PRI_LO      GF_FOP_PRI_LO
+#define IOT_PRI_LEAST   GF_FOP_PRI_LEAST
+#define IOT_PRI_MAX     GF_FOP_PRI_MAX
+
+static const char* FOP_PRI_STRINGS[] = {
+        "HIGH",
+        "NORMAL",
+        "LOW",
+        "LEAST"
+};
+
+static inline const char *fop_pri_to_string (gf_fop_pri_t pri)
+{
+        if (pri < 0)
+                return "UNSPEC";
+
+        if (pri >= GF_FOP_PRI_MAX)
+                return "INVALID";
+
+        return FOP_PRI_STRINGS[pri];
+}
+
 const char *fop_enum_to_pri_string (glusterfs_fop_t fop);
 const char *fop_enum_to_string (glusterfs_fop_t fop);
 
diff --git a/tests/basic/stats-dump.t b/tests/basic/stats-dump.t
index 7da6e0605a4..2840498218b 100644
--- a/tests/basic/stats-dump.t
+++ b/tests/basic/stats-dump.t
@@ -12,6 +12,7 @@ TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
 TEST $CLI volume set $V0 diagnostics.latency-measurement on
 TEST $CLI volume set $V0 diagnostics.count-fop-hits on
 TEST $CLI volume set $V0 diagnostics.stats-dump-interval 1
+TEST $CLI volume set $V0 performance.nfs.io-threads on 
 TEST $CLI volume set $V0 nfs.disable off
 TEST $CLI volume start $V0
 EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available
@@ -36,6 +37,10 @@ NFSD_RET="$?"
 FUSE_OUTPUT="$(grep 'aggr.fop.write.count": "0"'  ${GLUSTERD_WORKDIR}/stats/glusterfs_patchy.dump)"
 FUSE_RET="$?"
 
+# Test that io-stats is getting queue sizes from io-threads
+TEST grep 'queue_size' ${GLUSTERD_WORKDIR}/stats/glusterfs_nfsd.dump
+TEST grep 'queue_size' ${GLUSTERD_WORKDIR}/stats/glusterfsd__d_backends_patchy?.dump
+
 TEST [ 0 -ne "$BRICK_RET" ]
 TEST [ 0 -ne "$NFSD_RET" ]
 TEST [ 0 -ne "$FUSE_RET" ]
diff --git a/xlators/cluster/afr/src/afr-inode-read.c b/xlators/cluster/afr/src/afr-inode-read.c
index 2b369ca3c68..a917bc08ae0 100644
--- a/xlators/cluster/afr/src/afr-inode-read.c
+++ b/xlators/cluster/afr/src/afr-inode-read.c
@@ -1538,6 +1538,15 @@ afr_getxattr (call_frame_t *frame, xlator_t *this,
                 return 0;
 
         /*
+         * Heal daemons don't have IO threads ... and as a result they
+         * send this getxattr down and eventually crash :(
+         */
+        if (strcmp (name, IO_THREADS_QUEUE_SIZE_KEY) == 0) {
+                ret = -EINVAL;
+                goto out;
+        }
+
+        /*
          * Special xattrs which need responses from all subvols
          */
         if (afr_is_special_xattr (name, &cbk, 0)) {
diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
index 45632b04845..c120dffbf23 100644
--- a/xlators/cluster/dht/src/dht-common.c
+++ b/xlators/cluster/dht/src/dht-common.c
@@ -3464,6 +3464,8 @@ dht_getxattr (call_frame_t *frame, xlator_t *this,
         int           cnt           = 0;
         char         *node_uuid_key = NULL;
         int           ret           = -1;
+
+        GF_CHECK_XATTR_KEY_AND_GOTO (key, IO_THREADS_QUEUE_SIZE_KEY, op_errno, err);
         VALIDATE_OR_GOTO (frame, err);
         VALIDATE_OR_GOTO (this, err);
         VALIDATE_OR_GOTO (loc, err);
diff --git a/xlators/debug/io-stats/src/io-stats.c b/xlators/debug/io-stats/src/io-stats.c
index b02819f1013..0b5c095c3b4 100644
--- a/xlators/debug/io-stats/src/io-stats.c
+++ b/xlators/debug/io-stats/src/io-stats.c
@@ -35,6 +35,7 @@
 #include "logging.h"
 #include "cli1-xdr.h"
 #include "statedump.h"
+#include "syncop.h"
 #include <pwd.h>
 #include <grp.h>
 
@@ -858,6 +859,7 @@ io_stats_dump_global_to_json_logfp (xlator_t *this,
         float                 fop_lat_min;
         float                 fop_lat_max;
         double                interval_sec;
+        loc_t                 unused_loc = {0, };
 
         interval_sec = ((now->tv_sec * 1000000.0 + now->tv_usec) -
                 (stats->started_at.tv_sec * 1000000.0 +
@@ -962,6 +964,29 @@ io_stats_dump_global_to_json_logfp (xlator_t *this,
                         "\"%s.%s.fop.%s.latency_max_usec\": \"%0.2lf\",",
                         key_prefix, str_prefix, lc_fop_name, fop_lat_max);
         }
+
+        dict_t *xattr = NULL;
+        ret = syncop_getxattr (this, &unused_loc, &xattr,
+                               IO_THREADS_QUEUE_SIZE_KEY, NULL, NULL);
+        if (xattr) {
+                // Iterate over the dictionary returned to us by io-threads and
+                // dump the results to the stats file.
+                data_pair_t *curr = NULL;
+                dict_for_each (xattr, curr) {
+                        ios_log (this, logfp,
+                                  "\"%s.%s.%s.queue_size\": \"%d\",",
+                                  key_prefix, str_prefix, curr->key,
+                                  data_to_int32 (curr->value));
+                }
+
+                // Free the dictionary
+                dict_unref (xattr);
+        } else {
+                gf_log (this->name, GF_LOG_WARNING,
+                        "Unable to get queue size counts from "
+                        "the io-threads translator!");
+        }
+
         if (interval == -1) {
                 ios_log (this, logfp, "\"%s.%s.uptime\": \"%"PRId64"\",",
                          key_prefix, str_prefix,
diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c
index fdc9f46284a..1f087b43ab4 100644
--- a/xlators/mgmt/glusterd/src/glusterd-volgen.c
+++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c
@@ -4554,8 +4554,12 @@ volgen_get_shd_key (int type)
 static gf_boolean_t
 volgen_is_shd_compatible_xl (char *xl_type)
 {
-        char            *shd_xls[] = {"cluster/replicate", "cluster/disperse",
-                                      NULL};
+        char            *shd_xls[] = {
+                "cluster/replicate",
+                "cluster/disperse",
+                "debug/io-stats",
+                 NULL
+        };
         if (gf_get_index_by_elem (shd_xls, xl_type) != -1)
                 return _gf_true;
 
diff --git a/xlators/performance/io-threads/src/io-threads.c b/xlators/performance/io-threads/src/io-threads.c
index 06ad0c2aae5..7f9dc5f82a8 100644
--- a/xlators/performance/io-threads/src/io-threads.c
+++ b/xlators/performance/io-threads/src/io-threads.c
@@ -278,6 +278,9 @@ iot_get_pri_meaning (iot_pri_t pri)
         case IOT_PRI_MAX:
                 name = "invalid";
                 break;
+        case IOT_PRI_UNSPEC:
+                name = "unspecified";
+                break;
         }
         return name;
 }
@@ -610,6 +613,34 @@ int
 iot_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
               const char *name, dict_t *xdata)
 {
+        iot_conf_t *conf = NULL;
+        dict_t     *depths = NULL;
+        int i = 0;
+
+        conf = this->private;
+
+        if (conf && name && strcmp (name, IO_THREADS_QUEUE_SIZE_KEY) == 0) {
+                // We explicitly do not want a reference count
+                // for this dict in this translator
+                depths = get_new_dict ();
+                if (!depths)
+                        goto unwind_special_getxattr;
+
+                for (i = 0; i < IOT_PRI_MAX; i++) {
+                        if (dict_set_int32 (depths,
+                                            (char *)fop_pri_to_string (i),
+                                            conf->queue_sizes[i]) != 0) {
+                                dict_destroy (depths);
+                                depths = NULL;
+                                goto unwind_special_getxattr;
+                        }
+                }
+
+unwind_special_getxattr:
+                STACK_UNWIND_STRICT (getxattr, frame, 0, 0, depths, xdata);
+                return 0;
+        }
+
         IOT_FOP (getxattr, frame, this, loc, name, xdata);
         return 0;
 }
diff --git a/xlators/performance/io-threads/src/io-threads.h b/xlators/performance/io-threads/src/io-threads.h
index 673e1967617..011d4a00f7f 100644
--- a/xlators/performance/io-threads/src/io-threads.h
+++ b/xlators/performance/io-threads/src/io-threads.h
@@ -42,14 +42,6 @@ struct iot_conf;
 #define IOT_THREAD_STACK_SIZE   ((size_t)(1024*1024))
 
 
-typedef enum {
-        IOT_PRI_HI = 0, /* low latency */
-        IOT_PRI_NORMAL, /* normal */
-        IOT_PRI_LO,     /* bulk */
-        IOT_PRI_LEAST,  /* least */
-        IOT_PRI_MAX,
-} iot_pri_t;
-
 #define IOT_LEAST_THROTTLE_DELAY 1	/* sample interval in seconds */
 struct iot_least_throttle {
 	struct timeval	sample_time;	/* timestamp of current sample */
author	Shreyas Siravara <sshreyas@fb.com>	2016-03-22 21:04:35 -0700
committer	Shreyas Siravara <sshreyas@fb.com>	2017-08-30 03:10:20 +0000
commit	69509ee7d270302c232556b5c941fb6a22b4dced (patch)
tree	fc79f03525c7c1037079deb173acb2c8bc789702
parent	9f9da37e3afa1f9394fb5edf49334ef9d6a6dd00 (diff)