diff options
author | Shreyas Siravara <sshreyas@fb.com> | 2017-09-03 09:18:10 -0700 |
---|---|---|
committer | Shreyas Siravara <sshreyas@fb.com> | 2017-09-03 17:28:55 +0000 |
commit | 87f6e9f034fb6560161f419b5b3e22631f802ace (patch) | |
tree | abdf9a61f21957833b411b4bc6607c88fc2ffddc | |
parent | 51f797ce900527d8441269f3ebdc5456cfc299e0 (diff) |
auditing: Sample creation & removal of filesystem entries as well as errors
Summary:
- Adds the ability for gluster to log every single CREATE and UNLINK that happens on the bricks (right before invoking sys_unlink() or open(...| O_CREAT)
- Makes it so that CREATEs and UNLINKs are not downsampled in io-stats
- This is a port of D3268156, D3778968, D3903894 & D3301527 to 3.8
Reviewed By: kvigor
Change-Id: I1bce28068c02b7d202f094094237646b4d39794b
Reviewed-on: https://review.gluster.org/18198
Reviewed-by: Shreyas Siravara <sshreyas@fb.com>
CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
Smoke: Gluster Build System <jenkins@build.gluster.org>
-rw-r--r-- | tests/basic/fop-sampling.t | 87 | ||||
-rw-r--r-- | xlators/debug/io-stats/src/io-stats.c | 237 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-volume-set.c | 6 |
3 files changed, 312 insertions, 18 deletions
diff --git a/tests/basic/fop-sampling.t b/tests/basic/fop-sampling.t index d22826c4224..e429fd8cb07 100644 --- a/tests/basic/fop-sampling.t +++ b/tests/basic/fop-sampling.t @@ -7,6 +7,7 @@ BRICK_SAMPLES="$(gluster --print-logdir)/samples/glusterfsd__d_backends_${V0}0.samp" NFS_SAMPLES="$(gluster --print-logdir)/samples/glusterfs_nfsd_${V0}.samp" +FUSE_SAMPLES="/var/log/glusterfs/samples/glusterfs_${V0}.samp" function check_path { op=$1 @@ -22,25 +23,26 @@ function check_path { function print_cnt() { local FOP_TYPE=$1 - local FOP_CNT=$(grep ,${FOP_TYPE} ${BRICK_SAMPLES} | wc -l) + local SAMP_FILE=$2 + local FOP_CNT=$(grep ,${FOP_TYPE} ${SAMP_FILE} | wc -l) echo $FOP_CNT } # Verify we got non-zero counts for stats/lookup/readdir check_samples() { - STAT_CNT=$(print_cnt STAT) + STAT_CNT=$(print_cnt STAT $BRICK_SAMPLES) if [ "$STAT_CNT" -le "0" ]; then echo "STAT count is zero" return fi - LOOKUP_CNT=$(print_cnt LOOKUP) + LOOKUP_CNT=$(print_cnt LOOKUP $BRICK_SAMPLES) if [ "$LOOKUP_CNT" -le "0" ]; then echo "LOOKUP count is zero" return fi - READDIR_CNT=$(print_cnt READDIR) + READDIR_CNT=$(print_cnt READDIR $BRICK_SAMPLES) if [ "$READDIR_CNT" -le "0" ]; then echo "READDIR count is zero" return @@ -64,20 +66,22 @@ TEST $CLI volume start $V0 >${NFS_SAMPLES} >${BRICK_SAMPLES} +>${FUSE_SAMPLES} ################# # Basic Samples # ################# + TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0 for i in {1..5} do dd if=/dev/zero of=${M0}/testfile$i bs=4k count=1 + rm ${M0}/testfile$i done TEST ls -l $M0 EXPECT_WITHIN 6 "OK" check_samples - sleep 2 ################################ @@ -86,17 +90,21 @@ sleep 2 TEST mount_nfs $H0:$V0 $N0 -ls $N0 &> /dev/null -touch $N0/file1 -stat $N0/file1 &> /dev/null -echo "some data" > $N0/file1 -dd if=/dev/zero of=$N0/file2 bs=1M count=10 conv=fsync -dd if=/dev/zero of=$N0/file1 bs=1M count=1 -cat $N0/file2 &> /dev/null -mkdir -p $N0/dir1 -rmdir $N0/dir1 -rm $N0/file1 -rm $N0/file2 +>$FUSE_SAMPLES + +for dir in "$N0" "$M0"; do + ls $dir &> /dev/null + touch $dir/file1 + stat $dir/file1 &> /dev/null + echo "some data" > $dir/file1 + dd if=/dev/zero of=$dir/file2 bs=1M count=10 conv=fsync + dd if=/dev/zero of=$dir/file1 bs=1M count=1 + cat $dir/file2 &> /dev/null + mkdir -p $dir/dir1 + rmdir $dir/dir1 + rm $dir/file1 + rm $dir/file2 +done; EXPECT_WITHIN 10 "Y" check_path CREATE /file1 $BRICK_SAMPLES EXPECT_WITHIN 10 "Y" check_path LOOKUP /file1 $BRICK_SAMPLES @@ -126,4 +134,51 @@ EXPECT_WITHIN 10 "Y" check_path RMDIR /dir1 $NFS_SAMPLES EXPECT_WITHIN 10 "Y" check_path UNLINK /file1 $NFS_SAMPLES EXPECT_WITHIN 10 "Y" check_path UNLINK /file2 $NFS_SAMPLES + +EXPECT_WITHIN 10 "Y" check_path CREATE /file1 $FUSE_SAMPLES +EXPECT_WITHIN 10 "Y" check_path LOOKUP /file1 $FUSE_SAMPLES +EXPECT_WITHIN 10 "Y" check_path OPEN /file1 $FUSE_SAMPLES +EXPECT_WITHIN 10 "Y" check_path SETATTR /file1 $FUSE_SAMPLES +EXPECT_WITHIN 10 "Y" check_path WRITE /file1 $FUSE_SAMPLES +EXPECT_WITHIN 10 "Y" check_path FLUSH /file2 $FUSE_SAMPLES +EXPECT_WITHIN 10 "Y" check_path FSYNC /file2 $FUSE_SAMPLES +EXPECT_WITHIN 10 "Y" check_path OPEN /file2 $FUSE_SAMPLES +EXPECT_WITHIN 10 "Y" check_path READ /file2 $FUSE_SAMPLES +EXPECT_WITHIN 10 "Y" check_path TRUNCATE /file1 $FUSE_SAMPLES +EXPECT_WITHIN 10 "Y" check_path MKDIR /dir1 $FUSE_SAMPLES +EXPECT_WITHIN 10 "Y" check_path RMDIR /dir1 $FUSE_SAMPLES +EXPECT_WITHIN 10 "Y" check_path UNLINK /file1 $FUSE_SAMPLES +EXPECT_WITHIN 10 "Y" check_path UNLINK /file2 $FUSE_SAMPLES + +###################### +# Errors in samples # +##################### + +# With a very low sample rate, we should still audit creates & unlinks 1:1 +TEST $CLI volume set $V0 diagnostics.fop-sample-interval 1000 +TEST $CLI volume set $V0 diagnostics.fop-sample-enable-audit on + +>${NFS_SAMPLES} +>${BRICK_SAMPLES} +>${FUSE_SAMPLES} + +mkdir -pv $M0/1/2/3/4 +touch $M0/1/2/3/4/{a,b,c} +dd if=/dev/zero of=$M0/1/2/3/4/d bs=1k count=10 +dd if=/dev/zero of=$M0/1/2/3/4/d bs=1M count=10 +rm -rfv $M0/* +sleep 6 + +TEST grep "MKDIR.*/1/2/3/4" $FUSE_SAMPLES +TEST grep "CREATE.*/1/2/3/4" $FUSE_SAMPLES +TEST grep "RMDIR.*/1/2" $FUSE_SAMPLES +TEST grep "UNLINK.*/1/2/3/4/a" $FUSE_SAMPLES +TEST grep "TRUNCATE.*/1/2/3/4/d" $FUSE_SAMPLES + +TEST [ $(print_cnt MKDIR $FUSE_SAMPLES) -eq "4" ] +TEST [ $(print_cnt CREATE $FUSE_SAMPLES) -eq "4" ] +TEST [ $(print_cnt RMDIR $FUSE_SAMPLES) -eq "4" ] +TEST [ $(print_cnt UNLINK $FUSE_SAMPLES) -eq "4" ] +TEST [ $(print_cnt TRUNCATE $FUSE_SAMPLES) -eq "1" ] + cleanup; diff --git a/xlators/debug/io-stats/src/io-stats.c b/xlators/debug/io-stats/src/io-stats.c index d25d5be29ea..101803f470d 100644 --- a/xlators/debug/io-stats/src/io-stats.c +++ b/xlators/debug/io-stats/src/io-stats.c @@ -42,6 +42,7 @@ #define MAX_LIST_MEMBERS 100 #define DEFAULT_PWD_BUF_SZ 16384 #define DEFAULT_GRP_BUF_SZ 16384 +#define IOS_MAX_ERRORS 132 typedef enum { IOS_STATS_TYPE_NONE, @@ -131,6 +132,154 @@ struct ios_global_stats { struct timeval max_openfd_time; }; +/* This is a list of errors which are in some way critical. + * It is useful to sample these errors even if other errors + * should be ignored. */ +const int32_t ios_hard_error_list[] = { + EIO, + EROFS, + ENOSPC, + ENOTCONN, + ESTALE, +}; + +#define IOS_HARD_ERROR_LIST_SIZE (sizeof(ios_hard_error_list) / sizeof(int32_t)) + +const char *errno_to_name[IOS_MAX_ERRORS] = { + "success", /* 0 */ + "eperm", + "enoent", + "esrch", + "eintr", + "eio", + "enxio", + "e2big", + "enoexec", + "ebadf", + "echild", + "eagain", + "enomem", + "eacces", + "efault", + "enotblk", + "ebusy", + "eexist", + "exdev", + "enodev", + "enotdir", + "eisdir", + "einval", + "enfile", + "emfile", + "enotty", + "etxtbsy", + "efbig", + "enospc", + "espipe", + "erofs", + "emlink", + "epipe", + "edom", + "erange", + "edeadlk", + "enametoolong", + "enolck", + "enosys", + "enotempty", + "eloop", + "ewouldblock", + "enomsg", + "eidrm", + "echrng", + "el2nsync", + "el3hlt", + "el3rst", + "elnrng", + "eunatch", + "enocsi", + "el2hlt", + "ebade", + "ebadr", + "exfull", + "enoano", + "ebadrqc", + "ebadslt", + "edeadlock", + "ebfont", + "enostr", + "enodata", + "etime", + "enosr", + "enonet", + "enopkg", + "eremote", + "enolink", + "eadv", + "esrmnt", + "ecomm", + "eproto", + "emultihop", + "edotdot", + "ebadmsg", + "eoverflow", + "enotuniq", + "ebadfd", + "eremchg", + "elibacc", + "elibbad", + "elibscn", + "elibmax", + "elibexec", + "eilseq", + "erestart", + "estrpipe", + "eusers", + "enotsock", + "edestaddrreq", + "emsgsize", + "eprototype", + "enoprotoopt", + "eprotonosupport", + "esocktnosupport", + "eopnotsupp", + "epfnosupport", + "eafnosupport", + "eaddrinuse", + "eaddrnotavail", + "enetdown", + "enetunreach", + "enetreset", + "econnaborted", + "econnreset", + "enobufs", + "eisconn", + "enotconn", + "eshutdown", + "etoomanyrefs", + "etimedout", + "econnrefused", + "ehostdown", + "ehostunreach", + "ealready", + "einprogress", + "estale", + "euclean", + "enotnam", + "enavail", + "eisnam", + "eremoteio", + "edquot", + "enomedium", + "emediumtype", + "ecanceled", + "enokey", + "ekeyexpired", + "ekeyrevoked", + "ekeyrejected", + "eownerdead", + "enotrecoverable" +}; + struct ios_conf { gf_lock_t lock; struct ios_global_stats cumulative; @@ -153,6 +302,9 @@ struct ios_conf { gf_boolean_t iamshd; gf_boolean_t iamnfsd; gf_boolean_t iamgfproxyd; + gf_boolean_t audit_creates_and_unlinks; + gf_boolean_t sample_hard_errors; + gf_boolean_t sample_all_errors; }; @@ -1887,6 +2039,52 @@ out: return; } +gf_boolean_t +_should_sample (struct ios_conf *conf, glusterfs_fop_t fop_type, + ios_sample_buf_t* ios_sample_buf, int32_t op_ret, + int32_t op_errno) +{ + int i; + + /* If sampling is disabled, return false */ + if (conf->ios_sample_interval == 0) + return _gf_false; + + /* Sometimes it's useful to sample errors. If `fop-sample-all-errors` + * is active, then we should sample ALL errors. */ + if (op_ret < 0 && op_errno != 0 && conf->sample_all_errors) { + return _gf_true; + } + + /* If `fop-sample-hard-errors` is active, we only look through a small + * subset of errno values to sample, those which are critical to Gluster + * functioning. */ + if (op_ret < 0 && op_errno != 0 && conf->sample_hard_errors) { + for (i = 0; i < IOS_HARD_ERROR_LIST_SIZE; i++) { + if (abs (op_errno) == ios_hard_error_list[i]) { + return _gf_true; + } + } + } + + /* If auditing is on, sample TRUNCATE, CREATE, UNLINK, RMDIR, MKDIR 1:1 */ + if (conf->audit_creates_and_unlinks) { + switch (fop_type) { + case GF_FOP_TRUNCATE: + case GF_FOP_CREATE: + case GF_FOP_UNLINK: + case GF_FOP_MKDIR: + case GF_FOP_RMDIR: + return _gf_true; + default: + break; + } + } + + /* Sample only 1 out of ios_sample_interval number of fops. */ + return (ios_sample_buf->observed % conf->ios_sample_interval == 0); +} + void collect_ios_latency_sample (struct ios_conf *conf, glusterfs_fop_t fop_type, double elapsed, call_frame_t *frame, int32_t op_ret, int32_t op_errno) @@ -1901,9 +2099,9 @@ void collect_ios_latency_sample (struct ios_conf *conf, ios_sample_buf = conf->ios_sample_buf; LOCK (&conf->ios_sampling_lock); - if (conf->ios_sample_interval == 0 || - ios_sample_buf->observed % conf->ios_sample_interval != 0) + if (!_should_sample (conf, fop_type, ios_sample_buf, op_ret, op_errno)) { goto out; + } timestamp = &frame->begin; root = frame->root; @@ -4022,6 +4220,15 @@ reconfigure (xlator_t *this, dict_t *options) time, out); gf_log_set_log_flush_timeout (log_flush_timeout); + GF_OPTION_RECONF ("fop-sample-enable-audit", + conf->audit_creates_and_unlinks, options, bool, out); + + GF_OPTION_RECONF ("fop-sample-all-errors", + conf->sample_all_errors, options, bool, out); + + GF_OPTION_RECONF ("fop-sample-hard-errors", + conf->sample_hard_errors, options, bool, out); + ret = 0; out: gf_log (this ? this->name : "io-stats", @@ -4119,6 +4326,15 @@ init (xlator_t *this) GF_OPTION_INIT ("iam-gfproxy-daemon", conf->iamgfproxyd, bool, out); + GF_OPTION_INIT ("fop-sample-hard-errors", conf->sample_hard_errors, + bool, out); + + GF_OPTION_INIT ("fop-sample-all-errors", conf->sample_all_errors, + bool, out); + + GF_OPTION_INIT ("fop-sample-enable-audit", + conf->audit_creates_and_unlinks, bool, out); + GF_OPTION_INIT ("dump-fd-stats", conf->dump_fd_stats, bool, out); GF_OPTION_INIT ("count-fop-hits", conf->count_fop_hits, bool, out); @@ -4559,6 +4775,23 @@ struct volume_options options[] = { "translator is running as part of an GFProxy daemon " "or not." }, + { .key = {"fop-sample-enable-audit"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "off", + .description = "This option samples the following FOPs 1:1: " + "CREATE, UNLINK, MKDIR, RMDIR, TRUNCATE. " + }, + { .key = {"fop-sample-all-errors"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "off", + .description = "This option samples all fops that failed." + }, + { .key = {"fop-sample-hard-errors"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "on", + .description = "This option samples all fops with \"hard errors\"" + "including EROFS, ENOSPC, etc." + }, { .key = {NULL} }, }; diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c index f6555955baf..9164c5680f3 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c +++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c @@ -1442,6 +1442,12 @@ struct volopt_map_entry glusterd_volopt_map[] = { .option = "ios-dnscache-ttl-sec", .op_version = 1 }, + { .key = "diagnostics.fop-sample-enable-audit", + .voltype = "debug/io-stats", + .option = "fop-sample-enable-audit", + .value = "off", + .op_version = 2 + }, /* IO-cache xlator options */ { .key = "performance.cache-max-file-size", |