diff options
| author | Jeff Darcy <jdarcy@fb.com> | 2017-07-26 11:56:04 -0700 | 
|---|---|---|
| committer | Jeff Darcy <jeff@pl.atyp.us> | 2017-07-31 21:23:43 +0000 | 
| commit | 18cb1a87bb7c88657b499b22894b81f1f694b6ad (patch) | |
| tree | 3a3b2732939533c9aaa2e8d2e266309021a79828 /xlators/debug | |
| parent | 91c9f4a19fde4894576b398252c77f730832a26a (diff) | |
error-gen: fix randomness
This generates errors in a truly random distribution, including streaks
and dry spells, instead of just setting a constant interval between
errors.  It also supports very small error probabilities, down to 1ppm
instead of 1%.  Also cleaned up some bad logic in the "random-failure"
case.
Change-Id: I17b058c4b86b0aa17dd5706250e6d44f242fcd9d
Signed-off-by: Jeff Darcy <jdarcy@fb.com>
Reviewed-on: https://review.gluster.org/17886
Smoke: Gluster Build System <jenkins@build.gluster.org>
Tested-by: Jeff Darcy <jeff@pl.atyp.us>
CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
Reviewed-by: Shyamsundar Ranganathan <srangana@redhat.com>
Reviewed-by: Jeff Darcy <jeff@pl.atyp.us>
Diffstat (limited to 'xlators/debug')
| -rw-r--r-- | xlators/debug/error-gen/src/error-gen.c | 88 | ||||
| -rw-r--r-- | xlators/debug/error-gen/src/error-gen.h | 6 | 
2 files changed, 67 insertions, 27 deletions
diff --git a/xlators/debug/error-gen/src/error-gen.c b/xlators/debug/error-gen/src/error-gen.c index 80f46dec065..0ce88a92918 100644 --- a/xlators/debug/error-gen/src/error-gen.c +++ b/xlators/debug/error-gen/src/error-gen.c @@ -12,6 +12,22 @@  #include "statedump.h"  #include "defaults.h" +/* + * The user can specify an error probability as a float percentage, but we + * store it internally as a numerator with this as the denominator.  When it's + * used, it's like this: + * + *    (rand() % FAILURE_GRANULARITY) < error_rate + * + * To minimize rounding errors from the modulo operation, it's good for this to + * be a power of two. + * + * (BTW this is just the normal case.  If "random-failure" is set, that does + * something completely different and this number is irrelevant.  See error_gen + * for the legacy code.) + */ +#define FAILURE_GRANULARITY     (1 << 20) +  sys_error_t error_no_list[] = {          [GF_FOP_LOOKUP]            = { .error_no_count = 4,                                      .error_no = {ENOENT,ENOTDIR, @@ -246,32 +262,52 @@ error_gen (xlator_t *this, int op_no)  {          eg_t             *egp = NULL;          int              count = 0; -        int              failure_iter_no = GF_FAILURE_DEFAULT;          int              error_no_int = 0;          int              rand_no = 0;          int              ret = 0; +        gf_boolean_t     should_err = _gf_false;          egp = this->private; -        LOCK (&egp->lock); -        { -                count = ++egp->op_count; -                failure_iter_no = egp->failure_iter_no; -                error_no_int = egp->error_no_int; -        } -        UNLOCK (&egp->lock); - -        if((count % failure_iter_no) == 0) { +        if (egp->random_failure) { +                /* +                 * I honestly don't know why anyone would use this "feature" +                 * but I'll try to preserve its functionality anyway.  Without +                 * locking twice to update failure_iter_no and egp->op_count +                 * separately, then not locking at all to update +                 * egp->failure_iter_no.  That's not needed for compatibility, +                 * and it's abhorrently wrong.  I have *some* standards. +                 */                  LOCK (&egp->lock);                  { -                        egp->op_count = 0; +                        count = ++(egp->op_count); +                        error_no_int = egp->error_no_int; +                        if ((count % egp->failure_iter_no) == 0) { +                                egp->op_count = 0; +                                /* coverty[DC.WEAK_CRYPTO] */ +                                egp->failure_iter_no = 3 +                                        + (rand () % GF_UNIVERSAL_ANSWER); +                                should_err = _gf_true; +                        }                  }                  UNLOCK (&egp->lock); +        } else { +                /* +                 * It turns out that rand() is almost universally implemented +                 * as a linear congruential PRNG, which is about as cheap as +                 * it gets.  This gets us real random behavior, including +                 * phenomena like streaks and dry spells, with controllable +                 * long-term probability, cheaply. +                 */ +                if ((rand () % FAILURE_GRANULARITY) < egp->failure_iter_no) { +                        should_err = _gf_true; +                } +        } +        if (should_err) {                  if (error_no_int)                          ret = error_no_int;                  else { -                          rand_no = generate_rand_no (op_no);                          if (op_no >= GF_FOP_MAXVALUE)                                  op_no = 0; @@ -279,10 +315,8 @@ error_gen (xlator_t *this, int op_no)                                  rand_no = 0;                          ret = error_no_list[op_no].error_no[rand_no];                  } -                if (egp->random_failure == _gf_true) -                        /* coverty[DC.WEAK_CRYPTO] */ -                        egp->failure_iter_no = 3 + (rand () % GF_UNIVERSAL_ANSWER);          } +          return ret;  } @@ -1379,14 +1413,14 @@ error_gen_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,  }  static void -error_gen_set_failure (eg_t *pvt, int percent) +error_gen_set_failure (eg_t *pvt, double percent)  { +        double  ppm; +          GF_ASSERT (pvt); -        if (percent) -                pvt->failure_iter_no = 100/percent; -        else -                pvt->failure_iter_no = 100/GF_FAILURE_DEFAULT; +        ppm = (percent / 100.0) * (double)FAILURE_GRANULARITY; +        pvt->failure_iter_no = (int)ppm;  }  static void @@ -1482,7 +1516,7 @@ reconfigure (xlator_t *this, dict_t *options)          eg_t            *pvt = NULL;          int32_t          ret = 0;          char            *error_enable_fops = NULL; -        int32_t          failure_percent_int = 0; +        double           failure_percent_dbl = 0.0;          if (!this || !this->private)                  goto out; @@ -1496,7 +1530,7 @@ reconfigure (xlator_t *this, dict_t *options)          if (pvt->error_no)                  pvt->error_no_int = conv_errno_to_int (&pvt->error_no); -        GF_OPTION_RECONF ("failure", failure_percent_int, options, int32, +        GF_OPTION_RECONF ("failure", failure_percent_dbl, options, percent,                            out);          GF_OPTION_RECONF ("enable", error_enable_fops, options, str, out); @@ -1505,7 +1539,7 @@ reconfigure (xlator_t *this, dict_t *options)                            bool, out);          error_gen_parse_fill_fops (pvt, error_enable_fops); -        error_gen_set_failure (pvt, failure_percent_int); +        error_gen_set_failure (pvt, failure_percent_dbl);          ret = 0;  out: @@ -1519,7 +1553,7 @@ init (xlator_t *this)          eg_t            *pvt = NULL;          int32_t          ret = 0;          char            *error_enable_fops = NULL; -        int32_t          failure_percent_int = 0; +        double          failure_percent_dbl = 0.0;          if (!this->children || this->children->next) {                  gf_log (this->name, GF_LOG_ERROR, @@ -1549,7 +1583,7 @@ init (xlator_t *this)          if (pvt->error_no)                  pvt->error_no_int = conv_errno_to_int (&pvt->error_no); -        GF_OPTION_INIT ("failure", failure_percent_int, int32, out); +        GF_OPTION_INIT ("failure", failure_percent_dbl, percent, out);          GF_OPTION_INIT ("enable", error_enable_fops, str, out); @@ -1557,7 +1591,7 @@ init (xlator_t *this)          error_gen_parse_fill_fops (pvt, error_enable_fops); -        error_gen_set_failure (pvt, failure_percent_int); +        error_gen_set_failure (pvt, failure_percent_dbl);          this->private = pvt; @@ -1641,7 +1675,7 @@ struct xlator_fops fops = {  struct volume_options options[] = {          { .key  = {"failure"}, -          .type = GF_OPTION_TYPE_INT, +          .type = GF_OPTION_TYPE_PERCENT,            .description = "Percentage failure of operations when enabled.",          }, diff --git a/xlators/debug/error-gen/src/error-gen.h b/xlators/debug/error-gen/src/error-gen.h index 6ef6b232e6a..2cd95de335a 100644 --- a/xlators/debug/error-gen/src/error-gen.h +++ b/xlators/debug/error-gen/src/error-gen.h @@ -29,6 +29,12 @@ enum GF_PSEUDO_ERRORS {  typedef struct {          int enable[GF_FOP_MAXVALUE];          int op_count; +        /* +         * This is only an iteration number in the random-failure case.  For +         * the normal controlled-probability case, it's actually a numerator +         * for the failure probability (see FAILURE_GRANULARITY declaration). +         * It's just not worth blowing up the diff by changing it. +         */          int failure_iter_no;          char *error_no;          int error_no_int;  | 
