diff options
| author | Ashish Pandey <aspandey@redhat.com> | 2017-07-31 12:45:21 +0530 | 
|---|---|---|
| committer | Pranith Kumar Karampuri <pkarampu@redhat.com> | 2017-10-16 02:40:01 +0000 | 
| commit | d88be3bc29dbd1eaa393802f3c98e188fe5287c8 (patch) | |
| tree | 6f0b6648e1882761d755fc28ae7fd0c803305c7d | |
| parent | 067f38063e13fc75d4e3f7adf93441d15099c557 (diff) | |
cluster/ec: Improve heal info command to handle obvious cases
Problem:
1 - If a brick is down and we see an index entry in
.glusterfs/indices, we should show it in heal info
output as it most certainly needs heal.
2 - The first problem is also not getting handled after
ec_heal_inspect. Even if in ec_heal_inspect, lookup will
mark need_heal as true, we don't handle it properly in
ec_get_heal_info and continue with locked inspect which
takes lot of time.
Solution:
1 - In first case we need not to do any further invstigation.
As soon as we see that a brick is down, we should say that
this index entry needs heal for sure.
2 - In second case, if we have need_heal as _gf_true after
ec_heal_inspect, we should show it as heal requires.
Change-Id: Ibe7f9d7602cc0b382ba53bddaf75a2a2c3326aa6
BUG: 1476668
Signed-off-by: Ashish Pandey <aspandey@redhat.com>
| -rw-r--r-- | xlators/cluster/ec/src/ec-common.h | 2 | ||||
| -rw-r--r-- | xlators/cluster/ec/src/ec-heal.c | 54 | ||||
| -rw-r--r-- | xlators/cluster/ec/src/ec-types.h | 9 | 
3 files changed, 41 insertions, 24 deletions
diff --git a/xlators/cluster/ec/src/ec-common.h b/xlators/cluster/ec/src/ec-common.h index 6fb981709c0..f5d62269acf 100644 --- a/xlators/cluster/ec/src/ec-common.h +++ b/xlators/cluster/ec/src/ec-common.h @@ -124,7 +124,7 @@ int32_t  ec_heal_inspect (call_frame_t *frame, ec_t *ec,                   inode_t *inode, unsigned char *locked_on,                   gf_boolean_t self_locked, gf_boolean_t thorough, -                 gf_boolean_t *need_heal); +                 ec_heal_need_t *need_heal);  int32_t  ec_get_heal_info (xlator_t *this, loc_t *loc, dict_t **dict); diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c index bc25015498a..fd8c9024a48 100644 --- a/xlators/cluster/ec/src/ec-heal.c +++ b/xlators/cluster/ec/src/ec-heal.c @@ -2421,9 +2421,9 @@ ec_heal_do (xlator_t *this, void *data, loc_t *loc, int32_t partial)          intptr_t      bad            = 0;          ec_fop_data_t *fop           = data;          gf_boolean_t  blocking       = _gf_false; -        gf_boolean_t  need_heal      = _gf_false; +        ec_heal_need_t  need_heal    = EC_HEAL_NONEED;          unsigned char *up_subvols    = NULL; -	char up_bricks[32]; +        char up_bricks[32];          ec = this->private; @@ -2470,7 +2470,8 @@ ec_heal_do (xlator_t *this, void *data, loc_t *loc, int32_t partial)           * triggers heals periodically which need not be thorough*/          ec_heal_inspect (frame, ec, loc->inode, up_subvols, _gf_false,                           !ec->shd.iamshd, &need_heal); -        if (!need_heal) { + +        if (need_heal == EC_HEAL_NONEED)  {                  gf_msg (ec->xl->name, GF_LOG_DEBUG, 0,                          EC_MSG_HEAL_FAIL, "Heal is not required for : %s ",                          uuid_utoa(loc->gfid)); @@ -2776,18 +2777,18 @@ out:  static int32_t  _need_heal_calculate (ec_t *ec, uint64_t *dirty, unsigned char *sources,                        gf_boolean_t self_locked, int32_t lock_count, -                      gf_boolean_t *need_heal) +                      ec_heal_need_t *need_heal)  {          int i = 0;          int source_count = 0;          source_count = EC_COUNT (sources, ec->nodes);          if (source_count == ec->nodes) { -                *need_heal = _gf_false; +                *need_heal = EC_HEAL_NONEED;                  if (self_locked || lock_count == 0) {                          for (i = 0; i < ec->nodes; i++) {                                  if (dirty[i]) { -                                        *need_heal = _gf_true; +                                        *need_heal = EC_HEAL_MUST;                                          goto out;                                  }                          } @@ -2799,13 +2800,13 @@ _need_heal_calculate (ec_t *ec, uint64_t *dirty, unsigned char *sources,                                   * set and this indicates a problem in the                                   * inode.*/                                  if (dirty[i] > 1) { -                                        *need_heal = _gf_true; +                                        *need_heal = EC_HEAL_MUST;                                          goto out;                                  }                          }                  }          } else { -                *need_heal = _gf_true; +                *need_heal = EC_HEAL_MUST;          }  out: @@ -2815,7 +2816,7 @@ out:  static int32_t  ec_need_metadata_heal (ec_t *ec, inode_t *inode, default_args_cbk_t *replies,                         int32_t lock_count, gf_boolean_t self_locked, -                       gf_boolean_t thorough, gf_boolean_t *need_heal) +                       gf_boolean_t thorough, ec_heal_need_t *need_heal)  {          uint64_t           *dirty         = NULL;          unsigned char      *sources       = NULL; @@ -2836,10 +2837,10 @@ ec_need_metadata_heal (ec_t *ec, inode_t *inode, default_args_cbk_t *replies,          ret = _need_heal_calculate (ec, dirty, sources, self_locked, lock_count,                                      need_heal); -        if (ret == ec->nodes && !(*need_heal)) { +        if (ret == ec->nodes && *need_heal == EC_HEAL_NONEED) {                  for (i = 1; i < ec->nodes; i++) {                          if (meta_versions[i] != meta_versions[0]) { -                                *need_heal = _gf_true; +                                *need_heal = EC_HEAL_MUST;                                  goto out;                          }                  } @@ -2851,7 +2852,7 @@ out:  static int32_t  ec_need_data_heal (ec_t *ec, inode_t *inode, default_args_cbk_t *replies,                     int32_t lock_count, gf_boolean_t self_locked, -                   gf_boolean_t thorough, gf_boolean_t *need_heal) +                   gf_boolean_t thorough, ec_heal_need_t *need_heal)  {          uint64_t           *dirty         = NULL;          unsigned char      *sources       = NULL; @@ -2888,7 +2889,7 @@ out:  static int32_t  ec_need_entry_heal (ec_t *ec, inode_t *inode, default_args_cbk_t *replies,                      int32_t lock_count, gf_boolean_t self_locked, -                    gf_boolean_t thorough, gf_boolean_t *need_heal) +                    gf_boolean_t thorough, ec_heal_need_t *need_heal)  {          uint64_t           *dirty         = NULL;          unsigned char      *sources       = NULL; @@ -2916,7 +2917,7 @@ out:  static int32_t  ec_need_heal (ec_t *ec, inode_t *inode, default_args_cbk_t *replies,                int32_t lock_count, gf_boolean_t self_locked, -              gf_boolean_t thorough, gf_boolean_t *need_heal) +              gf_boolean_t thorough, ec_heal_need_t *need_heal)  {          int                ret            = 0; @@ -2926,7 +2927,7 @@ ec_need_heal (ec_t *ec, inode_t *inode, default_args_cbk_t *replies,          if (ret < 0)                  goto out; -        if (*need_heal) +        if (*need_heal == EC_HEAL_MUST)                  goto out;          if (inode->ia_type == IA_IFREG) { @@ -2945,7 +2946,7 @@ int32_t  ec_heal_inspect (call_frame_t *frame, ec_t *ec,                   inode_t *inode, unsigned char *locked_on,                   gf_boolean_t self_locked, gf_boolean_t thorough, -                 gf_boolean_t *need_heal) +                 ec_heal_need_t *need_heal)  {          loc_t              loc           = {0};          int                i             = 0; @@ -2989,7 +2990,7 @@ ec_heal_inspect (call_frame_t *frame, ec_t *ec,          if (ret != ec->nodes) {                  ret = ec->nodes; -                *need_heal = _gf_true; +                *need_heal = EC_HEAL_MUST;                  goto out;          } @@ -3009,6 +3010,9 @@ need_heal:          ret = ec_need_heal (ec, inode, replies, lock_count,                              self_locked, thorough, need_heal); +        if (!self_locked && *need_heal == EC_HEAL_MUST) { +                *need_heal = EC_HEAL_MAYBE; +        }  out:          cluster_replies_wipe (replies, ec->nodes);          loc_wipe (&loc); @@ -3020,7 +3024,7 @@ out:  int32_t  ec_heal_locked_inspect (call_frame_t *frame, ec_t *ec, inode_t *inode, -                        gf_boolean_t *need_heal) +                        ec_heal_need_t *need_heal)  {          unsigned char      *locked_on  = NULL;          unsigned char      *up_subvols = NULL; @@ -3038,7 +3042,7 @@ ec_heal_locked_inspect (call_frame_t *frame, ec_t *ec, inode_t *inode,                                 replies, locked_on, frame, ec->xl,                                 ec->xl->name, inode, 0, 0);          if (ret != ec->nodes) { -                *need_heal = _gf_true; +                *need_heal = EC_HEAL_MUST;                  goto unlock;          }          ret = ec_heal_inspect (frame, ec, inode, locked_on, _gf_true, _gf_true, @@ -3055,7 +3059,7 @@ int32_t  ec_get_heal_info (xlator_t *this, loc_t *entry_loc, dict_t **dict_rsp)  {          int             ret             = -ENOMEM; -        gf_boolean_t    need_heal       = _gf_false; +        ec_heal_need_t  need_heal       = EC_HEAL_NONEED;          call_frame_t    *frame          = NULL;          ec_t            *ec             = NULL;          unsigned char   *up_subvols     = NULL; @@ -3068,6 +3072,10 @@ ec_get_heal_info (xlator_t *this, loc_t *entry_loc, dict_t **dict_rsp)          up_subvols = alloca0(ec->nodes);          ec_mask_to_char_array (ec->xl_up, up_subvols, ec->nodes); +        if (EC_COUNT (up_subvols, ec->nodes) != ec->nodes) { +                need_heal = EC_HEAL_MUST; +                goto set_heal; +        }          frame = create_frame (this, this->ctx->pool);          if (!frame) {                  goto out; @@ -3092,16 +3100,16 @@ ec_get_heal_info (xlator_t *this, loc_t *entry_loc, dict_t **dict_rsp)          ret = ec_heal_inspect (frame, ec, loc.inode, up_subvols, _gf_false,                                 _gf_false, &need_heal); -        if (ret == ec->nodes && !need_heal) { +        if (ret == ec->nodes && need_heal == EC_HEAL_NONEED) {                  goto set_heal;          } -        need_heal = _gf_false; +        need_heal = EC_HEAL_NONEED;          ret = ec_heal_locked_inspect (frame, ec, loc.inode,                                        &need_heal);          if (ret < 0)                  goto out;  set_heal: -        if (need_heal) { +        if (need_heal == EC_HEAL_MUST) {                  ret =  ec_set_heal_info (dict_rsp, "heal");          } else {                  ret =  ec_set_heal_info (dict_rsp, "no-heal"); diff --git a/xlators/cluster/ec/src/ec-types.h b/xlators/cluster/ec/src/ec-types.h index d771741f0ac..94ac911238e 100644 --- a/xlators/cluster/ec/src/ec-types.h +++ b/xlators/cluster/ec/src/ec-types.h @@ -17,6 +17,9 @@  #define EC_GF_MAX_REGS 16 +enum _ec_heal_need; +typedef enum _ec_heal_need ec_heal_need_t; +  enum _ec_read_policy;  typedef enum _ec_read_policy ec_read_policy_t; @@ -115,6 +118,12 @@ enum _ec_read_policy {          EC_READ_POLICY_MAX  }; +enum _ec_heal_need { +        EC_HEAL_NONEED, +        EC_HEAL_MAYBE, +        EC_HEAL_MUST +}; +  struct _ec_config {      uint32_t version;      uint8_t  algorithm;  | 
