diff options
Diffstat (limited to 'xlators/cluster')
| -rw-r--r-- | xlators/cluster/ec/src/ec-common.c | 150 | ||||
| -rw-r--r-- | xlators/cluster/ec/src/ec-common.h | 8 | ||||
| -rw-r--r-- | xlators/cluster/ec/src/ec-types.h | 3 | 
3 files changed, 102 insertions, 59 deletions
| diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c index 7ee0666c6c9..9b9cde8cbc5 100644 --- a/xlators/cluster/ec/src/ec-common.c +++ b/xlators/cluster/ec/src/ec-common.c @@ -22,6 +22,9 @@  #include "ec-messages.h"  #define EC_INVALID_INDEX UINT32_MAX +#define EC_XATTROP_ALL_WAITING_FLAGS (EC_FLAG_WAITING_XATTROP |\ +                                   EC_FLAG_WAITING_DATA_DIRTY |\ +                                   EC_FLAG_WAITING_METADATA_DIRTY)  uint32_t  ec_select_first_by_read_policy (ec_t *ec, ec_fop_data_t *fop) @@ -888,11 +891,11 @@ void ec_lock_prepare_fd(ec_fop_data_t *fop, fd_t *fd, uint32_t flags)  }  gf_boolean_t -ec_config_check (ec_fop_data_t *fop, ec_config_t *config) +ec_config_check (xlator_t *xl, ec_config_t *config)  {      ec_t *ec; -    ec = fop->xl->private; +    ec = xl->private;      if ((config->version != EC_CONFIG_VERSION) ||          (config->algorithm != EC_CONFIG_ALGORITHM) ||          (config->gf_word_size != EC_GF_BITS) || @@ -917,11 +920,11 @@ ec_config_check (ec_fop_data_t *fop, ec_config_t *config)              !ec_is_power_of_2(config->gf_word_size) ||              ((config->chunk_size * 8) % (config->gf_word_size * data_bricks)                                                                         != 0)) { -            gf_msg (fop->xl->name, GF_LOG_ERROR, EINVAL, +            gf_msg (xl->name, GF_LOG_ERROR, EINVAL,                      EC_MSG_INVALID_CONFIG,                      "Invalid or corrupted config");          } else { -            gf_msg (fop->xl->name, GF_LOG_ERROR, EINVAL, +            gf_msg (xl->name, GF_LOG_ERROR, EINVAL,                      EC_MSG_INVALID_CONFIG,                      "Unsupported config "                      "(V=%u, A=%u, W=%u, " @@ -968,24 +971,28 @@ ec_prepare_update_cbk (call_frame_t *frame, void *cookie,  {      struct list_head list;      ec_fop_data_t *fop = cookie, *parent, *tmp; -    ec_lock_link_t *link = fop->data; +    ec_lock_link_t *parent_link = fop->data; +    ec_lock_link_t *link = NULL;      ec_lock_t *lock = NULL;      ec_inode_t *ctx;      gf_boolean_t release = _gf_false; +    uint64_t waiting_flags = 0; +    uint64_t dirty[EC_VERSION_SIZE] = {0, 0}; -    lock = link->lock; -    parent = link->fop; +    lock = parent_link->lock; +    parent = parent_link->fop;      ctx = lock->ctx;      INIT_LIST_HEAD(&list); +    waiting_flags = parent_link->waiting_flags & EC_XATTROP_ALL_WAITING_FLAGS;      LOCK(&lock->loc.inode->lock);      list_for_each_entry(link, &lock->owners, owner_list) { -        if ((link->fop->flags & EC_FLAG_WAITING_XATTROP) != 0) { -            link->fop->flags ^= EC_FLAG_WAITING_XATTROP; - -            list_add_tail(&link->fop->cbk_list, &list); +        if ((link->waiting_flags & waiting_flags) != 0) { +            link->waiting_flags ^= (link->waiting_flags & waiting_flags); +            if ((link->waiting_flags & EC_XATTROP_ALL_WAITING_FLAGS) == 0) +                    list_add_tail(&link->fop->cbk_list, &list);          }      } @@ -997,8 +1004,7 @@ ec_prepare_update_cbk (call_frame_t *frame, void *cookie,          goto unlock;      } -    if (parent->flags & EC_FLAG_QUERY_METADATA) { -            parent->flags ^= EC_FLAG_QUERY_METADATA; +    if (waiting_flags & EC_FLAG_WAITING_XATTROP) {              op_errno = -ec_dict_del_array(dict, EC_XATTR_VERSION,                                            ctx->pre_version,                                            EC_VERSION_SIZE); @@ -1042,7 +1048,7 @@ ec_prepare_update_cbk (call_frame_t *frame, void *cookie,                          goto unlock;                      }                  } else { -                    if (!ec_config_check(parent, &ctx->config)) { +                    if (!ec_config_check(parent->xl, &ctx->config)) {                          gf_msg (this->name, GF_LOG_ERROR, EINVAL,                                  EC_MSG_CONFIG_XATTR_INVALID,                                  "Invalid config xattr"); @@ -1057,12 +1063,22 @@ ec_prepare_update_cbk (call_frame_t *frame, void *cookie,              ctx->have_info = _gf_true;      } -    ec_set_dirty_flag (fop->data, ctx, ctx->dirty); +    ec_set_dirty_flag (fop->data, ctx, dirty); +    if (dirty[EC_METADATA_TXN] && +        (waiting_flags & EC_FLAG_WAITING_METADATA_DIRTY)) { +            GF_ASSERT (!ctx->dirty[EC_METADATA_TXN]); +            ctx->dirty[EC_METADATA_TXN] = 1; +    } + +    if (dirty[EC_DATA_TXN] && +        (waiting_flags & EC_FLAG_WAITING_DATA_DIRTY)) { +            GF_ASSERT (!ctx->dirty[EC_DATA_TXN]); +            ctx->dirty[EC_DATA_TXN] = 1; +    }      op_errno = 0;  unlock: -    lock->getting_xattr = _gf_false; -    UNLOCK(&lock->loc.inode->lock); +    lock->waiting_flags ^= waiting_flags;      if (op_errno == 0) {          /* If the fop fails on any of the good bricks, it is important to mark @@ -1072,33 +1088,24 @@ unlock:                  release = _gf_true;          } -        /* lock->release is a critical field that is checked and modified most -         * of the time inside a locked region. This use here is safe because we -         * are in a modifying fop and we currently don't allow two modifying -         * fops to be processed concurrently, so no one else could be checking -         * or modifying it.*/ -        if (link->update[0] && !link->dirty[0]) { +        if (parent_link->update[0] && !parent_link->dirty[0]) {                  lock->release |= release;          } -        if (link->update[1] && !link->dirty[1]) { +        if (parent_link->update[1] && !parent_link->dirty[1]) {                  lock->release |= release;          }          /* We don't allow the main fop to be executed on bricks that have not           * succeeded the initial xattrop. */ -        parent->mask &= fop->good;          ec_lock_update_good (lock, fop);          /*As of now only data healing marks bricks as healing*/          lock->healing |= fop->healing; -        if (ec_is_data_fop (parent->id)) { -            parent->healing |= fop->healing; -        } -    } else { -        ec_fop_set_error(parent, op_errno);      } +    UNLOCK(&lock->loc.inode->lock); +      while (!list_empty(&list)) {          tmp = list_entry(list.next, ec_fop_data_t, cbk_list);          list_del_init(&tmp->cbk_list); @@ -1110,16 +1117,50 @@ unlock:              if (ec_is_data_fop (tmp->id)) {                  tmp->healing |= fop->healing;              } -        } else { -            ec_fop_set_error(tmp, op_errno);          } -        ec_resume(tmp, 0); +        ec_resume(tmp, op_errno);      }      return 0;  } +static uint64_t +ec_set_xattrop_flags_and_params (ec_lock_t *lock, ec_lock_link_t *link, +                                 uint64_t *dirty) +{ +        uint64_t        oldflags = 0; +        uint64_t        newflags = 0; +        ec_inode_t *ctx     = lock->ctx; + +        oldflags = lock->waiting_flags & EC_XATTROP_ALL_WAITING_FLAGS; + +        if (lock->query && !ctx->have_info) { +                lock->waiting_flags |= EC_FLAG_WAITING_XATTROP; +                link->waiting_flags |= EC_FLAG_WAITING_XATTROP; +        } + +        if (dirty[EC_DATA_TXN]) { +                if (oldflags & EC_FLAG_WAITING_DATA_DIRTY) { +                        dirty[EC_DATA_TXN] = 0; +                } else { +                        lock->waiting_flags |= EC_FLAG_WAITING_DATA_DIRTY; +                } +                link->waiting_flags |= EC_FLAG_WAITING_DATA_DIRTY; +        } + +        if (dirty[EC_METADATA_TXN]) { +                if (oldflags & EC_FLAG_WAITING_METADATA_DIRTY) { +                        dirty[EC_METADATA_TXN] = 0; +                } else { +                        lock->waiting_flags |= EC_FLAG_WAITING_METADATA_DIRTY; +                } +                link->waiting_flags |= EC_FLAG_WAITING_METADATA_DIRTY; +        } +        newflags = lock->waiting_flags & EC_XATTROP_ALL_WAITING_FLAGS; +        return oldflags ^ newflags; +} +  void ec_get_size_version(ec_lock_link_t *link)  {      loc_t loc; @@ -1130,7 +1171,6 @@ void ec_get_size_version(ec_lock_link_t *link)      dict_t *xdata = NULL;      ec_t   *ec = NULL;      int32_t error = 0; -    gf_boolean_t getting_xattr;      gf_boolean_t set_dirty = _gf_false;      uint64_t allzero[EC_VERSION_SIZE] = {0, 0};      uint64_t dirty[EC_VERSION_SIZE] = {0, 0}; @@ -1138,6 +1178,7 @@ void ec_get_size_version(ec_lock_link_t *link)      ctx = lock->ctx;      fop = link->fop;      ec  = fop->xl->private; +    uint64_t changed_flags = 0;      if (ec->optimistic_changelog &&          !(ec->node_mask & ~link->lock->good_mask) && !ec_is_data_fop (fop->id)) @@ -1165,19 +1206,20 @@ void ec_get_size_version(ec_lock_link_t *link)      LOCK(&lock->loc.inode->lock); -    getting_xattr = lock->getting_xattr; -    lock->getting_xattr = _gf_true; -    if (getting_xattr) { -        fop->flags |= EC_FLAG_WAITING_XATTROP; - -        ec_sleep(fop); +    changed_flags = ec_set_xattrop_flags_and_params (lock, link, dirty); +    if (link->waiting_flags) { +            /* This fop needs to wait until all its flags are cleared which +             * potentially can be cleared by other xattrops that are already +             * wound*/ +            ec_sleep(fop); +    } else { +            GF_ASSERT (!changed_flags);      }      UNLOCK(&lock->loc.inode->lock); -    if (getting_xattr) { +    if (!changed_flags)          goto out; -    }      dict = dict_new();      if (dict == NULL) { @@ -1185,17 +1227,7 @@ void ec_get_size_version(ec_lock_link_t *link)          goto out;      } -    if (lock->loc.inode->ia_type == IA_IFREG || -        lock->loc.inode->ia_type == IA_INVAL) { -            xdata = dict_new(); -            if (xdata == NULL || dict_set_int32 (xdata, GF_GET_SIZE, 1)) { -                error = -ENOMEM; -                goto out; -            } -    } - -    if (lock->query && !ctx->have_info) { -            fop->flags |= EC_FLAG_QUERY_METADATA; +    if (changed_flags & EC_FLAG_WAITING_XATTROP) {              /* Once we know that an xattrop will be needed,               * we try to get all available information in a               * single call. */ @@ -1214,9 +1246,17 @@ void ec_get_size_version(ec_lock_link_t *link)                  if (error != 0) {                      goto out;                  } + +                xdata = dict_new(); +                if (xdata == NULL || dict_set_int32 (xdata, GF_GET_SIZE, 1)) { +                    error = -ENOMEM; +                    goto out; +                } +              }      } -    if (set_dirty) { + +    if (memcmp (allzero, dirty, sizeof (allzero))) {              error = ec_dict_set_array(dict, EC_XATTR_DIRTY, dirty,                                        EC_VERSION_SIZE);              if (error != 0) { @@ -1949,7 +1989,7 @@ int32_t ec_update_size_version_done(call_frame_t * frame, void * cookie,              ctx->have_size = _gf_true;          }          if ((ec_dict_del_config(xdata, EC_XATTR_CONFIG, &ctx->config) == 0) && -            ec_config_check(fop->parent, &ctx->config)) { +            ec_config_check(fop->xl, &ctx->config)) {              ctx->have_config = _gf_true;          } diff --git a/xlators/cluster/ec/src/ec-common.h b/xlators/cluster/ec/src/ec-common.h index 0e8be35dbf9..6fb981709c0 100644 --- a/xlators/cluster/ec/src/ec-common.h +++ b/xlators/cluster/ec/src/ec-common.h @@ -27,9 +27,11 @@ typedef enum {  #define EC_CONFIG_ALGORITHM 0 -#define EC_FLAG_LOCK_SHARED       0x0001 -#define EC_FLAG_WAITING_XATTROP   0x0002 -#define EC_FLAG_QUERY_METADATA    0x0004 +#define EC_FLAG_LOCK_SHARED             0x0001 + +#define EC_FLAG_WAITING_XATTROP         0x0001 +#define EC_FLAG_WAITING_DATA_DIRTY      0x0002 +#define EC_FLAG_WAITING_METADATA_DIRTY  0x0004  #define EC_SELFHEAL_BIT 62 diff --git a/xlators/cluster/ec/src/ec-types.h b/xlators/cluster/ec/src/ec-types.h index 907a3084952..d771741f0ac 100644 --- a/xlators/cluster/ec/src/ec-types.h +++ b/xlators/cluster/ec/src/ec-types.h @@ -227,8 +227,8 @@ struct _ec_lock {      uintptr_t          healing;      uint32_t           refs_owners;  /* Refs for fops owning the lock */      uint32_t           refs_pending; /* Refs assigned to fops being prepared */ +    uint32_t           waiting_flags; /*Track xattrop/dirty marking*/      gf_boolean_t       acquired; -    gf_boolean_t       getting_xattr;      gf_boolean_t       unlock_now;      gf_boolean_t       release;      gf_boolean_t       query; @@ -250,6 +250,7 @@ struct _ec_lock_link {      gf_boolean_t      optimistic_changelog;      loc_t            *base;      uint64_t          size; +    uint32_t          waiting_flags;  };  struct _ec_fop_data { | 
