From e16868dede6455cab644805af6fe1ac312775e13 Mon Sep 17 00:00:00 2001 From: Gluster Ant Date: Wed, 12 Sep 2018 17:52:45 +0530 Subject: Land part 2 of clang-format changes Change-Id: Ia84cc24c8924e6d22d02ac15f611c10e26db99b4 Signed-off-by: Nigel Babu --- xlators/cluster/afr/src/afr-common.c | 10695 ++++++----- xlators/cluster/afr/src/afr-dir-read.c | 493 +- xlators/cluster/afr/src/afr-dir-write.c | 2270 ++- xlators/cluster/afr/src/afr-inode-read.c | 2873 ++- xlators/cluster/afr/src/afr-inode-write.c | 3534 ++-- xlators/cluster/afr/src/afr-lk-common.c | 1683 +- xlators/cluster/afr/src/afr-open.c | 526 +- xlators/cluster/afr/src/afr-read-txn.c | 752 +- xlators/cluster/afr/src/afr-self-heal-common.c | 4108 ++--- xlators/cluster/afr/src/afr-self-heal-data.c | 1469 +- xlators/cluster/afr/src/afr-self-heal-entry.c | 1902 +- xlators/cluster/afr/src/afr-self-heal-metadata.c | 820 +- xlators/cluster/afr/src/afr-self-heal-name.c | 1091 +- xlators/cluster/afr/src/afr-self-heald.c | 2151 ++- xlators/cluster/afr/src/afr-transaction.c | 3887 ++-- xlators/cluster/afr/src/afr.c | 2045 +- xlators/cluster/dht/src/dht-common.c | 18427 +++++++++---------- xlators/cluster/dht/src/dht-diskusage.c | 853 +- xlators/cluster/dht/src/dht-hashfn.c | 142 +- xlators/cluster/dht/src/dht-helper.c | 3382 ++-- xlators/cluster/dht/src/dht-inode-read.c | 2453 ++- xlators/cluster/dht/src/dht-inode-write.c | 2316 ++- xlators/cluster/dht/src/dht-layout.c | 1275 +- xlators/cluster/dht/src/dht-linkfile.c | 515 +- xlators/cluster/dht/src/dht-lock.c | 2141 +-- xlators/cluster/dht/src/dht-rebalance.c | 9142 +++++---- xlators/cluster/dht/src/dht-rename.c | 3322 ++-- xlators/cluster/dht/src/dht-selfheal.c | 4568 +++-- xlators/cluster/dht/src/dht-shared.c | 1999 +- xlators/cluster/dht/src/dht.c | 118 +- xlators/cluster/dht/src/nufa.c | 1091 +- xlators/cluster/dht/src/switch.c | 1477 +- xlators/cluster/dht/src/tier-common.c | 1988 +- xlators/cluster/dht/src/tier.c | 5232 +++--- xlators/cluster/dht/src/unittest/dht_layout_mock.c | 33 +- .../cluster/dht/src/unittest/dht_layout_unittest.c | 22 +- xlators/cluster/ec/src/ec-code-avx.c | 43 +- xlators/cluster/ec/src/ec-code-c.c | 902 +- xlators/cluster/ec/src/ec-code-intel.c | 64 +- xlators/cluster/ec/src/ec-code-sse.c | 43 +- xlators/cluster/ec/src/ec-code-x64.c | 47 +- xlators/cluster/ec/src/ec-code.c | 391 +- xlators/cluster/ec/src/ec-combine.c | 495 +- xlators/cluster/ec/src/ec-common.c | 1785 +- xlators/cluster/ec/src/ec-data.c | 186 +- xlators/cluster/ec/src/ec-dir-read.c | 339 +- xlators/cluster/ec/src/ec-dir-write.c | 607 +- xlators/cluster/ec/src/ec-generic.c | 805 +- xlators/cluster/ec/src/ec-gf8.c | 10679 ++++++----- xlators/cluster/ec/src/ec-heal.c | 4917 +++-- xlators/cluster/ec/src/ec-heald.c | 814 +- xlators/cluster/ec/src/ec-helpers.c | 333 +- xlators/cluster/ec/src/ec-inode-read.c | 1068 +- xlators/cluster/ec/src/ec-inode-write.c | 1740 +- xlators/cluster/ec/src/ec-locks.c | 612 +- xlators/cluster/ec/src/ec-method.c | 40 +- xlators/cluster/ec/src/ec.c | 1880 +- xlators/cluster/stripe/src/stripe-helpers.c | 1043 +- xlators/cluster/stripe/src/stripe.c | 9394 +++++----- 59 files changed, 67926 insertions(+), 71096 deletions(-) (limited to 'xlators/cluster') diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index 3a32ebc31a4..eb0e7330a91 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -42,157 +42,155 @@ #include "afr-messages.h" int32_t -afr_quorum_errno (afr_private_t *priv) +afr_quorum_errno(afr_private_t *priv) { - return ENOTCONN; + return ENOTCONN; } int -afr_fav_child_reset_sink_xattrs (void *opaque); +afr_fav_child_reset_sink_xattrs(void *opaque); int -afr_fav_child_reset_sink_xattrs_cbk (int ret, call_frame_t *frame, - void *opaque); +afr_fav_child_reset_sink_xattrs_cbk(int ret, call_frame_t *frame, void *opaque); static void -afr_discover_done (call_frame_t *frame, xlator_t *this); +afr_discover_done(call_frame_t *frame, xlator_t *this); gf_boolean_t -afr_is_consistent_io_possible (afr_local_t *local, afr_private_t *priv, - int32_t *op_errno) -{ - if (priv->consistent_io && local->call_count != priv->child_count) { - gf_msg (THIS->name, GF_LOG_INFO, 0, - AFR_MSG_SUBVOLS_DOWN, "All subvolumes are not up"); - if (op_errno) - *op_errno = ENOTCONN; - return _gf_false; - } - return _gf_true; +afr_is_consistent_io_possible(afr_local_t *local, afr_private_t *priv, + int32_t *op_errno) +{ + if (priv->consistent_io && local->call_count != priv->child_count) { + gf_msg(THIS->name, GF_LOG_INFO, 0, AFR_MSG_SUBVOLS_DOWN, + "All subvolumes are not up"); + if (op_errno) + *op_errno = ENOTCONN; + return _gf_false; + } + return _gf_true; } call_frame_t * -afr_copy_frame (call_frame_t *base) +afr_copy_frame(call_frame_t *base) { - afr_local_t *local = NULL; - call_frame_t *frame = NULL; - int op_errno = 0; + afr_local_t *local = NULL; + call_frame_t *frame = NULL; + int op_errno = 0; - frame = copy_frame (base); - if (!frame) - return NULL; - local = AFR_FRAME_INIT (frame, op_errno); - if (!local) { - AFR_STACK_DESTROY (frame); - return NULL; - } + frame = copy_frame(base); + if (!frame) + return NULL; + local = AFR_FRAME_INIT(frame, op_errno); + if (!local) { + AFR_STACK_DESTROY(frame); + return NULL; + } - return frame; + return frame; } /* Check if an entry or inode could be undergoing a transaction. */ gf_boolean_t -afr_is_possibly_under_txn (afr_transaction_type type, afr_local_t *local, - xlator_t *this) -{ - int i = 0; - int tmp = 0; - afr_private_t *priv = NULL; - GF_UNUSED char *key = NULL; - - priv = this->private; - - if (type == AFR_ENTRY_TRANSACTION) - key = GLUSTERFS_PARENT_ENTRYLK; - else if (type == AFR_DATA_TRANSACTION) - /*FIXME: Use GLUSTERFS_INODELK_DOM_COUNT etc. once - * pl_inodelk_xattr_fill supports separate keys for different - * domains.*/ - key = GLUSTERFS_INODELK_COUNT; - - for (i = 0; i < priv->child_count; i++) { - if (!local->replies[i].xdata) - continue; - if (dict_get_int32 (local->replies[i].xdata, key, &tmp) == 0) - if (tmp) - return _gf_true; - } +afr_is_possibly_under_txn(afr_transaction_type type, afr_local_t *local, + xlator_t *this) +{ + int i = 0; + int tmp = 0; + afr_private_t *priv = NULL; + GF_UNUSED char *key = NULL; + + priv = this->private; + + if (type == AFR_ENTRY_TRANSACTION) + key = GLUSTERFS_PARENT_ENTRYLK; + else if (type == AFR_DATA_TRANSACTION) + /*FIXME: Use GLUSTERFS_INODELK_DOM_COUNT etc. once + * pl_inodelk_xattr_fill supports separate keys for different + * domains.*/ + key = GLUSTERFS_INODELK_COUNT; + + for (i = 0; i < priv->child_count; i++) { + if (!local->replies[i].xdata) + continue; + if (dict_get_int32(local->replies[i].xdata, key, &tmp) == 0) + if (tmp) + return _gf_true; + } - return _gf_false; + return _gf_false; } static void -afr_inode_ctx_destroy (afr_inode_ctx_t *ctx) +afr_inode_ctx_destroy(afr_inode_ctx_t *ctx) { - int i = 0; + int i = 0; - if (!ctx) - return; + if (!ctx) + return; - for (i = 0; i < AFR_NUM_CHANGE_LOGS; i++) { - GF_FREE (ctx->pre_op_done[i]); - } + for (i = 0; i < AFR_NUM_CHANGE_LOGS; i++) { + GF_FREE(ctx->pre_op_done[i]); + } - GF_FREE (ctx); + GF_FREE(ctx); } int -__afr_inode_ctx_get (xlator_t *this, inode_t *inode, afr_inode_ctx_t **ctx) -{ - uint64_t ctx_int = 0; - int ret = -1; - int i = -1; - int num_locks = -1; - afr_inode_ctx_t *ictx = NULL; - afr_lock_t *lock = NULL; - afr_private_t *priv = this->private; - - ret = __inode_ctx_get (inode, this, &ctx_int); - if (ret == 0) { - *ctx = (afr_inode_ctx_t *)ctx_int; - return 0; - } - - ictx = GF_CALLOC (1, sizeof (afr_inode_ctx_t), gf_afr_mt_inode_ctx_t); - if (!ictx) - goto out; - - for (i = 0; i < AFR_NUM_CHANGE_LOGS; i++) { - ictx->pre_op_done[i] = GF_CALLOC (sizeof *ictx->pre_op_done[i], - priv->child_count, - gf_afr_mt_int32_t); - if (!ictx->pre_op_done[i]) { - ret = -ENOMEM; - goto out; - } - } - - num_locks = sizeof(ictx->lock)/sizeof(afr_lock_t); - for (i = 0; i < num_locks; i++) { - lock = &ictx->lock[i]; - INIT_LIST_HEAD (&lock->post_op); - INIT_LIST_HEAD (&lock->frozen); - INIT_LIST_HEAD (&lock->waiting); - INIT_LIST_HEAD (&lock->owners); - } - - ctx_int = (uint64_t)ictx; - ret = __inode_ctx_set (inode, this, &ctx_int); - if (ret) { - goto out; - } - - ictx->spb_choice = -1; - ictx->read_subvol = 0; - ictx->write_subvol = 0; - ictx->lock_count = 0; - ret = 0; - *ctx = ictx; +__afr_inode_ctx_get(xlator_t *this, inode_t *inode, afr_inode_ctx_t **ctx) +{ + uint64_t ctx_int = 0; + int ret = -1; + int i = -1; + int num_locks = -1; + afr_inode_ctx_t *ictx = NULL; + afr_lock_t *lock = NULL; + afr_private_t *priv = this->private; + + ret = __inode_ctx_get(inode, this, &ctx_int); + if (ret == 0) { + *ctx = (afr_inode_ctx_t *)ctx_int; + return 0; + } + + ictx = GF_CALLOC(1, sizeof(afr_inode_ctx_t), gf_afr_mt_inode_ctx_t); + if (!ictx) + goto out; + + for (i = 0; i < AFR_NUM_CHANGE_LOGS; i++) { + ictx->pre_op_done[i] = GF_CALLOC(sizeof *ictx->pre_op_done[i], + priv->child_count, gf_afr_mt_int32_t); + if (!ictx->pre_op_done[i]) { + ret = -ENOMEM; + goto out; + } + } + + num_locks = sizeof(ictx->lock) / sizeof(afr_lock_t); + for (i = 0; i < num_locks; i++) { + lock = &ictx->lock[i]; + INIT_LIST_HEAD(&lock->post_op); + INIT_LIST_HEAD(&lock->frozen); + INIT_LIST_HEAD(&lock->waiting); + INIT_LIST_HEAD(&lock->owners); + } + + ctx_int = (uint64_t)ictx; + ret = __inode_ctx_set(inode, this, &ctx_int); + if (ret) { + goto out; + } + + ictx->spb_choice = -1; + ictx->read_subvol = 0; + ictx->write_subvol = 0; + ictx->lock_count = 0; + ret = 0; + *ctx = ictx; out: - if (ret) { - afr_inode_ctx_destroy (ictx); - } - return ret; + if (ret) { + afr_inode_ctx_destroy(ictx); + } + return ret; } /* @@ -226,1723 +224,1670 @@ out: */ int -__afr_set_in_flight_sb_status (xlator_t *this, afr_local_t *local, - inode_t *inode) -{ - int i = 0; - int txn_type = 0; - int count = 0; - int index = -1; - uint16_t datamap_old = 0; - uint16_t metadatamap_old = 0; - uint16_t datamap = 0; - uint16_t metadatamap = 0; - uint16_t tmp_map = 0; - uint16_t mask = 0; - uint32_t event = 0; - uint64_t val = 0; - afr_private_t *priv = NULL; - - priv = this->private; - txn_type = local->transaction.type; - - if (txn_type == AFR_DATA_TRANSACTION) - val = local->inode_ctx->write_subvol; - else - val = local->inode_ctx->read_subvol; - - metadatamap_old = metadatamap = (val & 0x000000000000ffff); - datamap_old = datamap = (val & 0x00000000ffff0000) >> 16; - event = (val & 0xffffffff00000000) >> 32; - - if (txn_type == AFR_DATA_TRANSACTION) - tmp_map = datamap; - else if (txn_type == AFR_METADATA_TRANSACTION) - tmp_map = metadatamap; - - count = gf_bits_count (tmp_map); - - if (count == 1) - index = gf_bits_index (tmp_map); - - for (i = 0; i < priv->child_count; i++) { - mask = 0; - if (!local->transaction.failed_subvols[i]) - continue; - - mask = 1 << i; - if (txn_type == AFR_METADATA_TRANSACTION) - metadatamap &= ~mask; - else if (txn_type == AFR_DATA_TRANSACTION) - datamap &= ~mask; - } - - switch (txn_type) { +__afr_set_in_flight_sb_status(xlator_t *this, afr_local_t *local, + inode_t *inode) +{ + int i = 0; + int txn_type = 0; + int count = 0; + int index = -1; + uint16_t datamap_old = 0; + uint16_t metadatamap_old = 0; + uint16_t datamap = 0; + uint16_t metadatamap = 0; + uint16_t tmp_map = 0; + uint16_t mask = 0; + uint32_t event = 0; + uint64_t val = 0; + afr_private_t *priv = NULL; + + priv = this->private; + txn_type = local->transaction.type; + + if (txn_type == AFR_DATA_TRANSACTION) + val = local->inode_ctx->write_subvol; + else + val = local->inode_ctx->read_subvol; + + metadatamap_old = metadatamap = (val & 0x000000000000ffff); + datamap_old = datamap = (val & 0x00000000ffff0000) >> 16; + event = (val & 0xffffffff00000000) >> 32; + + if (txn_type == AFR_DATA_TRANSACTION) + tmp_map = datamap; + else if (txn_type == AFR_METADATA_TRANSACTION) + tmp_map = metadatamap; + + count = gf_bits_count(tmp_map); + + if (count == 1) + index = gf_bits_index(tmp_map); + + for (i = 0; i < priv->child_count; i++) { + mask = 0; + if (!local->transaction.failed_subvols[i]) + continue; + + mask = 1 << i; + if (txn_type == AFR_METADATA_TRANSACTION) + metadatamap &= ~mask; + else if (txn_type == AFR_DATA_TRANSACTION) + datamap &= ~mask; + } + + switch (txn_type) { case AFR_METADATA_TRANSACTION: - if ((metadatamap_old != 0) && (metadatamap == 0) && - (count == 1)) { - local->transaction.in_flight_sb_errno = - local->replies[index].op_errno; - local->transaction.in_flight_sb = _gf_true; - metadatamap |= (1 << index); - } - if (metadatamap_old != metadatamap) { - event = 0; - } - break; + if ((metadatamap_old != 0) && (metadatamap == 0) && (count == 1)) { + local->transaction.in_flight_sb_errno = local->replies[index] + .op_errno; + local->transaction.in_flight_sb = _gf_true; + metadatamap |= (1 << index); + } + if (metadatamap_old != metadatamap) { + event = 0; + } + break; case AFR_DATA_TRANSACTION: - if ((datamap_old != 0) && (datamap == 0) && (count == 1)) { - local->transaction.in_flight_sb_errno = - local->replies[index].op_errno; - local->transaction.in_flight_sb = _gf_true; - datamap |= (1 << index); - } - if (datamap_old != datamap) - event = 0; - break; + if ((datamap_old != 0) && (datamap == 0) && (count == 1)) { + local->transaction.in_flight_sb_errno = local->replies[index] + .op_errno; + local->transaction.in_flight_sb = _gf_true; + datamap |= (1 << index); + } + if (datamap_old != datamap) + event = 0; + break; default: - break; - } + break; + } - val = ((uint64_t) metadatamap) | - (((uint64_t) datamap) << 16) | - (((uint64_t) event) << 32); + val = ((uint64_t)metadatamap) | (((uint64_t)datamap) << 16) | + (((uint64_t)event) << 32); - if (txn_type == AFR_DATA_TRANSACTION) - local->inode_ctx->write_subvol = val; - local->inode_ctx->read_subvol = val; + if (txn_type == AFR_DATA_TRANSACTION) + local->inode_ctx->write_subvol = val; + local->inode_ctx->read_subvol = val; - return 0; + return 0; } gf_boolean_t -afr_is_symmetric_error (call_frame_t *frame, xlator_t *this) +afr_is_symmetric_error(call_frame_t *frame, xlator_t *this) { - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - int op_errno = 0; - int i_errno = 0; - gf_boolean_t matching_errors = _gf_true; - int i = 0; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + int op_errno = 0; + int i_errno = 0; + gf_boolean_t matching_errors = _gf_true; + int i = 0; - priv = this->private; - local = frame->local; + priv = this->private; + local = frame->local; - for (i = 0; i < priv->child_count; i++) { - if (!local->replies[i].valid) - continue; - if (local->replies[i].op_ret != -1) { - /* Operation succeeded on at least one subvol, - so it is not a failed-everywhere situation. - */ - matching_errors = _gf_false; - break; - } - i_errno = local->replies[i].op_errno; - - if (i_errno == ENOTCONN) { - /* ENOTCONN is not a symmetric error. We do not - know if the operation was performed on the - backend or not. - */ - matching_errors = _gf_false; - break; - } + for (i = 0; i < priv->child_count; i++) { + if (!local->replies[i].valid) + continue; + if (local->replies[i].op_ret != -1) { + /* Operation succeeded on at least one subvol, + so it is not a failed-everywhere situation. + */ + matching_errors = _gf_false; + break; + } + i_errno = local->replies[i].op_errno; - if (!op_errno) { - op_errno = i_errno; - } else if (op_errno != i_errno) { - /* Mismatching op_errno's */ - matching_errors = _gf_false; - break; - } + if (i_errno == ENOTCONN) { + /* ENOTCONN is not a symmetric error. We do not + know if the operation was performed on the + backend or not. + */ + matching_errors = _gf_false; + break; + } + + if (!op_errno) { + op_errno = i_errno; + } else if (op_errno != i_errno) { + /* Mismatching op_errno's */ + matching_errors = _gf_false; + break; } + } - return matching_errors; + return matching_errors; } int -afr_set_in_flight_sb_status (xlator_t *this, call_frame_t *frame, - inode_t *inode) +afr_set_in_flight_sb_status(xlator_t *this, call_frame_t *frame, inode_t *inode) { - int ret = -1; - afr_private_t *priv = NULL; - afr_local_t *local = NULL; + int ret = -1; + afr_private_t *priv = NULL; + afr_local_t *local = NULL; - priv = this->private; - local = frame->local; + priv = this->private; + local = frame->local; - /* If this transaction saw no failures, then exit. */ - if (AFR_COUNT (local->transaction.failed_subvols, - priv->child_count) == 0) - return 0; + /* If this transaction saw no failures, then exit. */ + if (AFR_COUNT(local->transaction.failed_subvols, priv->child_count) == 0) + return 0; - if (afr_is_symmetric_error (frame, this)) - return 0; + if (afr_is_symmetric_error(frame, this)) + return 0; - LOCK (&inode->lock); - { - ret = __afr_set_in_flight_sb_status (this, local, inode); - } - UNLOCK (&inode->lock); + LOCK(&inode->lock); + { + ret = __afr_set_in_flight_sb_status(this, local, inode); + } + UNLOCK(&inode->lock); - return ret; + return ret; } int -__afr_inode_read_subvol_get_small (inode_t *inode, xlator_t *this, - unsigned char *data, unsigned char *metadata, - int *event_p) -{ - afr_private_t *priv = NULL; - int ret = -1; - uint16_t datamap = 0; - uint16_t metadatamap = 0; - uint32_t event = 0; - uint64_t val = 0; - int i = 0; - afr_inode_ctx_t *ctx = NULL; - - priv = this->private; - - ret = __afr_inode_ctx_get (this, inode, &ctx); - if (ret < 0) - return ret; +__afr_inode_read_subvol_get_small(inode_t *inode, xlator_t *this, + unsigned char *data, unsigned char *metadata, + int *event_p) +{ + afr_private_t *priv = NULL; + int ret = -1; + uint16_t datamap = 0; + uint16_t metadatamap = 0; + uint32_t event = 0; + uint64_t val = 0; + int i = 0; + afr_inode_ctx_t *ctx = NULL; + + priv = this->private; + + ret = __afr_inode_ctx_get(this, inode, &ctx); + if (ret < 0) + return ret; - val = ctx->read_subvol; + val = ctx->read_subvol; - metadatamap = (val & 0x000000000000ffff); - datamap = (val & 0x00000000ffff0000) >> 16; - event = (val & 0xffffffff00000000) >> 32; + metadatamap = (val & 0x000000000000ffff); + datamap = (val & 0x00000000ffff0000) >> 16; + event = (val & 0xffffffff00000000) >> 32; - for (i = 0; i < priv->child_count; i++) { - if (metadata) - metadata[i] = (metadatamap >> i) & 1; - if (data) - data[i] = (datamap >> i) & 1; - } + for (i = 0; i < priv->child_count; i++) { + if (metadata) + metadata[i] = (metadatamap >> i) & 1; + if (data) + data[i] = (datamap >> i) & 1; + } - if (event_p) - *event_p = event; - return ret; + if (event_p) + *event_p = event; + return ret; } - int -__afr_inode_read_subvol_set_small (inode_t *inode, xlator_t *this, - unsigned char *data, unsigned char *metadata, - int event) +__afr_inode_read_subvol_set_small(inode_t *inode, xlator_t *this, + unsigned char *data, unsigned char *metadata, + int event) { - afr_private_t *priv = NULL; - uint16_t datamap = 0; - uint16_t metadatamap = 0; - uint64_t val = 0; - int i = 0; - int ret = -1; - afr_inode_ctx_t *ctx = NULL; + afr_private_t *priv = NULL; + uint16_t datamap = 0; + uint16_t metadatamap = 0; + uint64_t val = 0; + int i = 0; + int ret = -1; + afr_inode_ctx_t *ctx = NULL; - priv = this->private; + priv = this->private; - ret = __afr_inode_ctx_get (this, inode, &ctx); - if (ret) - goto out; + ret = __afr_inode_ctx_get(this, inode, &ctx); + if (ret) + goto out; - for (i = 0; i < priv->child_count; i++) { - if (data[i]) - datamap |= (1 << i); - if (metadata[i]) - metadatamap |= (1 << i); - } + for (i = 0; i < priv->child_count; i++) { + if (data[i]) + datamap |= (1 << i); + if (metadata[i]) + metadatamap |= (1 << i); + } - val = ((uint64_t) metadatamap) | - (((uint64_t) datamap) << 16) | - (((uint64_t) event) << 32); + val = ((uint64_t)metadatamap) | (((uint64_t)datamap) << 16) | + (((uint64_t)event) << 32); - ctx->read_subvol = val; + ctx->read_subvol = val; - ret = 0; + ret = 0; out: - return ret; + return ret; } int -__afr_inode_event_gen_reset_small (inode_t *inode, xlator_t *this) +__afr_inode_event_gen_reset_small(inode_t *inode, xlator_t *this) { - int ret = -1; - uint16_t datamap = 0; - uint16_t metadatamap = 0; - uint32_t event = 0; - uint64_t val = 0; - afr_inode_ctx_t *ctx = NULL; + int ret = -1; + uint16_t datamap = 0; + uint16_t metadatamap = 0; + uint32_t event = 0; + uint64_t val = 0; + afr_inode_ctx_t *ctx = NULL; - ret = __afr_inode_ctx_get (this, inode, &ctx); - if (ret) - return ret; + ret = __afr_inode_ctx_get(this, inode, &ctx); + if (ret) + return ret; - val = ctx->read_subvol; + val = ctx->read_subvol; - metadatamap = (val & 0x000000000000ffff) >> 0; - datamap = (val & 0x00000000ffff0000) >> 16; - event = 0; + metadatamap = (val & 0x000000000000ffff) >> 0; + datamap = (val & 0x00000000ffff0000) >> 16; + event = 0; - val = ((uint64_t) metadatamap) | - (((uint64_t) datamap) << 16) | - (((uint64_t) event) << 32); + val = ((uint64_t)metadatamap) | (((uint64_t)datamap) << 16) | + (((uint64_t)event) << 32); - ctx->read_subvol = val; + ctx->read_subvol = val; - return ret; + return ret; } - int -__afr_inode_read_subvol_get (inode_t *inode, xlator_t *this, - unsigned char *data, unsigned char *metadata, - int *event_p) +__afr_inode_read_subvol_get(inode_t *inode, xlator_t *this, unsigned char *data, + unsigned char *metadata, int *event_p) { - afr_private_t *priv = NULL; - int ret = -1; + afr_private_t *priv = NULL; + int ret = -1; - priv = this->private; + priv = this->private; - if (priv->child_count <= 16) - ret = __afr_inode_read_subvol_get_small (inode, this, data, - metadata, event_p); - else - /* TBD: allocate structure with array and read from it */ - ret = -1; + if (priv->child_count <= 16) + ret = __afr_inode_read_subvol_get_small(inode, this, data, metadata, + event_p); + else + /* TBD: allocate structure with array and read from it */ + ret = -1; - return ret; + return ret; } int -__afr_inode_split_brain_choice_get (inode_t *inode, xlator_t *this, - int *spb_choice) +__afr_inode_split_brain_choice_get(inode_t *inode, xlator_t *this, + int *spb_choice) { - afr_inode_ctx_t *ctx = NULL; - int ret = -1; + afr_inode_ctx_t *ctx = NULL; + int ret = -1; - ret = __afr_inode_ctx_get (this, inode, &ctx); - if (ret < 0) - return ret; + ret = __afr_inode_ctx_get(this, inode, &ctx); + if (ret < 0) + return ret; - *spb_choice = ctx->spb_choice; - return 0; + *spb_choice = ctx->spb_choice; + return 0; } int -__afr_inode_read_subvol_set (inode_t *inode, xlator_t *this, unsigned char *data, - unsigned char *metadata, int event) +__afr_inode_read_subvol_set(inode_t *inode, xlator_t *this, unsigned char *data, + unsigned char *metadata, int event) { - afr_private_t *priv = NULL; - int ret = -1; + afr_private_t *priv = NULL; + int ret = -1; - priv = this->private; + priv = this->private; - if (priv->child_count <= 16) - ret = __afr_inode_read_subvol_set_small (inode, this, data, - metadata, event); - else - ret = -1; + if (priv->child_count <= 16) + ret = __afr_inode_read_subvol_set_small(inode, this, data, metadata, + event); + else + ret = -1; - return ret; + return ret; } int -__afr_inode_split_brain_choice_set (inode_t *inode, xlator_t *this, - int spb_choice) +__afr_inode_split_brain_choice_set(inode_t *inode, xlator_t *this, + int spb_choice) { - afr_inode_ctx_t *ctx = NULL; - int ret = -1; + afr_inode_ctx_t *ctx = NULL; + int ret = -1; - ret = __afr_inode_ctx_get (this, inode, &ctx); - if (ret) - goto out; + ret = __afr_inode_ctx_get(this, inode, &ctx); + if (ret) + goto out; - ctx->spb_choice = spb_choice; + ctx->spb_choice = spb_choice; - ret = 0; + ret = 0; out: - return ret; + return ret; } int -__afr_inode_event_gen_reset (inode_t *inode, xlator_t *this) +__afr_inode_event_gen_reset(inode_t *inode, xlator_t *this) { - afr_private_t *priv = NULL; - int ret = -1; + afr_private_t *priv = NULL; + int ret = -1; - priv = this->private; + priv = this->private; - if (priv->child_count <= 16) - ret = __afr_inode_event_gen_reset_small (inode, this); - else - ret = -1; + if (priv->child_count <= 16) + ret = __afr_inode_event_gen_reset_small(inode, this); + else + ret = -1; - return ret; + return ret; } - int -afr_inode_read_subvol_get (inode_t *inode, xlator_t *this, unsigned char *data, - unsigned char *metadata, int *event_p) +afr_inode_read_subvol_get(inode_t *inode, xlator_t *this, unsigned char *data, + unsigned char *metadata, int *event_p) { - int ret = -1; + int ret = -1; - GF_VALIDATE_OR_GOTO (this->name, inode, out); + GF_VALIDATE_OR_GOTO(this->name, inode, out); - LOCK(&inode->lock); - { - ret = __afr_inode_read_subvol_get (inode, this, data, - metadata, event_p); - } - UNLOCK(&inode->lock); + LOCK(&inode->lock); + { + ret = __afr_inode_read_subvol_get(inode, this, data, metadata, event_p); + } + UNLOCK(&inode->lock); out: - return ret; + return ret; } int -afr_inode_get_readable (call_frame_t *frame, inode_t *inode, xlator_t *this, +afr_inode_get_readable(call_frame_t *frame, inode_t *inode, xlator_t *this, unsigned char *readable, int *event_p, int type) { - - afr_private_t *priv = this->private; - afr_local_t *local = frame->local; - unsigned char *data = alloca0 (priv->child_count); - unsigned char *metadata = alloca0 (priv->child_count); - int data_count = 0; - int metadata_count = 0; - int event_generation = 0; - int ret = 0; - - ret = afr_inode_read_subvol_get (inode, this, data, metadata, - &event_generation); - if (ret == -1) + afr_private_t *priv = this->private; + afr_local_t *local = frame->local; + unsigned char *data = alloca0(priv->child_count); + unsigned char *metadata = alloca0(priv->child_count); + int data_count = 0; + int metadata_count = 0; + int event_generation = 0; + int ret = 0; + + ret = afr_inode_read_subvol_get(inode, this, data, metadata, + &event_generation); + if (ret == -1) + return -EIO; + + data_count = AFR_COUNT(data, priv->child_count); + metadata_count = AFR_COUNT(metadata, priv->child_count); + + if (inode->ia_type == IA_IFDIR) { + /* For directories, allow even if it is in data split-brain. */ + if (type == AFR_METADATA_TRANSACTION || local->op == GF_FOP_STAT || + local->op == GF_FOP_FSTAT) { + if (!metadata_count) return -EIO; - - data_count = AFR_COUNT (data, priv->child_count); - metadata_count = AFR_COUNT (metadata, priv->child_count); - - if (inode->ia_type == IA_IFDIR) { - /* For directories, allow even if it is in data split-brain. */ - if (type == AFR_METADATA_TRANSACTION || - local->op == GF_FOP_STAT || local->op == GF_FOP_FSTAT) { - if (!metadata_count) - return -EIO; - } - } else { - /* For files, abort in case of data/metadata split-brain. */ - if (!data_count || !metadata_count) { - return -EIO; - } } - - if (type == AFR_METADATA_TRANSACTION && readable) - memcpy (readable, metadata, priv->child_count * sizeof *metadata); - if (type == AFR_DATA_TRANSACTION && readable) { - if (!data_count) - memcpy (readable, local->child_up, - priv->child_count * sizeof *readable); - else - memcpy (readable, data, priv->child_count * sizeof *data); + } else { + /* For files, abort in case of data/metadata split-brain. */ + if (!data_count || !metadata_count) { + return -EIO; } - if (event_p) - *event_p = event_generation; - return 0; + } + + if (type == AFR_METADATA_TRANSACTION && readable) + memcpy(readable, metadata, priv->child_count * sizeof *metadata); + if (type == AFR_DATA_TRANSACTION && readable) { + if (!data_count) + memcpy(readable, local->child_up, + priv->child_count * sizeof *readable); + else + memcpy(readable, data, priv->child_count * sizeof *data); + } + if (event_p) + *event_p = event_generation; + return 0; } int -afr_inode_split_brain_choice_get (inode_t *inode, xlator_t *this, - int *spb_choice) +afr_inode_split_brain_choice_get(inode_t *inode, xlator_t *this, + int *spb_choice) { - int ret = -1; + int ret = -1; - GF_VALIDATE_OR_GOTO (this->name, inode, out); + GF_VALIDATE_OR_GOTO(this->name, inode, out); - LOCK(&inode->lock); - { - ret = __afr_inode_split_brain_choice_get (inode, this, - spb_choice); - } - UNLOCK(&inode->lock); + LOCK(&inode->lock); + { + ret = __afr_inode_split_brain_choice_get(inode, this, spb_choice); + } + UNLOCK(&inode->lock); out: - return ret; + return ret; } - int -afr_inode_read_subvol_set (inode_t *inode, xlator_t *this, unsigned char *data, - unsigned char *metadata, int event) +afr_inode_read_subvol_set(inode_t *inode, xlator_t *this, unsigned char *data, + unsigned char *metadata, int event) { - int ret = -1; + int ret = -1; - GF_VALIDATE_OR_GOTO (this->name, inode, out); + GF_VALIDATE_OR_GOTO(this->name, inode, out); - LOCK(&inode->lock); - { - ret = __afr_inode_read_subvol_set (inode, this, data, metadata, - event); - } - UNLOCK(&inode->lock); + LOCK(&inode->lock); + { + ret = __afr_inode_read_subvol_set(inode, this, data, metadata, event); + } + UNLOCK(&inode->lock); out: - return ret; + return ret; } - int -afr_inode_split_brain_choice_set (inode_t *inode, xlator_t *this, - int spb_choice) +afr_inode_split_brain_choice_set(inode_t *inode, xlator_t *this, int spb_choice) { - int ret = -1; + int ret = -1; - GF_VALIDATE_OR_GOTO (this->name, inode, out); + GF_VALIDATE_OR_GOTO(this->name, inode, out); - LOCK(&inode->lock); - { - ret = __afr_inode_split_brain_choice_set (inode, this, - spb_choice); - } - UNLOCK(&inode->lock); + LOCK(&inode->lock); + { + ret = __afr_inode_split_brain_choice_set(inode, this, spb_choice); + } + UNLOCK(&inode->lock); out: - return ret; + return ret; } - /* The caller of this should perform afr_inode_refresh, if this function * returns _gf_true */ gf_boolean_t -afr_is_inode_refresh_reqd (inode_t *inode, xlator_t *this, - int event_gen1, int event_gen2) +afr_is_inode_refresh_reqd(inode_t *inode, xlator_t *this, int event_gen1, + int event_gen2) { - gf_boolean_t need_refresh = _gf_false; - afr_inode_ctx_t *ctx = NULL; - int ret = -1; + gf_boolean_t need_refresh = _gf_false; + afr_inode_ctx_t *ctx = NULL; + int ret = -1; - GF_VALIDATE_OR_GOTO (this->name, inode, out); + GF_VALIDATE_OR_GOTO(this->name, inode, out); - LOCK(&inode->lock); - { - ret = __afr_inode_ctx_get (this, inode, &ctx); - if (ret) - goto unlock; - - need_refresh = ctx->need_refresh; - /* Hoping that the caller will do inode_refresh followed by - * this, hence setting the need_refresh to false */ - ctx->need_refresh = _gf_false; - } + LOCK(&inode->lock); + { + ret = __afr_inode_ctx_get(this, inode, &ctx); + if (ret) + goto unlock; + + need_refresh = ctx->need_refresh; + /* Hoping that the caller will do inode_refresh followed by + * this, hence setting the need_refresh to false */ + ctx->need_refresh = _gf_false; + } unlock: - UNLOCK(&inode->lock); + UNLOCK(&inode->lock); - if (event_gen1 != event_gen2) - need_refresh = _gf_true; + if (event_gen1 != event_gen2) + need_refresh = _gf_true; out: - return need_refresh; + return need_refresh; } - static int -afr_inode_need_refresh_set (inode_t *inode, xlator_t *this) +afr_inode_need_refresh_set(inode_t *inode, xlator_t *this) { - int ret = -1; - afr_inode_ctx_t *ctx = NULL; + int ret = -1; + afr_inode_ctx_t *ctx = NULL; - GF_VALIDATE_OR_GOTO (this->name, inode, out); + GF_VALIDATE_OR_GOTO(this->name, inode, out); - LOCK(&inode->lock); - { - ret = __afr_inode_ctx_get (this, inode, &ctx); - if (ret) - goto unlock; + LOCK(&inode->lock); + { + ret = __afr_inode_ctx_get(this, inode, &ctx); + if (ret) + goto unlock; - ctx->need_refresh = _gf_true; - } + ctx->need_refresh = _gf_true; + } unlock: - UNLOCK(&inode->lock); + UNLOCK(&inode->lock); out: - return ret; + return ret; } int -afr_inode_event_gen_reset (inode_t *inode, xlator_t *this) +afr_inode_event_gen_reset(inode_t *inode, xlator_t *this) { - int ret = -1; + int ret = -1; - GF_VALIDATE_OR_GOTO (this->name, inode, out); + GF_VALIDATE_OR_GOTO(this->name, inode, out); - LOCK(&inode->lock); - { - ret = __afr_inode_event_gen_reset (inode, this); - } - UNLOCK(&inode->lock); + LOCK(&inode->lock); + { + ret = __afr_inode_event_gen_reset(inode, this); + } + UNLOCK(&inode->lock); out: - return ret; + return ret; } int -afr_spb_choice_timeout_cancel (xlator_t *this, inode_t *inode) +afr_spb_choice_timeout_cancel(xlator_t *this, inode_t *inode) { - afr_inode_ctx_t *ctx = NULL; - int ret = -1; + afr_inode_ctx_t *ctx = NULL; + int ret = -1; - if (!inode) - return ret; + if (!inode) + return ret; - LOCK(&inode->lock); - { - ret = __afr_inode_ctx_get (this, inode, &ctx); - if (ret < 0 || !ctx) { - gf_msg (this->name, GF_LOG_WARNING, 0, - AFR_MSG_SPLIT_BRAIN_CHOICE_ERROR, - "Failed to cancel split-brain choice timer."); - goto out; - } - ctx->spb_choice = -1; - if (ctx->timer) { - gf_timer_call_cancel (this->ctx, ctx->timer); - ctx->timer = NULL; - } - ret = 0; + LOCK(&inode->lock); + { + ret = __afr_inode_ctx_get(this, inode, &ctx); + if (ret < 0 || !ctx) { + gf_msg(this->name, GF_LOG_WARNING, 0, + AFR_MSG_SPLIT_BRAIN_CHOICE_ERROR, + "Failed to cancel split-brain choice timer."); + goto out; + } + ctx->spb_choice = -1; + if (ctx->timer) { + gf_timer_call_cancel(this->ctx, ctx->timer); + ctx->timer = NULL; } + ret = 0; + } out: - UNLOCK(&inode->lock); - return ret; + UNLOCK(&inode->lock); + return ret; } void -afr_set_split_brain_choice_cbk (void *data) +afr_set_split_brain_choice_cbk(void *data) { - inode_t *inode = data; - xlator_t *this = THIS; + inode_t *inode = data; + xlator_t *this = THIS; - afr_spb_choice_timeout_cancel (this, inode); - inode_invalidate (inode); - inode_unref (inode); - return; + afr_spb_choice_timeout_cancel(this, inode); + inode_invalidate(inode); + inode_unref(inode); + return; } - int -afr_set_split_brain_choice (int ret, call_frame_t *frame, void *opaque) -{ - int op_errno = ENOMEM; - afr_private_t *priv = NULL; - afr_inode_ctx_t *ctx = NULL; - inode_t *inode = NULL; - loc_t *loc = NULL; - xlator_t *this = NULL; - afr_spbc_timeout_t *data = opaque; - struct timespec delta = {0, }; - gf_boolean_t timer_set = _gf_false; - gf_boolean_t timer_cancelled = _gf_false; - gf_boolean_t timer_reset = _gf_false; - gf_boolean_t need_invalidate = _gf_true; - int old_spb_choice = -1; - - frame = data->frame; - loc = data->loc; - this = frame->this; - priv = this->private; - +afr_set_split_brain_choice(int ret, call_frame_t *frame, void *opaque) +{ + int op_errno = ENOMEM; + afr_private_t *priv = NULL; + afr_inode_ctx_t *ctx = NULL; + inode_t *inode = NULL; + loc_t *loc = NULL; + xlator_t *this = NULL; + afr_spbc_timeout_t *data = opaque; + struct timespec delta = { + 0, + }; + gf_boolean_t timer_set = _gf_false; + gf_boolean_t timer_cancelled = _gf_false; + gf_boolean_t timer_reset = _gf_false; + gf_boolean_t need_invalidate = _gf_true; + int old_spb_choice = -1; + + frame = data->frame; + loc = data->loc; + this = frame->this; + priv = this->private; + + if (ret) { + op_errno = -ret; + ret = -1; + goto out; + } + + delta.tv_sec = priv->spb_choice_timeout; + delta.tv_nsec = 0; + + if (!loc->inode) { + ret = -1; + op_errno = EINVAL; + goto out; + } + + if (!(data->d_spb || data->m_spb)) { + gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_SPLIT_BRAIN_CHOICE_ERROR, + "Cannot set " + "replica.split-brain-choice on %s. File is" + " not in data/metadata split-brain.", + uuid_utoa(loc->gfid)); + ret = -1; + op_errno = EINVAL; + goto out; + } + + /* + * we're ref'ing the inode before LOCK like it is done elsewhere in the + * code. If we ref after LOCK, coverity complains of possible deadlocks. + */ + inode = inode_ref(loc->inode); + + LOCK(&inode->lock); + { + ret = __afr_inode_ctx_get(this, inode, &ctx); if (ret) { - op_errno = -ret; - ret = -1; - goto out; - } - - delta.tv_sec = priv->spb_choice_timeout; - delta.tv_nsec = 0; - - if (!loc->inode) { - ret = -1; - op_errno = EINVAL; - goto out; + gf_msg(this->name, GF_LOG_ERROR, 0, + AFR_MSG_SPLIT_BRAIN_CHOICE_ERROR, + "Failed to get inode_ctx for %s", loc->name); + goto unlock; } - if (!(data->d_spb || data->m_spb)) { - gf_msg (this->name, GF_LOG_WARNING, 0, - AFR_MSG_SPLIT_BRAIN_CHOICE_ERROR, "Cannot set " - "replica.split-brain-choice on %s. File is" - " not in data/metadata split-brain.", - uuid_utoa (loc->gfid)); - ret = -1; - op_errno = EINVAL; - goto out; - } + old_spb_choice = ctx->spb_choice; + ctx->spb_choice = data->spb_child_index; - /* - * we're ref'ing the inode before LOCK like it is done elsewhere in the - * code. If we ref after LOCK, coverity complains of possible deadlocks. + /* Possible changes in spb-choice : + * valid to -1 : cancel timer and unref + * valid to valid : cancel timer and inject new one + * -1 to -1 : unref and do not do anything + * -1 to valid : inject timer */ - inode = inode_ref (loc->inode); - LOCK(&inode->lock); - { - ret = __afr_inode_ctx_get (this, inode, &ctx); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - AFR_MSG_SPLIT_BRAIN_CHOICE_ERROR, - "Failed to get inode_ctx for %s", loc->name); - goto unlock; + /* ctx->timer is NULL iff previous value of + * ctx->spb_choice is -1 + */ + if (ctx->timer) { + if (ctx->spb_choice == -1) { + if (!gf_timer_call_cancel(this->ctx, ctx->timer)) { + ctx->timer = NULL; + timer_cancelled = _gf_true; } - - old_spb_choice = ctx->spb_choice; - ctx->spb_choice = data->spb_child_index; - - /* Possible changes in spb-choice : - * valid to -1 : cancel timer and unref - * valid to valid : cancel timer and inject new one - * -1 to -1 : unref and do not do anything - * -1 to valid : inject timer + /* If timer cancel failed here it means that the + * previous cbk will be executed which will set + * spb_choice to -1. So we can consider the + * 'valid to -1' case to be a success + * (i.e. ret = 0) and goto unlock. */ - - /* ctx->timer is NULL iff previous value of - * ctx->spb_choice is -1 - */ - if (ctx->timer) { - if (ctx->spb_choice == -1) { - if (!gf_timer_call_cancel (this->ctx, - ctx->timer)) { - ctx->timer = NULL; - timer_cancelled = _gf_true; - } - /* If timer cancel failed here it means that the - * previous cbk will be executed which will set - * spb_choice to -1. So we can consider the - * 'valid to -1' case to be a success - * (i.e. ret = 0) and goto unlock. - */ - goto unlock; - } - goto reset_timer; - } else { - if (ctx->spb_choice == -1) - goto unlock; - goto set_timer; - } - -reset_timer: - ret = gf_timer_call_cancel (this->ctx, ctx->timer); - if (ret != 0) { - /* We need to bail out now instead of launching a new - * timer. Otherwise the cbk of the previous timer event - * will cancel the new ctx->timer. - */ - ctx->spb_choice = old_spb_choice; - ret = -1; - op_errno = EAGAIN; - goto unlock; - } - ctx->timer = NULL; - timer_reset = _gf_true; - -set_timer: - ctx->timer = gf_timer_call_after (this->ctx, delta, - afr_set_split_brain_choice_cbk, - inode); - if (!ctx->timer) { - ctx->spb_choice = old_spb_choice; - ret = -1; - op_errno = ENOMEM; - } - if (!timer_reset && ctx->timer) - timer_set = _gf_true; - if (timer_reset && !ctx->timer) - timer_cancelled = _gf_true; - need_invalidate = _gf_false; + goto unlock; + } + goto reset_timer; + } else { + if (ctx->spb_choice == -1) + goto unlock; + goto set_timer; } + + reset_timer: + ret = gf_timer_call_cancel(this->ctx, ctx->timer); + if (ret != 0) { + /* We need to bail out now instead of launching a new + * timer. Otherwise the cbk of the previous timer event + * will cancel the new ctx->timer. + */ + ctx->spb_choice = old_spb_choice; + ret = -1; + op_errno = EAGAIN; + goto unlock; + } + ctx->timer = NULL; + timer_reset = _gf_true; + + set_timer: + ctx->timer = gf_timer_call_after(this->ctx, delta, + afr_set_split_brain_choice_cbk, inode); + if (!ctx->timer) { + ctx->spb_choice = old_spb_choice; + ret = -1; + op_errno = ENOMEM; + } + if (!timer_reset && ctx->timer) + timer_set = _gf_true; + if (timer_reset && !ctx->timer) + timer_cancelled = _gf_true; + need_invalidate = _gf_false; + } unlock: - UNLOCK(&inode->lock); - if (!timer_set) - inode_unref (inode); - if (timer_cancelled) - inode_unref (inode); - /* - * We need to invalidate the inode to prevent the kernel from serving - * reads from an older cached value despite a change in spb_choice to - * a new value. - */ - if (need_invalidate) - inode_invalidate (inode); + UNLOCK(&inode->lock); + if (!timer_set) + inode_unref(inode); + if (timer_cancelled) + inode_unref(inode); + /* + * We need to invalidate the inode to prevent the kernel from serving + * reads from an older cached value despite a change in spb_choice to + * a new value. + */ + if (need_invalidate) + inode_invalidate(inode); out: - GF_FREE (data); - AFR_STACK_UNWIND (setxattr, frame, ret, op_errno, NULL); - return 0; + GF_FREE(data); + AFR_STACK_UNWIND(setxattr, frame, ret, op_errno, NULL); + return 0; } int -afr_accused_fill (xlator_t *this, dict_t *xdata, unsigned char *accused, - afr_transaction_type type) +afr_accused_fill(xlator_t *this, dict_t *xdata, unsigned char *accused, + afr_transaction_type type) { - afr_private_t *priv = NULL; - int i = 0; - int idx = afr_index_for_transaction_type (type); - void *pending_raw = NULL; - int pending[3]; - int ret = 0; + afr_private_t *priv = NULL; + int i = 0; + int idx = afr_index_for_transaction_type(type); + void *pending_raw = NULL; + int pending[3]; + int ret = 0; - priv = this->private; + priv = this->private; - for (i = 0; i < priv->child_count; i++) { - ret = dict_get_ptr (xdata, priv->pending_key[i], - &pending_raw); - if (ret) /* no pending flags */ - continue; - memcpy (pending, pending_raw, sizeof(pending)); + for (i = 0; i < priv->child_count; i++) { + ret = dict_get_ptr(xdata, priv->pending_key[i], &pending_raw); + if (ret) /* no pending flags */ + continue; + memcpy(pending, pending_raw, sizeof(pending)); - if (ntoh32 (pending[idx])) - accused[i] = 1; - } + if (ntoh32(pending[idx])) + accused[i] = 1; + } - return 0; + return 0; } int -afr_accuse_smallfiles (xlator_t *this, struct afr_reply *replies, - unsigned char *data_accused) +afr_accuse_smallfiles(xlator_t *this, struct afr_reply *replies, + unsigned char *data_accused) { - int i = 0; - afr_private_t *priv = NULL; - uint64_t maxsize = 0; + int i = 0; + afr_private_t *priv = NULL; + uint64_t maxsize = 0; - priv = this->private; + priv = this->private; - for (i = 0; i < priv->child_count; i++) { - if (replies[i].valid && replies[i].xdata && - dict_get (replies[i].xdata, GLUSTERFS_BAD_INODE)) - continue; - if (data_accused[i]) - continue; - if (replies[i].poststat.ia_size > maxsize) - maxsize = replies[i].poststat.ia_size; - } + for (i = 0; i < priv->child_count; i++) { + if (replies[i].valid && replies[i].xdata && + dict_get(replies[i].xdata, GLUSTERFS_BAD_INODE)) + continue; + if (data_accused[i]) + continue; + if (replies[i].poststat.ia_size > maxsize) + maxsize = replies[i].poststat.ia_size; + } - for (i = 0; i < priv->child_count; i++) { - if (data_accused[i]) - continue; - if (AFR_IS_ARBITER_BRICK(priv, i)) - continue; - if (replies[i].poststat.ia_size < maxsize) - data_accused[i] = 1; - } + for (i = 0; i < priv->child_count; i++) { + if (data_accused[i]) + continue; + if (AFR_IS_ARBITER_BRICK(priv, i)) + continue; + if (replies[i].poststat.ia_size < maxsize) + data_accused[i] = 1; + } - return 0; + return 0; } int -afr_readables_fill (call_frame_t *frame, xlator_t *this, inode_t *inode, - unsigned char *data_accused, - unsigned char *metadata_accused, - unsigned char *data_readable, - unsigned char *metadata_readable, - struct afr_reply *replies) -{ - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - dict_t *xdata = NULL; - int i = 0; - int ret = 0; - ia_type_t ia_type = IA_INVAL; - - local = frame->local; - priv = this->private; +afr_readables_fill(call_frame_t *frame, xlator_t *this, inode_t *inode, + unsigned char *data_accused, unsigned char *metadata_accused, + unsigned char *data_readable, + unsigned char *metadata_readable, struct afr_reply *replies) +{ + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + dict_t *xdata = NULL; + int i = 0; + int ret = 0; + ia_type_t ia_type = IA_INVAL; + + local = frame->local; + priv = this->private; + + for (i = 0; i < priv->child_count; i++) { + data_readable[i] = 1; + metadata_readable[i] = 1; + } + if (AFR_IS_ARBITER_BRICK(priv, ARBITER_BRICK_INDEX)) { + data_readable[ARBITER_BRICK_INDEX] = 0; + metadata_readable[ARBITER_BRICK_INDEX] = 0; + } + + for (i = 0; i < priv->child_count; i++) { + if (replies) { /* Lookup */ + if (!replies[i].valid || replies[i].op_ret == -1 || + (replies[i].xdata && + dict_get(replies[i].xdata, GLUSTERFS_BAD_INODE))) { + data_readable[i] = 0; + metadata_readable[i] = 0; + continue; + } - for (i = 0; i < priv->child_count; i++) { - data_readable[i] = 1; - metadata_readable[i] = 1; - } - if (AFR_IS_ARBITER_BRICK (priv, ARBITER_BRICK_INDEX)) { - data_readable[ARBITER_BRICK_INDEX] = 0; - metadata_readable[ARBITER_BRICK_INDEX] = 0; + xdata = replies[i].xdata; + ia_type = replies[i].poststat.ia_type; + } else { /* pre-op xattrop */ + xdata = local->transaction.changelog_xdata[i]; + ia_type = inode->ia_type; } - for (i = 0; i < priv->child_count; i++) { - if (replies) {/* Lookup */ - if (!replies[i].valid || replies[i].op_ret == -1 || - (replies[i].xdata && dict_get (replies[i].xdata, - GLUSTERFS_BAD_INODE))) { - data_readable[i] = 0; - metadata_readable[i] = 0; - continue; - } - - xdata = replies[i].xdata; - ia_type = replies[i].poststat.ia_type; - } else {/* pre-op xattrop */ - xdata = local->transaction.changelog_xdata[i]; - ia_type = inode->ia_type; - } + afr_accused_fill(this, xdata, data_accused, + (ia_type == IA_IFDIR) ? AFR_ENTRY_TRANSACTION + : AFR_DATA_TRANSACTION); - afr_accused_fill (this, xdata, data_accused, - (ia_type == IA_IFDIR) ? - AFR_ENTRY_TRANSACTION : AFR_DATA_TRANSACTION); + afr_accused_fill(this, xdata, metadata_accused, + AFR_METADATA_TRANSACTION); + } - afr_accused_fill (this, xdata, - metadata_accused, AFR_METADATA_TRANSACTION); - } + if (replies && ia_type != IA_INVAL && ia_type != IA_IFDIR && + /* We want to accuse small files only when we know for + * sure that there is no IO happening. Otherwise, the + * ia_sizes obtained in post-refresh replies may + * mismatch due to a race between inode-refresh and + * ongoing writes, causing spurious heal launches*/ + !afr_is_possibly_under_txn(AFR_DATA_TRANSACTION, local, this)) { + afr_accuse_smallfiles(this, replies, data_accused); + } - if (replies && ia_type != IA_INVAL && ia_type != IA_IFDIR && - /* We want to accuse small files only when we know for - * sure that there is no IO happening. Otherwise, the - * ia_sizes obtained in post-refresh replies may - * mismatch due to a race between inode-refresh and - * ongoing writes, causing spurious heal launches*/ - !afr_is_possibly_under_txn (AFR_DATA_TRANSACTION, local, this)) { - afr_accuse_smallfiles (this, replies, data_accused); + for (i = 0; i < priv->child_count; i++) { + if (data_accused[i]) { + data_readable[i] = 0; + ret = 1; } - - for (i = 0; i < priv->child_count; i++) { - if (data_accused[i]) { - data_readable[i] = 0; - ret = 1; - } - if (metadata_accused[i]) { - metadata_readable[i] = 0; - ret = 1; - } + if (metadata_accused[i]) { + metadata_readable[i] = 0; + ret = 1; } - return ret; + } + return ret; } int -afr_replies_interpret (call_frame_t *frame, xlator_t *this, inode_t *inode, - gf_boolean_t *start_heal) -{ - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - struct afr_reply *replies = NULL; - int event_generation = 0; - int i = 0; - unsigned char *data_accused = NULL; - unsigned char *metadata_accused = NULL; - unsigned char *data_readable = NULL; - unsigned char *metadata_readable = NULL; - int ret = 0; - - local = frame->local; - priv = this->private; - replies = local->replies; - event_generation = local->event_generation; - - data_accused = alloca0 (priv->child_count); - data_readable = alloca0 (priv->child_count); - metadata_accused = alloca0 (priv->child_count); - metadata_readable = alloca0 (priv->child_count); - - ret = afr_readables_fill (frame, this, inode, data_accused, - metadata_accused, data_readable, - metadata_readable, replies); - - for (i = 0; i < priv->child_count; i++) { - if (start_heal && priv->child_up[i] && - (data_accused[i] || metadata_accused[i])) { - *start_heal = _gf_true; - break; - } - } - afr_inode_read_subvol_set (inode, this, data_readable, - metadata_readable, event_generation); - return ret; +afr_replies_interpret(call_frame_t *frame, xlator_t *this, inode_t *inode, + gf_boolean_t *start_heal) +{ + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + struct afr_reply *replies = NULL; + int event_generation = 0; + int i = 0; + unsigned char *data_accused = NULL; + unsigned char *metadata_accused = NULL; + unsigned char *data_readable = NULL; + unsigned char *metadata_readable = NULL; + int ret = 0; + + local = frame->local; + priv = this->private; + replies = local->replies; + event_generation = local->event_generation; + + data_accused = alloca0(priv->child_count); + data_readable = alloca0(priv->child_count); + metadata_accused = alloca0(priv->child_count); + metadata_readable = alloca0(priv->child_count); + + ret = afr_readables_fill(frame, this, inode, data_accused, metadata_accused, + data_readable, metadata_readable, replies); + + for (i = 0; i < priv->child_count; i++) { + if (start_heal && priv->child_up[i] && + (data_accused[i] || metadata_accused[i])) { + *start_heal = _gf_true; + break; + } + } + afr_inode_read_subvol_set(inode, this, data_readable, metadata_readable, + event_generation); + return ret; } int -afr_refresh_selfheal_done (int ret, call_frame_t *heal, void *opaque) +afr_refresh_selfheal_done(int ret, call_frame_t *heal, void *opaque) { - if (heal) - AFR_STACK_DESTROY (heal); - return 0; + if (heal) + AFR_STACK_DESTROY(heal); + return 0; } int -afr_inode_refresh_err (call_frame_t *frame, xlator_t *this) +afr_inode_refresh_err(call_frame_t *frame, xlator_t *this) { - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - int i = 0; - int err = 0; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + int i = 0; + int err = 0; - local = frame->local; - priv = this->private; + local = frame->local; + priv = this->private; - for (i = 0; i < priv->child_count; i++) { - if (local->replies[i].valid && !local->replies[i].op_ret) { - err = 0; - goto ret; - } - } + for (i = 0; i < priv->child_count; i++) { + if (local->replies[i].valid && !local->replies[i].op_ret) { + err = 0; + goto ret; + } + } - err = afr_final_errno (local, priv); + err = afr_final_errno(local, priv); ret: - return err; + return err; } gf_boolean_t -afr_selfheal_enabled (xlator_t *this) +afr_selfheal_enabled(xlator_t *this) { - afr_private_t *priv = NULL; - gf_boolean_t data = _gf_false; - int ret = 0; + afr_private_t *priv = NULL; + gf_boolean_t data = _gf_false; + int ret = 0; - priv = this->private; + priv = this->private; - ret = gf_string2boolean (priv->data_self_heal, &data); - GF_ASSERT (!ret); + ret = gf_string2boolean(priv->data_self_heal, &data); + GF_ASSERT(!ret); - return data || priv->metadata_self_heal || priv->entry_self_heal; + return data || priv->metadata_self_heal || priv->entry_self_heal; } - int -afr_txn_refresh_done (call_frame_t *frame, xlator_t *this, int err) -{ - - call_frame_t *heal_frame = NULL; - afr_local_t *heal_local = NULL; - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - inode_t *inode = NULL; - int event_generation = 0; - int read_subvol = -1; - int op_errno = ENOMEM; - int ret = 0; - - local = frame->local; - inode = local->inode; - priv = this->private; - - if (err) - goto refresh_done; - - if (local->op == GF_FOP_LOOKUP) - goto refresh_done; - - ret = afr_inode_get_readable (frame, inode, this, local->readable, - &event_generation, - local->transaction.type); - - if (ret == -EIO || (local->is_read_txn && !event_generation)) { - /* No readable subvolume even after refresh ==> splitbrain.*/ - if (!priv->fav_child_policy) { - err = EIO; - goto refresh_done; - } - read_subvol = afr_sh_get_fav_by_policy (this, local->replies, - inode, NULL); - if (read_subvol == -1) { - err = EIO; - goto refresh_done; - } - - heal_frame = copy_frame (frame); - if (!heal_frame) { - err = EIO; - goto refresh_done; - } - heal_frame->root->pid = GF_CLIENT_PID_SELF_HEALD; - heal_local = AFR_FRAME_INIT (heal_frame, op_errno); - if (!heal_local) { - err = EIO; - AFR_STACK_DESTROY (heal_frame); - goto refresh_done; - } - heal_local->xdata_req = dict_new(); - if (!heal_local->xdata_req) { - err = EIO; - AFR_STACK_DESTROY (heal_frame); - goto refresh_done; - } - heal_local->heal_frame = frame; - ret = synctask_new (this->ctx->env, - afr_fav_child_reset_sink_xattrs, - afr_fav_child_reset_sink_xattrs_cbk, - heal_frame, - heal_frame); - return 0; - } +afr_txn_refresh_done(call_frame_t *frame, xlator_t *this, int err) +{ + call_frame_t *heal_frame = NULL; + afr_local_t *heal_local = NULL; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + inode_t *inode = NULL; + int event_generation = 0; + int read_subvol = -1; + int op_errno = ENOMEM; + int ret = 0; + + local = frame->local; + inode = local->inode; + priv = this->private; + + if (err) + goto refresh_done; + + if (local->op == GF_FOP_LOOKUP) + goto refresh_done; + + ret = afr_inode_get_readable(frame, inode, this, local->readable, + &event_generation, local->transaction.type); + + if (ret == -EIO || (local->is_read_txn && !event_generation)) { + /* No readable subvolume even after refresh ==> splitbrain.*/ + if (!priv->fav_child_policy) { + err = EIO; + goto refresh_done; + } + read_subvol = afr_sh_get_fav_by_policy(this, local->replies, inode, + NULL); + if (read_subvol == -1) { + err = EIO; + goto refresh_done; + } + + heal_frame = copy_frame(frame); + if (!heal_frame) { + err = EIO; + goto refresh_done; + } + heal_frame->root->pid = GF_CLIENT_PID_SELF_HEALD; + heal_local = AFR_FRAME_INIT(heal_frame, op_errno); + if (!heal_local) { + err = EIO; + AFR_STACK_DESTROY(heal_frame); + goto refresh_done; + } + heal_local->xdata_req = dict_new(); + if (!heal_local->xdata_req) { + err = EIO; + AFR_STACK_DESTROY(heal_frame); + goto refresh_done; + } + heal_local->heal_frame = frame; + ret = synctask_new(this->ctx->env, afr_fav_child_reset_sink_xattrs, + afr_fav_child_reset_sink_xattrs_cbk, heal_frame, + heal_frame); + return 0; + } refresh_done: - afr_local_replies_wipe (local, this->private); - local->refreshfn (frame, this, err); + afr_local_replies_wipe(local, this->private); + local->refreshfn(frame, this, err); - return 0; + return 0; } static void -afr_fill_success_replies (afr_local_t *local, afr_private_t *priv, - unsigned char *replies) +afr_fill_success_replies(afr_local_t *local, afr_private_t *priv, + unsigned char *replies) { - int i = 0; + int i = 0; - for (i = 0; i < priv->child_count; i++) { - if (local->replies[i].valid && local->replies[i].op_ret == 0) - replies[i] = 1; - } + for (i = 0; i < priv->child_count; i++) { + if (local->replies[i].valid && local->replies[i].op_ret == 0) + replies[i] = 1; + } } int -afr_inode_refresh_done (call_frame_t *frame, xlator_t *this, int error) -{ - call_frame_t *heal_frame = NULL; - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - gf_boolean_t start_heal = _gf_false; - afr_local_t *heal_local = NULL; - unsigned char *success_replies = NULL; - int op_errno = ENOMEM; - int ret = 0; - - if (error != 0) { - goto refresh_done; - } - - local = frame->local; - priv = this->private; - success_replies = alloca0 (priv->child_count); - afr_fill_success_replies (local, priv, success_replies); - - if (!afr_has_quorum (success_replies, this)) { - error = afr_final_errno (frame->local, this->private); - if (!error) - error = afr_quorum_errno(priv); - goto refresh_done; - } - - if (priv->thin_arbiter_count && local->is_read_txn && - AFR_COUNT (success_replies, priv->child_count) != - priv->child_count) { - /* We need to query the good bricks and/or thin-arbiter.*/ - error = EINVAL; - goto refresh_done; - } - - ret = afr_replies_interpret (frame, this, local->refreshinode, - &start_heal); - - if (ret && afr_selfheal_enabled (this) && start_heal) { - heal_frame = copy_frame (frame); - if (!heal_frame) - goto refresh_done; - heal_frame->root->pid = GF_CLIENT_PID_SELF_HEALD; - heal_local = AFR_FRAME_INIT (heal_frame, op_errno); - if (!heal_local) { - AFR_STACK_DESTROY (heal_frame); - goto refresh_done; - } - heal_local->refreshinode = inode_ref (local->refreshinode); - heal_local->heal_frame = heal_frame; - if (!afr_throttled_selfheal (heal_frame, this)) { - AFR_STACK_DESTROY (heal_frame); - goto refresh_done; - } - } +afr_inode_refresh_done(call_frame_t *frame, xlator_t *this, int error) +{ + call_frame_t *heal_frame = NULL; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + gf_boolean_t start_heal = _gf_false; + afr_local_t *heal_local = NULL; + unsigned char *success_replies = NULL; + int op_errno = ENOMEM; + int ret = 0; + + if (error != 0) { + goto refresh_done; + } + + local = frame->local; + priv = this->private; + success_replies = alloca0(priv->child_count); + afr_fill_success_replies(local, priv, success_replies); + + if (!afr_has_quorum(success_replies, this)) { + error = afr_final_errno(frame->local, this->private); + if (!error) + error = afr_quorum_errno(priv); + goto refresh_done; + } + + if (priv->thin_arbiter_count && local->is_read_txn && + AFR_COUNT(success_replies, priv->child_count) != priv->child_count) { + /* We need to query the good bricks and/or thin-arbiter.*/ + error = EINVAL; + goto refresh_done; + } + + ret = afr_replies_interpret(frame, this, local->refreshinode, &start_heal); + + if (ret && afr_selfheal_enabled(this) && start_heal) { + heal_frame = copy_frame(frame); + if (!heal_frame) + goto refresh_done; + heal_frame->root->pid = GF_CLIENT_PID_SELF_HEALD; + heal_local = AFR_FRAME_INIT(heal_frame, op_errno); + if (!heal_local) { + AFR_STACK_DESTROY(heal_frame); + goto refresh_done; + } + heal_local->refreshinode = inode_ref(local->refreshinode); + heal_local->heal_frame = heal_frame; + if (!afr_throttled_selfheal(heal_frame, this)) { + AFR_STACK_DESTROY(heal_frame); + goto refresh_done; + } + } refresh_done: - afr_txn_refresh_done (frame, this, error); + afr_txn_refresh_done(frame, this, error); - return 0; + return 0; } void -afr_inode_refresh_subvol_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, struct iatt *buf, - dict_t *xdata, struct iatt *par) -{ - afr_local_t *local = NULL; - int call_child = (long) cookie; - int8_t need_heal = 1; - int call_count = 0; - int ret = 0; - - local = frame->local; - local->replies[call_child].valid = 1; - local->replies[call_child].op_ret = op_ret; - local->replies[call_child].op_errno = op_errno; - if (op_ret != -1) { - local->replies[call_child].poststat = *buf; - if (par) - local->replies[call_child].postparent = *par; - if (xdata) - local->replies[call_child].xdata = dict_ref (xdata); - } - if (xdata) { - ret = dict_get_int8 (xdata, "link-count", &need_heal); - local->replies[call_child].need_heal = need_heal; - } else { - local->replies[call_child].need_heal = need_heal; - } - - call_count = afr_frame_return (frame); - if (call_count == 0) { - afr_set_need_heal (this, local); - ret = afr_inode_refresh_err (frame, this); - afr_inode_refresh_done (frame, this, ret); - } - +afr_inode_refresh_subvol_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, struct iatt *buf, + dict_t *xdata, struct iatt *par) +{ + afr_local_t *local = NULL; + int call_child = (long)cookie; + int8_t need_heal = 1; + int call_count = 0; + int ret = 0; + + local = frame->local; + local->replies[call_child].valid = 1; + local->replies[call_child].op_ret = op_ret; + local->replies[call_child].op_errno = op_errno; + if (op_ret != -1) { + local->replies[call_child].poststat = *buf; + if (par) + local->replies[call_child].postparent = *par; + if (xdata) + local->replies[call_child].xdata = dict_ref(xdata); + } + if (xdata) { + ret = dict_get_int8(xdata, "link-count", &need_heal); + local->replies[call_child].need_heal = need_heal; + } else { + local->replies[call_child].need_heal = need_heal; + } + + call_count = afr_frame_return(frame); + if (call_count == 0) { + afr_set_need_heal(this, local); + ret = afr_inode_refresh_err(frame, this); + afr_inode_refresh_done(frame, this, ret); + } } int -afr_inode_refresh_subvol_with_lookup_cbk (call_frame_t *frame, void *cookie, - xlator_t *this, int op_ret, - int op_errno, inode_t *inode, - struct iatt *buf, dict_t *xdata, - struct iatt *par) +afr_inode_refresh_subvol_with_lookup_cbk(call_frame_t *frame, void *cookie, + xlator_t *this, int op_ret, + int op_errno, inode_t *inode, + struct iatt *buf, dict_t *xdata, + struct iatt *par) { - afr_inode_refresh_subvol_cbk (frame, cookie, this, op_ret, op_errno, - buf, xdata, par); - return 0; + afr_inode_refresh_subvol_cbk(frame, cookie, this, op_ret, op_errno, buf, + xdata, par); + return 0; } - int -afr_inode_refresh_subvol_with_lookup (call_frame_t *frame, xlator_t *this, - int i, inode_t *inode, uuid_t gfid, - dict_t *xdata) +afr_inode_refresh_subvol_with_lookup(call_frame_t *frame, xlator_t *this, int i, + inode_t *inode, uuid_t gfid, dict_t *xdata) { - loc_t loc = {0, }; - afr_private_t *priv = NULL; + loc_t loc = { + 0, + }; + afr_private_t *priv = NULL; - priv = this->private; + priv = this->private; - loc.inode = inode; - if (gf_uuid_is_null (inode->gfid) && gfid) { - /* To handle setattr/setxattr on yet to be linked inode from - * dht */ - gf_uuid_copy (loc.gfid, gfid); - } else { - gf_uuid_copy (loc.gfid, inode->gfid); - } + loc.inode = inode; + if (gf_uuid_is_null(inode->gfid) && gfid) { + /* To handle setattr/setxattr on yet to be linked inode from + * dht */ + gf_uuid_copy(loc.gfid, gfid); + } else { + gf_uuid_copy(loc.gfid, inode->gfid); + } - STACK_WIND_COOKIE (frame, afr_inode_refresh_subvol_with_lookup_cbk, - (void *) (long) i, priv->children[i], - priv->children[i]->fops->lookup, &loc, xdata); - return 0; + STACK_WIND_COOKIE(frame, afr_inode_refresh_subvol_with_lookup_cbk, + (void *)(long)i, priv->children[i], + priv->children[i]->fops->lookup, &loc, xdata); + return 0; } int -afr_inode_refresh_subvol_with_fstat_cbk (call_frame_t *frame, - void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, - struct iatt *buf, dict_t *xdata) +afr_inode_refresh_subvol_with_fstat_cbk(call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, + int32_t op_errno, struct iatt *buf, + dict_t *xdata) { - afr_inode_refresh_subvol_cbk (frame, cookie, this, op_ret, op_errno, - buf, xdata, NULL); - return 0; + afr_inode_refresh_subvol_cbk(frame, cookie, this, op_ret, op_errno, buf, + xdata, NULL); + return 0; } int -afr_inode_refresh_subvol_with_fstat (call_frame_t *frame, xlator_t *this, int i, - dict_t *xdata) +afr_inode_refresh_subvol_with_fstat(call_frame_t *frame, xlator_t *this, int i, + dict_t *xdata) { - afr_private_t *priv = NULL; - afr_local_t *local = NULL; + afr_private_t *priv = NULL; + afr_local_t *local = NULL; - priv = this->private; - local = frame->local; + priv = this->private; + local = frame->local; - STACK_WIND_COOKIE (frame, afr_inode_refresh_subvol_with_fstat_cbk, - (void *) (long) i, priv->children[i], - priv->children[i]->fops->fstat, local->fd, xdata); - return 0; + STACK_WIND_COOKIE(frame, afr_inode_refresh_subvol_with_fstat_cbk, + (void *)(long)i, priv->children[i], + priv->children[i]->fops->fstat, local->fd, xdata); + return 0; } int -afr_inode_refresh_do (call_frame_t *frame, xlator_t *this) -{ - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - int call_count = 0; - int i = 0; - int ret = 0; - dict_t *xdata = NULL; - afr_fd_ctx_t *fd_ctx = NULL; - unsigned char *wind_subvols = NULL; - - priv = this->private; - local = frame->local; - wind_subvols = alloca0 (priv->child_count); - - afr_local_replies_wipe (local, priv); - - if (local->fd) { - fd_ctx = afr_fd_ctx_get (local->fd, this); - if (!fd_ctx) { - afr_inode_refresh_done (frame, this, EINVAL); - return 0; - } - } +afr_inode_refresh_do(call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + int call_count = 0; + int i = 0; + int ret = 0; + dict_t *xdata = NULL; + afr_fd_ctx_t *fd_ctx = NULL; + unsigned char *wind_subvols = NULL; - xdata = dict_new (); - if (!xdata) { - afr_inode_refresh_done (frame, this, ENOMEM); - return 0; - } + priv = this->private; + local = frame->local; + wind_subvols = alloca0(priv->child_count); - ret = afr_xattr_req_prepare (this, xdata); - if (ret != 0) { - dict_unref (xdata); - afr_inode_refresh_done (frame, this, -ret); - return 0; - } + afr_local_replies_wipe(local, priv); - ret = dict_set_str (xdata, "link-count", GF_XATTROP_INDEX_COUNT); - if (ret) { - gf_msg_debug (this->name, -ret, - "Unable to set link-count in dict "); + if (local->fd) { + fd_ctx = afr_fd_ctx_get(local->fd, this); + if (!fd_ctx) { + afr_inode_refresh_done(frame, this, EINVAL); + return 0; } + } - ret = dict_set_str (xdata, GLUSTERFS_INODELK_DOM_COUNT, this->name); - if (ret) { - gf_msg_debug (this->name, -ret, - "Unable to set inodelk-dom-count in dict "); + xdata = dict_new(); + if (!xdata) { + afr_inode_refresh_done(frame, this, ENOMEM); + return 0; + } - } + ret = afr_xattr_req_prepare(this, xdata); + if (ret != 0) { + dict_unref(xdata); + afr_inode_refresh_done(frame, this, -ret); + return 0; + } - if (local->fd) { - for (i = 0; i < priv->child_count; i++) { - if (local->child_up[i] && - fd_ctx->opened_on[i] == AFR_FD_OPENED) - wind_subvols[i] = 1; - } - } else { - memcpy (wind_subvols, local->child_up, - sizeof (*local->child_up) * priv->child_count); - } + ret = dict_set_str(xdata, "link-count", GF_XATTROP_INDEX_COUNT); + if (ret) { + gf_msg_debug(this->name, -ret, "Unable to set link-count in dict "); + } - local->call_count = AFR_COUNT (wind_subvols, priv->child_count); + ret = dict_set_str(xdata, GLUSTERFS_INODELK_DOM_COUNT, this->name); + if (ret) { + gf_msg_debug(this->name, -ret, + "Unable to set inodelk-dom-count in dict "); + } - call_count = local->call_count; - if (!call_count) { - dict_unref (xdata); - if (local->fd && AFR_COUNT(local->child_up, priv->child_count)) - afr_inode_refresh_done (frame, this, EBADFD); - else - afr_inode_refresh_done (frame, this, ENOTCONN); - return 0; + if (local->fd) { + for (i = 0; i < priv->child_count; i++) { + if (local->child_up[i] && fd_ctx->opened_on[i] == AFR_FD_OPENED) + wind_subvols[i] = 1; } - for (i = 0; i < priv->child_count; i++) { - if (!wind_subvols[i]) - continue; + } else { + memcpy(wind_subvols, local->child_up, + sizeof(*local->child_up) * priv->child_count); + } + + local->call_count = AFR_COUNT(wind_subvols, priv->child_count); + + call_count = local->call_count; + if (!call_count) { + dict_unref(xdata); + if (local->fd && AFR_COUNT(local->child_up, priv->child_count)) + afr_inode_refresh_done(frame, this, EBADFD); + else + afr_inode_refresh_done(frame, this, ENOTCONN); + return 0; + } + for (i = 0; i < priv->child_count; i++) { + if (!wind_subvols[i]) + continue; - if (local->fd) - afr_inode_refresh_subvol_with_fstat (frame, this, i, - xdata); - else - afr_inode_refresh_subvol_with_lookup (frame, this, i, - local->refreshinode, - local->refreshgfid, xdata); + if (local->fd) + afr_inode_refresh_subvol_with_fstat(frame, this, i, xdata); + else + afr_inode_refresh_subvol_with_lookup( + frame, this, i, local->refreshinode, local->refreshgfid, xdata); - if (!--call_count) - break; - } + if (!--call_count) + break; + } - dict_unref (xdata); + dict_unref(xdata); - return 0; + return 0; } - int -afr_inode_refresh (call_frame_t *frame, xlator_t *this, inode_t *inode, - uuid_t gfid, afr_inode_refresh_cbk_t refreshfn) +afr_inode_refresh(call_frame_t *frame, xlator_t *this, inode_t *inode, + uuid_t gfid, afr_inode_refresh_cbk_t refreshfn) { - afr_local_t *local = NULL; + afr_local_t *local = NULL; - local = frame->local; + local = frame->local; - local->refreshfn = refreshfn; + local->refreshfn = refreshfn; - if (local->refreshinode) { - inode_unref (local->refreshinode); - local->refreshinode = NULL; - } + if (local->refreshinode) { + inode_unref(local->refreshinode); + local->refreshinode = NULL; + } - local->refreshinode = inode_ref (inode); + local->refreshinode = inode_ref(inode); - if (gfid) - gf_uuid_copy (local->refreshgfid, gfid); - else - gf_uuid_clear (local->refreshgfid); + if (gfid) + gf_uuid_copy(local->refreshgfid, gfid); + else + gf_uuid_clear(local->refreshgfid); - afr_inode_refresh_do (frame, this); + afr_inode_refresh_do(frame, this); - return 0; + return 0; } - int -afr_xattr_req_prepare (xlator_t *this, dict_t *xattr_req) +afr_xattr_req_prepare(xlator_t *this, dict_t *xattr_req) { - int i = 0; - afr_private_t *priv = NULL; - int ret = 0; + int i = 0; + afr_private_t *priv = NULL; + int ret = 0; - priv = this->private; - - for (i = 0; i < priv->child_count; i++) { - ret = dict_set_uint64 (xattr_req, priv->pending_key[i], - AFR_NUM_CHANGE_LOGS * sizeof(int)); - if (ret < 0) - gf_msg (this->name, GF_LOG_WARNING, - -ret, AFR_MSG_DICT_SET_FAILED, - "Unable to set dict value for %s", - priv->pending_key[i]); - /* 3 = data+metadata+entry */ - } - ret = dict_set_uint64 (xattr_req, AFR_DIRTY, - AFR_NUM_CHANGE_LOGS * sizeof(int)); - if (ret) { - gf_msg_debug (this->name, -ret, "failed to set dirty " - "query flag"); - } + priv = this->private; - ret = dict_set_int32 (xattr_req, "list-xattr", 1); - if (ret) { - gf_msg_debug (this->name, -ret, - "Unable to set list-xattr in dict "); - } + for (i = 0; i < priv->child_count; i++) { + ret = dict_set_uint64(xattr_req, priv->pending_key[i], + AFR_NUM_CHANGE_LOGS * sizeof(int)); + if (ret < 0) + gf_msg(this->name, GF_LOG_WARNING, -ret, AFR_MSG_DICT_SET_FAILED, + "Unable to set dict value for %s", priv->pending_key[i]); + /* 3 = data+metadata+entry */ + } + ret = dict_set_uint64(xattr_req, AFR_DIRTY, + AFR_NUM_CHANGE_LOGS * sizeof(int)); + if (ret) { + gf_msg_debug(this->name, -ret, + "failed to set dirty " + "query flag"); + } + + ret = dict_set_int32(xattr_req, "list-xattr", 1); + if (ret) { + gf_msg_debug(this->name, -ret, "Unable to set list-xattr in dict "); + } + + return ret; +} - return ret; +int +afr_lookup_xattr_req_prepare(afr_local_t *local, xlator_t *this, + dict_t *xattr_req, loc_t *loc) +{ + int ret = -ENOMEM; + + if (!local->xattr_req) + local->xattr_req = dict_new(); + + if (!local->xattr_req) + goto out; + + if (xattr_req && (xattr_req != local->xattr_req)) + dict_copy(xattr_req, local->xattr_req); + + ret = afr_xattr_req_prepare(this, local->xattr_req); + + ret = dict_set_uint64(local->xattr_req, GLUSTERFS_INODELK_COUNT, 0); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, -ret, AFR_MSG_DICT_SET_FAILED, + "%s: Unable to set dict value for %s", loc->path, + GLUSTERFS_INODELK_COUNT); + } + ret = dict_set_uint64(local->xattr_req, GLUSTERFS_ENTRYLK_COUNT, 0); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, -ret, AFR_MSG_DICT_SET_FAILED, + "%s: Unable to set dict value for %s", loc->path, + GLUSTERFS_ENTRYLK_COUNT); + } + + ret = dict_set_uint32(local->xattr_req, GLUSTERFS_PARENT_ENTRYLK, 0); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, -ret, AFR_MSG_DICT_SET_FAILED, + "%s: Unable to set dict value for %s", loc->path, + GLUSTERFS_PARENT_ENTRYLK); + } + + ret = dict_set_str(local->xattr_req, "link-count", GF_XATTROP_INDEX_COUNT); + if (ret) { + gf_msg_debug(this->name, -ret, "Unable to set link-count in dict "); + } + + ret = 0; +out: + return ret; } int -afr_lookup_xattr_req_prepare (afr_local_t *local, xlator_t *this, - dict_t *xattr_req, loc_t *loc) +afr_least_pending_reads_child(afr_private_t *priv) { - int ret = -ENOMEM; - - if (!local->xattr_req) - local->xattr_req = dict_new (); + int i = 0; + int child = 0; + int64_t read_iter = -1; + int64_t pending_read = -1; - if (!local->xattr_req) - goto out; + pending_read = GF_ATOMIC_GET(priv->pending_reads[0]); + for (i = 1; i < priv->child_count; i++) { + if (AFR_IS_ARBITER_BRICK(priv, i)) + continue; + read_iter = GF_ATOMIC_GET(priv->pending_reads[i]); + if (read_iter < pending_read) { + pending_read = read_iter; + child = i; + } + } - if (xattr_req && (xattr_req != local->xattr_req)) - dict_copy (xattr_req, local->xattr_req); + return child; +} - ret = afr_xattr_req_prepare (this, local->xattr_req); +int +afr_hash_child(afr_read_subvol_args_t *args, afr_private_t *priv) +{ + uuid_t gfid_copy = { + 0, + }; + pid_t pid; + int child = -1; - ret = dict_set_uint64 (local->xattr_req, GLUSTERFS_INODELK_COUNT, 0); - if (ret < 0) { - gf_msg (this->name, GF_LOG_WARNING, - -ret, AFR_MSG_DICT_SET_FAILED, - "%s: Unable to set dict value for %s", - loc->path, GLUSTERFS_INODELK_COUNT); - } - ret = dict_set_uint64 (local->xattr_req, GLUSTERFS_ENTRYLK_COUNT, 0); - if (ret < 0) { - gf_msg (this->name, GF_LOG_WARNING, - -ret, AFR_MSG_DICT_SET_FAILED, - "%s: Unable to set dict value for %s", - loc->path, GLUSTERFS_ENTRYLK_COUNT); - } - - ret = dict_set_uint32 (local->xattr_req, GLUSTERFS_PARENT_ENTRYLK, 0); - if (ret < 0) { - gf_msg (this->name, GF_LOG_WARNING, - -ret, AFR_MSG_DICT_SET_FAILED, - "%s: Unable to set dict value for %s", - loc->path, GLUSTERFS_PARENT_ENTRYLK); - } - - ret = dict_set_str (local->xattr_req, "link-count", - GF_XATTROP_INDEX_COUNT); - if (ret) { - gf_msg_debug (this->name, -ret, - "Unable to set link-count in dict "); - } - - ret = 0; -out: - return ret; -} - -int -afr_least_pending_reads_child (afr_private_t *priv) -{ - int i = 0; - int child = 0; - int64_t read_iter = -1; - int64_t pending_read = -1; - - pending_read = GF_ATOMIC_GET (priv->pending_reads[0]); - for (i = 1; i < priv->child_count; i++) { - if (AFR_IS_ARBITER_BRICK(priv, i)) - continue; - read_iter = GF_ATOMIC_GET(priv->pending_reads[i]); - if (read_iter < pending_read) { - pending_read = read_iter; - child = i; - } - } - - return child; -} - -int -afr_hash_child (afr_read_subvol_args_t *args, afr_private_t *priv) -{ - uuid_t gfid_copy = {0,}; - pid_t pid; - int child = -1; - - switch (priv->hash_mode) { + switch (priv->hash_mode) { case 0: - break; + break; case 1: - gf_uuid_copy (gfid_copy, args->gfid); - child = SuperFastHash((char *)gfid_copy, - sizeof(gfid_copy)) % priv->child_count; - break; + gf_uuid_copy(gfid_copy, args->gfid); + child = SuperFastHash((char *)gfid_copy, sizeof(gfid_copy)) % + priv->child_count; + break; case 2: - if (args->ia_type != IA_IFDIR) { - /* - * Why getpid? Because it's one of the cheapest calls - * available - faster than gethostname etc. - and - * returns a constant-length value that's sure to be - * shorter than a UUID. It's still very unlikely to be - * the same across clients, so it still provides good - * mixing. We're not trying for perfection here. All we - * need is a low probability that multiple clients - * won't converge on the same subvolume. - */ - pid = getpid(); - memcpy (gfid_copy, &pid, sizeof(pid)); - } - child = SuperFastHash((char *)gfid_copy, - sizeof(gfid_copy)) % priv->child_count; - break; + if (args->ia_type != IA_IFDIR) { + /* + * Why getpid? Because it's one of the cheapest calls + * available - faster than gethostname etc. - and + * returns a constant-length value that's sure to be + * shorter than a UUID. It's still very unlikely to be + * the same across clients, so it still provides good + * mixing. We're not trying for perfection here. All we + * need is a low probability that multiple clients + * won't converge on the same subvolume. + */ + pid = getpid(); + memcpy(gfid_copy, &pid, sizeof(pid)); + } + child = SuperFastHash((char *)gfid_copy, sizeof(gfid_copy)) % + priv->child_count; + break; case 3: - child = afr_least_pending_reads_child (priv); - break; - } + child = afr_least_pending_reads_child(priv); + break; + } - return child; + return child; } int -afr_read_subvol_select_by_policy (inode_t *inode, xlator_t *this, - unsigned char *readable, - afr_read_subvol_args_t *args) -{ - int i = 0; - int read_subvol = -1; - afr_private_t *priv = NULL; - afr_read_subvol_args_t local_args = {0,}; - - priv = this->private; - - /* first preference - explicitly specified or local subvolume */ - if (priv->read_child >= 0 && readable[priv->read_child]) - return priv->read_child; - - if (inode_is_linked (inode)) { - gf_uuid_copy (local_args.gfid, inode->gfid); - local_args.ia_type = inode->ia_type; - } else if (args) { - local_args = *args; - } +afr_read_subvol_select_by_policy(inode_t *inode, xlator_t *this, + unsigned char *readable, + afr_read_subvol_args_t *args) +{ + int i = 0; + int read_subvol = -1; + afr_private_t *priv = NULL; + afr_read_subvol_args_t local_args = { + 0, + }; - /* second preference - use hashed mode */ - read_subvol = afr_hash_child (&local_args, priv); - if (read_subvol >= 0 && readable[read_subvol]) - return read_subvol; + priv = this->private; - for (i = 0; i < priv->child_count; i++) { - if (readable[i]) - return i; - } + /* first preference - explicitly specified or local subvolume */ + if (priv->read_child >= 0 && readable[priv->read_child]) + return priv->read_child; - /* no readable subvolumes, either split brain or all subvols down */ + if (inode_is_linked(inode)) { + gf_uuid_copy(local_args.gfid, inode->gfid); + local_args.ia_type = inode->ia_type; + } else if (args) { + local_args = *args; + } - return -1; -} + /* second preference - use hashed mode */ + read_subvol = afr_hash_child(&local_args, priv); + if (read_subvol >= 0 && readable[read_subvol]) + return read_subvol; + for (i = 0; i < priv->child_count; i++) { + if (readable[i]) + return i; + } + + /* no readable subvolumes, either split brain or all subvols down */ + + return -1; +} int -afr_inode_read_subvol_type_get (inode_t *inode, xlator_t *this, - unsigned char *readable, int *event_p, - int type) +afr_inode_read_subvol_type_get(inode_t *inode, xlator_t *this, + unsigned char *readable, int *event_p, int type) { - int ret = -1; + int ret = -1; - if (type == AFR_METADATA_TRANSACTION) - ret = afr_inode_read_subvol_get (inode, this, 0, readable, - event_p); - else - ret = afr_inode_read_subvol_get (inode, this, readable, 0, - event_p); - return ret; + if (type == AFR_METADATA_TRANSACTION) + ret = afr_inode_read_subvol_get(inode, this, 0, readable, event_p); + else + ret = afr_inode_read_subvol_get(inode, this, readable, 0, event_p); + return ret; } void -afr_readables_intersect_get (inode_t *inode, xlator_t *this, int *event, - unsigned char *intersection) +afr_readables_intersect_get(inode_t *inode, xlator_t *this, int *event, + unsigned char *intersection) { - afr_private_t *priv = NULL; - unsigned char *data_readable = NULL; - unsigned char *metadata_readable = NULL; - unsigned char *intersect = NULL; + afr_private_t *priv = NULL; + unsigned char *data_readable = NULL; + unsigned char *metadata_readable = NULL; + unsigned char *intersect = NULL; - priv = this->private; - data_readable = alloca0 (priv->child_count); - metadata_readable = alloca0 (priv->child_count); - intersect = alloca0 (priv->child_count); + priv = this->private; + data_readable = alloca0(priv->child_count); + metadata_readable = alloca0(priv->child_count); + intersect = alloca0(priv->child_count); - afr_inode_read_subvol_get (inode, this, data_readable, - metadata_readable, event); + afr_inode_read_subvol_get(inode, this, data_readable, metadata_readable, + event); - AFR_INTERSECT (intersect, data_readable, metadata_readable, - priv->child_count); - if (intersection) - memcpy (intersection, intersect, - sizeof (*intersection) * priv->child_count); + AFR_INTERSECT(intersect, data_readable, metadata_readable, + priv->child_count); + if (intersection) + memcpy(intersection, intersect, + sizeof(*intersection) * priv->child_count); } int -afr_read_subvol_get (inode_t *inode, xlator_t *this, int *subvol_p, - unsigned char *readables, - int *event_p, afr_transaction_type type, - afr_read_subvol_args_t *args) +afr_read_subvol_get(inode_t *inode, xlator_t *this, int *subvol_p, + unsigned char *readables, int *event_p, + afr_transaction_type type, afr_read_subvol_args_t *args) { - afr_private_t *priv = NULL; - unsigned char *readable = NULL; - unsigned char *intersection = NULL; - int subvol = -1; - int event = 0; + afr_private_t *priv = NULL; + unsigned char *readable = NULL; + unsigned char *intersection = NULL; + int subvol = -1; + int event = 0; - priv = this->private; + priv = this->private; - readable = alloca0 (priv->child_count); - intersection = alloca0 (priv->child_count); + readable = alloca0(priv->child_count); + intersection = alloca0(priv->child_count); - afr_inode_read_subvol_type_get (inode, this, readable, &event, type); + afr_inode_read_subvol_type_get(inode, this, readable, &event, type); - afr_readables_intersect_get (inode, this, &event, intersection); + afr_readables_intersect_get(inode, this, &event, intersection); - if (AFR_COUNT (intersection, priv->child_count) > 0) - subvol = afr_read_subvol_select_by_policy (inode, this, - intersection, args); - else - subvol = afr_read_subvol_select_by_policy (inode, this, - readable, args); - if (subvol_p) - *subvol_p = subvol; - if (event_p) - *event_p = event; - if (readables) - memcpy (readables, readable, - sizeof (*readables) * priv->child_count); - return subvol; + if (AFR_COUNT(intersection, priv->child_count) > 0) + subvol = afr_read_subvol_select_by_policy(inode, this, intersection, + args); + else + subvol = afr_read_subvol_select_by_policy(inode, this, readable, args); + if (subvol_p) + *subvol_p = subvol; + if (event_p) + *event_p = event; + if (readables) + memcpy(readables, readable, sizeof(*readables) * priv->child_count); + return subvol; } - void -afr_local_transaction_cleanup (afr_local_t *local, xlator_t *this) +afr_local_transaction_cleanup(afr_local_t *local, xlator_t *this) { - afr_private_t *priv = NULL; - int i = 0; + afr_private_t *priv = NULL; + int i = 0; - priv = this->private; + priv = this->private; - afr_matrix_cleanup (local->pending, priv->child_count); + afr_matrix_cleanup(local->pending, priv->child_count); - GF_FREE (local->internal_lock.locked_nodes); + GF_FREE(local->internal_lock.locked_nodes); - GF_FREE (local->internal_lock.lower_locked_nodes); + GF_FREE(local->internal_lock.lower_locked_nodes); - afr_entry_lockee_cleanup (&local->internal_lock); + afr_entry_lockee_cleanup(&local->internal_lock); - GF_FREE (local->transaction.pre_op); + GF_FREE(local->transaction.pre_op); - GF_FREE (local->transaction.pre_op_sources); - if (local->transaction.changelog_xdata) { - for (i = 0; i < priv->child_count; i++) { - if (!local->transaction.changelog_xdata[i]) - continue; - dict_unref (local->transaction.changelog_xdata[i]); - } - GF_FREE (local->transaction.changelog_xdata); + GF_FREE(local->transaction.pre_op_sources); + if (local->transaction.changelog_xdata) { + for (i = 0; i < priv->child_count; i++) { + if (!local->transaction.changelog_xdata[i]) + continue; + dict_unref(local->transaction.changelog_xdata[i]); } + GF_FREE(local->transaction.changelog_xdata); + } - GF_FREE (local->transaction.failed_subvols); - - GF_FREE (local->transaction.basename); - GF_FREE (local->transaction.new_basename); + GF_FREE(local->transaction.failed_subvols); - loc_wipe (&local->transaction.parent_loc); - loc_wipe (&local->transaction.new_parent_loc); + GF_FREE(local->transaction.basename); + GF_FREE(local->transaction.new_basename); + loc_wipe(&local->transaction.parent_loc); + loc_wipe(&local->transaction.new_parent_loc); } void -afr_reply_wipe (struct afr_reply *reply) +afr_reply_wipe(struct afr_reply *reply) { - if (reply->xdata) { - dict_unref (reply->xdata); - reply->xdata = NULL; - } + if (reply->xdata) { + dict_unref(reply->xdata); + reply->xdata = NULL; + } - if (reply->xattr) { - dict_unref (reply->xattr); - reply->xattr = NULL; - } + if (reply->xattr) { + dict_unref(reply->xattr); + reply->xattr = NULL; + } } void -afr_replies_wipe (struct afr_reply *replies, int count) +afr_replies_wipe(struct afr_reply *replies, int count) { - int i = 0; + int i = 0; - for (i = 0; i < count; i++) { - afr_reply_wipe (&replies[i]); - } + for (i = 0; i < count; i++) { + afr_reply_wipe(&replies[i]); + } } void -afr_local_replies_wipe (afr_local_t *local, afr_private_t *priv) +afr_local_replies_wipe(afr_local_t *local, afr_private_t *priv) { + if (!local->replies) + return; - if (!local->replies) - return; - - afr_replies_wipe (local->replies, priv->child_count); + afr_replies_wipe(local->replies, priv->child_count); - memset (local->replies, 0, sizeof(*local->replies) * priv->child_count); + memset(local->replies, 0, sizeof(*local->replies) * priv->child_count); } static gf_boolean_t -afr_fop_lock_is_unlock (call_frame_t *frame) +afr_fop_lock_is_unlock(call_frame_t *frame) { - afr_local_t *local = frame->local; - switch (local->op) { + afr_local_t *local = frame->local; + switch (local->op) { case GF_FOP_INODELK: case GF_FOP_FINODELK: - if ((F_UNLCK == local->cont.inodelk.in_flock.l_type) && - (local->cont.inodelk.in_cmd == F_SETLKW || - local->cont.inodelk.in_cmd == F_SETLK)) - return _gf_true; - break; + if ((F_UNLCK == local->cont.inodelk.in_flock.l_type) && + (local->cont.inodelk.in_cmd == F_SETLKW || + local->cont.inodelk.in_cmd == F_SETLK)) + return _gf_true; + break; case GF_FOP_ENTRYLK: case GF_FOP_FENTRYLK: - if (ENTRYLK_UNLOCK == local->cont.entrylk.in_cmd) - return _gf_true; - break; + if (ENTRYLK_UNLOCK == local->cont.entrylk.in_cmd) + return _gf_true; + break; default: - return _gf_false; - } - return _gf_false; + return _gf_false; + } + return _gf_false; } static gf_boolean_t -afr_lk_is_unlock (int32_t cmd, struct gf_flock *flock) +afr_lk_is_unlock(int32_t cmd, struct gf_flock *flock) { - switch (cmd) { + switch (cmd) { case F_RESLK_UNLCK: - return _gf_true; - break; + return _gf_true; + break; #if F_SETLKW != F_SETLKW64 case F_SETLKW64: @@ -1953,571 +1898,565 @@ afr_lk_is_unlock (int32_t cmd, struct gf_flock *flock) case F_SETLK64: #endif case F_SETLK: - if (F_UNLCK == flock->l_type) - return _gf_true; - break; + if (F_UNLCK == flock->l_type) + return _gf_true; + break; default: - return _gf_false; - } - return _gf_false; + return _gf_false; + } + return _gf_false; } void -afr_handle_inconsistent_fop (call_frame_t *frame, int32_t *op_ret, - int32_t *op_errno) +afr_handle_inconsistent_fop(call_frame_t *frame, int32_t *op_ret, + int32_t *op_errno) { - afr_private_t *priv = NULL; - afr_local_t *local = NULL; + afr_private_t *priv = NULL; + afr_local_t *local = NULL; - if (!frame || !frame->this || !frame->local || !frame->this->private) - return; + if (!frame || !frame->this || !frame->local || !frame->this->private) + return; - if (*op_ret < 0) - return; + if (*op_ret < 0) + return; - /* Failing inodelk/entrylk/lk here is not a good idea because we - * need to cleanup the locks on the other bricks if we choose to fail - * the fop here. The brick may go down just after unwind happens as well - * so anyways the fop will fail when the next fop is sent so leaving - * it like this for now.*/ - local = frame->local; - switch (local->op) { + /* Failing inodelk/entrylk/lk here is not a good idea because we + * need to cleanup the locks on the other bricks if we choose to fail + * the fop here. The brick may go down just after unwind happens as well + * so anyways the fop will fail when the next fop is sent so leaving + * it like this for now.*/ + local = frame->local; + switch (local->op) { case GF_FOP_LOOKUP: case GF_FOP_INODELK: case GF_FOP_FINODELK: case GF_FOP_ENTRYLK: case GF_FOP_FENTRYLK: case GF_FOP_LK: - return; + return; default: - break; - } + break; + } - priv = frame->this->private; - if (!priv->consistent_io) - return; + priv = frame->this->private; + if (!priv->consistent_io) + return; - if (local->event_generation && - (local->event_generation != priv->event_generation)) - goto inconsistent; + if (local->event_generation && + (local->event_generation != priv->event_generation)) + goto inconsistent; - return; + return; inconsistent: - *op_ret = -1; - *op_errno = ENOTCONN; + *op_ret = -1; + *op_errno = ENOTCONN; } void -afr_local_cleanup (afr_local_t *local, xlator_t *this) +afr_local_cleanup(afr_local_t *local, xlator_t *this) { - afr_private_t * priv = NULL; - - if (!local) - return; - - syncbarrier_destroy (&local->barrier); + afr_private_t *priv = NULL; - afr_local_transaction_cleanup (local, this); - - priv = this->private; - - loc_wipe (&local->loc); - loc_wipe (&local->newloc); + if (!local) + return; - if (local->fd) - fd_unref (local->fd); + syncbarrier_destroy(&local->barrier); - if (local->xattr_req) - dict_unref (local->xattr_req); + afr_local_transaction_cleanup(local, this); - if (local->xattr_rsp) - dict_unref (local->xattr_rsp); + priv = this->private; - if (local->dict) - dict_unref (local->dict); + loc_wipe(&local->loc); + loc_wipe(&local->newloc); - afr_local_replies_wipe (local, priv); - GF_FREE(local->replies); + if (local->fd) + fd_unref(local->fd); - GF_FREE (local->child_up); + if (local->xattr_req) + dict_unref(local->xattr_req); - GF_FREE (local->read_attempted); + if (local->xattr_rsp) + dict_unref(local->xattr_rsp); - GF_FREE (local->readable); - GF_FREE (local->readable2); + if (local->dict) + dict_unref(local->dict); - if (local->inode) - inode_unref (local->inode); + afr_local_replies_wipe(local, priv); + GF_FREE(local->replies); - if (local->parent) - inode_unref (local->parent); + GF_FREE(local->child_up); - if (local->parent2) - inode_unref (local->parent2); + GF_FREE(local->read_attempted); - if (local->refreshinode) - inode_unref (local->refreshinode); + GF_FREE(local->readable); + GF_FREE(local->readable2); - { /* getxattr */ - GF_FREE (local->cont.getxattr.name); - } + if (local->inode) + inode_unref(local->inode); - { /* lk */ - GF_FREE (local->cont.lk.locked_nodes); - } + if (local->parent) + inode_unref(local->parent); - { /* create */ - if (local->cont.create.fd) - fd_unref (local->cont.create.fd); - if (local->cont.create.params) - dict_unref (local->cont.create.params); - } + if (local->parent2) + inode_unref(local->parent2); - { /* mknod */ - if (local->cont.mknod.params) - dict_unref (local->cont.mknod.params); - } + if (local->refreshinode) + inode_unref(local->refreshinode); - { /* mkdir */ - if (local->cont.mkdir.params) - dict_unref (local->cont.mkdir.params); - } + { /* getxattr */ + GF_FREE(local->cont.getxattr.name); + } - { /* symlink */ - if (local->cont.symlink.params) - dict_unref (local->cont.symlink.params); - } + { /* lk */ + GF_FREE(local->cont.lk.locked_nodes); + } - { /* writev */ - GF_FREE (local->cont.writev.vector); - if (local->cont.writev.iobref) - iobref_unref (local->cont.writev.iobref); - } + { /* create */ + if (local->cont.create.fd) + fd_unref(local->cont.create.fd); + if (local->cont.create.params) + dict_unref(local->cont.create.params); + } - { /* setxattr */ - if (local->cont.setxattr.dict) - dict_unref (local->cont.setxattr.dict); - } + { /* mknod */ + if (local->cont.mknod.params) + dict_unref(local->cont.mknod.params); + } - { /* fsetxattr */ - if (local->cont.fsetxattr.dict) - dict_unref (local->cont.fsetxattr.dict); - } + { /* mkdir */ + if (local->cont.mkdir.params) + dict_unref(local->cont.mkdir.params); + } - { /* removexattr */ - GF_FREE (local->cont.removexattr.name); - } - { /* xattrop */ - if (local->cont.xattrop.xattr) - dict_unref (local->cont.xattrop.xattr); - } - { /* symlink */ - GF_FREE (local->cont.symlink.linkpath); - } + { /* symlink */ + if (local->cont.symlink.params) + dict_unref(local->cont.symlink.params); + } - { /* opendir */ - GF_FREE (local->cont.opendir.checksum); - } + { /* writev */ + GF_FREE(local->cont.writev.vector); + if (local->cont.writev.iobref) + iobref_unref(local->cont.writev.iobref); + } - { /* open */ - if (local->cont.open.fd) - fd_unref (local->cont.open.fd); - } + { /* setxattr */ + if (local->cont.setxattr.dict) + dict_unref(local->cont.setxattr.dict); + } - { /* readdirp */ - if (local->cont.readdir.dict) - dict_unref (local->cont.readdir.dict); - } + { /* fsetxattr */ + if (local->cont.fsetxattr.dict) + dict_unref(local->cont.fsetxattr.dict); + } - { /* inodelk */ - GF_FREE (local->cont.inodelk.volume); - if (local->cont.inodelk.xdata) - dict_unref (local->cont.inodelk.xdata); - } + { /* removexattr */ + GF_FREE(local->cont.removexattr.name); + } + { /* xattrop */ + if (local->cont.xattrop.xattr) + dict_unref(local->cont.xattrop.xattr); + } + { /* symlink */ + GF_FREE(local->cont.symlink.linkpath); + } - { /* entrylk */ - GF_FREE (local->cont.entrylk.volume); - GF_FREE (local->cont.entrylk.basename); - if (local->cont.entrylk.xdata) - dict_unref (local->cont.entrylk.xdata); - } + { /* opendir */ + GF_FREE(local->cont.opendir.checksum); + } - if (local->xdata_req) - dict_unref (local->xdata_req); + { /* open */ + if (local->cont.open.fd) + fd_unref(local->cont.open.fd); + } - if (local->xdata_rsp) - dict_unref (local->xdata_rsp); + { /* readdirp */ + if (local->cont.readdir.dict) + dict_unref(local->cont.readdir.dict); + } + + { /* inodelk */ + GF_FREE(local->cont.inodelk.volume); + if (local->cont.inodelk.xdata) + dict_unref(local->cont.inodelk.xdata); + } + + { /* entrylk */ + GF_FREE(local->cont.entrylk.volume); + GF_FREE(local->cont.entrylk.basename); + if (local->cont.entrylk.xdata) + dict_unref(local->cont.entrylk.xdata); + } + + if (local->xdata_req) + dict_unref(local->xdata_req); + + if (local->xdata_rsp) + dict_unref(local->xdata_rsp); } - int -afr_frame_return (call_frame_t *frame) +afr_frame_return(call_frame_t *frame) { - afr_local_t *local = NULL; - int call_count = 0; + afr_local_t *local = NULL; + int call_count = 0; - local = frame->local; + local = frame->local; - LOCK (&frame->lock); - { - call_count = --local->call_count; - } - UNLOCK (&frame->lock); + LOCK(&frame->lock); + { + call_count = --local->call_count; + } + UNLOCK(&frame->lock); - return call_count; + return call_count; } -static char *afr_ignore_xattrs[] = { - GF_SELINUX_XATTR_KEY, - QUOTA_SIZE_KEY, - NULL -}; +static char *afr_ignore_xattrs[] = {GF_SELINUX_XATTR_KEY, QUOTA_SIZE_KEY, NULL}; gf_boolean_t -afr_is_xattr_ignorable (char *key) +afr_is_xattr_ignorable(char *key) { - int i = 0; + int i = 0; - if (!strncmp (key, AFR_XATTR_PREFIX, SLEN (AFR_XATTR_PREFIX))) - return _gf_true; - for (i = 0; afr_ignore_xattrs[i]; i++) { - if (!strcmp (key, afr_ignore_xattrs[i])) - return _gf_true; - } - return _gf_false; + if (!strncmp(key, AFR_XATTR_PREFIX, SLEN(AFR_XATTR_PREFIX))) + return _gf_true; + for (i = 0; afr_ignore_xattrs[i]; i++) { + if (!strcmp(key, afr_ignore_xattrs[i])) + return _gf_true; + } + return _gf_false; } static gf_boolean_t -afr_xattr_match_needed (dict_t *this, char *key1, data_t *value1, void *data) +afr_xattr_match_needed(dict_t *this, char *key1, data_t *value1, void *data) { - /* Ignore all non-disk (i.e. virtual) xattrs right away. */ - if (!gf_is_valid_xattr_namespace (key1)) - return _gf_false; + /* Ignore all non-disk (i.e. virtual) xattrs right away. */ + if (!gf_is_valid_xattr_namespace(key1)) + return _gf_false; - /* Ignore on-disk xattrs that AFR doesn't need to heal. */ - if (!afr_is_xattr_ignorable (key1)) - return _gf_true; + /* Ignore on-disk xattrs that AFR doesn't need to heal. */ + if (!afr_is_xattr_ignorable(key1)) + return _gf_true; - return _gf_false; + return _gf_false; } gf_boolean_t -afr_xattrs_are_equal (dict_t *dict1, dict_t *dict2) +afr_xattrs_are_equal(dict_t *dict1, dict_t *dict2) { - return are_dicts_equal (dict1, dict2, afr_xattr_match_needed, NULL); + return are_dicts_equal(dict1, dict2, afr_xattr_match_needed, NULL); } static int -afr_get_parent_read_subvol (xlator_t *this, inode_t *parent, - struct afr_reply *replies, unsigned char *readable) +afr_get_parent_read_subvol(xlator_t *this, inode_t *parent, + struct afr_reply *replies, unsigned char *readable) { - int i = 0; - int par_read_subvol = -1; - int par_read_subvol_iter = -1; - afr_private_t *priv = NULL; - - priv = this->private; + int i = 0; + int par_read_subvol = -1; + int par_read_subvol_iter = -1; + afr_private_t *priv = NULL; - if (parent) - par_read_subvol = afr_data_subvol_get (parent, this, NULL, NULL, - NULL, NULL); - - for (i = 0; i < priv->child_count; i++) { - if (!replies[i].valid) - continue; + priv = this->private; - if (replies[i].op_ret < 0) - continue; + if (parent) + par_read_subvol = afr_data_subvol_get(parent, this, NULL, NULL, NULL, + NULL); - if (par_read_subvol_iter == -1) { - par_read_subvol_iter = i; - continue; - } + for (i = 0; i < priv->child_count; i++) { + if (!replies[i].valid) + continue; - if ((par_read_subvol_iter != par_read_subvol) && readable[i]) - par_read_subvol_iter = i; + if (replies[i].op_ret < 0) + continue; - if (i == par_read_subvol) - par_read_subvol_iter = i; + if (par_read_subvol_iter == -1) { + par_read_subvol_iter = i; + continue; } - /* At the end of the for-loop, the only reason why @par_read_subvol_iter - * could be -1 is when this LOOKUP has failed on all sub-volumes. - * So it is okay to send an arbitrary subvolume (0 in this case) - * as parent read subvol. - */ - if (par_read_subvol_iter == -1) - par_read_subvol_iter = 0; - return par_read_subvol_iter; + if ((par_read_subvol_iter != par_read_subvol) && readable[i]) + par_read_subvol_iter = i; + if (i == par_read_subvol) + par_read_subvol_iter = i; + } + /* At the end of the for-loop, the only reason why @par_read_subvol_iter + * could be -1 is when this LOOKUP has failed on all sub-volumes. + * So it is okay to send an arbitrary subvolume (0 in this case) + * as parent read subvol. + */ + if (par_read_subvol_iter == -1) + par_read_subvol_iter = 0; + + return par_read_subvol_iter; } int -afr_read_subvol_decide (inode_t *inode, xlator_t *this, - afr_read_subvol_args_t *args, unsigned char *readable) +afr_read_subvol_decide(inode_t *inode, xlator_t *this, + afr_read_subvol_args_t *args, unsigned char *readable) { - int event = 0; - afr_private_t *priv = NULL; - unsigned char *intersection = NULL; + int event = 0; + afr_private_t *priv = NULL; + unsigned char *intersection = NULL; - priv = this->private; - intersection = alloca0 (priv->child_count); + priv = this->private; + intersection = alloca0(priv->child_count); - afr_readables_intersect_get (inode, this, &event, intersection); + afr_readables_intersect_get(inode, this, &event, intersection); - if (AFR_COUNT (intersection, priv->child_count) <= 0) { - /* TODO: If we have one brick with valid data_readable and - * another with metadata_readable, try to send an iatt with - * valid bits from both.*/ - return -1; - } + if (AFR_COUNT(intersection, priv->child_count) <= 0) { + /* TODO: If we have one brick with valid data_readable and + * another with metadata_readable, try to send an iatt with + * valid bits from both.*/ + return -1; + } - memcpy (readable, intersection, sizeof (*readable) * priv->child_count); + memcpy(readable, intersection, sizeof(*readable) * priv->child_count); - return afr_read_subvol_select_by_policy (inode, this, intersection, - args); + return afr_read_subvol_select_by_policy(inode, this, intersection, args); } static inline int -afr_first_up_child (call_frame_t *frame, xlator_t *this) +afr_first_up_child(call_frame_t *frame, xlator_t *this) { - afr_private_t *priv = NULL; - afr_local_t *local = NULL; - int i = 0; + afr_private_t *priv = NULL; + afr_local_t *local = NULL; + int i = 0; - local = frame->local; - priv = this->private; + local = frame->local; + priv = this->private; - for (i = 0; i < priv->child_count; i++) - if (local->replies[i].valid && - local->replies[i].op_ret == 0) - return i; - return -1; + for (i = 0; i < priv->child_count; i++) + if (local->replies[i].valid && local->replies[i].op_ret == 0) + return i; + return -1; } static void -afr_attempt_readsubvol_set (call_frame_t *frame, xlator_t *this, - unsigned char *success_replies, - unsigned char *data_readable, int *read_subvol) +afr_attempt_readsubvol_set(call_frame_t *frame, xlator_t *this, + unsigned char *success_replies, + unsigned char *data_readable, int *read_subvol) { - afr_private_t *priv = NULL; - afr_local_t *local = NULL; - int spb_choice = -1; - int child_count = -1; + afr_private_t *priv = NULL; + afr_local_t *local = NULL; + int spb_choice = -1; + int child_count = -1; - if (*read_subvol != -1) - return; + if (*read_subvol != -1) + return; - priv = this->private; - local = frame->local; - child_count = priv->child_count; - - afr_inode_split_brain_choice_get (local->inode, this, - &spb_choice); - if ((spb_choice >= 0) && - (AFR_COUNT(success_replies, child_count) == child_count)) { - *read_subvol = spb_choice; - } else if (!priv->quorum_count) { - *read_subvol = afr_first_up_child (frame, this); - } else if (priv->quorum_count && - afr_has_quorum (data_readable, this)) { - /* read_subvol is guaranteed to be valid if we hit this path. */ - *read_subvol = afr_first_up_child (frame, this); - } else { - /* If quorum is enabled and we do not have a - readable yet, it means all good copies are down. - */ - local->op_ret = -1; - local->op_errno = ENOTCONN; - gf_msg (this->name, GF_LOG_WARNING, 0, - AFR_MSG_READ_SUBVOL_ERROR, "no read " - "subvols for %s", local->loc.path); - } - if (*read_subvol >= 0) - dict_del (local->replies[*read_subvol].xdata, GF_CONTENT_KEY); + priv = this->private; + local = frame->local; + child_count = priv->child_count; + + afr_inode_split_brain_choice_get(local->inode, this, &spb_choice); + if ((spb_choice >= 0) && + (AFR_COUNT(success_replies, child_count) == child_count)) { + *read_subvol = spb_choice; + } else if (!priv->quorum_count) { + *read_subvol = afr_first_up_child(frame, this); + } else if (priv->quorum_count && afr_has_quorum(data_readable, this)) { + /* read_subvol is guaranteed to be valid if we hit this path. */ + *read_subvol = afr_first_up_child(frame, this); + } else { + /* If quorum is enabled and we do not have a + readable yet, it means all good copies are down. + */ + local->op_ret = -1; + local->op_errno = ENOTCONN; + gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_READ_SUBVOL_ERROR, + "no read " + "subvols for %s", + local->loc.path); + } + if (*read_subvol >= 0) + dict_del(local->replies[*read_subvol].xdata, GF_CONTENT_KEY); } static void -afr_lookup_done (call_frame_t *frame, xlator_t *this) -{ - afr_private_t *priv = NULL; - afr_local_t *local = NULL; - int i = -1; - int op_errno = 0; - int read_subvol = 0; - int par_read_subvol = 0; - int ret = -1; - unsigned char *readable = NULL; - unsigned char *success_replies = NULL; - int event = 0; - struct afr_reply *replies = NULL; - uuid_t read_gfid = {0, }; - gf_boolean_t locked_entry = _gf_false; - gf_boolean_t can_interpret = _gf_true; - inode_t *parent = NULL; - ia_type_t ia_type = IA_INVAL; - afr_read_subvol_args_t args = {0,}; - char *gfid_heal_msg = NULL; - - priv = this->private; - local = frame->local; - replies = local->replies; - parent = local->loc.parent; - - locked_entry = afr_is_possibly_under_txn (AFR_ENTRY_TRANSACTION, local, - this); - - readable = alloca0 (priv->child_count); - success_replies = alloca0 (priv->child_count); - - afr_inode_read_subvol_get (parent, this, readable, NULL, &event); - par_read_subvol = afr_get_parent_read_subvol (this, parent, replies, - readable); - - /* First, check if we have a gfid-change from somewhere, - If so, propagate that so that a fresh lookup can be - issued - */ - if (local->cont.lookup.needs_fresh_lookup) { - local->op_ret = -1; - local->op_errno = ESTALE; - goto error; +afr_lookup_done(call_frame_t *frame, xlator_t *this) +{ + afr_private_t *priv = NULL; + afr_local_t *local = NULL; + int i = -1; + int op_errno = 0; + int read_subvol = 0; + int par_read_subvol = 0; + int ret = -1; + unsigned char *readable = NULL; + unsigned char *success_replies = NULL; + int event = 0; + struct afr_reply *replies = NULL; + uuid_t read_gfid = { + 0, + }; + gf_boolean_t locked_entry = _gf_false; + gf_boolean_t can_interpret = _gf_true; + inode_t *parent = NULL; + ia_type_t ia_type = IA_INVAL; + afr_read_subvol_args_t args = { + 0, + }; + char *gfid_heal_msg = NULL; + + priv = this->private; + local = frame->local; + replies = local->replies; + parent = local->loc.parent; + + locked_entry = afr_is_possibly_under_txn(AFR_ENTRY_TRANSACTION, local, + this); + + readable = alloca0(priv->child_count); + success_replies = alloca0(priv->child_count); + + afr_inode_read_subvol_get(parent, this, readable, NULL, &event); + par_read_subvol = afr_get_parent_read_subvol(this, parent, replies, + readable); + + /* First, check if we have a gfid-change from somewhere, + If so, propagate that so that a fresh lookup can be + issued + */ + if (local->cont.lookup.needs_fresh_lookup) { + local->op_ret = -1; + local->op_errno = ESTALE; + goto error; + } + + op_errno = afr_final_errno(frame->local, this->private); + local->op_errno = op_errno; + + read_subvol = -1; + afr_fill_success_replies(local, priv, success_replies); + + for (i = 0; i < priv->child_count; i++) { + if (!replies[i].valid) + continue; + + if (locked_entry && replies[i].op_ret == -1 && + replies[i].op_errno == ENOENT) { + /* Second, check entry is still + "underway" in creation */ + local->op_ret = -1; + local->op_errno = ENOENT; + goto error; } - op_errno = afr_final_errno (frame->local, this->private); - local->op_errno = op_errno; + if (replies[i].op_ret == -1) + continue; - read_subvol = -1; - afr_fill_success_replies (local, priv, success_replies); + if (read_subvol == -1 || !readable[read_subvol]) { + read_subvol = i; + gf_uuid_copy(read_gfid, replies[i].poststat.ia_gfid); + ia_type = replies[i].poststat.ia_type; + local->op_ret = 0; + } + } - for (i = 0; i < priv->child_count; i++) { - if (!replies[i].valid) - continue; + if (read_subvol == -1) + goto error; + /* We now have a read_subvol, which is readable[] (if there + were any). Next we look for GFID mismatches. We don't + consider a GFID mismatch as an error if read_subvol is + readable[] but the mismatching GFID subvol is not. + */ + for (i = 0; i < priv->child_count; i++) { + if (!replies[i].valid || replies[i].op_ret == -1) { + continue; + } - if (locked_entry && replies[i].op_ret == -1 && - replies[i].op_errno == ENOENT) { - /* Second, check entry is still - "underway" in creation */ - local->op_ret = -1; - local->op_errno = ENOENT; - goto error; - } + if (!gf_uuid_compare(replies[i].poststat.ia_gfid, read_gfid)) + continue; - if (replies[i].op_ret == -1) - continue; + can_interpret = _gf_false; - if (read_subvol == -1 || !readable[read_subvol]) { - read_subvol = i; - gf_uuid_copy (read_gfid, replies[i].poststat.ia_gfid); - ia_type = replies[i].poststat.ia_type; - local->op_ret = 0; - } - } + if (locked_entry) + continue; - if (read_subvol == -1) - goto error; - /* We now have a read_subvol, which is readable[] (if there - were any). Next we look for GFID mismatches. We don't - consider a GFID mismatch as an error if read_subvol is - readable[] but the mismatching GFID subvol is not. - */ - for (i = 0; i < priv->child_count; i++) { - if (!replies[i].valid || replies[i].op_ret == -1) { - continue; - } - - if (!gf_uuid_compare (replies[i].poststat.ia_gfid, read_gfid)) - continue; - - can_interpret = _gf_false; - - if (locked_entry) - continue; - - /* Now GFIDs mismatch. It's OK as long as this subvol - is not readable[] but read_subvol is */ - if (readable[read_subvol] && !readable[i]) - continue; - - /* If we were called from glfsheal and there is still a gfid - * mismatch, succeed the lookup and let glfsheal print the - * response via gfid-heal-msg.*/ - if (!dict_get_str (local->xattr_req, "gfid-heal-msg", - &gfid_heal_msg)) - goto cant_interpret; - - /* LOG ERROR */ - local->op_ret = -1; - local->op_errno = EIO; - goto error; - } - - /* Forth, for the finalized GFID, pick the best subvolume - to return stats from. - */ - read_subvol = -1; - memset (readable, 0, sizeof (*readable) * priv->child_count); - if (can_interpret) { - if (!afr_has_quorum (success_replies, this)) - goto cant_interpret; - /* It is safe to call afr_replies_interpret() because we have - a response from all the UP subvolumes and all of them resolved - to the same GFID - */ - gf_uuid_copy (args.gfid, read_gfid); - args.ia_type = ia_type; - ret = afr_replies_interpret (frame, this, local->inode, NULL); - read_subvol = afr_read_subvol_decide (local->inode, this, &args, - readable); - if (read_subvol == -1) - goto cant_interpret; - if (ret) { - afr_inode_event_gen_reset (local->inode, this); - dict_del (local->replies[read_subvol].xdata, - GF_CONTENT_KEY); - } - } else { - cant_interpret: - afr_attempt_readsubvol_set (frame, this, success_replies, - readable, &read_subvol); - if (read_subvol == -1) { - goto error; - } - } + /* Now GFIDs mismatch. It's OK as long as this subvol + is not readable[] but read_subvol is */ + if (readable[read_subvol] && !readable[i]) + continue; - afr_handle_quota_size (frame, this); + /* If we were called from glfsheal and there is still a gfid + * mismatch, succeed the lookup and let glfsheal print the + * response via gfid-heal-msg.*/ + if (!dict_get_str(local->xattr_req, "gfid-heal-msg", &gfid_heal_msg)) + goto cant_interpret; - afr_set_need_heal (this, local); - if (AFR_IS_ARBITER_BRICK (priv, read_subvol) && local->op_ret == 0) { - local->op_ret = -1; - local->op_errno = ENOTCONN; - gf_msg_debug(this->name, 0, "Arbiter cannot be a read subvol " - "for %s", local->loc.path); - goto error; + /* LOG ERROR */ + local->op_ret = -1; + local->op_errno = EIO; + goto error; + } + + /* Forth, for the finalized GFID, pick the best subvolume + to return stats from. + */ + read_subvol = -1; + memset(readable, 0, sizeof(*readable) * priv->child_count); + if (can_interpret) { + if (!afr_has_quorum(success_replies, this)) + goto cant_interpret; + /* It is safe to call afr_replies_interpret() because we have + a response from all the UP subvolumes and all of them resolved + to the same GFID + */ + gf_uuid_copy(args.gfid, read_gfid); + args.ia_type = ia_type; + ret = afr_replies_interpret(frame, this, local->inode, NULL); + read_subvol = afr_read_subvol_decide(local->inode, this, &args, + readable); + if (read_subvol == -1) + goto cant_interpret; + if (ret) { + afr_inode_event_gen_reset(local->inode, this); + dict_del(local->replies[read_subvol].xdata, GF_CONTENT_KEY); } + } else { + cant_interpret: + afr_attempt_readsubvol_set(frame, this, success_replies, readable, + &read_subvol); + if (read_subvol == -1) { + goto error; + } + } - ret = dict_get_str (local->xattr_req, "gfid-heal-msg", &gfid_heal_msg); - if (!ret) { - ret = dict_set_str (local->replies[read_subvol].xdata, - "gfid-heal-msg", gfid_heal_msg); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - AFR_MSG_DICT_SET_FAILED, - "Error setting gfid-heal-msg dict"); - local->op_ret = -1; - local->op_errno = ENOMEM; - } + afr_handle_quota_size(frame, this); + + afr_set_need_heal(this, local); + if (AFR_IS_ARBITER_BRICK(priv, read_subvol) && local->op_ret == 0) { + local->op_ret = -1; + local->op_errno = ENOTCONN; + gf_msg_debug(this->name, 0, + "Arbiter cannot be a read subvol " + "for %s", + local->loc.path); + goto error; + } + + ret = dict_get_str(local->xattr_req, "gfid-heal-msg", &gfid_heal_msg); + if (!ret) { + ret = dict_set_str(local->replies[read_subvol].xdata, "gfid-heal-msg", + gfid_heal_msg); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_DICT_SET_FAILED, + "Error setting gfid-heal-msg dict"); + local->op_ret = -1; + local->op_errno = ENOMEM; } + } - AFR_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno, - local->inode, &local->replies[read_subvol].poststat, - local->replies[read_subvol].xdata, - &local->replies[par_read_subvol].postparent); - return; + AFR_STACK_UNWIND(lookup, frame, local->op_ret, local->op_errno, + local->inode, &local->replies[read_subvol].poststat, + local->replies[read_subvol].xdata, + &local->replies[par_read_subvol].postparent); + return; error: - AFR_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno, NULL, - NULL, NULL, NULL); + AFR_STACK_UNWIND(lookup, frame, local->op_ret, local->op_errno, NULL, NULL, + NULL, NULL); } /* @@ -2529,843 +2468,834 @@ error: */ int -afr_higher_errno (int32_t old_errno, int32_t new_errno) +afr_higher_errno(int32_t old_errno, int32_t new_errno) { - if (old_errno == ENODATA || new_errno == ENODATA) - return ENODATA; - if (old_errno == ENOENT || new_errno == ENOENT) - return ENOENT; - if (old_errno == ESTALE || new_errno == ESTALE) - return ESTALE; + if (old_errno == ENODATA || new_errno == ENODATA) + return ENODATA; + if (old_errno == ENOENT || new_errno == ENOENT) + return ENOENT; + if (old_errno == ESTALE || new_errno == ESTALE) + return ESTALE; - return new_errno; + return new_errno; } - int -afr_final_errno (afr_local_t *local, afr_private_t *priv) +afr_final_errno(afr_local_t *local, afr_private_t *priv) { - int i = 0; - int op_errno = 0; - int tmp_errno = 0; + int i = 0; + int op_errno = 0; + int tmp_errno = 0; - for (i = 0; i < priv->child_count; i++) { - if (!local->replies[i].valid) - continue; - if (local->replies[i].op_ret >= 0) - continue; - tmp_errno = local->replies[i].op_errno; - op_errno = afr_higher_errno (op_errno, tmp_errno); - } + for (i = 0; i < priv->child_count; i++) { + if (!local->replies[i].valid) + continue; + if (local->replies[i].op_ret >= 0) + continue; + tmp_errno = local->replies[i].op_errno; + op_errno = afr_higher_errno(op_errno, tmp_errno); + } - return op_errno; + return op_errno; } static int32_t -afr_local_discovery_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *dict, - dict_t *xdata) -{ - int ret = 0; - char *pathinfo = NULL; - gf_boolean_t is_local = _gf_false; - afr_private_t *priv = NULL; - int32_t child_index = -1; - - if (op_ret != 0) { - goto out; - } - - priv = this->private; - child_index = (int32_t)(long)cookie; - - ret = dict_get_str (dict, GF_XATTR_PATHINFO_KEY, &pathinfo); - if (ret != 0) { - goto out; - } - - ret = glusterfs_is_local_pathinfo (pathinfo, &is_local); - if (ret) { - goto out; - } - - /* - * Note that one local subvolume will override another here. The only - * way to avoid that would be to retain extra information about whether - * the previous read_child is local, and it's just not worth it. Even - * the slowest local subvolume is far preferable to a remote one. - */ - if (is_local) { - priv->local[child_index] = 1; - /* Don't set arbiter as read child. */ - if (AFR_IS_ARBITER_BRICK(priv, child_index)) - goto out; - gf_msg (this->name, GF_LOG_INFO, 0, - AFR_MSG_LOCAL_CHILD, "selecting local read_child %s", - priv->children[child_index]->name); - - priv->read_child = child_index; - } +afr_local_discovery_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *dict, + dict_t *xdata) +{ + int ret = 0; + char *pathinfo = NULL; + gf_boolean_t is_local = _gf_false; + afr_private_t *priv = NULL; + int32_t child_index = -1; + + if (op_ret != 0) { + goto out; + } + + priv = this->private; + child_index = (int32_t)(long)cookie; + + ret = dict_get_str(dict, GF_XATTR_PATHINFO_KEY, &pathinfo); + if (ret != 0) { + goto out; + } + + ret = glusterfs_is_local_pathinfo(pathinfo, &is_local); + if (ret) { + goto out; + } + + /* + * Note that one local subvolume will override another here. The only + * way to avoid that would be to retain extra information about whether + * the previous read_child is local, and it's just not worth it. Even + * the slowest local subvolume is far preferable to a remote one. + */ + if (is_local) { + priv->local[child_index] = 1; + /* Don't set arbiter as read child. */ + if (AFR_IS_ARBITER_BRICK(priv, child_index)) + goto out; + gf_msg(this->name, GF_LOG_INFO, 0, AFR_MSG_LOCAL_CHILD, + "selecting local read_child %s", + priv->children[child_index]->name); + + priv->read_child = child_index; + } out: - STACK_DESTROY(frame->root); - return 0; + STACK_DESTROY(frame->root); + return 0; } static void -afr_attempt_local_discovery (xlator_t *this, int32_t child_index) +afr_attempt_local_discovery(xlator_t *this, int32_t child_index) { - call_frame_t *newframe = NULL; - loc_t tmploc = {0,}; - afr_private_t *priv = this->private; + call_frame_t *newframe = NULL; + loc_t tmploc = { + 0, + }; + afr_private_t *priv = this->private; - newframe = create_frame(this,this->ctx->pool); - if (!newframe) { - return; - } + newframe = create_frame(this, this->ctx->pool); + if (!newframe) { + return; + } - tmploc.gfid[sizeof(tmploc.gfid)-1] = 1; - STACK_WIND_COOKIE (newframe, afr_local_discovery_cbk, - (void *)(long)child_index, - priv->children[child_index], - priv->children[child_index]->fops->getxattr, - &tmploc, GF_XATTR_PATHINFO_KEY, NULL); + tmploc.gfid[sizeof(tmploc.gfid) - 1] = 1; + STACK_WIND_COOKIE(newframe, afr_local_discovery_cbk, + (void *)(long)child_index, priv->children[child_index], + priv->children[child_index]->fops->getxattr, &tmploc, + GF_XATTR_PATHINFO_KEY, NULL); } int -afr_lookup_sh_metadata_wrap (void *opaque) -{ - call_frame_t *frame = opaque; - afr_local_t *local = NULL; - xlator_t *this = NULL; - inode_t *inode = NULL; - afr_private_t *priv = NULL; - struct afr_reply *replies = NULL; - int i= 0, first = -1; - int ret = -1; - dict_t *dict = NULL; - - local = frame->local; - this = frame->this; - priv = this->private; - replies = local->replies; - - for (i =0; i < priv->child_count; i++) { - if(!replies[i].valid || replies[i].op_ret == -1) - continue; - first = i; - break; - } - if (first == -1) - goto out; - - if (afr_selfheal_metadata_by_stbuf (this, &replies[first].poststat)) - goto out; - - afr_local_replies_wipe (local, this->private); - - dict = dict_new (); - if (!dict) - goto out; - ret = dict_set_str (dict, "link-count", GF_XATTROP_INDEX_COUNT); - if (ret) { - gf_msg_debug (this->name, -ret, - "Unable to set link-count in dict "); - } - - if (loc_is_nameless (&local->loc)) { - ret = afr_selfheal_unlocked_discover_on (frame, local->inode, - local->loc.gfid, - local->replies, - local->child_up); - } else { - inode = afr_selfheal_unlocked_lookup_on (frame, - local->loc.parent, - local->loc.name, - local->replies, - local->child_up, dict); - } - if (inode) - inode_unref (inode); +afr_lookup_sh_metadata_wrap(void *opaque) +{ + call_frame_t *frame = opaque; + afr_local_t *local = NULL; + xlator_t *this = NULL; + inode_t *inode = NULL; + afr_private_t *priv = NULL; + struct afr_reply *replies = NULL; + int i = 0, first = -1; + int ret = -1; + dict_t *dict = NULL; + + local = frame->local; + this = frame->this; + priv = this->private; + replies = local->replies; + + for (i = 0; i < priv->child_count; i++) { + if (!replies[i].valid || replies[i].op_ret == -1) + continue; + first = i; + break; + } + if (first == -1) + goto out; + + if (afr_selfheal_metadata_by_stbuf(this, &replies[first].poststat)) + goto out; + + afr_local_replies_wipe(local, this->private); + + dict = dict_new(); + if (!dict) + goto out; + ret = dict_set_str(dict, "link-count", GF_XATTROP_INDEX_COUNT); + if (ret) { + gf_msg_debug(this->name, -ret, "Unable to set link-count in dict "); + } + + if (loc_is_nameless(&local->loc)) { + ret = afr_selfheal_unlocked_discover_on(frame, local->inode, + local->loc.gfid, local->replies, + local->child_up); + } else { + inode = afr_selfheal_unlocked_lookup_on(frame, local->loc.parent, + local->loc.name, local->replies, + local->child_up, dict); + } + if (inode) + inode_unref(inode); out: - if (loc_is_nameless (&local->loc)) - afr_discover_done (frame, this); - else - afr_lookup_done (frame, this); + if (loc_is_nameless(&local->loc)) + afr_discover_done(frame, this); + else + afr_lookup_done(frame, this); - if (dict) - dict_unref (dict); + if (dict) + dict_unref(dict); - return 0; + return 0; } gf_boolean_t -afr_is_pending_set (xlator_t *this, dict_t *xdata, int type) +afr_is_pending_set(xlator_t *this, dict_t *xdata, int type) { - int idx = -1; - afr_private_t *priv = NULL; - void *pending_raw = NULL; - int *pending_int = NULL; - int i = 0; + int idx = -1; + afr_private_t *priv = NULL; + void *pending_raw = NULL; + int *pending_int = NULL; + int i = 0; - priv = this->private; - idx = afr_index_for_transaction_type (type); + priv = this->private; + idx = afr_index_for_transaction_type(type); - if (dict_get_ptr (xdata, AFR_DIRTY, &pending_raw) == 0) { - if (pending_raw) { - pending_int = pending_raw; + if (dict_get_ptr(xdata, AFR_DIRTY, &pending_raw) == 0) { + if (pending_raw) { + pending_int = pending_raw; - if (ntoh32 (pending_int[idx])) - return _gf_true; - } + if (ntoh32(pending_int[idx])) + return _gf_true; } + } - for (i = 0; i < priv->child_count; i++) { - if (dict_get_ptr (xdata, priv->pending_key[i], - &pending_raw)) - continue; - if (!pending_raw) - continue; - pending_int = pending_raw; - - if (ntoh32 (pending_int[idx])) - return _gf_true; - } + for (i = 0; i < priv->child_count; i++) { + if (dict_get_ptr(xdata, priv->pending_key[i], &pending_raw)) + continue; + if (!pending_raw) + continue; + pending_int = pending_raw; - return _gf_false; + if (ntoh32(pending_int[idx])) + return _gf_true; + } + + return _gf_false; } static gf_boolean_t afr_can_start_metadata_self_heal(call_frame_t *frame, xlator_t *this) { - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - struct afr_reply *replies = NULL; - int i = 0, first = -1; - gf_boolean_t start = _gf_false; - struct iatt stbuf = {0, }; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + struct afr_reply *replies = NULL; + int i = 0, first = -1; + gf_boolean_t start = _gf_false; + struct iatt stbuf = { + 0, + }; - local = frame->local; - replies = local->replies; - priv = this->private; - - if (!priv->metadata_self_heal) - return _gf_false; + local = frame->local; + replies = local->replies; + priv = this->private; - for (i = 0; i < priv->child_count; i++) { - if(!replies[i].valid || replies[i].op_ret == -1) - continue; - if (first == -1) { - first = i; - stbuf = replies[i].poststat; - continue; - } + if (!priv->metadata_self_heal) + return _gf_false; - if (afr_is_pending_set (this, replies[i].xdata, - AFR_METADATA_TRANSACTION)) { - /* Let shd do the heal so that lookup is not blocked - * on getting metadata lock/doing the heal */ - start = _gf_false; - break; - } + for (i = 0; i < priv->child_count; i++) { + if (!replies[i].valid || replies[i].op_ret == -1) + continue; + if (first == -1) { + first = i; + stbuf = replies[i].poststat; + continue; + } - if (gf_uuid_compare (stbuf.ia_gfid, replies[i].poststat.ia_gfid)) { - start = _gf_false; - break; - } - if (!IA_EQUAL (stbuf, replies[i].poststat, type)) { - start = _gf_false; - break; - } + if (afr_is_pending_set(this, replies[i].xdata, + AFR_METADATA_TRANSACTION)) { + /* Let shd do the heal so that lookup is not blocked + * on getting metadata lock/doing the heal */ + start = _gf_false; + break; + } - /*Check if iattrs need heal*/ - if ((!IA_EQUAL (stbuf, replies[i].poststat, uid)) || - (!IA_EQUAL (stbuf, replies[i].poststat, gid)) || - (!IA_EQUAL (stbuf, replies[i].poststat, prot))) { - start = _gf_true; - continue; - } + if (gf_uuid_compare(stbuf.ia_gfid, replies[i].poststat.ia_gfid)) { + start = _gf_false; + break; + } + if (!IA_EQUAL(stbuf, replies[i].poststat, type)) { + start = _gf_false; + break; + } - /*Check if xattrs need heal*/ - if (!afr_xattrs_are_equal (replies[first].xdata, - replies[i].xdata)) - start = _gf_true; + /*Check if iattrs need heal*/ + if ((!IA_EQUAL(stbuf, replies[i].poststat, uid)) || + (!IA_EQUAL(stbuf, replies[i].poststat, gid)) || + (!IA_EQUAL(stbuf, replies[i].poststat, prot))) { + start = _gf_true; + continue; } - return start; + /*Check if xattrs need heal*/ + if (!afr_xattrs_are_equal(replies[first].xdata, replies[i].xdata)) + start = _gf_true; + } + + return start; } int -afr_lookup_metadata_heal_check (call_frame_t *frame, xlator_t *this) +afr_lookup_metadata_heal_check(call_frame_t *frame, xlator_t *this) { - call_frame_t *heal = NULL; - afr_local_t *local = NULL; - int ret = 0; + call_frame_t *heal = NULL; + afr_local_t *local = NULL; + int ret = 0; - local = frame->local; - if (!afr_can_start_metadata_self_heal (frame, this)) - goto out; + local = frame->local; + if (!afr_can_start_metadata_self_heal(frame, this)) + goto out; - heal = afr_frame_create (this, &ret); - if (!heal) { - ret = -ret; - goto out; - } + heal = afr_frame_create(this, &ret); + if (!heal) { + ret = -ret; + goto out; + } - ret = synctask_new (this->ctx->env, afr_lookup_sh_metadata_wrap, - afr_refresh_selfheal_done, heal, frame); - if (ret) - goto out; - return ret; + ret = synctask_new(this->ctx->env, afr_lookup_sh_metadata_wrap, + afr_refresh_selfheal_done, heal, frame); + if (ret) + goto out; + return ret; out: - if (loc_is_nameless (&local->loc)) - afr_discover_done (frame, this); - else - afr_lookup_done (frame, this); - if (heal) - AFR_STACK_DESTROY (heal); - return ret; + if (loc_is_nameless(&local->loc)) + afr_discover_done(frame, this); + else + afr_lookup_done(frame, this); + if (heal) + AFR_STACK_DESTROY(heal); + return ret; } int -afr_lookup_selfheal_wrap (void *opaque) +afr_lookup_selfheal_wrap(void *opaque) { - int ret = 0; - call_frame_t *frame = opaque; - afr_local_t *local = NULL; - xlator_t *this = NULL; - inode_t *inode = NULL; - uuid_t pargfid = {0,}; + int ret = 0; + call_frame_t *frame = opaque; + afr_local_t *local = NULL; + xlator_t *this = NULL; + inode_t *inode = NULL; + uuid_t pargfid = { + 0, + }; - local = frame->local; - this = frame->this; - loc_pargfid (&local->loc, pargfid); + local = frame->local; + this = frame->this; + loc_pargfid(&local->loc, pargfid); - ret = afr_selfheal_name (frame->this, pargfid, local->loc.name, - &local->cont.lookup.gfid_req, local->xattr_req); - if (ret == -EIO) - goto unwind; + ret = afr_selfheal_name(frame->this, pargfid, local->loc.name, + &local->cont.lookup.gfid_req, local->xattr_req); + if (ret == -EIO) + goto unwind; - afr_local_replies_wipe (local, this->private); + afr_local_replies_wipe(local, this->private); - inode = afr_selfheal_unlocked_lookup_on (frame, local->loc.parent, - local->loc.name, local->replies, - local->child_up, NULL); - if (inode) - inode_unref (inode); + inode = afr_selfheal_unlocked_lookup_on(frame, local->loc.parent, + local->loc.name, local->replies, + local->child_up, NULL); + if (inode) + inode_unref(inode); - afr_lookup_metadata_heal_check(frame, this); - return 0; + afr_lookup_metadata_heal_check(frame, this); + return 0; unwind: - AFR_STACK_UNWIND (lookup, frame, -1, EIO, NULL, NULL, NULL, NULL); - return 0; + AFR_STACK_UNWIND(lookup, frame, -1, EIO, NULL, NULL, NULL, NULL); + return 0; } int -afr_lookup_entry_heal (call_frame_t *frame, xlator_t *this) -{ - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - call_frame_t *heal = NULL; - int i = 0, first = -1; - gf_boolean_t name_state_mismatch = _gf_false; - struct afr_reply *replies = NULL; - int ret = 0; - unsigned char *par_readables = NULL; - unsigned char *success = NULL; - int32_t op_errno = 0; - uuid_t gfid = {0}; - - local = frame->local; - replies = local->replies; - priv = this->private; - par_readables = alloca0(priv->child_count); - success = alloca0(priv->child_count); - - ret = afr_inode_read_subvol_get (local->loc.parent, this, par_readables, - NULL, NULL); - if (ret < 0 || AFR_COUNT (par_readables, priv->child_count) == 0) { - /* In this case set par_readables to all 1 so that name_heal - * need checks at the end of this function will flag missing - * entry when name state mismatches*/ - memset (par_readables, 1, priv->child_count); +afr_lookup_entry_heal(call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + call_frame_t *heal = NULL; + int i = 0, first = -1; + gf_boolean_t name_state_mismatch = _gf_false; + struct afr_reply *replies = NULL; + int ret = 0; + unsigned char *par_readables = NULL; + unsigned char *success = NULL; + int32_t op_errno = 0; + uuid_t gfid = {0}; + + local = frame->local; + replies = local->replies; + priv = this->private; + par_readables = alloca0(priv->child_count); + success = alloca0(priv->child_count); + + ret = afr_inode_read_subvol_get(local->loc.parent, this, par_readables, + NULL, NULL); + if (ret < 0 || AFR_COUNT(par_readables, priv->child_count) == 0) { + /* In this case set par_readables to all 1 so that name_heal + * need checks at the end of this function will flag missing + * entry when name state mismatches*/ + memset(par_readables, 1, priv->child_count); + } + + for (i = 0; i < priv->child_count; i++) { + if (!replies[i].valid) + continue; + + if (replies[i].op_ret == 0) { + if (uuid_is_null(gfid)) { + gf_uuid_copy(gfid, replies[i].poststat.ia_gfid); + } + success[i] = 1; + } else { + if ((replies[i].op_errno != ENOTCONN) && + (replies[i].op_errno != ENOENT) && + (replies[i].op_errno != ESTALE)) { + op_errno = replies[i].op_errno; + } } - for (i = 0; i < priv->child_count; i++) { - if (!replies[i].valid) - continue; - - if (replies[i].op_ret == 0) { - if (uuid_is_null (gfid)) { - gf_uuid_copy (gfid, - replies[i].poststat.ia_gfid); - } - success[i] = 1; - } else { - if ((replies[i].op_errno != ENOTCONN) && - (replies[i].op_errno != ENOENT) && - (replies[i].op_errno != ESTALE)) { - op_errno = replies[i].op_errno; - } - } + /*gfid is missing, needs heal*/ + if ((replies[i].op_ret == -1) && (replies[i].op_errno == ENODATA)) { + goto name_heal; + } - /*gfid is missing, needs heal*/ - if ((replies[i].op_ret == -1) && - (replies[i].op_errno == ENODATA)) { - goto name_heal; - } + if (first == -1) { + first = i; + continue; + } - if (first == -1) { - first = i; - continue; - } - - if (replies[i].op_ret != replies[first].op_ret) { - name_state_mismatch = _gf_true; - } - - if (replies[i].op_ret == 0) { - /* Rename after this lookup may succeed if we don't do - * a name-heal and the destination may not have pending xattrs - * to indicate which name is good and which is bad so always do - * this heal*/ - if (gf_uuid_compare (replies[i].poststat.ia_gfid, - gfid)) { - goto name_heal; - } - } - } - - if (name_state_mismatch) { - if (!priv->quorum_count) - goto name_heal; - if (!afr_has_quorum (success, this)) - goto name_heal; - if (op_errno) - goto name_heal; - for (i = 0; i < priv->child_count; i++) { - if (!replies[i].valid) - continue; - if (par_readables[i] && replies[i].op_ret < 0 && - replies[i].op_errno != ENOTCONN) { - goto name_heal; - } - } + if (replies[i].op_ret != replies[first].op_ret) { + name_state_mismatch = _gf_true; } - goto metadata_heal; + if (replies[i].op_ret == 0) { + /* Rename after this lookup may succeed if we don't do + * a name-heal and the destination may not have pending xattrs + * to indicate which name is good and which is bad so always do + * this heal*/ + if (gf_uuid_compare(replies[i].poststat.ia_gfid, gfid)) { + goto name_heal; + } + } + } + + if (name_state_mismatch) { + if (!priv->quorum_count) + goto name_heal; + if (!afr_has_quorum(success, this)) + goto name_heal; + if (op_errno) + goto name_heal; + for (i = 0; i < priv->child_count; i++) { + if (!replies[i].valid) + continue; + if (par_readables[i] && replies[i].op_ret < 0 && + replies[i].op_errno != ENOTCONN) { + goto name_heal; + } + } + } + + goto metadata_heal; name_heal: - heal = afr_frame_create (this, NULL); - if (!heal) - goto metadata_heal; + heal = afr_frame_create(this, NULL); + if (!heal) + goto metadata_heal; - ret = synctask_new (this->ctx->env, afr_lookup_selfheal_wrap, - afr_refresh_selfheal_done, heal, frame); - if (ret) { - AFR_STACK_DESTROY (heal); - goto metadata_heal; - } - return ret; + ret = synctask_new(this->ctx->env, afr_lookup_selfheal_wrap, + afr_refresh_selfheal_done, heal, frame); + if (ret) { + AFR_STACK_DESTROY(heal); + goto metadata_heal; + } + return ret; metadata_heal: - ret = afr_lookup_metadata_heal_check (frame, this); + ret = afr_lookup_metadata_heal_check(frame, this); - return ret; + return ret; } - int -afr_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, inode_t *inode, struct iatt *buf, - dict_t *xdata, struct iatt *postparent) -{ - afr_local_t * local = NULL; - int call_count = -1; - int child_index = -1; - GF_UNUSED int ret = 0; - int8_t need_heal = 1; - - child_index = (long) cookie; - - local = frame->local; - - local->replies[child_index].valid = 1; - local->replies[child_index].op_ret = op_ret; - local->replies[child_index].op_errno = op_errno; - /* - * On revalidate lookup if the gfid-changed, afr should unwind the fop - * with ESTALE so that a fresh lookup will be sent by the top xlator. - * So remember it. - */ - if (xdata && dict_get (xdata, "gfid-changed")) - local->cont.lookup.needs_fresh_lookup = _gf_true; +afr_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, + int op_errno, inode_t *inode, struct iatt *buf, dict_t *xdata, + struct iatt *postparent) +{ + afr_local_t *local = NULL; + int call_count = -1; + int child_index = -1; + GF_UNUSED int ret = 0; + int8_t need_heal = 1; + + child_index = (long)cookie; + + local = frame->local; + + local->replies[child_index].valid = 1; + local->replies[child_index].op_ret = op_ret; + local->replies[child_index].op_errno = op_errno; + /* + * On revalidate lookup if the gfid-changed, afr should unwind the fop + * with ESTALE so that a fresh lookup will be sent by the top xlator. + * So remember it. + */ + if (xdata && dict_get(xdata, "gfid-changed")) + local->cont.lookup.needs_fresh_lookup = _gf_true; + + if (xdata) { + ret = dict_get_int8(xdata, "link-count", &need_heal); + local->replies[child_index].need_heal = need_heal; + } else { + local->replies[child_index].need_heal = need_heal; + } + if (op_ret != -1) { + local->replies[child_index].poststat = *buf; + local->replies[child_index].postparent = *postparent; + if (xdata) + local->replies[child_index].xdata = dict_ref(xdata); + } - if (xdata) { - ret = dict_get_int8 (xdata, "link-count", &need_heal); - local->replies[child_index].need_heal = need_heal; - } else { - local->replies[child_index].need_heal = need_heal; - } - if (op_ret != -1) { - local->replies[child_index].poststat = *buf; - local->replies[child_index].postparent = *postparent; - if (xdata) - local->replies[child_index].xdata = dict_ref (xdata); - } - - call_count = afr_frame_return (frame); - if (call_count == 0) { - afr_set_need_heal (this, local); - afr_lookup_entry_heal (frame, this); - } + call_count = afr_frame_return(frame); + if (call_count == 0) { + afr_set_need_heal(this, local); + afr_lookup_entry_heal(frame, this); + } - return 0; + return 0; } static void -afr_discover_unwind (call_frame_t *frame, xlator_t *this) +afr_discover_unwind(call_frame_t *frame, xlator_t *this) { - afr_private_t *priv = NULL; - afr_local_t *local = NULL; - int op_errno = 0; - int read_subvol = -1; - unsigned char *data_readable = NULL; - unsigned char *success_replies = NULL; + afr_private_t *priv = NULL; + afr_local_t *local = NULL; + int op_errno = 0; + int read_subvol = -1; + unsigned char *data_readable = NULL; + unsigned char *success_replies = NULL; - priv = this->private; - local = frame->local; - data_readable = alloca0 (priv->child_count); - success_replies = alloca0 (priv->child_count); + priv = this->private; + local = frame->local; + data_readable = alloca0(priv->child_count); + success_replies = alloca0(priv->child_count); - afr_fill_success_replies (local, priv, success_replies); - if (AFR_COUNT (success_replies, priv->child_count) > 0) - local->op_ret = 0; + afr_fill_success_replies(local, priv, success_replies); + if (AFR_COUNT(success_replies, priv->child_count) > 0) + local->op_ret = 0; - op_errno = afr_final_errno (frame->local, this->private); + op_errno = afr_final_errno(frame->local, this->private); - if (local->op_ret < 0) { - AFR_STACK_UNWIND (lookup, frame, -1, op_errno, NULL, NULL, - NULL, NULL); - return; - } + if (local->op_ret < 0) { + AFR_STACK_UNWIND(lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL); + return; + } - if (!afr_has_quorum (success_replies, this)) - goto unwind; + if (!afr_has_quorum(success_replies, this)) + goto unwind; - afr_replies_interpret (frame, this, local->inode, NULL); + afr_replies_interpret(frame, this, local->inode, NULL); - read_subvol = afr_read_subvol_decide (local->inode, this, NULL, - data_readable); + read_subvol = afr_read_subvol_decide(local->inode, this, NULL, + data_readable); unwind: - afr_attempt_readsubvol_set (frame, this, success_replies, data_readable, - &read_subvol); - if (read_subvol == -1) { - AFR_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno, - NULL, NULL, NULL, NULL); - return; - } + afr_attempt_readsubvol_set(frame, this, success_replies, data_readable, + &read_subvol); + if (read_subvol == -1) { + AFR_STACK_UNWIND(lookup, frame, local->op_ret, local->op_errno, NULL, + NULL, NULL, NULL); + return; + } - if (AFR_IS_ARBITER_BRICK (priv, read_subvol) && local->op_ret == 0) { - local->op_ret = -1; - local->op_errno = ENOTCONN; - gf_msg_debug (this->name, 0, "Arbiter cannot be a read subvol " - "for %s", local->loc.path); - } + if (AFR_IS_ARBITER_BRICK(priv, read_subvol) && local->op_ret == 0) { + local->op_ret = -1; + local->op_errno = ENOTCONN; + gf_msg_debug(this->name, 0, + "Arbiter cannot be a read subvol " + "for %s", + local->loc.path); + } - AFR_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno, - local->inode, &local->replies[read_subvol].poststat, - local->replies[read_subvol].xdata, - &local->replies[read_subvol].postparent); + AFR_STACK_UNWIND(lookup, frame, local->op_ret, local->op_errno, + local->inode, &local->replies[read_subvol].poststat, + local->replies[read_subvol].xdata, + &local->replies[read_subvol].postparent); } static int -afr_ta_id_file_check (void *opaque) -{ - afr_private_t *priv = NULL; - xlator_t *this = NULL; - loc_t loc = {0, }; - struct iatt stbuf = {0,}; - dict_t *dict = NULL; - uuid_t gfid = {0,}; - fd_t *fd = NULL; - int ret = 0; - - this = opaque; - priv = this->private; - - ret = afr_fill_ta_loc (this, &loc); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB, - "Failed to populate thin-arbiter loc for: %s.", - loc.name); - goto out; - } - - ret = syncop_lookup (priv->children[THIN_ARBITER_BRICK_INDEX], &loc, - &stbuf, 0, 0, 0); - if (ret == 0) { - goto out; - } else if (ret == -ENOENT) { - fd = fd_create (loc.inode, getpid()); - if (!fd) - goto out; - dict = dict_new (); - if (!dict) - goto out; - gf_uuid_generate (gfid); - ret = dict_set_gfuuid (dict, "gfid-req", gfid, true); - ret = syncop_create (priv->children[THIN_ARBITER_BRICK_INDEX], - &loc, O_RDWR, 0664, fd, &stbuf, dict, - NULL); - } +afr_ta_id_file_check(void *opaque) +{ + afr_private_t *priv = NULL; + xlator_t *this = NULL; + loc_t loc = { + 0, + }; + struct iatt stbuf = { + 0, + }; + dict_t *dict = NULL; + uuid_t gfid = { + 0, + }; + fd_t *fd = NULL; + int ret = 0; + + this = opaque; + priv = this->private; + + ret = afr_fill_ta_loc(this, &loc); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB, + "Failed to populate thin-arbiter loc for: %s.", loc.name); + goto out; + } + + ret = syncop_lookup(priv->children[THIN_ARBITER_BRICK_INDEX], &loc, &stbuf, + 0, 0, 0); + if (ret == 0) { + goto out; + } else if (ret == -ENOENT) { + fd = fd_create(loc.inode, getpid()); + if (!fd) + goto out; + dict = dict_new(); + if (!dict) + goto out; + gf_uuid_generate(gfid); + ret = dict_set_gfuuid(dict, "gfid-req", gfid, true); + ret = syncop_create(priv->children[THIN_ARBITER_BRICK_INDEX], &loc, + O_RDWR, 0664, fd, &stbuf, dict, NULL); + } out: - if (ret == 0) { - gf_uuid_copy (priv->ta_gfid, stbuf.ia_gfid); - } else { - gf_msg (this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB, - "Failed to lookup/create thin-arbiter id file."); - } - if (dict) - dict_unref (dict); - if (fd) - fd_unref (fd); - loc_wipe (&loc); + if (ret == 0) { + gf_uuid_copy(priv->ta_gfid, stbuf.ia_gfid); + } else { + gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB, + "Failed to lookup/create thin-arbiter id file."); + } + if (dict) + dict_unref(dict); + if (fd) + fd_unref(fd); + loc_wipe(&loc); - return 0; + return 0; } static int -afr_ta_id_file_check_cbk (int ret, call_frame_t *ta_frame, void *opaque) +afr_ta_id_file_check_cbk(int ret, call_frame_t *ta_frame, void *opaque) { - return 0; + return 0; } static void -afr_discover_done (call_frame_t *frame, xlator_t *this) +afr_discover_done(call_frame_t *frame, xlator_t *this) { - int ret = 0; - afr_private_t *priv = NULL; + int ret = 0; + afr_private_t *priv = NULL; - priv = this->private; - if (!priv->thin_arbiter_count) - goto unwind; - if (!gf_uuid_is_null(priv->ta_gfid)) - goto unwind; + priv = this->private; + if (!priv->thin_arbiter_count) + goto unwind; + if (!gf_uuid_is_null(priv->ta_gfid)) + goto unwind; - ret = synctask_new (this->ctx->env, afr_ta_id_file_check, - afr_ta_id_file_check_cbk, NULL, this); - if (ret) - goto unwind; + ret = synctask_new(this->ctx->env, afr_ta_id_file_check, + afr_ta_id_file_check_cbk, NULL, this); + if (ret) + goto unwind; unwind: - afr_discover_unwind (frame, this); + afr_discover_unwind(frame, this); } int -afr_discover_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, inode_t *inode, struct iatt *buf, - dict_t *xdata, struct iatt *postparent) -{ - afr_local_t * local = NULL; - int call_count = -1; - int child_index = -1; - GF_UNUSED int ret = 0; - int8_t need_heal = 1; - - child_index = (long) cookie; - - local = frame->local; - - local->replies[child_index].valid = 1; - local->replies[child_index].op_ret = op_ret; - local->replies[child_index].op_errno = op_errno; - if (op_ret != -1) { - local->replies[child_index].poststat = *buf; - local->replies[child_index].postparent = *postparent; - if (xdata) - local->replies[child_index].xdata = dict_ref (xdata); - } - - if (local->do_discovery && (op_ret == 0)) - afr_attempt_local_discovery (this, child_index); - - if (xdata) { - ret = dict_get_int8 (xdata, "link-count", &need_heal); - local->replies[child_index].need_heal = need_heal; - } else { - local->replies[child_index].need_heal = need_heal; - } - - call_count = afr_frame_return (frame); - if (call_count == 0) { - afr_set_need_heal (this, local); - afr_lookup_metadata_heal_check (frame, this); - } +afr_discover_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, + int op_errno, inode_t *inode, struct iatt *buf, dict_t *xdata, + struct iatt *postparent) +{ + afr_local_t *local = NULL; + int call_count = -1; + int child_index = -1; + GF_UNUSED int ret = 0; + int8_t need_heal = 1; + + child_index = (long)cookie; + + local = frame->local; + + local->replies[child_index].valid = 1; + local->replies[child_index].op_ret = op_ret; + local->replies[child_index].op_errno = op_errno; + if (op_ret != -1) { + local->replies[child_index].poststat = *buf; + local->replies[child_index].postparent = *postparent; + if (xdata) + local->replies[child_index].xdata = dict_ref(xdata); + } + + if (local->do_discovery && (op_ret == 0)) + afr_attempt_local_discovery(this, child_index); + + if (xdata) { + ret = dict_get_int8(xdata, "link-count", &need_heal); + local->replies[child_index].need_heal = need_heal; + } else { + local->replies[child_index].need_heal = need_heal; + } + + call_count = afr_frame_return(frame); + if (call_count == 0) { + afr_set_need_heal(this, local); + afr_lookup_metadata_heal_check(frame, this); + } - return 0; + return 0; } - int -afr_discover_do (call_frame_t *frame, xlator_t *this, int err) -{ - int ret = 0; - int i = 0; - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - int call_count = 0; - - local = frame->local; - priv = this->private; - - if (err) { - local->op_errno = err; - goto out; - } - - call_count = local->call_count = AFR_COUNT (local->child_up, - priv->child_count); - - ret = afr_lookup_xattr_req_prepare (local, this, local->xattr_req, - &local->loc); - if (ret) { - local->op_errno = -ret; - goto out; - } - - for (i = 0; i < priv->child_count; i++) { - if (local->child_up[i]) { - STACK_WIND_COOKIE (frame, afr_discover_cbk, - (void *) (long) i, - priv->children[i], - priv->children[i]->fops->lookup, - &local->loc, local->xattr_req); - if (!--call_count) - break; - } +afr_discover_do(call_frame_t *frame, xlator_t *this, int err) +{ + int ret = 0; + int i = 0; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + int call_count = 0; + + local = frame->local; + priv = this->private; + + if (err) { + local->op_errno = err; + goto out; + } + + call_count = local->call_count = AFR_COUNT(local->child_up, + priv->child_count); + + ret = afr_lookup_xattr_req_prepare(local, this, local->xattr_req, + &local->loc); + if (ret) { + local->op_errno = -ret; + goto out; + } + + for (i = 0; i < priv->child_count; i++) { + if (local->child_up[i]) { + STACK_WIND_COOKIE( + frame, afr_discover_cbk, (void *)(long)i, priv->children[i], + priv->children[i]->fops->lookup, &local->loc, local->xattr_req); + if (!--call_count) + break; } + } - return 0; + return 0; out: - AFR_STACK_UNWIND (lookup, frame, -1, local->op_errno, 0, 0, 0, 0); - return 0; + AFR_STACK_UNWIND(lookup, frame, -1, local->op_errno, 0, 0, 0, 0); + return 0; } - int -afr_discover (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req) +afr_discover(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req) { - int op_errno = ENOMEM; - afr_private_t *priv = NULL; - afr_local_t *local = NULL; - int event = 0; + int op_errno = ENOMEM; + afr_private_t *priv = NULL; + afr_local_t *local = NULL; + int event = 0; - priv = this->private; + priv = this->private; - local = AFR_FRAME_INIT (frame, op_errno); - if (!local) - goto out; + local = AFR_FRAME_INIT(frame, op_errno); + if (!local) + goto out; - if (!local->call_count) { - op_errno = ENOTCONN; - goto out; - } + if (!local->call_count) { + op_errno = ENOTCONN; + goto out; + } - if (__is_root_gfid (loc->inode->gfid)) { - if (!this->itable) - this->itable = loc->inode->table; - if (!priv->root_inode) - priv->root_inode = inode_ref (loc->inode); - - if (priv->choose_local && !priv->did_discovery) { - /* Logic to detect which subvolumes of AFR are - local, in order to prefer them for reads - */ - local->do_discovery = _gf_true; - priv->did_discovery = _gf_true; - } - } + if (__is_root_gfid(loc->inode->gfid)) { + if (!this->itable) + this->itable = loc->inode->table; + if (!priv->root_inode) + priv->root_inode = inode_ref(loc->inode); + + if (priv->choose_local && !priv->did_discovery) { + /* Logic to detect which subvolumes of AFR are + local, in order to prefer them for reads + */ + local->do_discovery = _gf_true; + priv->did_discovery = _gf_true; + } + } - local->op = GF_FOP_LOOKUP; + local->op = GF_FOP_LOOKUP; - loc_copy (&local->loc, loc); + loc_copy(&local->loc, loc); - local->inode = inode_ref (loc->inode); + local->inode = inode_ref(loc->inode); - if (xattr_req) - /* If xattr_req was null, afr_lookup_xattr_req_prepare() will - allocate one for us */ - local->xattr_req = dict_ref (xattr_req); + if (xattr_req) + /* If xattr_req was null, afr_lookup_xattr_req_prepare() will + allocate one for us */ + local->xattr_req = dict_ref(xattr_req); - if (gf_uuid_is_null (loc->inode->gfid)) { - afr_discover_do (frame, this, 0); - return 0; - } + if (gf_uuid_is_null(loc->inode->gfid)) { + afr_discover_do(frame, this, 0); + return 0; + } - afr_read_subvol_get (loc->inode, this, NULL, NULL, &event, - AFR_DATA_TRANSACTION, NULL); + afr_read_subvol_get(loc->inode, this, NULL, NULL, &event, + AFR_DATA_TRANSACTION, NULL); - if (afr_is_inode_refresh_reqd (loc->inode, this, event, - local->event_generation)) - afr_inode_refresh (frame, this, loc->inode, NULL, - afr_discover_do); - else - afr_discover_do (frame, this, 0); + if (afr_is_inode_refresh_reqd(loc->inode, this, event, + local->event_generation)) + afr_inode_refresh(frame, this, loc->inode, NULL, afr_discover_do); + else + afr_discover_do(frame, this, 0); - return 0; + return 0; out: - AFR_STACK_UNWIND (lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL); - return 0; + AFR_STACK_UNWIND(lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL); + return 0; } - int -afr_lookup_do (call_frame_t *frame, xlator_t *this, int err) -{ - int ret = 0; - int i = 0; - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - int call_count = 0; - - local = frame->local; - priv = this->private; - - if (err < 0) { - local->op_errno = err; - ret = -1; - goto out; - } - - call_count = local->call_count = AFR_COUNT (local->child_up, - priv->child_count); - - ret = afr_lookup_xattr_req_prepare (local, this, local->xattr_req, - &local->loc); - if (ret) { - local->op_errno = -ret; - ret = -1; - goto out; - } - - for (i = 0; i < priv->child_count; i++) { - if (local->child_up[i]) { - STACK_WIND_COOKIE (frame, afr_lookup_cbk, - (void *) (long) i, - priv->children[i], - priv->children[i]->fops->lookup, - &local->loc, local->xattr_req); - if (!--call_count) - break; - } +afr_lookup_do(call_frame_t *frame, xlator_t *this, int err) +{ + int ret = 0; + int i = 0; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + int call_count = 0; + + local = frame->local; + priv = this->private; + + if (err < 0) { + local->op_errno = err; + ret = -1; + goto out; + } + + call_count = local->call_count = AFR_COUNT(local->child_up, + priv->child_count); + + ret = afr_lookup_xattr_req_prepare(local, this, local->xattr_req, + &local->loc); + if (ret) { + local->op_errno = -ret; + ret = -1; + goto out; + } + + for (i = 0; i < priv->child_count; i++) { + if (local->child_up[i]) { + STACK_WIND_COOKIE( + frame, afr_lookup_cbk, (void *)(long)i, priv->children[i], + priv->children[i]->fops->lookup, &local->loc, local->xattr_req); + if (!--call_count) + break; } - return 0; + } + return 0; out: - AFR_STACK_UNWIND (lookup, frame, -1, local->op_errno, 0, 0, 0, 0); - return 0; + AFR_STACK_UNWIND(lookup, frame, -1, local->op_errno, 0, 0, 0, 0); + return 0; } /* @@ -3405,1619 +3335,1568 @@ out: */ int -afr_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req) -{ - afr_local_t *local = NULL; - int32_t op_errno = 0; - int event = 0; - int ret = 0; - - if (loc_is_nameless (loc)) { - if (xattr_req) - dict_del (xattr_req, "gfid-req"); - afr_discover (frame, this, loc, xattr_req); - return 0; - } - - if (__is_root_gfid (loc->parent->gfid)) { - if (!strcmp (loc->name, GF_REPLICATE_TRASH_DIR)) { - op_errno = EPERM; - goto out; - } - } - - local = AFR_FRAME_INIT (frame, op_errno); - if (!local) - goto out; - - if (!local->call_count) { - op_errno = ENOTCONN; - goto out; +afr_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req) +{ + afr_local_t *local = NULL; + int32_t op_errno = 0; + int event = 0; + int ret = 0; + + if (loc_is_nameless(loc)) { + if (xattr_req) + dict_del(xattr_req, "gfid-req"); + afr_discover(frame, this, loc, xattr_req); + return 0; + } + + if (__is_root_gfid(loc->parent->gfid)) { + if (!strcmp(loc->name, GF_REPLICATE_TRASH_DIR)) { + op_errno = EPERM; + goto out; } + } - local->op = GF_FOP_LOOKUP; + local = AFR_FRAME_INIT(frame, op_errno); + if (!local) + goto out; - loc_copy (&local->loc, loc); + if (!local->call_count) { + op_errno = ENOTCONN; + goto out; + } - local->inode = inode_ref (loc->inode); + local->op = GF_FOP_LOOKUP; - if (xattr_req) { - /* If xattr_req was null, afr_lookup_xattr_req_prepare() will - allocate one for us */ - local->xattr_req = dict_copy_with_ref (xattr_req, NULL); - if (!local->xattr_req) { - op_errno = ENOMEM; - goto out; - } - ret = dict_get_gfuuid (local->xattr_req, "gfid-req", - &local->cont.lookup.gfid_req); - if (ret == 0) { - dict_del (local->xattr_req, "gfid-req"); - } + loc_copy(&local->loc, loc); + + local->inode = inode_ref(loc->inode); + + if (xattr_req) { + /* If xattr_req was null, afr_lookup_xattr_req_prepare() will + allocate one for us */ + local->xattr_req = dict_copy_with_ref(xattr_req, NULL); + if (!local->xattr_req) { + op_errno = ENOMEM; + goto out; + } + ret = dict_get_gfuuid(local->xattr_req, "gfid-req", + &local->cont.lookup.gfid_req); + if (ret == 0) { + dict_del(local->xattr_req, "gfid-req"); } + } - afr_read_subvol_get (loc->parent, this, NULL, NULL, &event, - AFR_DATA_TRANSACTION, NULL); + afr_read_subvol_get(loc->parent, this, NULL, NULL, &event, + AFR_DATA_TRANSACTION, NULL); - if (afr_is_inode_refresh_reqd (loc->inode, this, event, - local->event_generation)) - afr_inode_refresh (frame, this, loc->parent, NULL, - afr_lookup_do); - else - afr_lookup_do (frame, this, 0); + if (afr_is_inode_refresh_reqd(loc->inode, this, event, + local->event_generation)) + afr_inode_refresh(frame, this, loc->parent, NULL, afr_lookup_do); + else + afr_lookup_do(frame, this, 0); - return 0; + return 0; out: - AFR_STACK_UNWIND (lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL); + AFR_STACK_UNWIND(lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL); - return 0; + return 0; } void -_afr_cleanup_fd_ctx (afr_fd_ctx_t *fd_ctx) +_afr_cleanup_fd_ctx(afr_fd_ctx_t *fd_ctx) { - GF_FREE (fd_ctx->opened_on); - GF_FREE (fd_ctx); - return; + GF_FREE(fd_ctx->opened_on); + GF_FREE(fd_ctx); + return; } int -afr_cleanup_fd_ctx (xlator_t *this, fd_t *fd) +afr_cleanup_fd_ctx(xlator_t *this, fd_t *fd) { - uint64_t ctx = 0; - afr_fd_ctx_t *fd_ctx = NULL; - int ret = 0; + uint64_t ctx = 0; + afr_fd_ctx_t *fd_ctx = NULL; + int ret = 0; - ret = fd_ctx_get (fd, this, &ctx); - if (ret < 0) - goto out; + ret = fd_ctx_get(fd, this, &ctx); + if (ret < 0) + goto out; - fd_ctx = (afr_fd_ctx_t *)(long) ctx; + fd_ctx = (afr_fd_ctx_t *)(long)ctx; - if (fd_ctx) { - _afr_cleanup_fd_ctx (fd_ctx); - } + if (fd_ctx) { + _afr_cleanup_fd_ctx(fd_ctx); + } out: - return 0; + return 0; } int -afr_release (xlator_t *this, fd_t *fd) +afr_release(xlator_t *this, fd_t *fd) { - afr_cleanup_fd_ctx (this, fd); + afr_cleanup_fd_ctx(this, fd); - return 0; + return 0; } afr_fd_ctx_t * -__afr_fd_ctx_get (fd_t *fd, xlator_t *this) +__afr_fd_ctx_get(fd_t *fd, xlator_t *this) { - uint64_t ctx = 0; - int ret = 0; - afr_fd_ctx_t *fd_ctx = NULL; + uint64_t ctx = 0; + int ret = 0; + afr_fd_ctx_t *fd_ctx = NULL; - ret = __fd_ctx_get (fd, this, &ctx); + ret = __fd_ctx_get(fd, this, &ctx); - if (ret < 0) { - ret = __afr_fd_ctx_set (this, fd); - if (ret < 0) - goto out; + if (ret < 0) { + ret = __afr_fd_ctx_set(this, fd); + if (ret < 0) + goto out; - ret = __fd_ctx_get (fd, this, &ctx); - if (ret < 0) - goto out; - } + ret = __fd_ctx_get(fd, this, &ctx); + if (ret < 0) + goto out; + } - fd_ctx = (afr_fd_ctx_t *)(long) ctx; + fd_ctx = (afr_fd_ctx_t *)(long)ctx; out: - return fd_ctx; + return fd_ctx; } - afr_fd_ctx_t * -afr_fd_ctx_get (fd_t *fd, xlator_t *this) +afr_fd_ctx_get(fd_t *fd, xlator_t *this) { - afr_fd_ctx_t *fd_ctx = NULL; + afr_fd_ctx_t *fd_ctx = NULL; - LOCK(&fd->lock); - { - fd_ctx = __afr_fd_ctx_get (fd, this); - } - UNLOCK(&fd->lock); + LOCK(&fd->lock); + { + fd_ctx = __afr_fd_ctx_get(fd, this); + } + UNLOCK(&fd->lock); - return fd_ctx; + return fd_ctx; } - int -__afr_fd_ctx_set (xlator_t *this, fd_t *fd) +__afr_fd_ctx_set(xlator_t *this, fd_t *fd) { - afr_private_t * priv = NULL; - int ret = -1; - uint64_t ctx = 0; - afr_fd_ctx_t * fd_ctx = NULL; - int i = 0; + afr_private_t *priv = NULL; + int ret = -1; + uint64_t ctx = 0; + afr_fd_ctx_t *fd_ctx = NULL; + int i = 0; - VALIDATE_OR_GOTO (this->private, out); - VALIDATE_OR_GOTO (fd, out); + VALIDATE_OR_GOTO(this->private, out); + VALIDATE_OR_GOTO(fd, out); - priv = this->private; + priv = this->private; - ret = __fd_ctx_get (fd, this, &ctx); + ret = __fd_ctx_get(fd, this, &ctx); - if (ret == 0) - goto out; + if (ret == 0) + goto out; - fd_ctx = GF_CALLOC (1, sizeof (afr_fd_ctx_t), - gf_afr_mt_afr_fd_ctx_t); - if (!fd_ctx) { - ret = -ENOMEM; - goto out; - } + fd_ctx = GF_CALLOC(1, sizeof(afr_fd_ctx_t), gf_afr_mt_afr_fd_ctx_t); + if (!fd_ctx) { + ret = -ENOMEM; + goto out; + } - fd_ctx->opened_on = GF_CALLOC (sizeof (*fd_ctx->opened_on), - priv->child_count, - gf_afr_mt_int32_t); - if (!fd_ctx->opened_on) { - ret = -ENOMEM; - goto out; - } + fd_ctx->opened_on = GF_CALLOC(sizeof(*fd_ctx->opened_on), priv->child_count, + gf_afr_mt_int32_t); + if (!fd_ctx->opened_on) { + ret = -ENOMEM; + goto out; + } - for (i = 0; i < priv->child_count; i++) { - if (fd_is_anonymous (fd)) - fd_ctx->opened_on[i] = AFR_FD_OPENED; - else - fd_ctx->opened_on[i] = AFR_FD_NOT_OPENED; - } + for (i = 0; i < priv->child_count; i++) { + if (fd_is_anonymous(fd)) + fd_ctx->opened_on[i] = AFR_FD_OPENED; + else + fd_ctx->opened_on[i] = AFR_FD_NOT_OPENED; + } - fd_ctx->readdir_subvol = -1; + fd_ctx->readdir_subvol = -1; - ret = __fd_ctx_set (fd, this, (uint64_t)(long) fd_ctx); - if (ret) - gf_msg_debug (this->name, 0, - "failed to set fd ctx (%p)", fd); + ret = __fd_ctx_set(fd, this, (uint64_t)(long)fd_ctx); + if (ret) + gf_msg_debug(this->name, 0, "failed to set fd ctx (%p)", fd); out: - if (ret && fd_ctx) - _afr_cleanup_fd_ctx (fd_ctx); - return ret; + if (ret && fd_ctx) + _afr_cleanup_fd_ctx(fd_ctx); + return ret; } - /* {{{ flush */ int -afr_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) +afr_flush_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + int32_t op_errno, dict_t *xdata) { - afr_local_t *local = NULL; - int call_count = -1; + afr_local_t *local = NULL; + int call_count = -1; - local = frame->local; + local = frame->local; - LOCK (&frame->lock); - { - if (op_ret != -1) { - local->op_ret = op_ret; - if (!local->xdata_rsp && xdata) - local->xdata_rsp = dict_ref (xdata); - } else { - local->op_errno = op_errno; - } + LOCK(&frame->lock); + { + if (op_ret != -1) { + local->op_ret = op_ret; + if (!local->xdata_rsp && xdata) + local->xdata_rsp = dict_ref(xdata); + } else { + local->op_errno = op_errno; } - UNLOCK (&frame->lock); + } + UNLOCK(&frame->lock); - call_count = afr_frame_return (frame); + call_count = afr_frame_return(frame); - if (call_count == 0) - AFR_STACK_UNWIND (flush, frame, local->op_ret, - local->op_errno, local->xdata_rsp); + if (call_count == 0) + AFR_STACK_UNWIND(flush, frame, local->op_ret, local->op_errno, + local->xdata_rsp); - return 0; + return 0; } static int -afr_flush_wrapper (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) -{ - int i = 0; - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - int call_count = -1; - - priv = this->private; - local = frame->local; - call_count = local->call_count; - - for (i = 0; i < priv->child_count; i++) { - if (local->child_up[i]) { - STACK_WIND_COOKIE (frame, afr_flush_cbk, - (void *) (long) i, - priv->children[i], - priv->children[i]->fops->flush, - local->fd, xdata); - if (!--call_count) - break; - - } +afr_flush_wrapper(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) +{ + int i = 0; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + int call_count = -1; + + priv = this->private; + local = frame->local; + call_count = local->call_count; + + for (i = 0; i < priv->child_count; i++) { + if (local->child_up[i]) { + STACK_WIND_COOKIE(frame, afr_flush_cbk, (void *)(long)i, + priv->children[i], priv->children[i]->fops->flush, + local->fd, xdata); + if (!--call_count) + break; } + } - return 0; + return 0; } -afr_local_t* -afr_wakeup_same_fd_delayed_op (xlator_t *this, afr_lock_t *lock, fd_t *fd) +afr_local_t * +afr_wakeup_same_fd_delayed_op(xlator_t *this, afr_lock_t *lock, fd_t *fd) { - afr_local_t *local = NULL; + afr_local_t *local = NULL; - if (lock->delay_timer) { - local = list_entry(lock->post_op.next, afr_local_t, - transaction.owner_list); - if (fd == local->fd) { - if (gf_timer_call_cancel (this->ctx, - lock->delay_timer)) { - local = NULL; - } else { - lock->delay_timer = NULL; - } - } else { - local = NULL; - } + if (lock->delay_timer) { + local = list_entry(lock->post_op.next, afr_local_t, + transaction.owner_list); + if (fd == local->fd) { + if (gf_timer_call_cancel(this->ctx, lock->delay_timer)) { + local = NULL; + } else { + lock->delay_timer = NULL; + } + } else { + local = NULL; } + } - return local; + return local; } void -afr_delayed_changelog_wake_resume (xlator_t *this, inode_t *inode, - call_stub_t *stub) -{ - afr_inode_ctx_t *ctx = NULL; - afr_lock_t *lock = NULL; - afr_local_t *metadata_local = NULL; - afr_local_t *data_local = NULL; - LOCK (&inode->lock); - { - (void)__afr_inode_ctx_get (this, inode, &ctx); - lock = &ctx->lock[AFR_DATA_TRANSACTION]; - data_local = afr_wakeup_same_fd_delayed_op (this, lock, - stub->args.fd); - lock = &ctx->lock[AFR_METADATA_TRANSACTION]; - metadata_local = afr_wakeup_same_fd_delayed_op (this, lock, - stub->args.fd); - } - UNLOCK (&inode->lock); - - if (data_local) { - data_local->transaction.resume_stub = stub; - } else if (metadata_local) { - metadata_local->transaction.resume_stub = stub; - } else { - call_resume (stub); - } - if (data_local) { - afr_delayed_changelog_wake_up_cbk (data_local); - } - if (metadata_local) { - afr_delayed_changelog_wake_up_cbk (metadata_local); - } +afr_delayed_changelog_wake_resume(xlator_t *this, inode_t *inode, + call_stub_t *stub) +{ + afr_inode_ctx_t *ctx = NULL; + afr_lock_t *lock = NULL; + afr_local_t *metadata_local = NULL; + afr_local_t *data_local = NULL; + LOCK(&inode->lock); + { + (void)__afr_inode_ctx_get(this, inode, &ctx); + lock = &ctx->lock[AFR_DATA_TRANSACTION]; + data_local = afr_wakeup_same_fd_delayed_op(this, lock, stub->args.fd); + lock = &ctx->lock[AFR_METADATA_TRANSACTION]; + metadata_local = afr_wakeup_same_fd_delayed_op(this, lock, + stub->args.fd); + } + UNLOCK(&inode->lock); + + if (data_local) { + data_local->transaction.resume_stub = stub; + } else if (metadata_local) { + metadata_local->transaction.resume_stub = stub; + } else { + call_resume(stub); + } + if (data_local) { + afr_delayed_changelog_wake_up_cbk(data_local); + } + if (metadata_local) { + afr_delayed_changelog_wake_up_cbk(metadata_local); + } } int -afr_flush (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) +afr_flush(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) { - afr_local_t *local = NULL; - call_stub_t *stub = NULL; - int op_errno = ENOMEM; + afr_local_t *local = NULL; + call_stub_t *stub = NULL; + int op_errno = ENOMEM; - local = AFR_FRAME_INIT (frame, op_errno); - if (!local) - goto out; + local = AFR_FRAME_INIT(frame, op_errno); + if (!local) + goto out; - local->op = GF_FOP_FLUSH; - if (!afr_is_consistent_io_possible (local, this->private, &op_errno)) - goto out; + local->op = GF_FOP_FLUSH; + if (!afr_is_consistent_io_possible(local, this->private, &op_errno)) + goto out; - local->fd = fd_ref(fd); + local->fd = fd_ref(fd); - stub = fop_flush_stub (frame, afr_flush_wrapper, fd, xdata); - if (!stub) - goto out; + stub = fop_flush_stub(frame, afr_flush_wrapper, fd, xdata); + if (!stub) + goto out; - afr_delayed_changelog_wake_resume (this, fd->inode, stub); + afr_delayed_changelog_wake_resume(this, fd->inode, stub); - return 0; + return 0; out: - AFR_STACK_UNWIND (flush, frame, -1, op_errno, NULL); - return 0; + AFR_STACK_UNWIND(flush, frame, -1, op_errno, NULL); + return 0; } int -afr_fsyncdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) +afr_fsyncdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - afr_local_t *local = NULL; - int call_count = -1; + afr_local_t *local = NULL; + int call_count = -1; - local = frame->local; + local = frame->local; - LOCK (&frame->lock); - { - if (op_ret == 0) { - local->op_ret = 0; - if (!local->xdata_rsp && xdata) - local->xdata_rsp = dict_ref (xdata); - } else { - local->op_errno = op_errno; - } + LOCK(&frame->lock); + { + if (op_ret == 0) { + local->op_ret = 0; + if (!local->xdata_rsp && xdata) + local->xdata_rsp = dict_ref(xdata); + } else { + local->op_errno = op_errno; } - UNLOCK (&frame->lock); + } + UNLOCK(&frame->lock); - call_count = afr_frame_return (frame); + call_count = afr_frame_return(frame); - if (call_count == 0) - AFR_STACK_UNWIND (fsyncdir, frame, local->op_ret, - local->op_errno, local->xdata_rsp); + if (call_count == 0) + AFR_STACK_UNWIND(fsyncdir, frame, local->op_ret, local->op_errno, + local->xdata_rsp); - return 0; + return 0; } - int -afr_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync, - dict_t *xdata) -{ - afr_private_t *priv = NULL; - afr_local_t *local = NULL; - int i = 0; - int32_t call_count = 0; - int32_t op_errno = ENOMEM; - - priv = this->private; - - local = AFR_FRAME_INIT (frame, op_errno); - if (!local) - goto out; - - local->op = GF_FOP_FSYNCDIR; - if (!afr_is_consistent_io_possible (local, priv, &op_errno)) - goto out; - - call_count = local->call_count; - for (i = 0; i < priv->child_count; i++) { - if (local->child_up[i]) { - STACK_WIND (frame, afr_fsyncdir_cbk, - priv->children[i], - priv->children[i]->fops->fsyncdir, - fd, datasync, xdata); - if (!--call_count) - break; - } +afr_fsyncdir(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync, + dict_t *xdata) +{ + afr_private_t *priv = NULL; + afr_local_t *local = NULL; + int i = 0; + int32_t call_count = 0; + int32_t op_errno = ENOMEM; + + priv = this->private; + + local = AFR_FRAME_INIT(frame, op_errno); + if (!local) + goto out; + + local->op = GF_FOP_FSYNCDIR; + if (!afr_is_consistent_io_possible(local, priv, &op_errno)) + goto out; + + call_count = local->call_count; + for (i = 0; i < priv->child_count; i++) { + if (local->child_up[i]) { + STACK_WIND(frame, afr_fsyncdir_cbk, priv->children[i], + priv->children[i]->fops->fsyncdir, fd, datasync, xdata); + if (!--call_count) + break; } + } - return 0; + return 0; out: - AFR_STACK_UNWIND (fsyncdir, frame, -1, op_errno, NULL); + AFR_STACK_UNWIND(fsyncdir, frame, -1, op_errno, NULL); - return 0; + return 0; } /* }}} */ static int -afr_serialized_lock_wind (call_frame_t *frame, xlator_t *this); +afr_serialized_lock_wind(call_frame_t *frame, xlator_t *this); static gf_boolean_t -afr_is_conflicting_lock_present (int32_t op_ret, int32_t op_errno) +afr_is_conflicting_lock_present(int32_t op_ret, int32_t op_errno) { - if (op_ret == -1 && op_errno == EAGAIN) - return _gf_true; - return _gf_false; + if (op_ret == -1 && op_errno == EAGAIN) + return _gf_true; + return _gf_false; } static void -afr_fop_lock_unwind (call_frame_t *frame, glusterfs_fop_t op, int32_t op_ret, - int32_t op_errno, dict_t *xdata) +afr_fop_lock_unwind(call_frame_t *frame, glusterfs_fop_t op, int32_t op_ret, + int32_t op_errno, dict_t *xdata) { - switch (op) { + switch (op) { case GF_FOP_INODELK: - AFR_STACK_UNWIND (inodelk, frame, op_ret, op_errno, xdata); - break; + AFR_STACK_UNWIND(inodelk, frame, op_ret, op_errno, xdata); + break; case GF_FOP_FINODELK: - AFR_STACK_UNWIND (finodelk, frame, op_ret, op_errno, xdata); - break; + AFR_STACK_UNWIND(finodelk, frame, op_ret, op_errno, xdata); + break; case GF_FOP_ENTRYLK: - AFR_STACK_UNWIND (entrylk, frame, op_ret, op_errno, xdata); - break; + AFR_STACK_UNWIND(entrylk, frame, op_ret, op_errno, xdata); + break; case GF_FOP_FENTRYLK: - AFR_STACK_UNWIND (fentrylk, frame, op_ret, op_errno, xdata); - break; + AFR_STACK_UNWIND(fentrylk, frame, op_ret, op_errno, xdata); + break; default: - break; - } + break; + } } static void -afr_fop_lock_wind (call_frame_t *frame, xlator_t *this, int child_index, - int32_t (*lock_cbk) (call_frame_t *, void *, xlator_t *, - int32_t, int32_t, dict_t *)) +afr_fop_lock_wind(call_frame_t *frame, xlator_t *this, int child_index, + int32_t (*lock_cbk)(call_frame_t *, void *, xlator_t *, + int32_t, int32_t, dict_t *)) { - afr_local_t *local = frame->local; - afr_private_t *priv = this->private; - int i = child_index; + afr_local_t *local = frame->local; + afr_private_t *priv = this->private; + int i = child_index; - switch (local->op) { + switch (local->op) { case GF_FOP_INODELK: - STACK_WIND_COOKIE (frame, lock_cbk, (void *) (long) i, - priv->children[i], - priv->children[i]->fops->inodelk, - (const char *)local->cont.inodelk.volume, - &local->loc, local->cont.inodelk.cmd, - &local->cont.inodelk.flock, - local->cont.inodelk.xdata); - break; + STACK_WIND_COOKIE( + frame, lock_cbk, (void *)(long)i, priv->children[i], + priv->children[i]->fops->inodelk, + (const char *)local->cont.inodelk.volume, &local->loc, + local->cont.inodelk.cmd, &local->cont.inodelk.flock, + local->cont.inodelk.xdata); + break; case GF_FOP_FINODELK: - STACK_WIND_COOKIE (frame, lock_cbk, (void *) (long) i, - priv->children[i], - priv->children[i]->fops->finodelk, - (const char *)local->cont.inodelk.volume, - local->fd, local->cont.inodelk.cmd, - &local->cont.inodelk.flock, - local->cont.inodelk.xdata); - break; + STACK_WIND_COOKIE( + frame, lock_cbk, (void *)(long)i, priv->children[i], + priv->children[i]->fops->finodelk, + (const char *)local->cont.inodelk.volume, local->fd, + local->cont.inodelk.cmd, &local->cont.inodelk.flock, + local->cont.inodelk.xdata); + break; case GF_FOP_ENTRYLK: - STACK_WIND_COOKIE (frame, lock_cbk, (void *) (long) i, - priv->children[i], - priv->children[i]->fops->entrylk, - local->cont.entrylk.volume, &local->loc, - local->cont.entrylk.basename, - local->cont.entrylk.cmd, - local->cont.entrylk.type, - local->cont.entrylk.xdata); - break; + STACK_WIND_COOKIE( + frame, lock_cbk, (void *)(long)i, priv->children[i], + priv->children[i]->fops->entrylk, local->cont.entrylk.volume, + &local->loc, local->cont.entrylk.basename, + local->cont.entrylk.cmd, local->cont.entrylk.type, + local->cont.entrylk.xdata); + break; case GF_FOP_FENTRYLK: - STACK_WIND_COOKIE (frame, lock_cbk, (void *) (long) i, - priv->children[i], - priv->children[i]->fops->fentrylk, - local->cont.entrylk.volume, local->fd, - local->cont.entrylk.basename, - local->cont.entrylk.cmd, - local->cont.entrylk.type, - local->cont.entrylk.xdata); - break; + STACK_WIND_COOKIE( + frame, lock_cbk, (void *)(long)i, priv->children[i], + priv->children[i]->fops->fentrylk, local->cont.entrylk.volume, + local->fd, local->cont.entrylk.basename, + local->cont.entrylk.cmd, local->cont.entrylk.type, + local->cont.entrylk.xdata); + break; default: - break; - } + break; + } } void -afr_fop_lock_proceed (call_frame_t *frame) +afr_fop_lock_proceed(call_frame_t *frame) { - afr_local_t *local = NULL; - afr_private_t *priv = NULL; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; - local = frame->local; - priv = frame->this->private; + local = frame->local; + priv = frame->this->private; - if (local->fop_lock_state != AFR_FOP_LOCK_PARALLEL) { - afr_fop_lock_unwind (frame, local->op, local->op_ret, - local->op_errno, local->xdata_rsp); - return; - } - /* At least one child is up */ - /* - * Non-blocking locks also need to be serialized. Otherwise there is - * a chance that both the mounts which issued same non-blocking inodelk - * may endup not acquiring the lock on any-brick. - * Ex: Mount1 and Mount2 - * request for full length lock on file f1. Mount1 afr may acquire the - * partial lock on brick-1 and may not acquire the lock on brick-2 - * because Mount2 already got the lock on brick-2, vice versa. Since - * both the mounts only got partial locks, afr treats them as failure in - * gaining the locks and unwinds with EAGAIN errno. - */ - local->op_ret = -1; - local->op_errno = EUCLEAN; - local->fop_lock_state = AFR_FOP_LOCK_SERIAL; - afr_local_replies_wipe (local, priv); - if (local->xdata_rsp) - dict_unref (local->xdata_rsp); - local->xdata_rsp = NULL; - switch (local->op) { + if (local->fop_lock_state != AFR_FOP_LOCK_PARALLEL) { + afr_fop_lock_unwind(frame, local->op, local->op_ret, local->op_errno, + local->xdata_rsp); + return; + } + /* At least one child is up */ + /* + * Non-blocking locks also need to be serialized. Otherwise there is + * a chance that both the mounts which issued same non-blocking inodelk + * may endup not acquiring the lock on any-brick. + * Ex: Mount1 and Mount2 + * request for full length lock on file f1. Mount1 afr may acquire the + * partial lock on brick-1 and may not acquire the lock on brick-2 + * because Mount2 already got the lock on brick-2, vice versa. Since + * both the mounts only got partial locks, afr treats them as failure in + * gaining the locks and unwinds with EAGAIN errno. + */ + local->op_ret = -1; + local->op_errno = EUCLEAN; + local->fop_lock_state = AFR_FOP_LOCK_SERIAL; + afr_local_replies_wipe(local, priv); + if (local->xdata_rsp) + dict_unref(local->xdata_rsp); + local->xdata_rsp = NULL; + switch (local->op) { case GF_FOP_INODELK: case GF_FOP_FINODELK: - local->cont.inodelk.cmd = local->cont.inodelk.in_cmd; - local->cont.inodelk.flock = local->cont.inodelk.in_flock; - if (local->cont.inodelk.xdata) - dict_unref (local->cont.inodelk.xdata); - local->cont.inodelk.xdata = NULL; - if (local->xdata_req) - local->cont.inodelk.xdata = dict_ref (local->xdata_req); - break; + local->cont.inodelk.cmd = local->cont.inodelk.in_cmd; + local->cont.inodelk.flock = local->cont.inodelk.in_flock; + if (local->cont.inodelk.xdata) + dict_unref(local->cont.inodelk.xdata); + local->cont.inodelk.xdata = NULL; + if (local->xdata_req) + local->cont.inodelk.xdata = dict_ref(local->xdata_req); + break; case GF_FOP_ENTRYLK: case GF_FOP_FENTRYLK: - local->cont.entrylk.cmd = local->cont.entrylk.in_cmd; - if (local->cont.entrylk.xdata) - dict_unref (local->cont.entrylk.xdata); - local->cont.entrylk.xdata = NULL; - if (local->xdata_req) - local->cont.entrylk.xdata = dict_ref (local->xdata_req); - break; + local->cont.entrylk.cmd = local->cont.entrylk.in_cmd; + if (local->cont.entrylk.xdata) + dict_unref(local->cont.entrylk.xdata); + local->cont.entrylk.xdata = NULL; + if (local->xdata_req) + local->cont.entrylk.xdata = dict_ref(local->xdata_req); + break; default: - break; - } - afr_serialized_lock_wind (frame, frame->this); + break; + } + afr_serialized_lock_wind(frame, frame->this); } static int32_t -afr_unlock_partial_lock_cbk (call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, - int32_t op_errno, dict_t *xdata) +afr_unlock_partial_lock_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - int call_count = -1; - int child_index = (long)cookie; - uuid_t gfid = {0}; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + int call_count = -1; + int child_index = (long)cookie; + uuid_t gfid = {0}; - local = frame->local; - priv = this->private; - - if (op_ret < 0 && op_errno != ENOTCONN) { - if (local->fd) - gf_uuid_copy (gfid, local->fd->inode->gfid); - else - loc_gfid (&local->loc, gfid); - gf_msg (this->name, GF_LOG_ERROR, op_errno, - AFR_MSG_UNLOCK_FAIL, - "%s: Failed to unlock %s on %s " - "with lk_owner: %s", uuid_utoa (gfid), - gf_fop_list[local->op], - priv->children[child_index]->name, - lkowner_utoa (&frame->root->lk_owner)); - } + local = frame->local; + priv = this->private; - call_count = afr_frame_return (frame); - if (call_count == 0) - afr_fop_lock_proceed (frame); + if (op_ret < 0 && op_errno != ENOTCONN) { + if (local->fd) + gf_uuid_copy(gfid, local->fd->inode->gfid); + else + loc_gfid(&local->loc, gfid); + gf_msg(this->name, GF_LOG_ERROR, op_errno, AFR_MSG_UNLOCK_FAIL, + "%s: Failed to unlock %s on %s " + "with lk_owner: %s", + uuid_utoa(gfid), gf_fop_list[local->op], + priv->children[child_index]->name, + lkowner_utoa(&frame->root->lk_owner)); + } - return 0; + call_count = afr_frame_return(frame); + if (call_count == 0) + afr_fop_lock_proceed(frame); + + return 0; } static int32_t -afr_unlock_locks_and_proceed (call_frame_t *frame, xlator_t *this, +afr_unlock_locks_and_proceed(call_frame_t *frame, xlator_t *this, int call_count) { - int i = 0; - afr_private_t *priv = NULL; - afr_local_t *local = NULL; + int i = 0; + afr_private_t *priv = NULL; + afr_local_t *local = NULL; - if (call_count == 0) { - afr_fop_lock_proceed (frame); - goto out; - } + if (call_count == 0) { + afr_fop_lock_proceed(frame); + goto out; + } - local = frame->local; - priv = this->private; - local->call_count = call_count; - switch (local->op) { + local = frame->local; + priv = this->private; + local->call_count = call_count; + switch (local->op) { case GF_FOP_INODELK: case GF_FOP_FINODELK: - local->cont.inodelk.flock.l_type = F_UNLCK; - local->cont.inodelk.cmd = F_SETLK; - if (local->cont.inodelk.xdata) - dict_unref (local->cont.inodelk.xdata); - local->cont.inodelk.xdata = NULL; - break; + local->cont.inodelk.flock.l_type = F_UNLCK; + local->cont.inodelk.cmd = F_SETLK; + if (local->cont.inodelk.xdata) + dict_unref(local->cont.inodelk.xdata); + local->cont.inodelk.xdata = NULL; + break; case GF_FOP_ENTRYLK: case GF_FOP_FENTRYLK: - local->cont.entrylk.cmd = ENTRYLK_UNLOCK; - if (local->cont.entrylk.xdata) - dict_unref (local->cont.entrylk.xdata); - local->cont.entrylk.xdata = NULL; - break; + local->cont.entrylk.cmd = ENTRYLK_UNLOCK; + if (local->cont.entrylk.xdata) + dict_unref(local->cont.entrylk.xdata); + local->cont.entrylk.xdata = NULL; + break; default: - break; - } + break; + } - for (i = 0; i < priv->child_count; i++) { - if (!local->replies[i].valid) - continue; + for (i = 0; i < priv->child_count; i++) { + if (!local->replies[i].valid) + continue; - if (local->replies[i].op_ret == -1) - continue; + if (local->replies[i].op_ret == -1) + continue; - afr_fop_lock_wind (frame, this, i, afr_unlock_partial_lock_cbk); + afr_fop_lock_wind(frame, this, i, afr_unlock_partial_lock_cbk); - if (!--call_count) - break; - } + if (!--call_count) + break; + } out: - return 0; + return 0; } int32_t -afr_fop_lock_done (call_frame_t *frame, xlator_t *this) +afr_fop_lock_done(call_frame_t *frame, xlator_t *this) { - int i = 0; - int lock_count = 0; - unsigned char *success = NULL; + int i = 0; + int lock_count = 0; + unsigned char *success = NULL; - afr_local_t *local = NULL; - afr_private_t *priv = NULL; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; - local = frame->local; - priv = this->private; - success = alloca0(priv->child_count); + local = frame->local; + priv = this->private; + success = alloca0(priv->child_count); - for (i = 0; i < priv->child_count; i++) { - if (!local->replies[i].valid) - continue; + for (i = 0; i < priv->child_count; i++) { + if (!local->replies[i].valid) + continue; - if (local->replies[i].op_ret == 0) { - lock_count++; - success[i] = 1; - } + if (local->replies[i].op_ret == 0) { + lock_count++; + success[i] = 1; + } - if (local->op_ret == -1 && local->op_errno == EAGAIN) - continue; + if (local->op_ret == -1 && local->op_errno == EAGAIN) + continue; - if ((local->replies[i].op_ret == -1) && - (local->replies[i].op_errno == EAGAIN)) { - local->op_ret = -1; - local->op_errno = EAGAIN; - continue; - } + if ((local->replies[i].op_ret == -1) && + (local->replies[i].op_errno == EAGAIN)) { + local->op_ret = -1; + local->op_errno = EAGAIN; + continue; + } - if (local->replies[i].op_ret == 0) - local->op_ret = 0; + if (local->replies[i].op_ret == 0) + local->op_ret = 0; - local->op_errno = local->replies[i].op_errno; - } + local->op_errno = local->replies[i].op_errno; + } - if (afr_fop_lock_is_unlock (frame)) - goto unwind; - - if (afr_is_conflicting_lock_present (local->op_ret, local->op_errno)) { - afr_unlock_locks_and_proceed (frame, this, lock_count); - } else if (priv->quorum_count && !afr_has_quorum (success, this)) { - local->fop_lock_state = AFR_FOP_LOCK_QUORUM_FAILED; - local->op_ret = -1; - local->op_errno = afr_final_errno (local, priv); - if (local->op_errno == 0) - local->op_errno = afr_quorum_errno (priv); - afr_unlock_locks_and_proceed (frame, this, lock_count); - } else { - goto unwind; - } + if (afr_fop_lock_is_unlock(frame)) + goto unwind; - return 0; + if (afr_is_conflicting_lock_present(local->op_ret, local->op_errno)) { + afr_unlock_locks_and_proceed(frame, this, lock_count); + } else if (priv->quorum_count && !afr_has_quorum(success, this)) { + local->fop_lock_state = AFR_FOP_LOCK_QUORUM_FAILED; + local->op_ret = -1; + local->op_errno = afr_final_errno(local, priv); + if (local->op_errno == 0) + local->op_errno = afr_quorum_errno(priv); + afr_unlock_locks_and_proceed(frame, this, lock_count); + } else { + goto unwind; + } + + return 0; unwind: - afr_fop_lock_unwind (frame, local->op, local->op_ret, - local->op_errno, local->xdata_rsp); - return 0; + afr_fop_lock_unwind(frame, local->op, local->op_ret, local->op_errno, + local->xdata_rsp); + return 0; } static int -afr_common_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) +afr_common_lock_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - afr_local_t *local = NULL; - int child_index = (long)cookie; + afr_local_t *local = NULL; + int child_index = (long)cookie; - local = frame->local; + local = frame->local; - local->replies[child_index].valid = 1; - local->replies[child_index].op_ret = op_ret; - local->replies[child_index].op_errno = op_errno; - if (op_ret == 0 && xdata) { - local->replies[child_index].xdata = dict_ref (xdata); - LOCK (&frame->lock); - { - if (!local->xdata_rsp) - local->xdata_rsp = dict_ref (xdata); - } - UNLOCK (&frame->lock); + local->replies[child_index].valid = 1; + local->replies[child_index].op_ret = op_ret; + local->replies[child_index].op_errno = op_errno; + if (op_ret == 0 && xdata) { + local->replies[child_index].xdata = dict_ref(xdata); + LOCK(&frame->lock); + { + if (!local->xdata_rsp) + local->xdata_rsp = dict_ref(xdata); } - return 0; + UNLOCK(&frame->lock); + } + return 0; } static int32_t -afr_serialized_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) +afr_serialized_lock_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - int child_index = (long)cookie; - int next_child = 0; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + int child_index = (long)cookie; + int next_child = 0; - local = frame->local; - priv = this->private; + local = frame->local; + priv = this->private; - afr_common_lock_cbk (frame, cookie, this, op_ret, op_errno, xdata); + afr_common_lock_cbk(frame, cookie, this, op_ret, op_errno, xdata); - for (next_child = child_index + 1; next_child < priv->child_count; - next_child++) { - if (local->child_up[next_child]) - break; - } + for (next_child = child_index + 1; next_child < priv->child_count; + next_child++) { + if (local->child_up[next_child]) + break; + } - if (afr_is_conflicting_lock_present (op_ret, op_errno) || - (next_child == priv->child_count)) { - afr_fop_lock_done (frame, this); - } else { - afr_fop_lock_wind (frame, this, next_child, - afr_serialized_lock_cbk); - } + if (afr_is_conflicting_lock_present(op_ret, op_errno) || + (next_child == priv->child_count)) { + afr_fop_lock_done(frame, this); + } else { + afr_fop_lock_wind(frame, this, next_child, afr_serialized_lock_cbk); + } - return 0; + return 0; } static int -afr_serialized_lock_wind (call_frame_t *frame, xlator_t *this) +afr_serialized_lock_wind(call_frame_t *frame, xlator_t *this) { - afr_private_t *priv = NULL; - afr_local_t *local = NULL; - int i = 0; + afr_private_t *priv = NULL; + afr_local_t *local = NULL; + int i = 0; - priv = this->private; - local = frame->local; + priv = this->private; + local = frame->local; - for (i = 0; i < priv->child_count; i++) { - if (local->child_up[i]) { - afr_fop_lock_wind (frame, this, i, - afr_serialized_lock_cbk); - break; - } + for (i = 0; i < priv->child_count; i++) { + if (local->child_up[i]) { + afr_fop_lock_wind(frame, this, i, afr_serialized_lock_cbk); + break; } - return 0; + } + return 0; } static int32_t -afr_parallel_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) +afr_parallel_lock_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - int call_count = 0; + int call_count = 0; - afr_common_lock_cbk (frame, cookie, this, op_ret, op_errno, xdata); + afr_common_lock_cbk(frame, cookie, this, op_ret, op_errno, xdata); - call_count = afr_frame_return (frame); - if (call_count == 0) - afr_fop_lock_done (frame, this); + call_count = afr_frame_return(frame); + if (call_count == 0) + afr_fop_lock_done(frame, this); - return 0; + return 0; } static int -afr_parallel_lock_wind (call_frame_t *frame, xlator_t *this) +afr_parallel_lock_wind(call_frame_t *frame, xlator_t *this) { - afr_private_t *priv = NULL; - afr_local_t *local = NULL; - int call_count = 0; - int i = 0; + afr_private_t *priv = NULL; + afr_local_t *local = NULL; + int call_count = 0; + int i = 0; - priv = this->private; - local = frame->local; - call_count = local->call_count; + priv = this->private; + local = frame->local; + call_count = local->call_count; - for (i = 0; i < priv->child_count; i++) { - if (!local->child_up[i]) - continue; - afr_fop_lock_wind (frame, this, i, afr_parallel_lock_cbk); - if (!--call_count) - break; - } - return 0; + for (i = 0; i < priv->child_count; i++) { + if (!local->child_up[i]) + continue; + afr_fop_lock_wind(frame, this, i, afr_parallel_lock_cbk); + if (!--call_count) + break; + } + return 0; } static int -afr_fop_handle_lock (call_frame_t *frame, xlator_t *this) +afr_fop_handle_lock(call_frame_t *frame, xlator_t *this) { - afr_local_t *local = frame->local; - int op_errno = 0; + afr_local_t *local = frame->local; + int op_errno = 0; - if (!afr_fop_lock_is_unlock (frame)) { - if (!afr_is_consistent_io_possible (local, this->private, - &op_errno)) - goto out; + if (!afr_fop_lock_is_unlock(frame)) { + if (!afr_is_consistent_io_possible(local, this->private, &op_errno)) + goto out; - switch (local->op) { - case GF_FOP_INODELK: - case GF_FOP_FINODELK: - local->cont.inodelk.cmd = F_SETLK; - break; - case GF_FOP_ENTRYLK: - case GF_FOP_FENTRYLK: - local->cont.entrylk.cmd = ENTRYLK_LOCK_NB; - break; - default: - break; - } + switch (local->op) { + case GF_FOP_INODELK: + case GF_FOP_FINODELK: + local->cont.inodelk.cmd = F_SETLK; + break; + case GF_FOP_ENTRYLK: + case GF_FOP_FENTRYLK: + local->cont.entrylk.cmd = ENTRYLK_LOCK_NB; + break; + default: + break; } + } - if (local->xdata_req) { - switch (local->op) { - case GF_FOP_INODELK: - case GF_FOP_FINODELK: - local->cont.inodelk.xdata = dict_ref (local->xdata_req); - break; - case GF_FOP_ENTRYLK: - case GF_FOP_FENTRYLK: - local->cont.entrylk.xdata = dict_ref (local->xdata_req); - break; - default: - break; - } + if (local->xdata_req) { + switch (local->op) { + case GF_FOP_INODELK: + case GF_FOP_FINODELK: + local->cont.inodelk.xdata = dict_ref(local->xdata_req); + break; + case GF_FOP_ENTRYLK: + case GF_FOP_FENTRYLK: + local->cont.entrylk.xdata = dict_ref(local->xdata_req); + break; + default: + break; } + } - local->fop_lock_state = AFR_FOP_LOCK_PARALLEL; - afr_parallel_lock_wind (frame, this); + local->fop_lock_state = AFR_FOP_LOCK_PARALLEL; + afr_parallel_lock_wind(frame, this); out: - return -op_errno; + return -op_errno; } static int32_t -afr_handle_inodelk (call_frame_t *frame, glusterfs_fop_t fop, - const char *volume, loc_t *loc, fd_t *fd, int32_t cmd, - struct gf_flock *flock, dict_t *xdata) +afr_handle_inodelk(call_frame_t *frame, glusterfs_fop_t fop, const char *volume, + loc_t *loc, fd_t *fd, int32_t cmd, struct gf_flock *flock, + dict_t *xdata) { - afr_local_t *local = NULL; - int32_t op_errno = ENOMEM; - - local = AFR_FRAME_INIT (frame, op_errno); - if (!local) - goto out; - - local->op = fop; - if (loc) - loc_copy (&local->loc, loc); - if (fd) - local->fd = fd_ref (fd); - - local->cont.inodelk.volume = gf_strdup (volume); - if (!local->cont.inodelk.volume) { - op_errno = ENOMEM; - goto out; - } - - local->cont.inodelk.in_cmd = cmd; - local->cont.inodelk.cmd = cmd; - local->cont.inodelk.in_flock = *flock; - local->cont.inodelk.flock = *flock; - if (xdata) - local->xdata_req = dict_ref (xdata); - - op_errno = -afr_fop_handle_lock (frame, frame->this); - if (op_errno) - goto out; - return 0; + afr_local_t *local = NULL; + int32_t op_errno = ENOMEM; + + local = AFR_FRAME_INIT(frame, op_errno); + if (!local) + goto out; + + local->op = fop; + if (loc) + loc_copy(&local->loc, loc); + if (fd) + local->fd = fd_ref(fd); + + local->cont.inodelk.volume = gf_strdup(volume); + if (!local->cont.inodelk.volume) { + op_errno = ENOMEM; + goto out; + } + + local->cont.inodelk.in_cmd = cmd; + local->cont.inodelk.cmd = cmd; + local->cont.inodelk.in_flock = *flock; + local->cont.inodelk.flock = *flock; + if (xdata) + local->xdata_req = dict_ref(xdata); + + op_errno = -afr_fop_handle_lock(frame, frame->this); + if (op_errno) + goto out; + return 0; out: - afr_fop_lock_unwind (frame, fop, -1, op_errno, NULL); + afr_fop_lock_unwind(frame, fop, -1, op_errno, NULL); - return 0; + return 0; } int32_t -afr_inodelk (call_frame_t *frame, xlator_t *this, - const char *volume, loc_t *loc, int32_t cmd, - struct gf_flock *flock, dict_t *xdata) +afr_inodelk(call_frame_t *frame, xlator_t *this, const char *volume, loc_t *loc, + int32_t cmd, struct gf_flock *flock, dict_t *xdata) { - afr_handle_inodelk (frame, GF_FOP_INODELK, volume, loc, NULL, cmd, - flock, xdata); - return 0; + afr_handle_inodelk(frame, GF_FOP_INODELK, volume, loc, NULL, cmd, flock, + xdata); + return 0; } int32_t -afr_finodelk (call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd, - int32_t cmd, struct gf_flock *flock, dict_t *xdata) +afr_finodelk(call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd, + int32_t cmd, struct gf_flock *flock, dict_t *xdata) { - afr_handle_inodelk (frame, GF_FOP_FINODELK, volume, NULL, fd, cmd, - flock, xdata); - return 0; + afr_handle_inodelk(frame, GF_FOP_FINODELK, volume, NULL, fd, cmd, flock, + xdata); + return 0; } static int -afr_handle_entrylk (call_frame_t *frame, glusterfs_fop_t fop, - const char *volume, loc_t *loc, fd_t *fd, - const char *basename, entrylk_cmd cmd, - entrylk_type type, dict_t *xdata) -{ - afr_local_t *local = NULL; - int32_t op_errno = ENOMEM; - - local = AFR_FRAME_INIT (frame, op_errno); - if (!local) - goto out; - - local->op = fop; - if (loc) - loc_copy (&local->loc, loc); - if (fd) - local->fd = fd_ref (fd); - local->cont.entrylk.cmd = cmd; - local->cont.entrylk.in_cmd = cmd; - local->cont.entrylk.type = type; - local->cont.entrylk.volume = gf_strdup (volume); - local->cont.entrylk.basename = gf_strdup (basename); - if (!local->cont.entrylk.volume || !local->cont.entrylk.basename) { - op_errno = ENOMEM; - goto out; - } - if (xdata) - local->xdata_req = dict_ref (xdata); - op_errno = -afr_fop_handle_lock (frame, frame->this); - if (op_errno) - goto out; - - return 0; +afr_handle_entrylk(call_frame_t *frame, glusterfs_fop_t fop, const char *volume, + loc_t *loc, fd_t *fd, const char *basename, entrylk_cmd cmd, + entrylk_type type, dict_t *xdata) +{ + afr_local_t *local = NULL; + int32_t op_errno = ENOMEM; + + local = AFR_FRAME_INIT(frame, op_errno); + if (!local) + goto out; + + local->op = fop; + if (loc) + loc_copy(&local->loc, loc); + if (fd) + local->fd = fd_ref(fd); + local->cont.entrylk.cmd = cmd; + local->cont.entrylk.in_cmd = cmd; + local->cont.entrylk.type = type; + local->cont.entrylk.volume = gf_strdup(volume); + local->cont.entrylk.basename = gf_strdup(basename); + if (!local->cont.entrylk.volume || !local->cont.entrylk.basename) { + op_errno = ENOMEM; + goto out; + } + if (xdata) + local->xdata_req = dict_ref(xdata); + op_errno = -afr_fop_handle_lock(frame, frame->this); + if (op_errno) + goto out; + + return 0; out: - afr_fop_lock_unwind (frame, fop, -1, op_errno, NULL); - return 0; + afr_fop_lock_unwind(frame, fop, -1, op_errno, NULL); + return 0; } int -afr_entrylk (call_frame_t *frame, xlator_t *this, const char *volume, - loc_t *loc, const char *basename, entrylk_cmd cmd, - entrylk_type type, dict_t *xdata) +afr_entrylk(call_frame_t *frame, xlator_t *this, const char *volume, loc_t *loc, + const char *basename, entrylk_cmd cmd, entrylk_type type, + dict_t *xdata) { - afr_handle_entrylk (frame, GF_FOP_ENTRYLK, volume, loc, NULL, basename, - cmd, type, xdata); - return 0; + afr_handle_entrylk(frame, GF_FOP_ENTRYLK, volume, loc, NULL, basename, cmd, + type, xdata); + return 0; } int -afr_fentrylk (call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd, - const char *basename, entrylk_cmd cmd, entrylk_type type, - dict_t *xdata) +afr_fentrylk(call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd, + const char *basename, entrylk_cmd cmd, entrylk_type type, + dict_t *xdata) { - afr_handle_entrylk (frame, GF_FOP_FENTRYLK, volume, NULL, fd, basename, - cmd, type, xdata); - return 0; + afr_handle_entrylk(frame, GF_FOP_FENTRYLK, volume, NULL, fd, basename, cmd, + type, xdata); + return 0; } - int -afr_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, - int op_errno, struct statvfs *statvfs, dict_t *xdata) +afr_statfs_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, + int op_errno, struct statvfs *statvfs, dict_t *xdata) { - afr_local_t *local = NULL; - int call_count = 0; - struct statvfs *buf = NULL; + afr_local_t *local = NULL; + int call_count = 0; + struct statvfs *buf = NULL; - LOCK (&frame->lock); - { - local = frame->local; - - if (op_ret != 0) { - local->op_errno = op_errno; - goto unlock; - } - - local->op_ret = op_ret; - - buf = &local->cont.statfs.buf; - if (local->cont.statfs.buf_set) { - if (statvfs->f_bavail < buf->f_bavail) { - *buf = *statvfs; - if (xdata) { - if (local->xdata_rsp) - dict_unref (local->xdata_rsp); - local->xdata_rsp = dict_ref (xdata); - } - } - } else { - *buf = *statvfs; - local->cont.statfs.buf_set = 1; - if (xdata) - local->xdata_rsp = dict_ref (xdata); - } + LOCK(&frame->lock); + { + local = frame->local; + + if (op_ret != 0) { + local->op_errno = op_errno; + goto unlock; + } + + local->op_ret = op_ret; + + buf = &local->cont.statfs.buf; + if (local->cont.statfs.buf_set) { + if (statvfs->f_bavail < buf->f_bavail) { + *buf = *statvfs; + if (xdata) { + if (local->xdata_rsp) + dict_unref(local->xdata_rsp); + local->xdata_rsp = dict_ref(xdata); + } + } + } else { + *buf = *statvfs; + local->cont.statfs.buf_set = 1; + if (xdata) + local->xdata_rsp = dict_ref(xdata); } + } unlock: - UNLOCK (&frame->lock); + UNLOCK(&frame->lock); - call_count = afr_frame_return (frame); + call_count = afr_frame_return(frame); - if (call_count == 0) - AFR_STACK_UNWIND (statfs, frame, local->op_ret, local->op_errno, - &local->cont.statfs.buf, local->xdata_rsp); + if (call_count == 0) + AFR_STACK_UNWIND(statfs, frame, local->op_ret, local->op_errno, + &local->cont.statfs.buf, local->xdata_rsp); - return 0; + return 0; } - int -afr_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) -{ - afr_local_t * local = NULL; - afr_private_t *priv = NULL; - int i = 0; - int call_count = 0; - int32_t op_errno = ENOMEM; - - priv = this->private; - - local = AFR_FRAME_INIT (frame, op_errno); - if (!local) - goto out; - - local->op = GF_FOP_STATFS; - if (!afr_is_consistent_io_possible (local, priv, &op_errno)) - goto out; - - if (priv->arbiter_count == 1 && local->child_up[ARBITER_BRICK_INDEX]) - local->call_count--; - call_count = local->call_count; - if (!call_count) { - op_errno = ENOTCONN; - goto out; - } - - for (i = 0; i < priv->child_count; i++) { - if (local->child_up[i]) { - if (AFR_IS_ARBITER_BRICK(priv, i)) - continue; - STACK_WIND (frame, afr_statfs_cbk, - priv->children[i], - priv->children[i]->fops->statfs, - loc, xdata); - if (!--call_count) - break; - } +afr_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) +{ + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + int i = 0; + int call_count = 0; + int32_t op_errno = ENOMEM; + + priv = this->private; + + local = AFR_FRAME_INIT(frame, op_errno); + if (!local) + goto out; + + local->op = GF_FOP_STATFS; + if (!afr_is_consistent_io_possible(local, priv, &op_errno)) + goto out; + + if (priv->arbiter_count == 1 && local->child_up[ARBITER_BRICK_INDEX]) + local->call_count--; + call_count = local->call_count; + if (!call_count) { + op_errno = ENOTCONN; + goto out; + } + + for (i = 0; i < priv->child_count; i++) { + if (local->child_up[i]) { + if (AFR_IS_ARBITER_BRICK(priv, i)) + continue; + STACK_WIND(frame, afr_statfs_cbk, priv->children[i], + priv->children[i]->fops->statfs, loc, xdata); + if (!--call_count) + break; } + } - return 0; + return 0; out: - AFR_STACK_UNWIND (statfs, frame, -1, op_errno, NULL, NULL); + AFR_STACK_UNWIND(statfs, frame, -1, op_errno, NULL, NULL); - return 0; + return 0; } - int32_t -afr_lk_unlock_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct gf_flock *lock, - dict_t *xdata) +afr_lk_unlock_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct gf_flock *lock, + dict_t *xdata) { - afr_local_t *local = NULL; - afr_private_t *priv = this->private; - int call_count = -1; - int child_index = (long)cookie; + afr_local_t *local = NULL; + afr_private_t *priv = this->private; + int call_count = -1; + int child_index = (long)cookie; - local = frame->local; + local = frame->local; - if (op_ret < 0 && op_errno != ENOTCONN && op_errno != EBADFD) { - gf_msg (this->name, GF_LOG_ERROR, op_errno, - AFR_MSG_UNLOCK_FAIL, - "gfid=%s: unlock failed on subvolume %s " - "with lock owner %s", - uuid_utoa (local->fd->inode->gfid), - priv->children[child_index]->name, - lkowner_utoa (&frame->root->lk_owner)); - } + if (op_ret < 0 && op_errno != ENOTCONN && op_errno != EBADFD) { + gf_msg(this->name, GF_LOG_ERROR, op_errno, AFR_MSG_UNLOCK_FAIL, + "gfid=%s: unlock failed on subvolume %s " + "with lock owner %s", + uuid_utoa(local->fd->inode->gfid), + priv->children[child_index]->name, + lkowner_utoa(&frame->root->lk_owner)); + } - call_count = afr_frame_return (frame); - if (call_count == 0) - AFR_STACK_UNWIND (lk, frame, local->op_ret, local->op_errno, - NULL, local->xdata_rsp); + call_count = afr_frame_return(frame); + if (call_count == 0) + AFR_STACK_UNWIND(lk, frame, local->op_ret, local->op_errno, NULL, + local->xdata_rsp); - return 0; + return 0; } - int32_t -afr_lk_unlock (call_frame_t *frame, xlator_t *this) +afr_lk_unlock(call_frame_t *frame, xlator_t *this) { - afr_local_t * local = NULL; - afr_private_t * priv = NULL; - int i = 0; - int call_count = 0; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + int i = 0; + int call_count = 0; - local = frame->local; - priv = this->private; + local = frame->local; + priv = this->private; - call_count = afr_locked_nodes_count (local->cont.lk.locked_nodes, - priv->child_count); + call_count = afr_locked_nodes_count(local->cont.lk.locked_nodes, + priv->child_count); - if (call_count == 0) { - AFR_STACK_UNWIND (lk, frame, local->op_ret, local->op_errno, - NULL, local->xdata_rsp); - return 0; - } + if (call_count == 0) { + AFR_STACK_UNWIND(lk, frame, local->op_ret, local->op_errno, NULL, + local->xdata_rsp); + return 0; + } - local->call_count = call_count; + local->call_count = call_count; - local->cont.lk.user_flock.l_type = F_UNLCK; + local->cont.lk.user_flock.l_type = F_UNLCK; - for (i = 0; i < priv->child_count; i++) { - if (local->cont.lk.locked_nodes[i]) { - STACK_WIND_COOKIE (frame, afr_lk_unlock_cbk, - (void *) (long) i, priv->children[i], - priv->children[i]->fops->lk, - local->fd, F_SETLK, - &local->cont.lk.user_flock, NULL); - - if (!--call_count) - break; - } + for (i = 0; i < priv->child_count; i++) { + if (local->cont.lk.locked_nodes[i]) { + STACK_WIND_COOKIE(frame, afr_lk_unlock_cbk, (void *)(long)i, + priv->children[i], priv->children[i]->fops->lk, + local->fd, F_SETLK, &local->cont.lk.user_flock, + NULL); + + if (!--call_count) + break; } + } - return 0; + return 0; } - int32_t -afr_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct gf_flock *lock, - dict_t *xdata) +afr_lk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + int32_t op_errno, struct gf_flock *lock, dict_t *xdata) { - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - int child_index = -1; - - - local = frame->local; - priv = this->private; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + int child_index = -1; - child_index = (long) cookie; + local = frame->local; + priv = this->private; - afr_common_lock_cbk (frame, cookie, this, op_ret, op_errno, xdata); - if (op_ret < 0 && op_errno == EAGAIN) { - local->op_ret = -1; - local->op_errno = EAGAIN; + child_index = (long)cookie; - afr_lk_unlock (frame, this); - return 0; - } + afr_common_lock_cbk(frame, cookie, this, op_ret, op_errno, xdata); + if (op_ret < 0 && op_errno == EAGAIN) { + local->op_ret = -1; + local->op_errno = EAGAIN; - if (op_ret == 0) { - local->op_ret = 0; - local->op_errno = 0; - local->cont.lk.locked_nodes[child_index] = 1; - local->cont.lk.ret_flock = *lock; - } + afr_lk_unlock(frame, this); + return 0; + } + + if (op_ret == 0) { + local->op_ret = 0; + local->op_errno = 0; + local->cont.lk.locked_nodes[child_index] = 1; + local->cont.lk.ret_flock = *lock; + } + + child_index++; + + if (child_index < priv->child_count) { + STACK_WIND_COOKIE(frame, afr_lk_cbk, (void *)(long)child_index, + priv->children[child_index], + priv->children[child_index]->fops->lk, local->fd, + local->cont.lk.cmd, &local->cont.lk.user_flock, + local->xdata_req); + } else if (priv->quorum_count && + !afr_has_quorum(local->cont.lk.locked_nodes, this)) { + local->op_ret = -1; + local->op_errno = afr_final_errno(local, priv); - child_index++; - - if (child_index < priv->child_count) { - STACK_WIND_COOKIE (frame, afr_lk_cbk, (void *) (long) child_index, - priv->children[child_index], - priv->children[child_index]->fops->lk, - local->fd, local->cont.lk.cmd, - &local->cont.lk.user_flock, - local->xdata_req); - } else if (priv->quorum_count && - !afr_has_quorum (local->cont.lk.locked_nodes, this)) { - local->op_ret = -1; - local->op_errno = afr_final_errno (local, priv); - - afr_lk_unlock (frame, this); - } else { - if (local->op_ret < 0) - local->op_errno = afr_final_errno (local, priv); + afr_lk_unlock(frame, this); + } else { + if (local->op_ret < 0) + local->op_errno = afr_final_errno(local, priv); - AFR_STACK_UNWIND (lk, frame, local->op_ret, local->op_errno, - &local->cont.lk.ret_flock, local->xdata_rsp); - } + AFR_STACK_UNWIND(lk, frame, local->op_ret, local->op_errno, + &local->cont.lk.ret_flock, local->xdata_rsp); + } - return 0; + return 0; } int -afr_lk (call_frame_t *frame, xlator_t *this, - fd_t *fd, int32_t cmd, struct gf_flock *flock, dict_t *xdata) -{ - afr_private_t *priv = NULL; - afr_local_t *local = NULL; - int i = 0; - int32_t op_errno = ENOMEM; - - priv = this->private; - - local = AFR_FRAME_INIT (frame, op_errno); - if (!local) - goto out; - - local->op = GF_FOP_LK; - if (!afr_lk_is_unlock (cmd, flock) && - !afr_is_consistent_io_possible (local, priv, &op_errno)) - goto out; - - local->cont.lk.locked_nodes = GF_CALLOC (priv->child_count, - sizeof (*local->cont.lk.locked_nodes), - gf_afr_mt_char); - - if (!local->cont.lk.locked_nodes) { - op_errno = ENOMEM; - goto out; - } - - local->fd = fd_ref (fd); - local->cont.lk.cmd = cmd; - local->cont.lk.user_flock = *flock; - local->cont.lk.ret_flock = *flock; - if (xdata) - local->xdata_req = dict_ref (xdata); - - STACK_WIND_COOKIE (frame, afr_lk_cbk, (void *) (long) 0, - priv->children[i], - priv->children[i]->fops->lk, - fd, cmd, flock, local->xdata_req); - - return 0; +afr_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd, + struct gf_flock *flock, dict_t *xdata) +{ + afr_private_t *priv = NULL; + afr_local_t *local = NULL; + int i = 0; + int32_t op_errno = ENOMEM; + + priv = this->private; + + local = AFR_FRAME_INIT(frame, op_errno); + if (!local) + goto out; + + local->op = GF_FOP_LK; + if (!afr_lk_is_unlock(cmd, flock) && + !afr_is_consistent_io_possible(local, priv, &op_errno)) + goto out; + + local->cont.lk.locked_nodes = GF_CALLOC( + priv->child_count, sizeof(*local->cont.lk.locked_nodes), + gf_afr_mt_char); + + if (!local->cont.lk.locked_nodes) { + op_errno = ENOMEM; + goto out; + } + + local->fd = fd_ref(fd); + local->cont.lk.cmd = cmd; + local->cont.lk.user_flock = *flock; + local->cont.lk.ret_flock = *flock; + if (xdata) + local->xdata_req = dict_ref(xdata); + + STACK_WIND_COOKIE(frame, afr_lk_cbk, (void *)(long)0, priv->children[i], + priv->children[i]->fops->lk, fd, cmd, flock, + local->xdata_req); + + return 0; out: - AFR_STACK_UNWIND (lk, frame, -1, op_errno, NULL, NULL); + AFR_STACK_UNWIND(lk, frame, -1, op_errno, NULL, NULL); - return 0; + return 0; } int32_t -afr_lease_unlock_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct gf_lease *lease, - dict_t *xdata) +afr_lease_unlock_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct gf_lease *lease, + dict_t *xdata) { - afr_local_t *local = NULL; - int call_count = -1; + afr_local_t *local = NULL; + int call_count = -1; - local = frame->local; - call_count = afr_frame_return (frame); + local = frame->local; + call_count = afr_frame_return(frame); - if (call_count == 0) - AFR_STACK_UNWIND (lease, frame, local->op_ret, local->op_errno, - lease, xdata); + if (call_count == 0) + AFR_STACK_UNWIND(lease, frame, local->op_ret, local->op_errno, lease, + xdata); - return 0; + return 0; } int32_t -afr_lease_unlock (call_frame_t *frame, xlator_t *this) +afr_lease_unlock(call_frame_t *frame, xlator_t *this) { - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - int i = 0; - int call_count = 0; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + int i = 0; + int call_count = 0; - local = frame->local; - priv = this->private; + local = frame->local; + priv = this->private; - call_count = afr_locked_nodes_count (local->cont.lease.locked_nodes, - priv->child_count); + call_count = afr_locked_nodes_count(local->cont.lease.locked_nodes, + priv->child_count); - if (call_count == 0) { - AFR_STACK_UNWIND (lease, frame, local->op_ret, local->op_errno, - &local->cont.lease.ret_lease, NULL); - return 0; - } + if (call_count == 0) { + AFR_STACK_UNWIND(lease, frame, local->op_ret, local->op_errno, + &local->cont.lease.ret_lease, NULL); + return 0; + } - local->call_count = call_count; + local->call_count = call_count; - local->cont.lease.user_lease.cmd = GF_UNLK_LEASE; + local->cont.lease.user_lease.cmd = GF_UNLK_LEASE; - for (i = 0; i < priv->child_count; i++) { - if (local->cont.lease.locked_nodes[i]) { - STACK_WIND (frame, afr_lease_unlock_cbk, - priv->children[i], - priv->children[i]->fops->lease, - &local->loc, &local->cont.lease.user_lease, NULL); - - if (!--call_count) - break; - } + for (i = 0; i < priv->child_count; i++) { + if (local->cont.lease.locked_nodes[i]) { + STACK_WIND(frame, afr_lease_unlock_cbk, priv->children[i], + priv->children[i]->fops->lease, &local->loc, + &local->cont.lease.user_lease, NULL); + + if (!--call_count) + break; } + } - return 0; + return 0; } int32_t -afr_lease_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct gf_lease *lease, - dict_t *xdata) +afr_lease_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + int32_t op_errno, struct gf_lease *lease, dict_t *xdata) { - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - int child_index = -1; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + int child_index = -1; - local = frame->local; - priv = this->private; + local = frame->local; + priv = this->private; - child_index = (long) cookie; + child_index = (long)cookie; - afr_common_lock_cbk (frame, cookie, this, op_ret, op_errno, xdata); - if (op_ret < 0 && op_errno == EAGAIN) { - local->op_ret = -1; - local->op_errno = EAGAIN; + afr_common_lock_cbk(frame, cookie, this, op_ret, op_errno, xdata); + if (op_ret < 0 && op_errno == EAGAIN) { + local->op_ret = -1; + local->op_errno = EAGAIN; - afr_lease_unlock (frame, this); - return 0; - } + afr_lease_unlock(frame, this); + return 0; + } - if (op_ret == 0) { - local->op_ret = 0; - local->op_errno = 0; - local->cont.lease.locked_nodes[child_index] = 1; - local->cont.lease.ret_lease = *lease; - } + if (op_ret == 0) { + local->op_ret = 0; + local->op_errno = 0; + local->cont.lease.locked_nodes[child_index] = 1; + local->cont.lease.ret_lease = *lease; + } + + child_index++; + if (child_index < priv->child_count) { + STACK_WIND_COOKIE(frame, afr_lease_cbk, (void *)(long)child_index, + priv->children[child_index], + priv->children[child_index]->fops->lease, &local->loc, + &local->cont.lease.user_lease, xdata); + } else if (priv->quorum_count && + !afr_has_quorum(local->cont.lk.locked_nodes, this)) { + local->op_ret = -1; + local->op_errno = afr_final_errno(local, priv); - child_index++; - if (child_index < priv->child_count) { - STACK_WIND_COOKIE (frame, afr_lease_cbk, (void *) (long) child_index, - priv->children[child_index], - priv->children[child_index]->fops->lease, - &local->loc, &local->cont.lease.user_lease, xdata); - } else if (priv->quorum_count && - !afr_has_quorum (local->cont.lk.locked_nodes, this)) { - local->op_ret = -1; - local->op_errno = afr_final_errno (local, priv); - - afr_lease_unlock (frame, this); - } else { - if (local->op_ret < 0) - local->op_errno = afr_final_errno (local, priv); - AFR_STACK_UNWIND (lease, frame, local->op_ret, local->op_errno, - &local->cont.lease.ret_lease, NULL); - } + afr_lease_unlock(frame, this); + } else { + if (local->op_ret < 0) + local->op_errno = afr_final_errno(local, priv); + AFR_STACK_UNWIND(lease, frame, local->op_ret, local->op_errno, + &local->cont.lease.ret_lease, NULL); + } - return 0; + return 0; } int -afr_lease (call_frame_t *frame, xlator_t *this, - loc_t *loc, struct gf_lease *lease, dict_t *xdata) +afr_lease(call_frame_t *frame, xlator_t *this, loc_t *loc, + struct gf_lease *lease, dict_t *xdata) { - afr_private_t *priv = NULL; - afr_local_t *local = NULL; - int32_t op_errno = ENOMEM; + afr_private_t *priv = NULL; + afr_local_t *local = NULL; + int32_t op_errno = ENOMEM; - priv = this->private; + priv = this->private; - local = AFR_FRAME_INIT (frame, op_errno); - if (!local) - goto out; + local = AFR_FRAME_INIT(frame, op_errno); + if (!local) + goto out; - local->op = GF_FOP_LEASE; - local->cont.lease.locked_nodes = GF_CALLOC (priv->child_count, - sizeof (*local->cont.lease.locked_nodes), - gf_afr_mt_char); + local->op = GF_FOP_LEASE; + local->cont.lease.locked_nodes = GF_CALLOC( + priv->child_count, sizeof(*local->cont.lease.locked_nodes), + gf_afr_mt_char); - if (!local->cont.lease.locked_nodes) { - op_errno = ENOMEM; - goto out; - } + if (!local->cont.lease.locked_nodes) { + op_errno = ENOMEM; + goto out; + } - loc_copy (&local->loc, loc); - local->cont.lease.user_lease = *lease; - local->cont.lease.ret_lease = *lease; + loc_copy(&local->loc, loc); + local->cont.lease.user_lease = *lease; + local->cont.lease.ret_lease = *lease; - STACK_WIND_COOKIE (frame, afr_lease_cbk, (void *) (long) 0, - priv->children[0], - priv->children[0]->fops->lease, - loc, lease, xdata); + STACK_WIND_COOKIE(frame, afr_lease_cbk, (void *)(long)0, priv->children[0], + priv->children[0]->fops->lease, loc, lease, xdata); - return 0; + return 0; out: - AFR_STACK_UNWIND (lease, frame, -1, op_errno, NULL, NULL); + AFR_STACK_UNWIND(lease, frame, -1, op_errno, NULL, NULL); - return 0; + return 0; } int -afr_ipc_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) -{ - afr_local_t *local = NULL; - int child_index = (long)cookie; - int call_count = 0; - gf_boolean_t failed = _gf_false; - gf_boolean_t succeeded = _gf_false; - int i = 0; - afr_private_t *priv = NULL; - - local = frame->local; - priv = this->private; - - local->replies[child_index].valid = 1; - local->replies[child_index].op_ret = op_ret; - local->replies[child_index].op_errno = op_errno; - if (xdata) - local->replies[child_index].xdata = dict_ref (xdata); - - call_count = afr_frame_return (frame); - if (call_count) - goto out; - /* If any of the subvolumes failed with other than ENOTCONN - * return error else return success unless all the subvolumes - * failed. - * TODO: In case of failure, we need to unregister the xattrs - * from the other subvolumes where it succeeded (once upcall - * fixes the Bz-1371622)*/ - for (i = 0; i < priv->child_count; i++) { - if (!local->replies[i].valid) - continue; - if (local->replies[i].op_ret < 0 && - local->replies[i].op_errno != ENOTCONN) { - local->op_ret = local->replies[i].op_ret; - local->op_errno = local->replies[i].op_errno; - if (local->xdata_rsp) - dict_unref (local->xdata_rsp); - local->xdata_rsp = NULL; - if (local->replies[i].xdata) { - local->xdata_rsp = - dict_ref (local->replies[i].xdata); - } - failed = _gf_true; - break; - } - if (local->replies[i].op_ret == 0) { - succeeded = _gf_true; - local->op_ret = 0; - local->op_errno = 0; - if (!local->xdata_rsp && local->replies[i].xdata) { - local->xdata_rsp = - dict_ref (local->replies[i].xdata); - } - } +afr_ipc_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + int32_t op_errno, dict_t *xdata) +{ + afr_local_t *local = NULL; + int child_index = (long)cookie; + int call_count = 0; + gf_boolean_t failed = _gf_false; + gf_boolean_t succeeded = _gf_false; + int i = 0; + afr_private_t *priv = NULL; + + local = frame->local; + priv = this->private; + + local->replies[child_index].valid = 1; + local->replies[child_index].op_ret = op_ret; + local->replies[child_index].op_errno = op_errno; + if (xdata) + local->replies[child_index].xdata = dict_ref(xdata); + + call_count = afr_frame_return(frame); + if (call_count) + goto out; + /* If any of the subvolumes failed with other than ENOTCONN + * return error else return success unless all the subvolumes + * failed. + * TODO: In case of failure, we need to unregister the xattrs + * from the other subvolumes where it succeeded (once upcall + * fixes the Bz-1371622)*/ + for (i = 0; i < priv->child_count; i++) { + if (!local->replies[i].valid) + continue; + if (local->replies[i].op_ret < 0 && + local->replies[i].op_errno != ENOTCONN) { + local->op_ret = local->replies[i].op_ret; + local->op_errno = local->replies[i].op_errno; + if (local->xdata_rsp) + dict_unref(local->xdata_rsp); + local->xdata_rsp = NULL; + if (local->replies[i].xdata) { + local->xdata_rsp = dict_ref(local->replies[i].xdata); + } + failed = _gf_true; + break; + } + if (local->replies[i].op_ret == 0) { + succeeded = _gf_true; + local->op_ret = 0; + local->op_errno = 0; + if (!local->xdata_rsp && local->replies[i].xdata) { + local->xdata_rsp = dict_ref(local->replies[i].xdata); + } } + } - if (!succeeded && !failed) { - local->op_ret = -1; - local->op_errno = ENOTCONN; - } + if (!succeeded && !failed) { + local->op_ret = -1; + local->op_errno = ENOTCONN; + } - AFR_STACK_UNWIND (ipc, frame, local->op_ret, local->op_errno, - local->xdata_rsp); + AFR_STACK_UNWIND(ipc, frame, local->op_ret, local->op_errno, + local->xdata_rsp); out: - return 0; + return 0; } int -afr_ipc (call_frame_t *frame, xlator_t *this, int32_t op, dict_t *xdata) +afr_ipc(call_frame_t *frame, xlator_t *this, int32_t op, dict_t *xdata) { - afr_local_t *local = NULL; - int32_t op_errno = -1; - afr_private_t *priv = NULL; - int i = 0; - int call_cnt = -1; + afr_local_t *local = NULL; + int32_t op_errno = -1; + afr_private_t *priv = NULL; + int i = 0; + int call_cnt = -1; - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO(frame, err); + VALIDATE_OR_GOTO(this, err); - if (op != GF_IPC_TARGET_UPCALL) - goto wind_default; + if (op != GF_IPC_TARGET_UPCALL) + goto wind_default; - VALIDATE_OR_GOTO (this->private, err); - priv = this->private; - - local = AFR_FRAME_INIT (frame, op_errno); - if (!local) - goto err; + VALIDATE_OR_GOTO(this->private, err); + priv = this->private; - call_cnt = local->call_count; + local = AFR_FRAME_INIT(frame, op_errno); + if (!local) + goto err; - if (xdata) { - for (i = 0; i < priv->child_count; i++) { - if (dict_set_int8 (xdata, priv->pending_key[i], 0) < 0) - goto err; - } - } + call_cnt = local->call_count; + if (xdata) { for (i = 0; i < priv->child_count; i++) { - if (!local->child_up[i]) - continue; - - STACK_WIND_COOKIE (frame, afr_ipc_cbk, - (void *) (long) i, - priv->children[i], - priv->children[i]->fops->ipc, - op, xdata); - if (!--call_cnt) - break; + if (dict_set_int8(xdata, priv->pending_key[i], 0) < 0) + goto err; } - return 0; + } + + for (i = 0; i < priv->child_count; i++) { + if (!local->child_up[i]) + continue; + + STACK_WIND_COOKIE(frame, afr_ipc_cbk, (void *)(long)i, + priv->children[i], priv->children[i]->fops->ipc, op, + xdata); + if (!--call_cnt) + break; + } + return 0; err: - if (op_errno == -1) - op_errno = errno; - AFR_STACK_UNWIND (ipc, frame, -1, op_errno, NULL); + if (op_errno == -1) + op_errno = errno; + AFR_STACK_UNWIND(ipc, frame, -1, op_errno, NULL); - return 0; + return 0; wind_default: - STACK_WIND (frame, default_ipc_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->ipc, op, xdata); - return 0; + STACK_WIND(frame, default_ipc_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->ipc, op, xdata); + return 0; } int -afr_forget (xlator_t *this, inode_t *inode) +afr_forget(xlator_t *this, inode_t *inode) { - uint64_t ctx_int = 0; - afr_inode_ctx_t *ctx = NULL; - - afr_spb_choice_timeout_cancel (this, inode); - inode_ctx_del (inode, this, &ctx_int); - if (!ctx_int) - return 0; + uint64_t ctx_int = 0; + afr_inode_ctx_t *ctx = NULL; - ctx = (afr_inode_ctx_t *)ctx_int; - afr_inode_ctx_destroy (ctx); + afr_spb_choice_timeout_cancel(this, inode); + inode_ctx_del(inode, this, &ctx_int); + if (!ctx_int) return 0; + + ctx = (afr_inode_ctx_t *)ctx_int; + afr_inode_ctx_destroy(ctx); + return 0; } int -afr_priv_dump (xlator_t *this) -{ - afr_private_t *priv = NULL; - char key_prefix[GF_DUMP_MAX_BUF_LEN]; - char key[GF_DUMP_MAX_BUF_LEN]; - int i = 0; - - - GF_ASSERT (this); - priv = this->private; - - GF_ASSERT (priv); - snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s", this->type, this->name); - gf_proc_dump_add_section(key_prefix); - gf_proc_dump_write("child_count", "%u", priv->child_count); - for (i = 0; i < priv->child_count; i++) { - sprintf (key, "child_up[%d]", i); - gf_proc_dump_write(key, "%d", priv->child_up[i]); - sprintf (key, "pending_key[%d]", i); - gf_proc_dump_write(key, "%s", priv->pending_key[i]); - sprintf (key, "pending_reads[%d]", i); - gf_proc_dump_write(key, "%"PRId64, GF_ATOMIC_GET(priv->pending_reads[i])); - sprintf (key, "child_latency[%d]", i); - gf_proc_dump_write(key, "%"PRId64, priv->child_latency[i]); - } - gf_proc_dump_write("data_self_heal", "%s", priv->data_self_heal); - gf_proc_dump_write("metadata_self_heal", "%d", priv->metadata_self_heal); - gf_proc_dump_write("entry_self_heal", "%d", priv->entry_self_heal); - gf_proc_dump_write("read_child", "%d", priv->read_child); - gf_proc_dump_write("wait_count", "%u", priv->wait_count); - gf_proc_dump_write("heal-wait-queue-length", "%d", - priv->heal_wait_qlen); - gf_proc_dump_write("heal-waiters", "%d", priv->heal_waiters); - gf_proc_dump_write("background-self-heal-count", "%d", - priv->background_self_heal_count); - gf_proc_dump_write("healers", "%d", priv->healers); - gf_proc_dump_write("read-hash-mode", "%d", priv->hash_mode); - if (priv->quorum_count == AFR_QUORUM_AUTO) { - gf_proc_dump_write ("quorum-type", "auto"); - } else if (priv->quorum_count == 0) { - gf_proc_dump_write ("quorum-type", "none"); - } else { - gf_proc_dump_write("quorum-type", "fixed"); - gf_proc_dump_write("quorum-count", "%d", priv->quorum_count); - } - gf_proc_dump_write("up", "%u", afr_has_quorum (priv->child_up, this)); - - return 0; +afr_priv_dump(xlator_t *this) +{ + afr_private_t *priv = NULL; + char key_prefix[GF_DUMP_MAX_BUF_LEN]; + char key[GF_DUMP_MAX_BUF_LEN]; + int i = 0; + + GF_ASSERT(this); + priv = this->private; + + GF_ASSERT(priv); + snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s", this->type, this->name); + gf_proc_dump_add_section(key_prefix); + gf_proc_dump_write("child_count", "%u", priv->child_count); + for (i = 0; i < priv->child_count; i++) { + sprintf(key, "child_up[%d]", i); + gf_proc_dump_write(key, "%d", priv->child_up[i]); + sprintf(key, "pending_key[%d]", i); + gf_proc_dump_write(key, "%s", priv->pending_key[i]); + sprintf(key, "pending_reads[%d]", i); + gf_proc_dump_write(key, "%" PRId64, + GF_ATOMIC_GET(priv->pending_reads[i])); + sprintf(key, "child_latency[%d]", i); + gf_proc_dump_write(key, "%" PRId64, priv->child_latency[i]); + } + gf_proc_dump_write("data_self_heal", "%s", priv->data_self_heal); + gf_proc_dump_write("metadata_self_heal", "%d", priv->metadata_self_heal); + gf_proc_dump_write("entry_self_heal", "%d", priv->entry_self_heal); + gf_proc_dump_write("read_child", "%d", priv->read_child); + gf_proc_dump_write("wait_count", "%u", priv->wait_count); + gf_proc_dump_write("heal-wait-queue-length", "%d", priv->heal_wait_qlen); + gf_proc_dump_write("heal-waiters", "%d", priv->heal_waiters); + gf_proc_dump_write("background-self-heal-count", "%d", + priv->background_self_heal_count); + gf_proc_dump_write("healers", "%d", priv->healers); + gf_proc_dump_write("read-hash-mode", "%d", priv->hash_mode); + if (priv->quorum_count == AFR_QUORUM_AUTO) { + gf_proc_dump_write("quorum-type", "auto"); + } else if (priv->quorum_count == 0) { + gf_proc_dump_write("quorum-type", "none"); + } else { + gf_proc_dump_write("quorum-type", "fixed"); + gf_proc_dump_write("quorum-count", "%d", priv->quorum_count); + } + gf_proc_dump_write("up", "%u", afr_has_quorum(priv->child_up, this)); + + return 0; } - /** * find_child_index - find the child's index in the array of subvolumes * @this: AFR @@ -5025,1974 +4904,1924 @@ afr_priv_dump (xlator_t *this) */ static int -find_child_index (xlator_t *this, xlator_t *child) +find_child_index(xlator_t *this, xlator_t *child) { - afr_private_t *priv = NULL; - int i = -1; + afr_private_t *priv = NULL; + int i = -1; - priv = this->private; + priv = this->private; - for (i = 0; i < priv->child_count; i++) { - if ((xlator_t *) child == priv->children[i]) - break; - } + for (i = 0; i < priv->child_count; i++) { + if ((xlator_t *)child == priv->children[i]) + break; + } - return i; + return i; } static int -__afr_get_up_children_count (afr_private_t *priv) +__afr_get_up_children_count(afr_private_t *priv) { - int up_children = 0; - int i = 0; + int up_children = 0; + int i = 0; - for (i = 0; i < priv->child_count; i++) - if (priv->child_up[i] == 1) - up_children++; + for (i = 0; i < priv->child_count; i++) + if (priv->child_up[i] == 1) + up_children++; - return up_children; + return up_children; } glusterfs_event_t -__afr_transform_event_from_state (afr_private_t *priv) +__afr_transform_event_from_state(afr_private_t *priv) { - int i = 0; - int up_children = 0; + int i = 0; + int up_children = 0; - if (AFR_COUNT (priv->last_event, priv->child_count) == - priv->child_count) - /* have_heard_from_all. Let afr_notify() do the propagation. */ - return GF_EVENT_MAXVAL; + if (AFR_COUNT(priv->last_event, priv->child_count) == priv->child_count) + /* have_heard_from_all. Let afr_notify() do the propagation. */ + return GF_EVENT_MAXVAL; - up_children = __afr_get_up_children_count (priv); - /* Treat the children with pending notification, as having sent a - * GF_EVENT_CHILD_DOWN. i.e. set the event as GF_EVENT_SOME_DESCENDENT_DOWN, - * as done in afr_notify() */ - for (i = 0; i < priv->child_count; i++) { - if (priv->last_event[i]) - continue; - priv->last_event[i] = GF_EVENT_SOME_DESCENDENT_DOWN; - priv->child_up[i] = 0; - } + up_children = __afr_get_up_children_count(priv); + /* Treat the children with pending notification, as having sent a + * GF_EVENT_CHILD_DOWN. i.e. set the event as GF_EVENT_SOME_DESCENDENT_DOWN, + * as done in afr_notify() */ + for (i = 0; i < priv->child_count; i++) { + if (priv->last_event[i]) + continue; + priv->last_event[i] = GF_EVENT_SOME_DESCENDENT_DOWN; + priv->child_up[i] = 0; + } - if (up_children) - /* We received at least one child up */ - return GF_EVENT_CHILD_UP; - else - return GF_EVENT_CHILD_DOWN; + if (up_children) + /* We received at least one child up */ + return GF_EVENT_CHILD_UP; + else + return GF_EVENT_CHILD_DOWN; - return GF_EVENT_MAXVAL; + return GF_EVENT_MAXVAL; } static void -afr_notify_cbk (void *data) -{ - xlator_t *this = data; - afr_private_t *priv = this->private; - glusterfs_event_t event = GF_EVENT_MAXVAL; - gf_boolean_t propagate = _gf_false; - - LOCK (&priv->lock); - { - if (!priv->timer) { - /* - * Either child_up/child_down is already sent to parent. - * This is a spurious wake up. - */ - goto unlock; - } - priv->timer = NULL; - event = __afr_transform_event_from_state (priv); - if (event != GF_EVENT_MAXVAL) - propagate = _gf_true; - } +afr_notify_cbk(void *data) +{ + xlator_t *this = data; + afr_private_t *priv = this->private; + glusterfs_event_t event = GF_EVENT_MAXVAL; + gf_boolean_t propagate = _gf_false; + + LOCK(&priv->lock); + { + if (!priv->timer) { + /* + * Either child_up/child_down is already sent to parent. + * This is a spurious wake up. + */ + goto unlock; + } + priv->timer = NULL; + event = __afr_transform_event_from_state(priv); + if (event != GF_EVENT_MAXVAL) + propagate = _gf_true; + } unlock: - UNLOCK (&priv->lock); - if (propagate) - default_notify (this, event, NULL); + UNLOCK(&priv->lock); + if (propagate) + default_notify(this, event, NULL); } static void -__afr_launch_notify_timer (xlator_t *this, afr_private_t *priv) +__afr_launch_notify_timer(xlator_t *this, afr_private_t *priv) { + struct timespec delay = { + 0, + }; - struct timespec delay = {0, }; - - gf_msg_debug (this->name, 0, "Initiating child-down timer"); - delay.tv_sec = 10; - delay.tv_nsec = 0; - priv->timer = gf_timer_call_after (this->ctx, delay, - afr_notify_cbk, this); - if (priv->timer == NULL) { - gf_msg (this->name, GF_LOG_ERROR, 0, AFR_MSG_TIMER_CREATE_FAIL, - "Cannot create timer for delayed initialization"); - } + gf_msg_debug(this->name, 0, "Initiating child-down timer"); + delay.tv_sec = 10; + delay.tv_nsec = 0; + priv->timer = gf_timer_call_after(this->ctx, delay, afr_notify_cbk, this); + if (priv->timer == NULL) { + gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_TIMER_CREATE_FAIL, + "Cannot create timer for delayed initialization"); + } } int -__get_heard_from_all_status (xlator_t *this) +__get_heard_from_all_status(xlator_t *this) { - afr_private_t *priv = this->private; - int heard_from_all = 1; - int i = 0; + afr_private_t *priv = this->private; + int heard_from_all = 1; + int i = 0; - for (i = 0; i < priv->child_count; i++) { - if (!priv->last_event[i]) { - heard_from_all = 0; - break; - } + for (i = 0; i < priv->child_count; i++) { + if (!priv->last_event[i]) { + heard_from_all = 0; + break; } - return heard_from_all; + } + return heard_from_all; } static int -find_best_down_child (xlator_t *this) +find_best_down_child(xlator_t *this) { - afr_private_t *priv = NULL; - int i = -1; - int32_t best_child = -1; - int64_t best_latency = INT64_MAX; + afr_private_t *priv = NULL; + int i = -1; + int32_t best_child = -1; + int64_t best_latency = INT64_MAX; - priv = this->private; + priv = this->private; - for (i = 0; i < priv->child_count; i++) { - if (priv->child_up[i] && - priv->child_latency[i] >= 0 && - priv->child_latency[i] < best_latency) { - best_child = i; - best_latency = priv->child_latency[i]; - } - } - if (best_child >= 0) { - gf_msg_debug (this->name, 0, "Found best down child (%d) " - "@ %ld ms latency", best_child, best_latency); + for (i = 0; i < priv->child_count; i++) { + if (priv->child_up[i] && priv->child_latency[i] >= 0 && + priv->child_latency[i] < best_latency) { + best_child = i; + best_latency = priv->child_latency[i]; } - return best_child; + } + if (best_child >= 0) { + gf_msg_debug(this->name, 0, + "Found best down child (%d) " + "@ %ld ms latency", + best_child, best_latency); + } + return best_child; } int -find_worst_up_child (xlator_t *this) +find_worst_up_child(xlator_t *this) { - afr_private_t *priv = NULL; - int i = -1; - int32_t worst_child = -1; - int64_t worst_latency = INT64_MIN; + afr_private_t *priv = NULL; + int i = -1; + int32_t worst_child = -1; + int64_t worst_latency = INT64_MIN; - priv = this->private; + priv = this->private; - for (i = 0; i < priv->child_count; i++) { - if (priv->child_up[i] && - priv->child_latency[i] >= 0 && - priv->child_latency[i] > worst_latency) { - worst_child = i; - worst_latency = priv->child_latency[i]; - } + for (i = 0; i < priv->child_count; i++) { + if (priv->child_up[i] && priv->child_latency[i] >= 0 && + priv->child_latency[i] > worst_latency) { + worst_child = i; + worst_latency = priv->child_latency[i]; } - if (worst_child >= 0) { - gf_msg_debug (this->name, 0, "Found worst up child (%d)" - " @ %ld ms latency", worst_child, worst_latency); - } - return worst_child; + } + if (worst_child >= 0) { + gf_msg_debug(this->name, 0, + "Found worst up child (%d)" + " @ %ld ms latency", + worst_child, worst_latency); + } + return worst_child; } void -__afr_handle_ping_event (xlator_t *this, xlator_t *child_xlator, - const int idx, int64_t halo_max_latency_msec, int32_t *event, - int64_t child_latency_msec) +__afr_handle_ping_event(xlator_t *this, xlator_t *child_xlator, const int idx, + int64_t halo_max_latency_msec, int32_t *event, + int64_t child_latency_msec) { - afr_private_t *priv = NULL; - int up_children = 0; + afr_private_t *priv = NULL; + int up_children = 0; - priv = this->private; + priv = this->private; - priv->child_latency[idx] = child_latency_msec; - gf_msg_debug (child_xlator->name, 0, "Client ping @ %ld ms", - child_latency_msec); - if (priv->shd.iamshd) - return; - - up_children = __afr_get_up_children_count (priv); - - if (child_latency_msec > halo_max_latency_msec && - priv->child_up[idx] == 1 && - up_children > priv->halo_min_replicas) { - if ((up_children - 1) < - priv->halo_min_replicas) { - gf_log (child_xlator->name, GF_LOG_INFO, - "Overriding halo threshold, " - "min replicas: %d", - priv->halo_min_replicas); - } else { - gf_log (child_xlator->name, GF_LOG_INFO, - "Child latency (%ld ms) " - "exceeds halo threshold (%ld), " - "marking child down.", - child_latency_msec, - halo_max_latency_msec); - *event = GF_EVENT_CHILD_DOWN; - } - } else if (child_latency_msec < halo_max_latency_msec && - priv->child_up[idx] == 0) { - if (up_children < priv->halo_max_replicas) { - gf_log (child_xlator->name, GF_LOG_INFO, - "Child latency (%ld ms) " - "below halo threshold (%ld), " - "marking child up.", - child_latency_msec, - halo_max_latency_msec); - *event = GF_EVENT_CHILD_UP; - } else { - gf_log (child_xlator->name, GF_LOG_INFO, - "Not marking child %d up, " - "max replicas (%d) reached.", idx, - priv->halo_max_replicas); - } + priv->child_latency[idx] = child_latency_msec; + gf_msg_debug(child_xlator->name, 0, "Client ping @ %ld ms", + child_latency_msec); + if (priv->shd.iamshd) + return; + + up_children = __afr_get_up_children_count(priv); + + if (child_latency_msec > halo_max_latency_msec && + priv->child_up[idx] == 1 && up_children > priv->halo_min_replicas) { + if ((up_children - 1) < priv->halo_min_replicas) { + gf_log(child_xlator->name, GF_LOG_INFO, + "Overriding halo threshold, " + "min replicas: %d", + priv->halo_min_replicas); + } else { + gf_log(child_xlator->name, GF_LOG_INFO, + "Child latency (%ld ms) " + "exceeds halo threshold (%ld), " + "marking child down.", + child_latency_msec, halo_max_latency_msec); + *event = GF_EVENT_CHILD_DOWN; + } + } else if (child_latency_msec < halo_max_latency_msec && + priv->child_up[idx] == 0) { + if (up_children < priv->halo_max_replicas) { + gf_log(child_xlator->name, GF_LOG_INFO, + "Child latency (%ld ms) " + "below halo threshold (%ld), " + "marking child up.", + child_latency_msec, halo_max_latency_msec); + *event = GF_EVENT_CHILD_UP; + } else { + gf_log(child_xlator->name, GF_LOG_INFO, + "Not marking child %d up, " + "max replicas (%d) reached.", + idx, priv->halo_max_replicas); } + } } static int64_t -afr_get_halo_latency (xlator_t *this) +afr_get_halo_latency(xlator_t *this) { - afr_private_t *priv = NULL; - int64_t halo_max_latency_msec = 0; + afr_private_t *priv = NULL; + int64_t halo_max_latency_msec = 0; - priv = this->private; + priv = this->private; - if (priv->shd.iamshd) { - halo_max_latency_msec = priv->shd.halo_max_latency_msec; - } else if (priv->nfsd.iamnfsd) { - halo_max_latency_msec = - priv->nfsd.halo_max_latency_msec; - } else { - halo_max_latency_msec = priv->halo_max_latency_msec; - } - gf_msg_debug (this->name, 0, "Using halo latency %ld", - halo_max_latency_msec); - return halo_max_latency_msec; + if (priv->shd.iamshd) { + halo_max_latency_msec = priv->shd.halo_max_latency_msec; + } else if (priv->nfsd.iamnfsd) { + halo_max_latency_msec = priv->nfsd.halo_max_latency_msec; + } else { + halo_max_latency_msec = priv->halo_max_latency_msec; + } + gf_msg_debug(this->name, 0, "Using halo latency %ld", + halo_max_latency_msec); + return halo_max_latency_msec; } void -__afr_handle_child_up_event (xlator_t *this, xlator_t *child_xlator, - const int idx, int64_t child_latency_msec, - int32_t *event, int32_t *call_psh, int32_t *up_child) -{ - afr_private_t *priv = NULL; - int up_children = 0; - int worst_up_child = -1; - int64_t halo_max_latency_msec = afr_get_halo_latency (this); - - priv = this->private; - - /* - * This only really counts if the child was never up - * (value = -1) or had been down (value = 0). See - * comment at GF_EVENT_CHILD_DOWN for a more detailed - * explanation. - */ - if (priv->child_up[idx] != 1) { - priv->event_generation++; - } - priv->child_up[idx] = 1; - - *call_psh = 1; - *up_child = idx; - up_children = __afr_get_up_children_count (priv); - /* - * If this is an _actual_ CHILD_UP event, we - * want to set the child_latency to MAX to indicate - * the child needs ping data to be available before doing child-up - */ - if (child_latency_msec < 0 && priv->halo_enabled) { - /*set to INT64_MAX-1 so that it is found for best_down_child*/ - priv->child_latency[idx] = AFR_HALO_MAX_LATENCY; - } - - /* - * Handle the edge case where we exceed - * halo_min_replicas and we've got a child which is - * marked up as it was helping to satisfy the - * halo_min_replicas even though it's latency exceeds - * halo_max_latency_msec. - */ - if (up_children > priv->halo_min_replicas) { - worst_up_child = find_worst_up_child (this); - if (worst_up_child >= 0 && - priv->child_latency[worst_up_child] > - halo_max_latency_msec) { - gf_msg_debug (this->name, 0, "Marking child %d down, " - "doesn't meet halo threshold (%ld), and > " - "halo_min_replicas (%d)", - worst_up_child, halo_max_latency_msec, - priv->halo_min_replicas); - priv->child_up[worst_up_child] = 0; - up_children--; - } - } - - if (up_children > priv->halo_max_replicas && - !priv->shd.iamshd) { - worst_up_child = find_worst_up_child (this); - if (worst_up_child < 0) { - worst_up_child = idx; - } - priv->child_up[worst_up_child] = 0; - up_children--; - gf_msg_debug (this->name, 0, "Marking child %d down, " - "up_children (%d) > halo_max_replicas (%d)", - worst_up_child, up_children, priv->halo_max_replicas); - } - - if (up_children == 1) { - gf_msg (this->name, GF_LOG_INFO, 0, AFR_MSG_SUBVOL_UP, - "Subvolume '%s' came back up; " - "going online.", - child_xlator->name); - gf_event (EVENT_AFR_SUBVOL_UP, "subvol=%s", this->name); - } else { - *event = GF_EVENT_SOME_DESCENDENT_UP; - } - - priv->last_event[idx] = *event; +__afr_handle_child_up_event(xlator_t *this, xlator_t *child_xlator, + const int idx, int64_t child_latency_msec, + int32_t *event, int32_t *call_psh, + int32_t *up_child) +{ + afr_private_t *priv = NULL; + int up_children = 0; + int worst_up_child = -1; + int64_t halo_max_latency_msec = afr_get_halo_latency(this); + + priv = this->private; + + /* + * This only really counts if the child was never up + * (value = -1) or had been down (value = 0). See + * comment at GF_EVENT_CHILD_DOWN for a more detailed + * explanation. + */ + if (priv->child_up[idx] != 1) { + priv->event_generation++; + } + priv->child_up[idx] = 1; + + *call_psh = 1; + *up_child = idx; + up_children = __afr_get_up_children_count(priv); + /* + * If this is an _actual_ CHILD_UP event, we + * want to set the child_latency to MAX to indicate + * the child needs ping data to be available before doing child-up + */ + if (child_latency_msec < 0 && priv->halo_enabled) { + /*set to INT64_MAX-1 so that it is found for best_down_child*/ + priv->child_latency[idx] = AFR_HALO_MAX_LATENCY; + } + + /* + * Handle the edge case where we exceed + * halo_min_replicas and we've got a child which is + * marked up as it was helping to satisfy the + * halo_min_replicas even though it's latency exceeds + * halo_max_latency_msec. + */ + if (up_children > priv->halo_min_replicas) { + worst_up_child = find_worst_up_child(this); + if (worst_up_child >= 0 && + priv->child_latency[worst_up_child] > halo_max_latency_msec) { + gf_msg_debug(this->name, 0, + "Marking child %d down, " + "doesn't meet halo threshold (%ld), and > " + "halo_min_replicas (%d)", + worst_up_child, halo_max_latency_msec, + priv->halo_min_replicas); + priv->child_up[worst_up_child] = 0; + up_children--; + } + } + + if (up_children > priv->halo_max_replicas && !priv->shd.iamshd) { + worst_up_child = find_worst_up_child(this); + if (worst_up_child < 0) { + worst_up_child = idx; + } + priv->child_up[worst_up_child] = 0; + up_children--; + gf_msg_debug(this->name, 0, + "Marking child %d down, " + "up_children (%d) > halo_max_replicas (%d)", + worst_up_child, up_children, priv->halo_max_replicas); + } + + if (up_children == 1) { + gf_msg(this->name, GF_LOG_INFO, 0, AFR_MSG_SUBVOL_UP, + "Subvolume '%s' came back up; " + "going online.", + child_xlator->name); + gf_event(EVENT_AFR_SUBVOL_UP, "subvol=%s", this->name); + } else { + *event = GF_EVENT_SOME_DESCENDENT_UP; + } + + priv->last_event[idx] = *event; } void -__afr_handle_child_down_event (xlator_t *this, xlator_t *child_xlator, - int idx, int64_t child_latency_msec, int32_t *event, - int32_t *call_psh, int32_t *up_child) -{ - afr_private_t *priv = NULL; - int i = 0; - int up_children = 0; - int down_children = 0; - int best_down_child = -1; - - priv = this->private; - - /* - * If a brick is down when we start, we'll get a - * CHILD_DOWN to indicate its initial state. There - * was never a CHILD_UP in this case, so if we - * increment "down_count" the difference between than - * and "up_count" will no longer be the number of - * children that are currently up. This has serious - * implications e.g. for quorum enforcement, so we - * don't increment these values unless the event - * represents an actual state transition between "up" - * (value = 1) and anything else. - */ - if (priv->child_up[idx] == 1) { - priv->event_generation++; - } - - /* - * If this is an _actual_ CHILD_DOWN event, we - * want to set the child_latency to < 0 to indicate - * the child is really disconnected. - */ - if (child_latency_msec < 0) { - priv->child_latency[idx] = child_latency_msec; - } - priv->child_up[idx] = 0; - - up_children = __afr_get_up_children_count (priv); - /* - * Handle the edge case where we need to find the - * next best child (to mark up) as marking this child - * down would cause us to fall below halo_min_replicas. - * We will also force the SHD to heal this child _now_ - * as we want it to be up to date if we are going to - * begin using it synchronously. - */ - if (up_children < priv->halo_min_replicas) { - best_down_child = find_best_down_child (this); - if (best_down_child >= 0) { - gf_msg_debug (this->name, 0, - "Swapping out child %d for " - "child %d to satisfy halo_min_replicas (%d).", - idx, best_down_child, priv->halo_min_replicas); - priv->child_up[best_down_child] = 1; - *call_psh = 1; - *up_child = best_down_child; - } - } - - for (i = 0; i < priv->child_count; i++) - if (priv->child_up[i] == 0) - down_children++; - if (down_children == priv->child_count) { - gf_msg (this->name, GF_LOG_ERROR, 0, AFR_MSG_SUBVOLS_DOWN, - "All subvolumes are down. Going " - "offline until at least one of them " - "comes back up."); - gf_event (EVENT_AFR_SUBVOLS_DOWN, "subvol=%s", this->name); - } else { - *event = GF_EVENT_SOME_DESCENDENT_DOWN; - } - priv->last_event[idx] = *event; +__afr_handle_child_down_event(xlator_t *this, xlator_t *child_xlator, int idx, + int64_t child_latency_msec, int32_t *event, + int32_t *call_psh, int32_t *up_child) +{ + afr_private_t *priv = NULL; + int i = 0; + int up_children = 0; + int down_children = 0; + int best_down_child = -1; + + priv = this->private; + + /* + * If a brick is down when we start, we'll get a + * CHILD_DOWN to indicate its initial state. There + * was never a CHILD_UP in this case, so if we + * increment "down_count" the difference between than + * and "up_count" will no longer be the number of + * children that are currently up. This has serious + * implications e.g. for quorum enforcement, so we + * don't increment these values unless the event + * represents an actual state transition between "up" + * (value = 1) and anything else. + */ + if (priv->child_up[idx] == 1) { + priv->event_generation++; + } + + /* + * If this is an _actual_ CHILD_DOWN event, we + * want to set the child_latency to < 0 to indicate + * the child is really disconnected. + */ + if (child_latency_msec < 0) { + priv->child_latency[idx] = child_latency_msec; + } + priv->child_up[idx] = 0; + + up_children = __afr_get_up_children_count(priv); + /* + * Handle the edge case where we need to find the + * next best child (to mark up) as marking this child + * down would cause us to fall below halo_min_replicas. + * We will also force the SHD to heal this child _now_ + * as we want it to be up to date if we are going to + * begin using it synchronously. + */ + if (up_children < priv->halo_min_replicas) { + best_down_child = find_best_down_child(this); + if (best_down_child >= 0) { + gf_msg_debug(this->name, 0, + "Swapping out child %d for " + "child %d to satisfy halo_min_replicas (%d).", + idx, best_down_child, priv->halo_min_replicas); + priv->child_up[best_down_child] = 1; + *call_psh = 1; + *up_child = best_down_child; + } + } + + for (i = 0; i < priv->child_count; i++) + if (priv->child_up[i] == 0) + down_children++; + if (down_children == priv->child_count) { + gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SUBVOLS_DOWN, + "All subvolumes are down. Going " + "offline until at least one of them " + "comes back up."); + gf_event(EVENT_AFR_SUBVOLS_DOWN, "subvol=%s", this->name); + } else { + *event = GF_EVENT_SOME_DESCENDENT_DOWN; + } + priv->last_event[idx] = *event; } int32_t -afr_notify (xlator_t *this, int32_t event, - void *data, void *data2) -{ - afr_private_t *priv = NULL; - xlator_t *child_xlator = NULL; - int i = -1; - int propagate = 0; - int had_heard_from_all = 0; - int have_heard_from_all = 0; - int idx = -1; - int ret = -1; - int call_psh = 0; - int up_child = -1; - dict_t *input = NULL; - dict_t *output = NULL; - gf_boolean_t had_quorum = _gf_false; - gf_boolean_t has_quorum = _gf_false; - struct gf_upcall *up_data = NULL; - struct gf_upcall_cache_invalidation *up_ci = NULL; - inode_table_t *itable = NULL; - inode_t *inode = NULL; - int64_t halo_max_latency_msec = 0; - int64_t child_latency_msec = -1; - - child_xlator = (xlator_t *)data; - - priv = this->private; - - if (!priv) - return 0; - - /* - * We need to reset this in case children come up in "staggered" - * fashion, so that we discover a late-arriving local subvolume. Note - * that we could end up issuing N lookups to the first subvolume, and - * O(N^2) overall, but N is small for AFR so it shouldn't be an issue. - */ - priv->did_discovery = _gf_false; +afr_notify(xlator_t *this, int32_t event, void *data, void *data2) +{ + afr_private_t *priv = NULL; + xlator_t *child_xlator = NULL; + int i = -1; + int propagate = 0; + int had_heard_from_all = 0; + int have_heard_from_all = 0; + int idx = -1; + int ret = -1; + int call_psh = 0; + int up_child = -1; + dict_t *input = NULL; + dict_t *output = NULL; + gf_boolean_t had_quorum = _gf_false; + gf_boolean_t has_quorum = _gf_false; + struct gf_upcall *up_data = NULL; + struct gf_upcall_cache_invalidation *up_ci = NULL; + inode_table_t *itable = NULL; + inode_t *inode = NULL; + int64_t halo_max_latency_msec = 0; + int64_t child_latency_msec = -1; + + child_xlator = (xlator_t *)data; + + priv = this->private; + + if (!priv) + return 0; + /* + * We need to reset this in case children come up in "staggered" + * fashion, so that we discover a late-arriving local subvolume. Note + * that we could end up issuing N lookups to the first subvolume, and + * O(N^2) overall, but N is small for AFR so it shouldn't be an issue. + */ + priv->did_discovery = _gf_false; + + /* parent xlators don't need to know about every child_up, child_down + * because of afr ha. If all subvolumes go down, child_down has + * to be triggered. In that state when 1 subvolume comes up child_up + * needs to be triggered. dht optimizes revalidate lookup by sending + * it only to one of its subvolumes. When child up/down happens + * for afr's subvolumes dht should be notified by child_modified. The + * subsequent revalidate lookup happens on all the dht's subvolumes + * which triggers afr self-heals if any. + */ + idx = find_child_index(this, child_xlator); + if (idx < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_INVALID_CHILD_UP, + "Received child_up from invalid subvolume"); + goto out; + } + + had_quorum = priv->quorum_count && afr_has_quorum(priv->child_up, this); + if (priv->halo_enabled) { + halo_max_latency_msec = afr_get_halo_latency(this); - /* parent xlators don't need to know about every child_up, child_down - * because of afr ha. If all subvolumes go down, child_down has - * to be triggered. In that state when 1 subvolume comes up child_up - * needs to be triggered. dht optimizes revalidate lookup by sending - * it only to one of its subvolumes. When child up/down happens - * for afr's subvolumes dht should be notified by child_modified. The - * subsequent revalidate lookup happens on all the dht's subvolumes - * which triggers afr self-heals if any. - */ - idx = find_child_index (this, child_xlator); - if (idx < 0) { - gf_msg (this->name, GF_LOG_ERROR, 0, AFR_MSG_INVALID_CHILD_UP, - "Received child_up from invalid subvolume"); - goto out; - } - - had_quorum = priv->quorum_count && afr_has_quorum (priv->child_up, - this); - if (priv->halo_enabled) { - halo_max_latency_msec = afr_get_halo_latency (this); - - if (event == GF_EVENT_CHILD_PING) { - /* Calculates the child latency and sets event - */ - child_latency_msec = (int64_t)(uintptr_t)data2; - LOCK (&priv->lock); - { - __afr_handle_ping_event (this, child_xlator, - idx, halo_max_latency_msec, &event, + if (event == GF_EVENT_CHILD_PING) { + /* Calculates the child latency and sets event + */ + child_latency_msec = (int64_t)(uintptr_t)data2; + LOCK(&priv->lock); + { + __afr_handle_ping_event(this, child_xlator, idx, + halo_max_latency_msec, &event, child_latency_msec); - } - UNLOCK (&priv->lock); - } + } + UNLOCK(&priv->lock); } + } - if (event == GF_EVENT_CHILD_PING) { - /* This is the only xlator that handles PING, no reason to - * propagate. - */ - goto out; - } + if (event == GF_EVENT_CHILD_PING) { + /* This is the only xlator that handles PING, no reason to + * propagate. + */ + goto out; + } - if (event == GF_EVENT_TRANSLATOR_OP) { - LOCK (&priv->lock); - { - had_heard_from_all = __get_heard_from_all_status (this); - } - UNLOCK (&priv->lock); - - if (!had_heard_from_all) { - ret = -1; - } else { - input = data; - output = data2; - ret = afr_xl_op (this, input, output); - } - goto out; + if (event == GF_EVENT_TRANSLATOR_OP) { + LOCK(&priv->lock); + { + had_heard_from_all = __get_heard_from_all_status(this); } + UNLOCK(&priv->lock); - LOCK (&priv->lock); - { - had_heard_from_all = __get_heard_from_all_status (this); - switch (event) { - case GF_EVENT_PARENT_UP: - __afr_launch_notify_timer (this, priv); - propagate = 1; - break; - case GF_EVENT_CHILD_UP: - __afr_handle_child_up_event (this, child_xlator, - idx, child_latency_msec, &event, &call_psh, - &up_child); - break; + if (!had_heard_from_all) { + ret = -1; + } else { + input = data; + output = data2; + ret = afr_xl_op(this, input, output); + } + goto out; + } + + LOCK(&priv->lock); + { + had_heard_from_all = __get_heard_from_all_status(this); + switch (event) { + case GF_EVENT_PARENT_UP: + __afr_launch_notify_timer(this, priv); + propagate = 1; + break; + case GF_EVENT_CHILD_UP: + __afr_handle_child_up_event(this, child_xlator, idx, + child_latency_msec, &event, + &call_psh, &up_child); + break; - case GF_EVENT_CHILD_DOWN: - __afr_handle_child_down_event (this, child_xlator, idx, - child_latency_msec, &event, &call_psh, - &up_child); - break; + case GF_EVENT_CHILD_DOWN: + __afr_handle_child_down_event(this, child_xlator, idx, + child_latency_msec, &event, + &call_psh, &up_child); + break; - case GF_EVENT_CHILD_CONNECTING: - priv->last_event[idx] = event; + case GF_EVENT_CHILD_CONNECTING: + priv->last_event[idx] = event; - break; + break; + + case GF_EVENT_SOME_DESCENDENT_DOWN: + priv->last_event[idx] = event; + break; + case GF_EVENT_UPCALL: + up_data = (struct gf_upcall *)data; + if (up_data->event_type != GF_UPCALL_CACHE_INVALIDATION) + break; + up_ci = (struct gf_upcall_cache_invalidation *)up_data->data; + + /* Since md-cache will be aggressively filtering + * lookups, the stale read issue will be more + * pronounced. Hence when a pending xattr is set notify + * all the md-cache clients to invalidate the existing + * stat cache and send the lookup next time */ + if (!up_ci->dict) + break; + for (i = 0; i < priv->child_count; i++) { + if (dict_get(up_ci->dict, priv->pending_key[i])) { + up_ci->flags |= UP_INVAL_ATTR; + itable = ((xlator_t *)this->graph->top)->itable; + /*Internal processes may not have itable for top + * xlator*/ + if (itable) + inode = inode_find(itable, up_data->gfid); + if (inode) + afr_inode_need_refresh_set(inode, this); - case GF_EVENT_SOME_DESCENDENT_DOWN: - priv->last_event[idx] = event; - break; - case GF_EVENT_UPCALL: - up_data = (struct gf_upcall *)data; - if (up_data->event_type != GF_UPCALL_CACHE_INVALIDATION) - break; - up_ci = (struct gf_upcall_cache_invalidation *)up_data->data; - - /* Since md-cache will be aggressively filtering - * lookups, the stale read issue will be more - * pronounced. Hence when a pending xattr is set notify - * all the md-cache clients to invalidate the existing - * stat cache and send the lookup next time */ - if (!up_ci->dict) - break; - for (i = 0; i < priv->child_count; i++) { - if (dict_get (up_ci->dict, priv->pending_key[i])) { - up_ci->flags |= UP_INVAL_ATTR; - itable = ((xlator_t *)this->graph->top)->itable; - /*Internal processes may not have itable for top xlator*/ - if (itable) - inode = inode_find (itable, up_data->gfid); - if (inode) - afr_inode_need_refresh_set (inode, this); - - break; - } - } - break; - default: - propagate = 1; break; + } } - have_heard_from_all = __get_heard_from_all_status (this); - if (!had_heard_from_all && have_heard_from_all) { - if (priv->timer) { - gf_timer_call_cancel (this->ctx, priv->timer); - priv->timer = NULL; - } - /* This is the first event which completes aggregation - of events from all subvolumes. If at least one subvol - had come up, propagate CHILD_UP, but only this time - */ - event = GF_EVENT_CHILD_DOWN; - for (i = 0; i < priv->child_count; i++) { - if (priv->last_event[i] == GF_EVENT_CHILD_UP) { - event = GF_EVENT_CHILD_UP; - break; - } - - if (priv->last_event[i] == - GF_EVENT_CHILD_CONNECTING) { - event = GF_EVENT_CHILD_CONNECTING; - /* continue to check other events for CHILD_UP */ - } - } - } + break; + default: + propagate = 1; + break; } - UNLOCK (&priv->lock); - - if (priv->quorum_count) { - has_quorum = afr_has_quorum (priv->child_up, this); - if (!had_quorum && has_quorum) { - gf_msg (this->name, GF_LOG_INFO, 0, AFR_MSG_QUORUM_MET, - "Client-quorum is met"); - gf_event (EVENT_AFR_QUORUM_MET, - "subvol=%s", this->name); - } - if (had_quorum && !has_quorum) { - gf_msg (this->name, GF_LOG_WARNING, 0, - AFR_MSG_QUORUM_FAIL, - "Client-quorum is not met"); - gf_event (EVENT_AFR_QUORUM_FAIL, "subvol=%s", - this->name); + have_heard_from_all = __get_heard_from_all_status(this); + if (!had_heard_from_all && have_heard_from_all) { + if (priv->timer) { + gf_timer_call_cancel(this->ctx, priv->timer); + priv->timer = NULL; + } + /* This is the first event which completes aggregation + of events from all subvolumes. If at least one subvol + had come up, propagate CHILD_UP, but only this time + */ + event = GF_EVENT_CHILD_DOWN; + for (i = 0; i < priv->child_count; i++) { + if (priv->last_event[i] == GF_EVENT_CHILD_UP) { + event = GF_EVENT_CHILD_UP; + break; } - } - /* if all subvols have reported status, no need to hide anything - or wait for anything else. Just propagate blindly */ - if (have_heard_from_all) - propagate = 1; - - ret = 0; - if (propagate) - ret = default_notify (this, event, data); - - if ((!had_heard_from_all) || call_psh) { - /* Launch self-heal on all local subvolumes if: - * a) We have_heard_from_all for the first time - * b) Already heard from everyone, but we now got a child-up - * event. - */ - if (have_heard_from_all && priv->shd.iamshd) { - for (i = 0; i < priv->child_count; i++) - if (priv->child_up[i]) - afr_selfheal_childup (this, i); + if (priv->last_event[i] == GF_EVENT_CHILD_CONNECTING) { + event = GF_EVENT_CHILD_CONNECTING; + /* continue to check other events for CHILD_UP */ } + } } + } + UNLOCK(&priv->lock); + + if (priv->quorum_count) { + has_quorum = afr_has_quorum(priv->child_up, this); + if (!had_quorum && has_quorum) { + gf_msg(this->name, GF_LOG_INFO, 0, AFR_MSG_QUORUM_MET, + "Client-quorum is met"); + gf_event(EVENT_AFR_QUORUM_MET, "subvol=%s", this->name); + } + if (had_quorum && !has_quorum) { + gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_QUORUM_FAIL, + "Client-quorum is not met"); + gf_event(EVENT_AFR_QUORUM_FAIL, "subvol=%s", this->name); + } + } + + /* if all subvols have reported status, no need to hide anything + or wait for anything else. Just propagate blindly */ + if (have_heard_from_all) + propagate = 1; + + ret = 0; + if (propagate) + ret = default_notify(this, event, data); + + if ((!had_heard_from_all) || call_psh) { + /* Launch self-heal on all local subvolumes if: + * a) We have_heard_from_all for the first time + * b) Already heard from everyone, but we now got a child-up + * event. + */ + if (have_heard_from_all && priv->shd.iamshd) { + for (i = 0; i < priv->child_count; i++) + if (priv->child_up[i]) + afr_selfheal_childup(this, i); + } + } out: - return ret; + return ret; } int -afr_local_init (afr_local_t *local, afr_private_t *priv, int32_t *op_errno) +afr_local_init(afr_local_t *local, afr_private_t *priv, int32_t *op_errno) { - int __ret = -1; - local->op_ret = -1; - local->op_errno = EUCLEAN; + int __ret = -1; + local->op_ret = -1; + local->op_errno = EUCLEAN; - __ret = syncbarrier_init (&local->barrier); - if (__ret) { - if (op_errno) - *op_errno = __ret; - goto out; - } + __ret = syncbarrier_init(&local->barrier); + if (__ret) { + if (op_errno) + *op_errno = __ret; + goto out; + } - local->child_up = GF_CALLOC (priv->child_count, - sizeof (*local->child_up), - gf_afr_mt_char); - if (!local->child_up) { - if (op_errno) - *op_errno = ENOMEM; - goto out; - } + local->child_up = GF_CALLOC(priv->child_count, sizeof(*local->child_up), + gf_afr_mt_char); + if (!local->child_up) { + if (op_errno) + *op_errno = ENOMEM; + goto out; + } + + memcpy(local->child_up, priv->child_up, + sizeof(*local->child_up) * priv->child_count); + local->call_count = AFR_COUNT(local->child_up, priv->child_count); + if (local->call_count == 0) { + gf_msg(THIS->name, GF_LOG_INFO, 0, AFR_MSG_SUBVOLS_DOWN, + "no subvolumes up"); + if (op_errno) + *op_errno = ENOTCONN; + goto out; + } - memcpy (local->child_up, priv->child_up, - sizeof (*local->child_up) * priv->child_count); - local->call_count = AFR_COUNT (local->child_up, priv->child_count); - if (local->call_count == 0) { - gf_msg (THIS->name, GF_LOG_INFO, 0, - AFR_MSG_SUBVOLS_DOWN, "no subvolumes up"); - if (op_errno) - *op_errno = ENOTCONN; - goto out; - } + local->event_generation = priv->event_generation; - local->event_generation = priv->event_generation; - - local->read_attempted = GF_CALLOC (priv->child_count, sizeof (char), - gf_afr_mt_char); - if (!local->read_attempted) { - if (op_errno) - *op_errno = ENOMEM; - goto out; - } - - local->readable = GF_CALLOC (priv->child_count, sizeof (char), - gf_afr_mt_char); - if (!local->readable) { - if (op_errno) - *op_errno = ENOMEM; - goto out; - } - - local->readable2 = GF_CALLOC (priv->child_count, sizeof (char), + local->read_attempted = GF_CALLOC(priv->child_count, sizeof(char), gf_afr_mt_char); - if (!local->readable2) { - if (op_errno) - *op_errno = ENOMEM; - goto out; - } + if (!local->read_attempted) { + if (op_errno) + *op_errno = ENOMEM; + goto out; + } + + local->readable = GF_CALLOC(priv->child_count, sizeof(char), + gf_afr_mt_char); + if (!local->readable) { + if (op_errno) + *op_errno = ENOMEM; + goto out; + } + + local->readable2 = GF_CALLOC(priv->child_count, sizeof(char), + gf_afr_mt_char); + if (!local->readable2) { + if (op_errno) + *op_errno = ENOMEM; + goto out; + } - local->read_subvol = -1; + local->read_subvol = -1; - local->replies = GF_CALLOC(priv->child_count, sizeof(*local->replies), - gf_afr_mt_reply_t); - if (!local->replies) { - if (op_errno) - *op_errno = ENOMEM; - goto out; - } + local->replies = GF_CALLOC(priv->child_count, sizeof(*local->replies), + gf_afr_mt_reply_t); + if (!local->replies) { + if (op_errno) + *op_errno = ENOMEM; + goto out; + } - local->need_full_crawl = _gf_false; + local->need_full_crawl = _gf_false; - INIT_LIST_HEAD (&local->healer); - return 0; + INIT_LIST_HEAD(&local->healer); + return 0; out: - return -1; + return -1; } int -afr_internal_lock_init (afr_internal_lock_t *lk, size_t child_count) +afr_internal_lock_init(afr_internal_lock_t *lk, size_t child_count) { - int ret = -ENOMEM; + int ret = -ENOMEM; - lk->locked_nodes = GF_CALLOC (sizeof (*lk->locked_nodes), - child_count, gf_afr_mt_char); - if (NULL == lk->locked_nodes) - goto out; + lk->locked_nodes = GF_CALLOC(sizeof(*lk->locked_nodes), child_count, + gf_afr_mt_char); + if (NULL == lk->locked_nodes) + goto out; - lk->lower_locked_nodes = GF_CALLOC (sizeof (*lk->lower_locked_nodes), - child_count, gf_afr_mt_char); - if (NULL == lk->lower_locked_nodes) - goto out; + lk->lower_locked_nodes = GF_CALLOC(sizeof(*lk->lower_locked_nodes), + child_count, gf_afr_mt_char); + if (NULL == lk->lower_locked_nodes) + goto out; - lk->lock_op_ret = -1; - lk->lock_op_errno = EUCLEAN; + lk->lock_op_ret = -1; + lk->lock_op_errno = EUCLEAN; - ret = 0; + ret = 0; out: - return ret; + return ret; } void -afr_matrix_cleanup (int32_t **matrix, unsigned int m) +afr_matrix_cleanup(int32_t **matrix, unsigned int m) { - int i = 0; + int i = 0; - if (!matrix) - goto out; - for (i = 0; i < m; i++) { - GF_FREE (matrix[i]); - } + if (!matrix) + goto out; + for (i = 0; i < m; i++) { + GF_FREE(matrix[i]); + } - GF_FREE (matrix); + GF_FREE(matrix); out: - return; + return; } -int32_t** -afr_matrix_create (unsigned int m, unsigned int n) +int32_t ** +afr_matrix_create(unsigned int m, unsigned int n) { - int32_t **matrix = NULL; - int i = 0; + int32_t **matrix = NULL; + int i = 0; - matrix = GF_CALLOC (sizeof (*matrix), m, gf_afr_mt_int32_t); - if (!matrix) - goto out; + matrix = GF_CALLOC(sizeof(*matrix), m, gf_afr_mt_int32_t); + if (!matrix) + goto out; - for (i = 0; i < m; i++) { - matrix[i] = GF_CALLOC (sizeof (*matrix[i]), n, - gf_afr_mt_int32_t); - if (!matrix[i]) - goto out; - } - return matrix; + for (i = 0; i < m; i++) { + matrix[i] = GF_CALLOC(sizeof(*matrix[i]), n, gf_afr_mt_int32_t); + if (!matrix[i]) + goto out; + } + return matrix; out: - afr_matrix_cleanup (matrix, m); - return NULL; + afr_matrix_cleanup(matrix, m); + return NULL; } int -afr_transaction_local_init (afr_local_t *local, xlator_t *this) -{ - int ret = -ENOMEM; - afr_private_t *priv = NULL; - - priv = this->private; - ret = afr_internal_lock_init (&local->internal_lock, priv->child_count); - if (ret < 0) - goto out; - - ret = -ENOMEM; - local->pre_op_compat = priv->pre_op_compat; - - local->transaction.pre_op = GF_CALLOC (sizeof (*local->transaction.pre_op), - priv->child_count, - gf_afr_mt_char); - if (!local->transaction.pre_op) - goto out; - - local->transaction.changelog_xdata = - GF_CALLOC (sizeof (*local->transaction.changelog_xdata), - priv->child_count, gf_afr_mt_dict_t); - if (!local->transaction.changelog_xdata) - goto out; - - if (priv->arbiter_count == 1) { - local->transaction.pre_op_sources = - GF_CALLOC (sizeof (*local->transaction.pre_op_sources), - priv->child_count, gf_afr_mt_char); - if (!local->transaction.pre_op_sources) - goto out; - } - - local->transaction.failed_subvols = GF_CALLOC (sizeof (*local->transaction.failed_subvols), - priv->child_count, - gf_afr_mt_char); - if (!local->transaction.failed_subvols) - goto out; - - local->pending = afr_matrix_create (priv->child_count, - AFR_NUM_CHANGE_LOGS); - if (!local->pending) - goto out; - - ret = 0; - INIT_LIST_HEAD (&local->transaction.wait_list); - INIT_LIST_HEAD (&local->transaction.owner_list); +afr_transaction_local_init(afr_local_t *local, xlator_t *this) +{ + int ret = -ENOMEM; + afr_private_t *priv = NULL; + + priv = this->private; + ret = afr_internal_lock_init(&local->internal_lock, priv->child_count); + if (ret < 0) + goto out; + + ret = -ENOMEM; + local->pre_op_compat = priv->pre_op_compat; + + local->transaction.pre_op = GF_CALLOC(sizeof(*local->transaction.pre_op), + priv->child_count, gf_afr_mt_char); + if (!local->transaction.pre_op) + goto out; + + local->transaction.changelog_xdata = GF_CALLOC( + sizeof(*local->transaction.changelog_xdata), priv->child_count, + gf_afr_mt_dict_t); + if (!local->transaction.changelog_xdata) + goto out; + + if (priv->arbiter_count == 1) { + local->transaction.pre_op_sources = GF_CALLOC( + sizeof(*local->transaction.pre_op_sources), priv->child_count, + gf_afr_mt_char); + if (!local->transaction.pre_op_sources) + goto out; + } + + local->transaction.failed_subvols = GF_CALLOC( + sizeof(*local->transaction.failed_subvols), priv->child_count, + gf_afr_mt_char); + if (!local->transaction.failed_subvols) + goto out; + + local->pending = afr_matrix_create(priv->child_count, AFR_NUM_CHANGE_LOGS); + if (!local->pending) + goto out; + + ret = 0; + INIT_LIST_HEAD(&local->transaction.wait_list); + INIT_LIST_HEAD(&local->transaction.owner_list); out: - return ret; + return ret; } - void -afr_set_low_priority (call_frame_t *frame) +afr_set_low_priority(call_frame_t *frame) { - frame->root->pid = LOW_PRIO_PROC_PID; + frame->root->pid = LOW_PRIO_PROC_PID; } - void -afr_priv_destroy (afr_private_t *priv) -{ - int i = 0; - int child_count = -1; - - if (!priv) - goto out; - GF_FREE (priv->last_event); - - child_count = priv->child_count; - if (priv->thin_arbiter_count) { - child_count++; - } - if (priv->pending_key) { - for (i = 0; i < child_count; i++) - GF_FREE (priv->pending_key[i]); - } - - GF_FREE (priv->pending_reads); - GF_FREE (priv->local); - GF_FREE (priv->pending_key); - GF_FREE (priv->children); - GF_FREE (priv->child_up); - GF_FREE (priv->child_latency); - LOCK_DESTROY (&priv->lock); - - GF_FREE (priv); +afr_priv_destroy(afr_private_t *priv) +{ + int i = 0; + int child_count = -1; + + if (!priv) + goto out; + GF_FREE(priv->last_event); + + child_count = priv->child_count; + if (priv->thin_arbiter_count) { + child_count++; + } + if (priv->pending_key) { + for (i = 0; i < child_count; i++) + GF_FREE(priv->pending_key[i]); + } + + GF_FREE(priv->pending_reads); + GF_FREE(priv->local); + GF_FREE(priv->pending_key); + GF_FREE(priv->children); + GF_FREE(priv->child_up); + GF_FREE(priv->child_latency); + LOCK_DESTROY(&priv->lock); + + GF_FREE(priv); out: - return; + return; } -int** -afr_mark_pending_changelog (afr_private_t *priv, unsigned char *pending, - dict_t *xattr, ia_type_t iat) +int ** +afr_mark_pending_changelog(afr_private_t *priv, unsigned char *pending, + dict_t *xattr, ia_type_t iat) { - int i = 0; - int **changelog = NULL; - int idx = -1; - int m_idx = 0; - int d_idx = 0; - int ret = 0; + int i = 0; + int **changelog = NULL; + int idx = -1; + int m_idx = 0; + int d_idx = 0; + int ret = 0; - m_idx = afr_index_for_transaction_type (AFR_METADATA_TRANSACTION); - d_idx = afr_index_for_transaction_type (AFR_DATA_TRANSACTION); + m_idx = afr_index_for_transaction_type(AFR_METADATA_TRANSACTION); + d_idx = afr_index_for_transaction_type(AFR_DATA_TRANSACTION); - idx = afr_index_from_ia_type (iat); + idx = afr_index_from_ia_type(iat); - changelog = afr_matrix_create (priv->child_count, AFR_NUM_CHANGE_LOGS); - if (!changelog) - goto out; + changelog = afr_matrix_create(priv->child_count, AFR_NUM_CHANGE_LOGS); + if (!changelog) + goto out; - for (i = 0; i < priv->child_count; i++) { - if (!pending[i]) - continue; + for (i = 0; i < priv->child_count; i++) { + if (!pending[i]) + continue; - changelog[i][m_idx] = hton32(1); - if (idx != -1) - changelog[i][idx] = hton32(1); - /* If the newentry marking is on a newly created directory, - * then mark it with the full-heal indicator. - */ - if ((IA_ISDIR (iat)) && (priv->esh_granular)) - changelog[i][d_idx] = hton32(1); - } - ret = afr_set_pending_dict (priv, xattr, changelog); - if (ret < 0) { - afr_matrix_cleanup (changelog, priv->child_count); - return NULL; - } + changelog[i][m_idx] = hton32(1); + if (idx != -1) + changelog[i][idx] = hton32(1); + /* If the newentry marking is on a newly created directory, + * then mark it with the full-heal indicator. + */ + if ((IA_ISDIR(iat)) && (priv->esh_granular)) + changelog[i][d_idx] = hton32(1); + } + ret = afr_set_pending_dict(priv, xattr, changelog); + if (ret < 0) { + afr_matrix_cleanup(changelog, priv->child_count); + return NULL; + } out: - return changelog; + return changelog; } gf_boolean_t -afr_decide_heal_info (afr_private_t *priv, unsigned char *sources, int source) +afr_decide_heal_info(afr_private_t *priv, unsigned char *sources, int source) { - int sources_count = 0; + int sources_count = 0; - if (source < 0) - goto out; + if (source < 0) + goto out; - sources_count = AFR_COUNT (sources, priv->child_count); - if (sources_count == priv->child_count) - return _gf_false; + sources_count = AFR_COUNT(sources, priv->child_count); + if (sources_count == priv->child_count) + return _gf_false; out: - return _gf_true; + return _gf_true; } int -afr_selfheal_locked_metadata_inspect (call_frame_t *frame, xlator_t *this, - inode_t *inode, gf_boolean_t *msh, - gf_boolean_t *pending) -{ - int ret = -1; - unsigned char *locked_on = NULL; - unsigned char *sources = NULL; - unsigned char *sinks = NULL; - unsigned char *healed_sinks = NULL; - unsigned char *undid_pending = NULL; - struct afr_reply *locked_replies = NULL; - - afr_private_t *priv = this->private; - - locked_on = alloca0 (priv->child_count); - sources = alloca0 (priv->child_count); - sinks = alloca0 (priv->child_count); - healed_sinks = alloca0 (priv->child_count); - undid_pending = alloca0 (priv->child_count); - - locked_replies = alloca0 (sizeof (*locked_replies) * priv->child_count); - - ret = afr_selfheal_inodelk (frame, this, inode, this->name, - LLONG_MAX - 1, 0, locked_on); - { - if (ret == 0) { - /* Not a single lock */ - ret = -afr_final_errno (frame->local, priv); - if (ret == 0) - ret = -ENOTCONN;/* all invalid responses */ - goto out; - } - ret = __afr_selfheal_metadata_prepare (frame, this, inode, - locked_on, sources, - sinks, healed_sinks, - undid_pending, - locked_replies, - pending); - *msh = afr_decide_heal_info (priv, sources, ret); - } - afr_selfheal_uninodelk (frame, this, inode, this->name, - LLONG_MAX - 1, 0, locked_on); +afr_selfheal_locked_metadata_inspect(call_frame_t *frame, xlator_t *this, + inode_t *inode, gf_boolean_t *msh, + gf_boolean_t *pending) +{ + int ret = -1; + unsigned char *locked_on = NULL; + unsigned char *sources = NULL; + unsigned char *sinks = NULL; + unsigned char *healed_sinks = NULL; + unsigned char *undid_pending = NULL; + struct afr_reply *locked_replies = NULL; + + afr_private_t *priv = this->private; + + locked_on = alloca0(priv->child_count); + sources = alloca0(priv->child_count); + sinks = alloca0(priv->child_count); + healed_sinks = alloca0(priv->child_count); + undid_pending = alloca0(priv->child_count); + + locked_replies = alloca0(sizeof(*locked_replies) * priv->child_count); + + ret = afr_selfheal_inodelk(frame, this, inode, this->name, LLONG_MAX - 1, 0, + locked_on); + { + if (ret == 0) { + /* Not a single lock */ + ret = -afr_final_errno(frame->local, priv); + if (ret == 0) + ret = -ENOTCONN; /* all invalid responses */ + goto out; + } + ret = __afr_selfheal_metadata_prepare( + frame, this, inode, locked_on, sources, sinks, healed_sinks, + undid_pending, locked_replies, pending); + *msh = afr_decide_heal_info(priv, sources, ret); + } + afr_selfheal_uninodelk(frame, this, inode, this->name, LLONG_MAX - 1, 0, + locked_on); out: - if (locked_replies) - afr_replies_wipe (locked_replies, priv->child_count); - return ret; + if (locked_replies) + afr_replies_wipe(locked_replies, priv->child_count); + return ret; } int -afr_selfheal_locked_data_inspect (call_frame_t *frame, xlator_t *this, - fd_t *fd, gf_boolean_t *dsh, - gf_boolean_t *pflag) -{ - int ret = -1; - unsigned char *data_lock = NULL; - unsigned char *sources = NULL; - unsigned char *sinks = NULL; - unsigned char *healed_sinks = NULL; - unsigned char *undid_pending = NULL; - afr_private_t *priv = NULL; - struct afr_reply *locked_replies = NULL; - inode_t *inode = fd->inode; - - priv = this->private; - data_lock = alloca0 (priv->child_count); - sources = alloca0 (priv->child_count); - sinks = alloca0 (priv->child_count); - healed_sinks = alloca0 (priv->child_count); - undid_pending = alloca0 (priv->child_count); - - locked_replies = alloca0 (sizeof (*locked_replies) * priv->child_count); - - ret = afr_selfheal_inodelk (frame, this, inode, this->name, - 0, 0, data_lock); - { - if (ret == 0) { - ret = -afr_final_errno (frame->local, priv); - if (ret == 0) - ret = -ENOTCONN; /* all invalid responses */ - goto out; - } - ret = __afr_selfheal_data_prepare (frame, this, inode, - data_lock, sources, sinks, - healed_sinks, undid_pending, - locked_replies, pflag); - *dsh = afr_decide_heal_info (priv, sources, ret); - } - afr_selfheal_uninodelk (frame, this, inode, this->name, 0, 0, - data_lock); +afr_selfheal_locked_data_inspect(call_frame_t *frame, xlator_t *this, fd_t *fd, + gf_boolean_t *dsh, gf_boolean_t *pflag) +{ + int ret = -1; + unsigned char *data_lock = NULL; + unsigned char *sources = NULL; + unsigned char *sinks = NULL; + unsigned char *healed_sinks = NULL; + unsigned char *undid_pending = NULL; + afr_private_t *priv = NULL; + struct afr_reply *locked_replies = NULL; + inode_t *inode = fd->inode; + + priv = this->private; + data_lock = alloca0(priv->child_count); + sources = alloca0(priv->child_count); + sinks = alloca0(priv->child_count); + healed_sinks = alloca0(priv->child_count); + undid_pending = alloca0(priv->child_count); + + locked_replies = alloca0(sizeof(*locked_replies) * priv->child_count); + + ret = afr_selfheal_inodelk(frame, this, inode, this->name, 0, 0, data_lock); + { + if (ret == 0) { + ret = -afr_final_errno(frame->local, priv); + if (ret == 0) + ret = -ENOTCONN; /* all invalid responses */ + goto out; + } + ret = __afr_selfheal_data_prepare(frame, this, inode, data_lock, + sources, sinks, healed_sinks, + undid_pending, locked_replies, pflag); + *dsh = afr_decide_heal_info(priv, sources, ret); + } + afr_selfheal_uninodelk(frame, this, inode, this->name, 0, 0, data_lock); out: - if (locked_replies) - afr_replies_wipe (locked_replies, priv->child_count); - return ret; + if (locked_replies) + afr_replies_wipe(locked_replies, priv->child_count); + return ret; } int -afr_selfheal_locked_entry_inspect (call_frame_t *frame, xlator_t *this, - inode_t *inode, - gf_boolean_t *esh, gf_boolean_t *pflag) -{ - int ret = -1; - int source = -1; - afr_private_t *priv = NULL; - unsigned char *locked_on = NULL; - unsigned char *data_lock = NULL; - unsigned char *sources = NULL; - unsigned char *sinks = NULL; - unsigned char *healed_sinks = NULL; - struct afr_reply *locked_replies = NULL; - gf_boolean_t granular_locks = _gf_false; - - priv = this->private; - if (strcmp ("granular", priv->locking_scheme) == 0) - granular_locks = _gf_true; - locked_on = alloca0 (priv->child_count); - data_lock = alloca0 (priv->child_count); - sources = alloca0 (priv->child_count); - sinks = alloca0 (priv->child_count); - healed_sinks = alloca0 (priv->child_count); - - locked_replies = alloca0 (sizeof (*locked_replies) * priv->child_count); - - if (!granular_locks) { - ret = afr_selfheal_tryentrylk (frame, this, inode, - priv->sh_domain, NULL, locked_on); - } +afr_selfheal_locked_entry_inspect(call_frame_t *frame, xlator_t *this, + inode_t *inode, gf_boolean_t *esh, + gf_boolean_t *pflag) +{ + int ret = -1; + int source = -1; + afr_private_t *priv = NULL; + unsigned char *locked_on = NULL; + unsigned char *data_lock = NULL; + unsigned char *sources = NULL; + unsigned char *sinks = NULL; + unsigned char *healed_sinks = NULL; + struct afr_reply *locked_replies = NULL; + gf_boolean_t granular_locks = _gf_false; + + priv = this->private; + if (strcmp("granular", priv->locking_scheme) == 0) + granular_locks = _gf_true; + locked_on = alloca0(priv->child_count); + data_lock = alloca0(priv->child_count); + sources = alloca0(priv->child_count); + sinks = alloca0(priv->child_count); + healed_sinks = alloca0(priv->child_count); + + locked_replies = alloca0(sizeof(*locked_replies) * priv->child_count); + + if (!granular_locks) { + ret = afr_selfheal_tryentrylk(frame, this, inode, priv->sh_domain, NULL, + locked_on); + } + { + if (!granular_locks && ret == 0) { + ret = -afr_final_errno(frame->local, priv); + if (ret == 0) + ret = -ENOTCONN; /* all invalid responses */ + goto out; + } + + ret = afr_selfheal_entrylk(frame, this, inode, this->name, NULL, + data_lock); { - if (!granular_locks && ret == 0) { - ret = -afr_final_errno (frame->local, priv); - if (ret == 0) - ret = -ENOTCONN;/* all invalid responses */ - goto out; - } - - ret = afr_selfheal_entrylk (frame, this, inode, this->name, - NULL, data_lock); - { - if (ret == 0) { - ret = -afr_final_errno (frame->local, priv); - if (ret == 0) - ret = -ENOTCONN; - /* all invalid responses */ - goto unlock; - } - ret = __afr_selfheal_entry_prepare (frame, this, inode, - data_lock, sources, - sinks, healed_sinks, - locked_replies, - &source, pflag); - if ((ret == 0) && source < 0) - ret = -EIO; - *esh = afr_decide_heal_info (priv, sources, ret); - } - afr_selfheal_unentrylk (frame, this, inode, this->name, NULL, - data_lock, NULL); - } + if (ret == 0) { + ret = -afr_final_errno(frame->local, priv); + if (ret == 0) + ret = -ENOTCONN; + /* all invalid responses */ + goto unlock; + } + ret = __afr_selfheal_entry_prepare(frame, this, inode, data_lock, + sources, sinks, healed_sinks, + locked_replies, &source, pflag); + if ((ret == 0) && source < 0) + ret = -EIO; + *esh = afr_decide_heal_info(priv, sources, ret); + } + afr_selfheal_unentrylk(frame, this, inode, this->name, NULL, data_lock, + NULL); + } unlock: - if (!granular_locks) - afr_selfheal_unentrylk (frame, this, inode, priv->sh_domain, - NULL, locked_on, NULL); + if (!granular_locks) + afr_selfheal_unentrylk(frame, this, inode, priv->sh_domain, NULL, + locked_on, NULL); out: - if (locked_replies) - afr_replies_wipe (locked_replies, priv->child_count); - return ret; + if (locked_replies) + afr_replies_wipe(locked_replies, priv->child_count); + return ret; } int -afr_selfheal_locked_inspect (call_frame_t *frame, xlator_t *this, uuid_t gfid, - inode_t **inode, - gf_boolean_t *entry_selfheal, - gf_boolean_t *data_selfheal, - gf_boolean_t *metadata_selfheal, - gf_boolean_t *pending) - -{ - int ret = -1; - fd_t *fd = NULL; - gf_boolean_t dsh = _gf_false; - gf_boolean_t msh = _gf_false; - gf_boolean_t esh = _gf_false; - - ret = afr_selfheal_unlocked_inspect (frame, this, gfid, inode, - &dsh, &msh, &esh); - if (ret) - goto out; - - /* For every heal type hold locks and check if it indeed needs heal */ - - - /* Heal-info does an open() on the file being examined so that the - * current eager-lock holding client, if present, at some point sees - * open-fd count being > 1 and releases the eager-lock so that heal-info - * doesn't remain blocked forever until IO completes. - */ - if ((*inode)->ia_type == IA_IFREG) { - ret = afr_selfheal_data_open (this, *inode, &fd); - if (ret < 0) { - gf_msg_debug (this->name, -ret, "%s: Failed to open", - uuid_utoa ((*inode)->gfid)); - goto out; - } +afr_selfheal_locked_inspect(call_frame_t *frame, xlator_t *this, uuid_t gfid, + inode_t **inode, gf_boolean_t *entry_selfheal, + gf_boolean_t *data_selfheal, + gf_boolean_t *metadata_selfheal, + gf_boolean_t *pending) + +{ + int ret = -1; + fd_t *fd = NULL; + gf_boolean_t dsh = _gf_false; + gf_boolean_t msh = _gf_false; + gf_boolean_t esh = _gf_false; + + ret = afr_selfheal_unlocked_inspect(frame, this, gfid, inode, &dsh, &msh, + &esh); + if (ret) + goto out; + + /* For every heal type hold locks and check if it indeed needs heal */ + + /* Heal-info does an open() on the file being examined so that the + * current eager-lock holding client, if present, at some point sees + * open-fd count being > 1 and releases the eager-lock so that heal-info + * doesn't remain blocked forever until IO completes. + */ + if ((*inode)->ia_type == IA_IFREG) { + ret = afr_selfheal_data_open(this, *inode, &fd); + if (ret < 0) { + gf_msg_debug(this->name, -ret, "%s: Failed to open", + uuid_utoa((*inode)->gfid)); + goto out; } + } - if (msh) { - ret = afr_selfheal_locked_metadata_inspect (frame, this, - *inode, &msh, - pending); - if (ret == -EIO) - goto out; - } + if (msh) { + ret = afr_selfheal_locked_metadata_inspect(frame, this, *inode, &msh, + pending); + if (ret == -EIO) + goto out; + } - if (dsh) { - ret = afr_selfheal_locked_data_inspect (frame, this, fd, - &dsh, pending); - if (ret == -EIO || (ret == -EAGAIN)) - goto out; - } + if (dsh) { + ret = afr_selfheal_locked_data_inspect(frame, this, fd, &dsh, pending); + if (ret == -EIO || (ret == -EAGAIN)) + goto out; + } - if (esh) { - ret = afr_selfheal_locked_entry_inspect (frame, this, *inode, - &esh, pending); - } + if (esh) { + ret = afr_selfheal_locked_entry_inspect(frame, this, *inode, &esh, + pending); + } out: - *data_selfheal = dsh; - *entry_selfheal = esh; - *metadata_selfheal = msh; - if (fd) - fd_unref (fd); - return ret; + *data_selfheal = dsh; + *entry_selfheal = esh; + *metadata_selfheal = msh; + if (fd) + fd_unref(fd); + return ret; } -dict_t* -afr_set_heal_info (char *status) +dict_t * +afr_set_heal_info(char *status) { - dict_t *dict = NULL; - int ret = -1; - - dict = dict_new (); - if (!dict) { - ret = -ENOMEM; - goto out; - } + dict_t *dict = NULL; + int ret = -1; - ret = dict_set_str (dict, "heal-info", status); - if (ret) - gf_msg ("", GF_LOG_WARNING, -ret, - AFR_MSG_DICT_SET_FAILED, - "Failed to set heal-info key to " - "%s", status); + dict = dict_new(); + if (!dict) { + ret = -ENOMEM; + goto out; + } + + ret = dict_set_str(dict, "heal-info", status); + if (ret) + gf_msg("", GF_LOG_WARNING, -ret, AFR_MSG_DICT_SET_FAILED, + "Failed to set heal-info key to " + "%s", + status); out: - return dict; + return dict; } int -afr_get_heal_info (call_frame_t *frame, xlator_t *this, loc_t *loc) -{ - gf_boolean_t data_selfheal = _gf_false; - gf_boolean_t metadata_selfheal = _gf_false; - gf_boolean_t entry_selfheal = _gf_false; - gf_boolean_t pending = _gf_false; - dict_t *dict = NULL; - int ret = -1; - int op_errno = 0; - inode_t *inode = NULL; - char *substr = NULL; - char *status = NULL; - - ret = afr_selfheal_locked_inspect (frame, this, loc->gfid, &inode, - &entry_selfheal, - &data_selfheal, &metadata_selfheal, - &pending); - - if (ret == -ENOMEM) { - op_errno = -ret; - ret = -1; +afr_get_heal_info(call_frame_t *frame, xlator_t *this, loc_t *loc) +{ + gf_boolean_t data_selfheal = _gf_false; + gf_boolean_t metadata_selfheal = _gf_false; + gf_boolean_t entry_selfheal = _gf_false; + gf_boolean_t pending = _gf_false; + dict_t *dict = NULL; + int ret = -1; + int op_errno = 0; + inode_t *inode = NULL; + char *substr = NULL; + char *status = NULL; + + ret = afr_selfheal_locked_inspect(frame, this, loc->gfid, &inode, + &entry_selfheal, &data_selfheal, + &metadata_selfheal, &pending); + + if (ret == -ENOMEM) { + op_errno = -ret; + ret = -1; + goto out; + } + + if (pending) { + gf_asprintf(&substr, "-pending"); + if (!substr) + goto out; + } + + if (ret == -EIO) { + ret = gf_asprintf(&status, "split-brain%s", substr ? substr : ""); + if (ret < 0) + goto out; + dict = afr_set_heal_info(status); + } else if (ret == -EAGAIN) { + ret = gf_asprintf(&status, "possibly-healing%s", substr ? substr : ""); + if (ret < 0) + goto out; + dict = afr_set_heal_info(status); + } else if (ret >= 0) { + /* value of ret = source index + * so ret >= 0 and at least one of the 3 booleans set to + * true means a source is identified; heal is required. + */ + if (!data_selfheal && !entry_selfheal && !metadata_selfheal) { + dict = afr_set_heal_info("no-heal"); + } else { + ret = gf_asprintf(&status, "heal%s", substr ? substr : ""); + if (ret < 0) goto out; + dict = afr_set_heal_info(status); + } + } else if (ret < 0) { + /* Apart from above checked -ve ret values, there are + * other possible ret values like ENOTCONN + * (returned when number of valid replies received are + * less than 2) + * in which case heal is required when one of the + * selfheal booleans is set. + */ + if (data_selfheal || entry_selfheal || metadata_selfheal) { + ret = gf_asprintf(&status, "heal%s", substr ? substr : ""); + if (ret < 0) + goto out; + dict = afr_set_heal_info(status); } - - if (pending) { - gf_asprintf (&substr, "-pending"); - if (!substr) - goto out; - } - - if (ret == -EIO) { - ret = gf_asprintf (&status, "split-brain%s", - substr? substr : ""); - if (ret < 0) - goto out; - dict = afr_set_heal_info (status); - } else if (ret == -EAGAIN) { - ret = gf_asprintf (&status, "possibly-healing%s", - substr? substr : ""); - if (ret < 0) - goto out; - dict = afr_set_heal_info (status); - } else if (ret >= 0) { - /* value of ret = source index - * so ret >= 0 and at least one of the 3 booleans set to - * true means a source is identified; heal is required. - */ - if (!data_selfheal && !entry_selfheal && - !metadata_selfheal) { - dict = afr_set_heal_info ("no-heal"); - } else { - ret = gf_asprintf (&status, "heal%s", - substr? substr : ""); - if (ret < 0) - goto out; - dict = afr_set_heal_info (status); - } - } else if (ret < 0) { - /* Apart from above checked -ve ret values, there are - * other possible ret values like ENOTCONN - * (returned when number of valid replies received are - * less than 2) - * in which case heal is required when one of the - * selfheal booleans is set. - */ - if (data_selfheal || entry_selfheal || - metadata_selfheal) { - ret = gf_asprintf (&status, "heal%s", - substr? substr : ""); - if (ret < 0) - goto out; - dict = afr_set_heal_info (status); - } - } - ret = 0; + } + ret = 0; out: - AFR_STACK_UNWIND (getxattr, frame, ret, op_errno, dict, NULL); - if (dict) - dict_unref (dict); - if (inode) - inode_unref (inode); - GF_FREE (substr); - return ret; + AFR_STACK_UNWIND(getxattr, frame, ret, op_errno, dict, NULL); + if (dict) + dict_unref(dict); + if (inode) + inode_unref(inode); + GF_FREE(substr); + return ret; } int -_afr_is_split_brain (call_frame_t *frame, xlator_t *this, - struct afr_reply *replies, - afr_transaction_type type, - gf_boolean_t *spb) -{ - afr_private_t *priv = NULL; - uint64_t *witness = NULL; - unsigned char *sources = NULL; - unsigned char *sinks = NULL; - int sources_count = 0; - int ret = 0; - - priv = this->private; - - sources = alloca0 (priv->child_count); - sinks = alloca0 (priv->child_count); - witness = alloca0(priv->child_count * sizeof (*witness)); - - ret = afr_selfheal_find_direction (frame, this, replies, - type, priv->child_up, sources, - sinks, witness, NULL); - if (ret) - return ret; +_afr_is_split_brain(call_frame_t *frame, xlator_t *this, + struct afr_reply *replies, afr_transaction_type type, + gf_boolean_t *spb) +{ + afr_private_t *priv = NULL; + uint64_t *witness = NULL; + unsigned char *sources = NULL; + unsigned char *sinks = NULL; + int sources_count = 0; + int ret = 0; + + priv = this->private; + + sources = alloca0(priv->child_count); + sinks = alloca0(priv->child_count); + witness = alloca0(priv->child_count * sizeof(*witness)); + + ret = afr_selfheal_find_direction(frame, this, replies, type, + priv->child_up, sources, sinks, witness, + NULL); + if (ret) + return ret; - sources_count = AFR_COUNT (sources, priv->child_count); - if (!sources_count) - *spb = _gf_true; + sources_count = AFR_COUNT(sources, priv->child_count); + if (!sources_count) + *spb = _gf_true; - return ret; + return ret; } int -afr_is_split_brain (call_frame_t *frame, xlator_t *this, inode_t *inode, - uuid_t gfid, gf_boolean_t *d_spb, gf_boolean_t *m_spb) +afr_is_split_brain(call_frame_t *frame, xlator_t *this, inode_t *inode, + uuid_t gfid, gf_boolean_t *d_spb, gf_boolean_t *m_spb) { - int ret = -1; - afr_private_t *priv = NULL; - struct afr_reply *replies = NULL; + int ret = -1; + afr_private_t *priv = NULL; + struct afr_reply *replies = NULL; - priv = this->private; + priv = this->private; - replies = alloca0 (sizeof (*replies) * priv->child_count); + replies = alloca0(sizeof(*replies) * priv->child_count); - ret = afr_selfheal_unlocked_discover (frame, inode, gfid, replies); - if (ret) - goto out; + ret = afr_selfheal_unlocked_discover(frame, inode, gfid, replies); + if (ret) + goto out; - if (!afr_can_decide_split_brain_source_sinks (replies, - priv->child_count)) { - ret = -EAGAIN; - goto out; - } + if (!afr_can_decide_split_brain_source_sinks(replies, priv->child_count)) { + ret = -EAGAIN; + goto out; + } - ret = _afr_is_split_brain (frame, this, replies, - AFR_DATA_TRANSACTION, d_spb); - if (ret) - goto out; + ret = _afr_is_split_brain(frame, this, replies, AFR_DATA_TRANSACTION, + d_spb); + if (ret) + goto out; - ret = _afr_is_split_brain (frame, this, replies, - AFR_METADATA_TRANSACTION, m_spb); + ret = _afr_is_split_brain(frame, this, replies, AFR_METADATA_TRANSACTION, + m_spb); out: - if (replies) { - afr_replies_wipe (replies, priv->child_count); - replies = NULL; - } - return ret; + if (replies) { + afr_replies_wipe(replies, priv->child_count); + replies = NULL; + } + return ret; } int -afr_get_split_brain_status_cbk (int ret, call_frame_t *frame, void *opaque) +afr_get_split_brain_status_cbk(int ret, call_frame_t *frame, void *opaque) { - GF_FREE (opaque); - return 0; + GF_FREE(opaque); + return 0; } int -afr_get_split_brain_status (void *opaque) -{ - gf_boolean_t d_spb = _gf_false; - gf_boolean_t m_spb = _gf_false; - int ret = -1; - int op_errno = 0; - int i = 0; - char *choices = NULL; - char *status = NULL; - dict_t *dict = NULL; - inode_t *inode = NULL; - afr_private_t *priv = NULL; - xlator_t **children = NULL; - call_frame_t *frame = NULL; - xlator_t *this = NULL; - loc_t *loc = NULL; - afr_spb_status_t *data = NULL; - - data = opaque; - frame = data->frame; - this = frame->this; - loc = data->loc; - priv = this->private; - children = priv->children; - - inode = afr_inode_find (this, loc->gfid); - if (!inode) - goto out; - - dict = dict_new (); - if (!dict) { - op_errno = ENOMEM; - ret = -1; - goto out; +afr_get_split_brain_status(void *opaque) +{ + gf_boolean_t d_spb = _gf_false; + gf_boolean_t m_spb = _gf_false; + int ret = -1; + int op_errno = 0; + int i = 0; + char *choices = NULL; + char *status = NULL; + dict_t *dict = NULL; + inode_t *inode = NULL; + afr_private_t *priv = NULL; + xlator_t **children = NULL; + call_frame_t *frame = NULL; + xlator_t *this = NULL; + loc_t *loc = NULL; + afr_spb_status_t *data = NULL; + + data = opaque; + frame = data->frame; + this = frame->this; + loc = data->loc; + priv = this->private; + children = priv->children; + + inode = afr_inode_find(this, loc->gfid); + if (!inode) + goto out; + + dict = dict_new(); + if (!dict) { + op_errno = ENOMEM; + ret = -1; + goto out; + } + + /* Calculation for string length : + * (child_count X length of child-name) + SLEN (" Choices :") + * child-name consists of : + * a) 251 = max characters for volname according to GD_VOLUME_NAME_MAX + * b) strlen ("-client-00,") assuming 16 replicas + */ + choices = alloca0(priv->child_count * (256 + SLEN("-client-00,")) + + SLEN(" Choices:")); + + ret = afr_is_split_brain(frame, this, inode, loc->gfid, &d_spb, &m_spb); + if (ret) { + op_errno = -ret; + if (ret == -EAGAIN) { + ret = dict_set_str(dict, GF_AFR_SBRAIN_STATUS, + SBRAIN_HEAL_NO_GO_MSG); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, -ret, + AFR_MSG_DICT_SET_FAILED, + "Failed to set GF_AFR_SBRAIN_STATUS in dict"); + } } + ret = -1; + goto out; + } - /* Calculation for string length : - * (child_count X length of child-name) + SLEN (" Choices :") - * child-name consists of : - * a) 251 = max characters for volname according to GD_VOLUME_NAME_MAX - * b) strlen ("-client-00,") assuming 16 replicas - */ - choices = alloca0 (priv->child_count * (256 + SLEN ("-client-00,")) + - SLEN (" Choices:")); - - ret = afr_is_split_brain (frame, this, inode, loc->gfid, &d_spb, - &m_spb); - if (ret) { - op_errno = -ret; - if (ret == -EAGAIN) { - ret = dict_set_str (dict, GF_AFR_SBRAIN_STATUS, - SBRAIN_HEAL_NO_GO_MSG); - if (ret) { - gf_msg (this->name, GF_LOG_WARNING, - -ret, AFR_MSG_DICT_SET_FAILED, - "Failed to set GF_AFR_SBRAIN_STATUS in dict"); - } - } - ret = -1; - goto out; + if (d_spb || m_spb) { + sprintf(choices, " Choices:"); + for (i = 0; i < priv->child_count; i++) { + strcat(choices, children[i]->name); + strcat(choices, ","); } + choices[strlen(choices) - 1] = '\0'; - if (d_spb || m_spb) { - sprintf (choices, " Choices:"); - for (i = 0; i < priv->child_count; i++) { - strcat (choices, children[i]->name); - strcat (choices, ","); - } - choices[strlen (choices) - 1] = '\0'; + ret = gf_asprintf(&status, + "data-split-brain:%s " + "metadata-split-brain:%s%s", + (d_spb) ? "yes" : "no", (m_spb) ? "yes" : "no", + choices); - ret = gf_asprintf (&status, "data-split-brain:%s " - "metadata-split-brain:%s%s", - (d_spb) ? "yes" : "no", - (m_spb) ? "yes" : "no", choices); - - if (-1 == ret) { - op_errno = ENOMEM; - goto out; - } - ret = dict_set_dynstr (dict, GF_AFR_SBRAIN_STATUS, status); - if (ret) { - op_errno = -ret; - ret = -1; - goto out; - } - } else { - ret = dict_set_str (dict, GF_AFR_SBRAIN_STATUS, - "The file is not under data or" - " metadata split-brain"); - if (ret) { - op_errno = -ret; - ret = -1; - goto out; - } + if (-1 == ret) { + op_errno = ENOMEM; + goto out; + } + ret = dict_set_dynstr(dict, GF_AFR_SBRAIN_STATUS, status); + if (ret) { + op_errno = -ret; + ret = -1; + goto out; + } + } else { + ret = dict_set_str(dict, GF_AFR_SBRAIN_STATUS, + "The file is not under data or" + " metadata split-brain"); + if (ret) { + op_errno = -ret; + ret = -1; + goto out; } + } - ret = 0; + ret = 0; out: - AFR_STACK_UNWIND (getxattr, frame, ret, op_errno, dict, NULL); - if (dict) - dict_unref (dict); - if (inode) - inode_unref (inode); - return ret; + AFR_STACK_UNWIND(getxattr, frame, ret, op_errno, dict, NULL); + if (dict) + dict_unref(dict); + if (inode) + inode_unref(inode); + return ret; } int32_t afr_heal_splitbrain_file(call_frame_t *frame, xlator_t *this, loc_t *loc) { - int ret = 0; - int op_errno = 0; - dict_t *dict = NULL; - afr_local_t *local = NULL; + int ret = 0; + int op_errno = 0; + dict_t *dict = NULL; + afr_local_t *local = NULL; - local = frame->local; - dict = dict_new (); - if (!dict) { - op_errno = ENOMEM; - ret = -1; - goto out; - } + local = frame->local; + dict = dict_new(); + if (!dict) { + op_errno = ENOMEM; + ret = -1; + goto out; + } - ret = afr_selfheal_do (frame, this, loc->gfid); + ret = afr_selfheal_do(frame, this, loc->gfid); - if (ret == 1 || ret == 2) { - ret = dict_set_str (dict, "sh-fail-msg", - "File not in split-brain"); - if (ret) - gf_msg (this->name, GF_LOG_WARNING, - -ret, AFR_MSG_DICT_SET_FAILED, - "Failed to set sh-fail-msg in dict"); - ret = 0; - goto out; - } else { - if (local->xdata_rsp) { - /* 'sh-fail-msg' has been set in the dict during self-heal.*/ - dict_copy (local->xdata_rsp, dict); - ret = 0; - } else if (ret < 0) { - op_errno = -ret; - ret = -1; - } + if (ret == 1 || ret == 2) { + ret = dict_set_str(dict, "sh-fail-msg", "File not in split-brain"); + if (ret) + gf_msg(this->name, GF_LOG_WARNING, -ret, AFR_MSG_DICT_SET_FAILED, + "Failed to set sh-fail-msg in dict"); + ret = 0; + goto out; + } else { + if (local->xdata_rsp) { + /* 'sh-fail-msg' has been set in the dict during self-heal.*/ + dict_copy(local->xdata_rsp, dict); + ret = 0; + } else if (ret < 0) { + op_errno = -ret; + ret = -1; } + } out: - if (local->op == GF_FOP_GETXATTR) - AFR_STACK_UNWIND (getxattr, frame, ret, op_errno, dict, NULL); - else if (local->op == GF_FOP_SETXATTR) - AFR_STACK_UNWIND (setxattr, frame, ret, op_errno, NULL); - if (dict) - dict_unref(dict); - return ret; + if (local->op == GF_FOP_GETXATTR) + AFR_STACK_UNWIND(getxattr, frame, ret, op_errno, dict, NULL); + else if (local->op == GF_FOP_SETXATTR) + AFR_STACK_UNWIND(setxattr, frame, ret, op_errno, NULL); + if (dict) + dict_unref(dict); + return ret; } int -afr_get_child_index_from_name (xlator_t *this, char *name) +afr_get_child_index_from_name(xlator_t *this, char *name) { - afr_private_t *priv = this->private; - int index = -1; + afr_private_t *priv = this->private; + int index = -1; - for (index = 0; index < priv->child_count; index++) { - if (!strcmp (priv->children[index]->name, name)) - goto out; - } - index = -1; + for (index = 0; index < priv->child_count; index++) { + if (!strcmp(priv->children[index]->name, name)) + goto out; + } + index = -1; out: - return index; + return index; } void -afr_priv_need_heal_set (afr_private_t *priv, gf_boolean_t need_heal) +afr_priv_need_heal_set(afr_private_t *priv, gf_boolean_t need_heal) { - LOCK (&priv->lock); - { - priv->need_heal = need_heal; - } - UNLOCK (&priv->lock); + LOCK(&priv->lock); + { + priv->need_heal = need_heal; + } + UNLOCK(&priv->lock); } void -afr_set_need_heal (xlator_t *this, afr_local_t *local) +afr_set_need_heal(xlator_t *this, afr_local_t *local) { - int i = 0; - afr_private_t *priv = this->private; - gf_boolean_t need_heal = _gf_false; + int i = 0; + afr_private_t *priv = this->private; + gf_boolean_t need_heal = _gf_false; - for (i = 0; i < priv->child_count; i++) { - if (local->replies[i].valid && local->replies[i].need_heal) { - need_heal = _gf_true; - break; - } + for (i = 0; i < priv->child_count; i++) { + if (local->replies[i].valid && local->replies[i].need_heal) { + need_heal = _gf_true; + break; } - afr_priv_need_heal_set (priv, need_heal); - return; + } + afr_priv_need_heal_set(priv, need_heal); + return; } gf_boolean_t -afr_get_need_heal (xlator_t *this) +afr_get_need_heal(xlator_t *this) { - afr_private_t *priv = this->private; - gf_boolean_t need_heal = _gf_true; + afr_private_t *priv = this->private; + gf_boolean_t need_heal = _gf_true; - LOCK (&priv->lock); - { - need_heal = priv->need_heal; - } - UNLOCK (&priv->lock); - return need_heal; + LOCK(&priv->lock); + { + need_heal = priv->need_heal; + } + UNLOCK(&priv->lock); + return need_heal; } int -afr_get_msg_id (char *op_type) +afr_get_msg_id(char *op_type) { - - if (!strcmp (op_type, GF_AFR_REPLACE_BRICK)) - return AFR_MSG_REPLACE_BRICK_STATUS; - else if (!strcmp (op_type, GF_AFR_ADD_BRICK)) - return AFR_MSG_ADD_BRICK_STATUS; - return -1; + if (!strcmp(op_type, GF_AFR_REPLACE_BRICK)) + return AFR_MSG_REPLACE_BRICK_STATUS; + else if (!strcmp(op_type, GF_AFR_ADD_BRICK)) + return AFR_MSG_ADD_BRICK_STATUS; + return -1; } int -afr_fav_child_reset_sink_xattrs_cbk (int ret, call_frame_t *heal_frame, - void *opaque) +afr_fav_child_reset_sink_xattrs_cbk(int ret, call_frame_t *heal_frame, + void *opaque) { + call_frame_t *txn_frame = NULL; + afr_local_t *local = NULL; + afr_local_t *heal_local = NULL; + xlator_t *this = NULL; - call_frame_t *txn_frame = NULL; - afr_local_t *local = NULL; - afr_local_t *heal_local = NULL; - xlator_t *this = NULL; - - heal_local = heal_frame->local; - txn_frame = heal_local->heal_frame; - local = txn_frame->local; - this = txn_frame->this; + heal_local = heal_frame->local; + txn_frame = heal_local->heal_frame; + local = txn_frame->local; + this = txn_frame->this; - /* Refresh the inode agan and proceed with the transaction.*/ - afr_inode_refresh (txn_frame, this, local->inode, NULL, - local->refreshfn); + /* Refresh the inode agan and proceed with the transaction.*/ + afr_inode_refresh(txn_frame, this, local->inode, NULL, local->refreshfn); - AFR_STACK_DESTROY (heal_frame); + AFR_STACK_DESTROY(heal_frame); - return 0; + return 0; } int -afr_fav_child_reset_sink_xattrs (void *opaque) -{ - call_frame_t *heal_frame = NULL; - call_frame_t *txn_frame = NULL; - xlator_t *this = NULL; - gf_boolean_t d_spb = _gf_false; - gf_boolean_t m_spb = _gf_false; - afr_local_t *heal_local = NULL; - afr_local_t *txn_local = NULL; - afr_private_t *priv = NULL; - inode_t *inode = NULL; - unsigned char *locked_on = NULL; - unsigned char *sources = NULL; - unsigned char *sinks = NULL; - unsigned char *healed_sinks = NULL; - unsigned char *undid_pending = NULL; - struct afr_reply *locked_replies = NULL; - int ret = 0; - - heal_frame = (call_frame_t *) opaque; - heal_local = heal_frame->local; - txn_frame = heal_local->heal_frame; - txn_local = txn_frame->local; - this = txn_frame->this; - inode = txn_local->inode; - priv = this->private; - locked_on = alloca0 (priv->child_count); - sources = alloca0 (priv->child_count); - sinks = alloca0 (priv->child_count); - healed_sinks = alloca0 (priv->child_count); - undid_pending = alloca0 (priv->child_count); - locked_replies = alloca0 (sizeof (*locked_replies) * priv->child_count); - - ret = _afr_is_split_brain (txn_frame, this, txn_local->replies, - AFR_DATA_TRANSACTION, &d_spb); - - ret = _afr_is_split_brain (txn_frame, this, txn_local->replies, - AFR_METADATA_TRANSACTION, &m_spb); - - /* Take appropriate locks and reset sink xattrs. */ - if (d_spb) { - ret = afr_selfheal_inodelk (heal_frame, this, inode, this->name, - 0, 0, locked_on); - { - if (ret < AFR_SH_MIN_PARTICIPANTS) - goto data_unlock; - ret = __afr_selfheal_data_prepare (heal_frame, this, - inode, locked_on, - sources, sinks, - healed_sinks, - undid_pending, - locked_replies, - NULL); - } -data_unlock: - afr_selfheal_uninodelk (heal_frame, this, inode, this->name, - 0, 0, locked_on); - } - - if (m_spb) { - memset (locked_on, 0, sizeof (*locked_on) * priv->child_count); - memset (undid_pending, 0, - sizeof (*undid_pending) * priv->child_count); - ret = afr_selfheal_inodelk (heal_frame, this, inode, this->name, - LLONG_MAX-1, 0, locked_on); - { - if (ret < AFR_SH_MIN_PARTICIPANTS) - goto mdata_unlock; - ret = __afr_selfheal_metadata_prepare (heal_frame, this, - inode, locked_on, - sources, sinks, - healed_sinks, - undid_pending, - locked_replies, - NULL); - - } -mdata_unlock: - afr_selfheal_uninodelk (heal_frame, this, inode, this->name, - LLONG_MAX-1, 0, locked_on); +afr_fav_child_reset_sink_xattrs(void *opaque) +{ + call_frame_t *heal_frame = NULL; + call_frame_t *txn_frame = NULL; + xlator_t *this = NULL; + gf_boolean_t d_spb = _gf_false; + gf_boolean_t m_spb = _gf_false; + afr_local_t *heal_local = NULL; + afr_local_t *txn_local = NULL; + afr_private_t *priv = NULL; + inode_t *inode = NULL; + unsigned char *locked_on = NULL; + unsigned char *sources = NULL; + unsigned char *sinks = NULL; + unsigned char *healed_sinks = NULL; + unsigned char *undid_pending = NULL; + struct afr_reply *locked_replies = NULL; + int ret = 0; + + heal_frame = (call_frame_t *)opaque; + heal_local = heal_frame->local; + txn_frame = heal_local->heal_frame; + txn_local = txn_frame->local; + this = txn_frame->this; + inode = txn_local->inode; + priv = this->private; + locked_on = alloca0(priv->child_count); + sources = alloca0(priv->child_count); + sinks = alloca0(priv->child_count); + healed_sinks = alloca0(priv->child_count); + undid_pending = alloca0(priv->child_count); + locked_replies = alloca0(sizeof(*locked_replies) * priv->child_count); + + ret = _afr_is_split_brain(txn_frame, this, txn_local->replies, + AFR_DATA_TRANSACTION, &d_spb); + + ret = _afr_is_split_brain(txn_frame, this, txn_local->replies, + AFR_METADATA_TRANSACTION, &m_spb); + + /* Take appropriate locks and reset sink xattrs. */ + if (d_spb) { + ret = afr_selfheal_inodelk(heal_frame, this, inode, this->name, 0, 0, + locked_on); + { + if (ret < AFR_SH_MIN_PARTICIPANTS) + goto data_unlock; + ret = __afr_selfheal_data_prepare( + heal_frame, this, inode, locked_on, sources, sinks, + healed_sinks, undid_pending, locked_replies, NULL); + } + data_unlock: + afr_selfheal_uninodelk(heal_frame, this, inode, this->name, 0, 0, + locked_on); + } + + if (m_spb) { + memset(locked_on, 0, sizeof(*locked_on) * priv->child_count); + memset(undid_pending, 0, sizeof(*undid_pending) * priv->child_count); + ret = afr_selfheal_inodelk(heal_frame, this, inode, this->name, + LLONG_MAX - 1, 0, locked_on); + { + if (ret < AFR_SH_MIN_PARTICIPANTS) + goto mdata_unlock; + ret = __afr_selfheal_metadata_prepare( + heal_frame, this, inode, locked_on, sources, sinks, + healed_sinks, undid_pending, locked_replies, NULL); } + mdata_unlock: + afr_selfheal_uninodelk(heal_frame, this, inode, this->name, + LLONG_MAX - 1, 0, locked_on); + } - return ret; - + return ret; } /* * Concatenates the xattrs in local->replies separated by a delimiter. */ int -afr_serialize_xattrs_with_delimiter (call_frame_t *frame, xlator_t *this, - char *buf, const char *default_str, - int32_t *serz_len, char delimiter) -{ - afr_private_t *priv = NULL; - afr_local_t *local = NULL; - char *xattr = NULL; - int i = 0; - int len = 0; - size_t str_len = 0; - int ret = -1; - - priv = this->private; - local = frame->local; - - for (i = 0; i < priv->child_count; i++) { - if (!local->replies[i].valid || local->replies[i].op_ret) { - str_len = strlen (default_str); - buf = strncat (buf, default_str, str_len); - len += str_len; - buf[len++] = delimiter; - buf[len] = '\0'; - } else { - ret = dict_get_str (local->replies[i].xattr, - local->cont.getxattr.name, &xattr); - if (ret) { - gf_msg ("TEST", GF_LOG_ERROR, -ret, - AFR_MSG_DICT_GET_FAILED, - "Failed to get the node_uuid of brick " - "%d", i); - goto out; - } - str_len = strlen (xattr); - buf = strncat (buf, xattr, str_len); - len += str_len; - buf[len++] = delimiter; - buf[len] = '\0'; - } - } - buf[--len] = '\0'; /*remove the last delimiter*/ - if (serz_len) - *serz_len = ++len; - ret = 0; +afr_serialize_xattrs_with_delimiter(call_frame_t *frame, xlator_t *this, + char *buf, const char *default_str, + int32_t *serz_len, char delimiter) +{ + afr_private_t *priv = NULL; + afr_local_t *local = NULL; + char *xattr = NULL; + int i = 0; + int len = 0; + size_t str_len = 0; + int ret = -1; + + priv = this->private; + local = frame->local; + + for (i = 0; i < priv->child_count; i++) { + if (!local->replies[i].valid || local->replies[i].op_ret) { + str_len = strlen(default_str); + buf = strncat(buf, default_str, str_len); + len += str_len; + buf[len++] = delimiter; + buf[len] = '\0'; + } else { + ret = dict_get_str(local->replies[i].xattr, + local->cont.getxattr.name, &xattr); + if (ret) { + gf_msg("TEST", GF_LOG_ERROR, -ret, AFR_MSG_DICT_GET_FAILED, + "Failed to get the node_uuid of brick " + "%d", + i); + goto out; + } + str_len = strlen(xattr); + buf = strncat(buf, xattr, str_len); + len += str_len; + buf[len++] = delimiter; + buf[len] = '\0'; + } + } + buf[--len] = '\0'; /*remove the last delimiter*/ + if (serz_len) + *serz_len = ++len; + ret = 0; out: - return ret; + return ret; } uint64_t -afr_write_subvol_get (call_frame_t *frame, xlator_t *this) +afr_write_subvol_get(call_frame_t *frame, xlator_t *this) { - afr_local_t *local = NULL; - uint64_t write_subvol = 0; + afr_local_t *local = NULL; + uint64_t write_subvol = 0; - local = frame->local; - LOCK(&local->inode->lock); - write_subvol = local->inode_ctx->write_subvol; - UNLOCK (&local->inode->lock); + local = frame->local; + LOCK(&local->inode->lock); + write_subvol = local->inode_ctx->write_subvol; + UNLOCK(&local->inode->lock); - return write_subvol; + return write_subvol; } int -afr_write_subvol_set (call_frame_t *frame, xlator_t *this) -{ - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - unsigned char *data_accused = NULL; - unsigned char *metadata_accused = NULL; - unsigned char *data_readable = NULL; - unsigned char *metadata_readable = NULL; - uint16_t datamap = 0; - uint16_t metadatamap = 0; - uint64_t val = 0; - int event = 0; - int i = 0; - - local = frame->local; - priv = this->private; - data_accused = alloca0 (priv->child_count); - metadata_accused = alloca0 (priv->child_count); - data_readable = alloca0 (priv->child_count); - metadata_readable = alloca0 (priv->child_count); - event = local->event_generation; - - afr_readables_fill (frame, this, local->inode, data_accused, - metadata_accused, data_readable, metadata_readable, - NULL); - - for (i = 0; i < priv->child_count; i++) { - if (data_readable[i]) - datamap |= (1 << i); - if (metadata_readable[i]) - metadatamap |= (1 << i); - } - - val = ((uint64_t) metadatamap) | - (((uint64_t) datamap) << 16) | - (((uint64_t) event) << 32); - - LOCK(&local->inode->lock); - { - if (local->inode_ctx->write_subvol == 0 && - local->transaction.type == AFR_DATA_TRANSACTION) { - local->inode_ctx->write_subvol = val; - } - } - UNLOCK (&local->inode->lock); - - return 0; +afr_write_subvol_set(call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + unsigned char *data_accused = NULL; + unsigned char *metadata_accused = NULL; + unsigned char *data_readable = NULL; + unsigned char *metadata_readable = NULL; + uint16_t datamap = 0; + uint16_t metadatamap = 0; + uint64_t val = 0; + int event = 0; + int i = 0; + + local = frame->local; + priv = this->private; + data_accused = alloca0(priv->child_count); + metadata_accused = alloca0(priv->child_count); + data_readable = alloca0(priv->child_count); + metadata_readable = alloca0(priv->child_count); + event = local->event_generation; + + afr_readables_fill(frame, this, local->inode, data_accused, + metadata_accused, data_readable, metadata_readable, + NULL); + + for (i = 0; i < priv->child_count; i++) { + if (data_readable[i]) + datamap |= (1 << i); + if (metadata_readable[i]) + metadatamap |= (1 << i); + } + + val = ((uint64_t)metadatamap) | (((uint64_t)datamap) << 16) | + (((uint64_t)event) << 32); + + LOCK(&local->inode->lock); + { + if (local->inode_ctx->write_subvol == 0 && + local->transaction.type == AFR_DATA_TRANSACTION) { + local->inode_ctx->write_subvol = val; + } + } + UNLOCK(&local->inode->lock); + + return 0; } int -afr_write_subvol_reset (call_frame_t *frame, xlator_t *this) +afr_write_subvol_reset(call_frame_t *frame, xlator_t *this) { - afr_local_t *local = NULL; + afr_local_t *local = NULL; - local = frame->local; - LOCK(&local->inode->lock); - { - GF_ASSERT (local->inode_ctx->lock_count > 0); - local->inode_ctx->lock_count--; + local = frame->local; + LOCK(&local->inode->lock); + { + GF_ASSERT(local->inode_ctx->lock_count > 0); + local->inode_ctx->lock_count--; - if (!local->inode_ctx->lock_count) - local->inode_ctx->write_subvol = 0; - } - UNLOCK(&local->inode->lock); + if (!local->inode_ctx->lock_count) + local->inode_ctx->write_subvol = 0; + } + UNLOCK(&local->inode->lock); - return 0; + return 0; } int -afr_set_inode_local (xlator_t *this, afr_local_t *local, inode_t *inode) +afr_set_inode_local(xlator_t *this, afr_local_t *local, inode_t *inode) { - int ret = 0; + int ret = 0; - local->inode = inode_ref (inode); - LOCK(&local->inode->lock); - { - ret = __afr_inode_ctx_get (this, local->inode, - &local->inode_ctx); - } - UNLOCK (&local->inode->lock); - if (ret < 0) { - gf_msg_callingfn (this->name, GF_LOG_ERROR, ENOMEM, - AFR_MSG_INODE_CTX_GET_FAILED, - "Error getting inode ctx %s", - uuid_utoa (local->inode->gfid)); - } - return ret; + local->inode = inode_ref(inode); + LOCK(&local->inode->lock); + { + ret = __afr_inode_ctx_get(this, local->inode, &local->inode_ctx); + } + UNLOCK(&local->inode->lock); + if (ret < 0) { + gf_msg_callingfn( + this->name, GF_LOG_ERROR, ENOMEM, AFR_MSG_INODE_CTX_GET_FAILED, + "Error getting inode ctx %s", uuid_utoa(local->inode->gfid)); + } + return ret; } gf_boolean_t -afr_ta_is_fop_called_from_synctask (xlator_t *this) +afr_ta_is_fop_called_from_synctask(xlator_t *this) { - struct synctask *task = NULL; - gf_lkowner_t tmp_owner = {0,}; + struct synctask *task = NULL; + gf_lkowner_t tmp_owner = { + 0, + }; - task = synctask_get (); - if (!task) - return _gf_false; + task = synctask_get(); + if (!task) + return _gf_false; - set_lk_owner_from_ptr(&tmp_owner, (void *)this); + set_lk_owner_from_ptr(&tmp_owner, (void *)this); - if (!is_same_lkowner (&tmp_owner, &task->frame->root->lk_owner)) - return _gf_false; + if (!is_same_lkowner(&tmp_owner, &task->frame->root->lk_owner)) + return _gf_false; - return _gf_true; + return _gf_true; } int -afr_ta_post_op_lock (xlator_t *this, loc_t *loc) -{ - /*Note: At any given time, only one instance of this function must - * be in progress.*/ - - int ret = 0; - uuid_t gfid = {0,}; - afr_private_t *priv = this->private; - gf_boolean_t locked = _gf_false; - struct gf_flock flock1 = {0, }; - struct gf_flock flock2 = {0, }; - int32_t cmd = 0; - - GF_ASSERT (afr_ta_is_fop_called_from_synctask (this)); - flock1.l_type = F_WRLCK; - - while (!locked) { - if (priv->shd.iamshd) { - cmd = F_SETLKW; - flock1.l_start = 0; - flock1.l_len = 0; - - } else { - cmd = F_SETLK; - if (priv->ta_notify_dom_lock_offset) { - flock1.l_start = - priv->ta_notify_dom_lock_offset; - } else { - gf_uuid_generate (gfid); - flock1.l_start = gfid_to_ino (gfid); - if (flock1.l_start < 0) - flock1.l_start = -flock1.l_start; - } - flock1.l_len = 1; - } - ret = syncop_inodelk (priv->children[THIN_ARBITER_BRICK_INDEX], - AFR_TA_DOM_NOTIFY, loc, cmd, &flock1, - NULL, NULL); - if (!ret) { - locked = _gf_true; - priv->ta_notify_dom_lock_offset = flock1.l_start; - } else if (ret == -EAGAIN) { - continue; - } else { - gf_msg (this->name, GF_LOG_ERROR, -ret, - AFR_MSG_THIN_ARB, "Failed to get " - "AFR_TA_DOM_NOTIFY lock on %s.", loc->name); - goto out; - } - } +afr_ta_post_op_lock(xlator_t *this, loc_t *loc) +{ + /*Note: At any given time, only one instance of this function must + * be in progress.*/ + + int ret = 0; + uuid_t gfid = { + 0, + }; + afr_private_t *priv = this->private; + gf_boolean_t locked = _gf_false; + struct gf_flock flock1 = { + 0, + }; + struct gf_flock flock2 = { + 0, + }; + int32_t cmd = 0; + + GF_ASSERT(afr_ta_is_fop_called_from_synctask(this)); + flock1.l_type = F_WRLCK; + + while (!locked) { + if (priv->shd.iamshd) { + cmd = F_SETLKW; + flock1.l_start = 0; + flock1.l_len = 0; - flock2.l_type = F_WRLCK; - flock2.l_start = 0; - flock2.l_len = 0; - ret = syncop_inodelk (priv->children[THIN_ARBITER_BRICK_INDEX], - AFR_TA_DOM_MODIFY, loc, F_SETLKW, &flock2, - NULL, NULL); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB, - "Failed to get AFR_TA_DOM_MODIFY lock."); - flock1.l_type = F_UNLCK; - ret = syncop_inodelk (priv->children[THIN_ARBITER_BRICK_INDEX], - AFR_TA_DOM_NOTIFY, loc, F_SETLK, &flock1, - NULL, NULL); + } else { + cmd = F_SETLK; + if (priv->ta_notify_dom_lock_offset) { + flock1.l_start = priv->ta_notify_dom_lock_offset; + } else { + gf_uuid_generate(gfid); + flock1.l_start = gfid_to_ino(gfid); + if (flock1.l_start < 0) + flock1.l_start = -flock1.l_start; + } + flock1.l_len = 1; } + ret = syncop_inodelk(priv->children[THIN_ARBITER_BRICK_INDEX], + AFR_TA_DOM_NOTIFY, loc, cmd, &flock1, NULL, NULL); + if (!ret) { + locked = _gf_true; + priv->ta_notify_dom_lock_offset = flock1.l_start; + } else if (ret == -EAGAIN) { + continue; + } else { + gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB, + "Failed to get " + "AFR_TA_DOM_NOTIFY lock on %s.", + loc->name); + goto out; + } + } + + flock2.l_type = F_WRLCK; + flock2.l_start = 0; + flock2.l_len = 0; + ret = syncop_inodelk(priv->children[THIN_ARBITER_BRICK_INDEX], + AFR_TA_DOM_MODIFY, loc, F_SETLKW, &flock2, NULL, NULL); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB, + "Failed to get AFR_TA_DOM_MODIFY lock."); + flock1.l_type = F_UNLCK; + ret = syncop_inodelk(priv->children[THIN_ARBITER_BRICK_INDEX], + AFR_TA_DOM_NOTIFY, loc, F_SETLK, &flock1, NULL, + NULL); + } out: - return ret; + return ret; } int -afr_ta_post_op_unlock (xlator_t *this, loc_t *loc) -{ - afr_private_t *priv = this->private; - struct gf_flock flock = {0, }; - int ret = 0; - - GF_ASSERT (afr_ta_is_fop_called_from_synctask (this)); - flock.l_type = F_UNLCK; - flock.l_start = 0; - flock.l_len = 0; - - ret = syncop_inodelk (priv->children[THIN_ARBITER_BRICK_INDEX], - AFR_TA_DOM_MODIFY, loc, F_SETLK, &flock, NULL, - NULL); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB, - "Failed to unlock AFR_TA_DOM_MODIFY lock."); - goto out; - } - - if (!priv->shd.iamshd) - /* Mounts (clients) will not release the AFR_TA_DOM_NOTIFY lock - * in post-op as they use it as a notification mechanism. When - * shd sends a lock request on TA during heal, the clients will - * receive a lock-contention upcall notification upon which they - * will release the AFR_TA_DOM_NOTIFY lock after completing the - * in flight I/O.*/ - goto out; - - ret = syncop_inodelk (priv->children[THIN_ARBITER_BRICK_INDEX], - AFR_TA_DOM_NOTIFY, loc, F_SETLK, &flock, - NULL, NULL); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB, - "Failed to unlock AFR_TA_DOM_NOTIFY lock."); - } +afr_ta_post_op_unlock(xlator_t *this, loc_t *loc) +{ + afr_private_t *priv = this->private; + struct gf_flock flock = { + 0, + }; + int ret = 0; + + GF_ASSERT(afr_ta_is_fop_called_from_synctask(this)); + flock.l_type = F_UNLCK; + flock.l_start = 0; + flock.l_len = 0; + + ret = syncop_inodelk(priv->children[THIN_ARBITER_BRICK_INDEX], + AFR_TA_DOM_MODIFY, loc, F_SETLK, &flock, NULL, NULL); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB, + "Failed to unlock AFR_TA_DOM_MODIFY lock."); + goto out; + } + + if (!priv->shd.iamshd) + /* Mounts (clients) will not release the AFR_TA_DOM_NOTIFY lock + * in post-op as they use it as a notification mechanism. When + * shd sends a lock request on TA during heal, the clients will + * receive a lock-contention upcall notification upon which they + * will release the AFR_TA_DOM_NOTIFY lock after completing the + * in flight I/O.*/ + goto out; + + ret = syncop_inodelk(priv->children[THIN_ARBITER_BRICK_INDEX], + AFR_TA_DOM_NOTIFY, loc, F_SETLK, &flock, NULL, NULL); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB, + "Failed to unlock AFR_TA_DOM_NOTIFY lock."); + } out: - return ret; + return ret; } call_frame_t * -afr_ta_frame_create (xlator_t *this) +afr_ta_frame_create(xlator_t *this) { - call_frame_t *frame = NULL; - void *lk_owner = NULL; + call_frame_t *frame = NULL; + void *lk_owner = NULL; - frame = create_frame (this, this->ctx->pool); - if (!frame) - return NULL; - lk_owner = (void *)this; - afr_set_lk_owner (frame, this, lk_owner); - return frame; + frame = create_frame(this, this->ctx->pool); + if (!frame) + return NULL; + lk_owner = (void *)this; + afr_set_lk_owner(frame, this, lk_owner); + return frame; } diff --git a/xlators/cluster/afr/src/afr-dir-read.c b/xlators/cluster/afr/src/afr-dir-read.c index 76d0cdfa826..4c40e85f393 100644 --- a/xlators/cluster/afr/src/afr-dir-read.c +++ b/xlators/cluster/afr/src/afr-dir-read.c @@ -8,7 +8,6 @@ cases as published by the Free Software Foundation. */ - #include #include #include @@ -33,327 +32,309 @@ #include "afr.h" #include "afr-transaction.h" - int32_t -afr_opendir_cbk (call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, int32_t op_errno, - fd_t *fd, dict_t *xdata) +afr_opendir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata) { - afr_local_t *local = NULL; - int call_count = -1; - int32_t child_index = 0; - afr_fd_ctx_t *fd_ctx = NULL; - - local = frame->local; - fd_ctx = local->fd_ctx; - child_index = (long) cookie; - - LOCK (&frame->lock); - { - if (op_ret == -1) { - local->op_errno = op_errno; - fd_ctx->opened_on[child_index] = AFR_FD_NOT_OPENED; - } else { - local->op_ret = op_ret; - fd_ctx->opened_on[child_index] = AFR_FD_OPENED; - if (!local->xdata_rsp && xdata) - local->xdata_rsp = dict_ref (xdata); - } + afr_local_t *local = NULL; + int call_count = -1; + int32_t child_index = 0; + afr_fd_ctx_t *fd_ctx = NULL; + + local = frame->local; + fd_ctx = local->fd_ctx; + child_index = (long)cookie; + + LOCK(&frame->lock); + { + if (op_ret == -1) { + local->op_errno = op_errno; + fd_ctx->opened_on[child_index] = AFR_FD_NOT_OPENED; + } else { + local->op_ret = op_ret; + fd_ctx->opened_on[child_index] = AFR_FD_OPENED; + if (!local->xdata_rsp && xdata) + local->xdata_rsp = dict_ref(xdata); } - UNLOCK (&frame->lock); + } + UNLOCK(&frame->lock); - call_count = afr_frame_return (frame); + call_count = afr_frame_return(frame); - if (call_count == 0) - AFR_STACK_UNWIND (opendir, frame, local->op_ret, - local->op_errno, local->fd, NULL); - return 0; + if (call_count == 0) + AFR_STACK_UNWIND(opendir, frame, local->op_ret, local->op_errno, + local->fd, NULL); + return 0; } - int -afr_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd) +afr_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd) { - afr_private_t * priv = NULL; - afr_local_t * local = NULL; - int i = 0; - int call_count = -1; - int32_t op_errno = ENOMEM; - afr_fd_ctx_t *fd_ctx = NULL; + afr_private_t *priv = NULL; + afr_local_t *local = NULL; + int i = 0; + int call_count = -1; + int32_t op_errno = ENOMEM; + afr_fd_ctx_t *fd_ctx = NULL; - priv = this->private; + priv = this->private; - local = AFR_FRAME_INIT (frame, op_errno); - if (!local) - goto out; + local = AFR_FRAME_INIT(frame, op_errno); + if (!local) + goto out; - local->op = GF_FOP_OPENDIR; - if (!afr_is_consistent_io_possible (local, priv, &op_errno)) - goto out; + local->op = GF_FOP_OPENDIR; + if (!afr_is_consistent_io_possible(local, priv, &op_errno)) + goto out; - fd_ctx = afr_fd_ctx_get (fd, this); - if (!fd_ctx) - goto out; + fd_ctx = afr_fd_ctx_get(fd, this); + if (!fd_ctx) + goto out; - loc_copy (&local->loc, loc); + loc_copy(&local->loc, loc); - local->fd = fd_ref (fd); - local->fd_ctx = fd_ctx; + local->fd = fd_ref(fd); + local->fd_ctx = fd_ctx; - call_count = local->call_count; + call_count = local->call_count; - for (i = 0; i < priv->child_count; i++) { - if (local->child_up[i]) { - STACK_WIND_COOKIE (frame, afr_opendir_cbk, - (void*) (long) i, - priv->children[i], - priv->children[i]->fops->opendir, - loc, fd, NULL); + for (i = 0; i < priv->child_count; i++) { + if (local->child_up[i]) { + STACK_WIND_COOKIE(frame, afr_opendir_cbk, (void *)(long)i, + priv->children[i], + priv->children[i]->fops->opendir, loc, fd, NULL); - if (!--call_count) - break; - } + if (!--call_count) + break; } + } - return 0; + return 0; out: - AFR_STACK_UNWIND (opendir, frame, -1, op_errno, fd, NULL); - return 0; + AFR_STACK_UNWIND(opendir, frame, -1, op_errno, fd, NULL); + return 0; } static int -afr_validate_read_subvol (inode_t *inode, xlator_t *this, int par_read_subvol) +afr_validate_read_subvol(inode_t *inode, xlator_t *this, int par_read_subvol) { - int gen = 0; - int entry_read_subvol = 0; - unsigned char *data_readable = NULL; - unsigned char *metadata_readable = NULL; - afr_private_t *priv = NULL; - - priv = this->private; - data_readable = alloca0 (priv->child_count); - metadata_readable = alloca0 (priv->child_count); - - afr_inode_read_subvol_get (inode, this, data_readable, - metadata_readable, &gen); - - if (gen != priv->event_generation || - !data_readable[par_read_subvol] || - !metadata_readable[par_read_subvol]) - return -1; - - /* Once the control reaches the following statement, it means that the - * parent's read subvol is perfectly readable. So calling - * either afr_data_subvol_get() or afr_metadata_subvol_get() would - * yield the same result. Hence, choosing afr_data_subvol_get() below. - */ - - if (!priv->consistent_metadata) - return 0; - - /* For an inode fetched through readdirp which is yet to be linked, - * inode ctx would not be initialised (yet). So this function returns - * -1 above due to gen being 0, which is why it is OK to pass NULL for - * read_subvol_args here. - */ - entry_read_subvol = afr_data_subvol_get (inode, this, NULL, NULL, - NULL, NULL); - if (entry_read_subvol != par_read_subvol) - return -1; - + int gen = 0; + int entry_read_subvol = 0; + unsigned char *data_readable = NULL; + unsigned char *metadata_readable = NULL; + afr_private_t *priv = NULL; + + priv = this->private; + data_readable = alloca0(priv->child_count); + metadata_readable = alloca0(priv->child_count); + + afr_inode_read_subvol_get(inode, this, data_readable, metadata_readable, + &gen); + + if (gen != priv->event_generation || !data_readable[par_read_subvol] || + !metadata_readable[par_read_subvol]) + return -1; + + /* Once the control reaches the following statement, it means that the + * parent's read subvol is perfectly readable. So calling + * either afr_data_subvol_get() or afr_metadata_subvol_get() would + * yield the same result. Hence, choosing afr_data_subvol_get() below. + */ + + if (!priv->consistent_metadata) return 0; + /* For an inode fetched through readdirp which is yet to be linked, + * inode ctx would not be initialised (yet). So this function returns + * -1 above due to gen being 0, which is why it is OK to pass NULL for + * read_subvol_args here. + */ + entry_read_subvol = afr_data_subvol_get(inode, this, NULL, NULL, NULL, + NULL); + if (entry_read_subvol != par_read_subvol) + return -1; + + return 0; } static void -afr_readdir_transform_entries (gf_dirent_t *subvol_entries, int subvol, - gf_dirent_t *entries, fd_t *fd) +afr_readdir_transform_entries(gf_dirent_t *subvol_entries, int subvol, + gf_dirent_t *entries, fd_t *fd) { - int ret = -1; - gf_dirent_t *entry = NULL; - gf_dirent_t *tmp = NULL; - xlator_t *this = NULL; - afr_private_t *priv = NULL; - gf_boolean_t need_heal = _gf_false; - gf_boolean_t validate_subvol = _gf_false; - - this = THIS; - priv = this->private; - - need_heal = afr_get_need_heal (this); - validate_subvol = need_heal | priv->consistent_metadata; - - list_for_each_entry_safe (entry, tmp, &subvol_entries->list, list) { - if (__is_root_gfid (fd->inode->gfid) && - !strcmp (entry->d_name, GF_REPLICATE_TRASH_DIR)) { - continue; - } - - list_del_init (&entry->list); - list_add_tail (&entry->list, &entries->list); - - if (!validate_subvol) - continue; - - if (entry->inode) { - ret = afr_validate_read_subvol (entry->inode, this, - subvol); - if (ret == -1) { - inode_unref (entry->inode); - entry->inode = NULL; - continue; - } - } + int ret = -1; + gf_dirent_t *entry = NULL; + gf_dirent_t *tmp = NULL; + xlator_t *this = NULL; + afr_private_t *priv = NULL; + gf_boolean_t need_heal = _gf_false; + gf_boolean_t validate_subvol = _gf_false; + + this = THIS; + priv = this->private; + + need_heal = afr_get_need_heal(this); + validate_subvol = need_heal | priv->consistent_metadata; + + list_for_each_entry_safe(entry, tmp, &subvol_entries->list, list) + { + if (__is_root_gfid(fd->inode->gfid) && + !strcmp(entry->d_name, GF_REPLICATE_TRASH_DIR)) { + continue; } -} + list_del_init(&entry->list); + list_add_tail(&entry->list, &entries->list); + + if (!validate_subvol) + continue; + + if (entry->inode) { + ret = afr_validate_read_subvol(entry->inode, this, subvol); + if (ret == -1) { + inode_unref(entry->inode); + entry->inode = NULL; + continue; + } + } + } +} int32_t -afr_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, gf_dirent_t *subvol_entries, - dict_t *xdata) +afr_readdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, gf_dirent_t *subvol_entries, + dict_t *xdata) { - afr_local_t *local = NULL; - gf_dirent_t entries; + afr_local_t *local = NULL; + gf_dirent_t entries; - INIT_LIST_HEAD (&entries.list); + INIT_LIST_HEAD(&entries.list); - local = frame->local; + local = frame->local; - if (op_ret < 0 && !local->cont.readdir.offset) { - /* failover only if this was first readdir, detected - by offset == 0 */ - local->op_ret = op_ret; - local->op_errno = op_errno; + if (op_ret < 0 && !local->cont.readdir.offset) { + /* failover only if this was first readdir, detected + by offset == 0 */ + local->op_ret = op_ret; + local->op_errno = op_errno; - afr_read_txn_continue (frame, this, (long) cookie); - return 0; - } + afr_read_txn_continue(frame, this, (long)cookie); + return 0; + } - if (op_ret >= 0) - afr_readdir_transform_entries (subvol_entries, (long) cookie, - &entries, local->fd); + if (op_ret >= 0) + afr_readdir_transform_entries(subvol_entries, (long)cookie, &entries, + local->fd); - AFR_STACK_UNWIND (readdir, frame, op_ret, op_errno, &entries, xdata); + AFR_STACK_UNWIND(readdir, frame, op_ret, op_errno, &entries, xdata); - gf_dirent_free (&entries); + gf_dirent_free(&entries); - return 0; + return 0; } - int -afr_readdir_wind (call_frame_t *frame, xlator_t *this, int subvol) +afr_readdir_wind(call_frame_t *frame, xlator_t *this, int subvol) { - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - afr_fd_ctx_t *fd_ctx = NULL; - - priv = this->private; - local = frame->local; - fd_ctx = afr_fd_ctx_get (local->fd, this); - if (!fd_ctx) { - local->op_errno = EINVAL; - local->op_ret = -1; - } - - if (subvol == -1 || !fd_ctx) { - AFR_STACK_UNWIND (readdir, frame, local->op_ret, - local->op_errno, 0, 0); - return 0; - } - - fd_ctx->readdir_subvol = subvol; - - if (local->op == GF_FOP_READDIR) - STACK_WIND_COOKIE (frame, afr_readdir_cbk, - (void *) (long) subvol, - priv->children[subvol], - priv->children[subvol]->fops->readdir, - local->fd, local->cont.readdir.size, - local->cont.readdir.offset, - local->xdata_req); - else - STACK_WIND_COOKIE (frame, afr_readdir_cbk, - (void *) (long) subvol, - priv->children[subvol], - priv->children[subvol]->fops->readdirp, - local->fd, local->cont.readdir.size, - local->cont.readdir.offset, - local->xdata_req); - return 0; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + afr_fd_ctx_t *fd_ctx = NULL; + + priv = this->private; + local = frame->local; + fd_ctx = afr_fd_ctx_get(local->fd, this); + if (!fd_ctx) { + local->op_errno = EINVAL; + local->op_ret = -1; + } + + if (subvol == -1 || !fd_ctx) { + AFR_STACK_UNWIND(readdir, frame, local->op_ret, local->op_errno, 0, 0); + return 0; + } + + fd_ctx->readdir_subvol = subvol; + + if (local->op == GF_FOP_READDIR) + STACK_WIND_COOKIE(frame, afr_readdir_cbk, (void *)(long)subvol, + priv->children[subvol], + priv->children[subvol]->fops->readdir, local->fd, + local->cont.readdir.size, local->cont.readdir.offset, + local->xdata_req); + else + STACK_WIND_COOKIE(frame, afr_readdir_cbk, (void *)(long)subvol, + priv->children[subvol], + priv->children[subvol]->fops->readdirp, local->fd, + local->cont.readdir.size, local->cont.readdir.offset, + local->xdata_req); + return 0; } - int -afr_do_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, - off_t offset, int whichop, dict_t *dict) +afr_do_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t offset, int whichop, dict_t *dict) { - afr_local_t *local = NULL; - int32_t op_errno = 0; - int subvol = -1; - afr_fd_ctx_t *fd_ctx = NULL; - - local = AFR_FRAME_INIT (frame, op_errno); - if (!local) - goto out; - - fd_ctx = afr_fd_ctx_get (fd, this); - if (!fd_ctx) { - op_errno = EINVAL; - goto out; - } - - local->op = whichop; - local->fd = fd_ref (fd); - local->cont.readdir.size = size; - local->cont.readdir.offset = offset; - local->xdata_req = (dict)? dict_ref (dict) : NULL; - - subvol = fd_ctx->readdir_subvol; - - if (offset == 0 || subvol == -1) { - /* First readdir has option of failing over and selecting - an appropriate read subvolume */ - afr_read_txn (frame, this, fd->inode, afr_readdir_wind, - AFR_DATA_TRANSACTION); - } else { - /* But continued readdirs MUST stick to the same subvolume - without an option to failover */ - afr_readdir_wind (frame, this, subvol); - } - - return 0; + afr_local_t *local = NULL; + int32_t op_errno = 0; + int subvol = -1; + afr_fd_ctx_t *fd_ctx = NULL; + + local = AFR_FRAME_INIT(frame, op_errno); + if (!local) + goto out; + + fd_ctx = afr_fd_ctx_get(fd, this); + if (!fd_ctx) { + op_errno = EINVAL; + goto out; + } + + local->op = whichop; + local->fd = fd_ref(fd); + local->cont.readdir.size = size; + local->cont.readdir.offset = offset; + local->xdata_req = (dict) ? dict_ref(dict) : NULL; + + subvol = fd_ctx->readdir_subvol; + + if (offset == 0 || subvol == -1) { + /* First readdir has option of failing over and selecting + an appropriate read subvolume */ + afr_read_txn(frame, this, fd->inode, afr_readdir_wind, + AFR_DATA_TRANSACTION); + } else { + /* But continued readdirs MUST stick to the same subvolume + without an option to failover */ + afr_readdir_wind(frame, this, subvol); + } + + return 0; out: - AFR_STACK_UNWIND (readdir, frame, -1, op_errno, NULL, NULL); - return 0; + AFR_STACK_UNWIND(readdir, frame, -1, op_errno, NULL, NULL); + return 0; } - int32_t -afr_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, - off_t offset, dict_t *xdata) +afr_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t offset, dict_t *xdata) { - afr_do_readdir (frame, this, fd, size, offset, GF_FOP_READDIR, xdata); + afr_do_readdir(frame, this, fd, size, offset, GF_FOP_READDIR, xdata); - return 0; + return 0; } - int32_t -afr_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, - off_t offset, dict_t *dict) +afr_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t offset, dict_t *dict) { - afr_do_readdir (frame, this, fd, size, offset, GF_FOP_READDIRP, dict); + afr_do_readdir(frame, this, fd, size, offset, GF_FOP_READDIRP, dict); - return 0; + return 0; } - int32_t -afr_releasedir (xlator_t *this, fd_t *fd) +afr_releasedir(xlator_t *this, fd_t *fd) { - afr_cleanup_fd_ctx (this, fd); + afr_cleanup_fd_ctx(this, fd); - return 0; + return 0; } diff --git a/xlators/cluster/afr/src/afr-dir-write.c b/xlators/cluster/afr/src/afr-dir-write.c index 0e3ae18b46a..5725b1c5cb3 100644 --- a/xlators/cluster/afr/src/afr-dir-write.c +++ b/xlators/cluster/afr/src/afr-dir-write.c @@ -8,7 +8,6 @@ cases as published by the Free Software Foundation. */ - #include #include #include @@ -34,501 +33,470 @@ #include "afr-transaction.h" void -afr_mark_entry_pending_changelog (call_frame_t *frame, xlator_t *this); +afr_mark_entry_pending_changelog(call_frame_t *frame, xlator_t *this); int -afr_build_parent_loc (loc_t *parent, loc_t *child, int32_t *op_errno) +afr_build_parent_loc(loc_t *parent, loc_t *child, int32_t *op_errno) { - int ret = -1; - char *child_path = NULL; - - if (!child->parent) { - if (op_errno) - *op_errno = EINVAL; - goto out; - } - - child_path = gf_strdup (child->path); - if (!child_path) { - if (op_errno) - *op_errno = ENOMEM; - goto out; - } - - parent->path = gf_strdup (dirname (child_path)); - if (!parent->path) { - if (op_errno) - *op_errno = ENOMEM; - goto out; - } - - parent->inode = inode_ref (child->parent); - gf_uuid_copy (parent->gfid, child->pargfid); - - ret = 0; + int ret = -1; + char *child_path = NULL; + + if (!child->parent) { + if (op_errno) + *op_errno = EINVAL; + goto out; + } + + child_path = gf_strdup(child->path); + if (!child_path) { + if (op_errno) + *op_errno = ENOMEM; + goto out; + } + + parent->path = gf_strdup(dirname(child_path)); + if (!parent->path) { + if (op_errno) + *op_errno = ENOMEM; + goto out; + } + + parent->inode = inode_ref(child->parent); + gf_uuid_copy(parent->gfid, child->pargfid); + + ret = 0; out: - GF_FREE (child_path); + GF_FREE(child_path); - return ret; + return ret; } - static void -__afr_dir_write_finalize (call_frame_t *frame, xlator_t *this) +__afr_dir_write_finalize(call_frame_t *frame, xlator_t *this) { - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - int inode_read_subvol = -1; - int parent_read_subvol = -1; - int parent2_read_subvol = -1; - int i = 0; - afr_read_subvol_args_t args = {0,}; - - local = frame->local; - priv = this->private; - - for (i = 0; i < priv->child_count; i++) { - if (!local->replies[i].valid) - continue; - if (local->replies[i].op_ret == -1) - continue; - gf_uuid_copy (args.gfid, local->replies[i].poststat.ia_gfid); - args.ia_type = local->replies[i].poststat.ia_type; - break; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + int inode_read_subvol = -1; + int parent_read_subvol = -1; + int parent2_read_subvol = -1; + int i = 0; + afr_read_subvol_args_t args = { + 0, + }; + + local = frame->local; + priv = this->private; + + for (i = 0; i < priv->child_count; i++) { + if (!local->replies[i].valid) + continue; + if (local->replies[i].op_ret == -1) + continue; + gf_uuid_copy(args.gfid, local->replies[i].poststat.ia_gfid); + args.ia_type = local->replies[i].poststat.ia_type; + break; + } + + if (local->inode) { + afr_replies_interpret(frame, this, local->inode, NULL); + inode_read_subvol = afr_data_subvol_get(local->inode, this, NULL, NULL, + NULL, &args); + } + + if (local->parent) + parent_read_subvol = afr_data_subvol_get(local->parent, this, NULL, + local->readable, NULL, NULL); + + if (local->parent2) + parent2_read_subvol = afr_data_subvol_get(local->parent2, this, NULL, + local->readable2, NULL, NULL); + + local->op_ret = -1; + local->op_errno = afr_final_errno(local, priv); + afr_pick_error_xdata(local, priv, local->parent, local->readable, + local->parent2, local->readable2); + + for (i = 0; i < priv->child_count; i++) { + if (!local->replies[i].valid) + continue; + if (local->replies[i].op_ret < 0) { + if (local->inode) + afr_inode_event_gen_reset(local->inode, this); + if (local->parent) + afr_inode_event_gen_reset(local->parent, this); + if (local->parent2) + afr_inode_event_gen_reset(local->parent2, this); + continue; } - if (local->inode) { - afr_replies_interpret (frame, this, local->inode, NULL); - inode_read_subvol = afr_data_subvol_get (local->inode, this, - NULL, NULL, NULL, &args); - } - - if (local->parent) - parent_read_subvol = afr_data_subvol_get (local->parent, this, - NULL, local->readable, NULL, NULL); - - if (local->parent2) - parent2_read_subvol = afr_data_subvol_get (local->parent2, this, - NULL, local->readable2, NULL, NULL); - - local->op_ret = -1; - local->op_errno = afr_final_errno (local, priv); - afr_pick_error_xdata (local, priv, local->parent, local->readable, - local->parent2, local->readable2); - - for (i = 0; i < priv->child_count; i++) { - if (!local->replies[i].valid) - continue; - if (local->replies[i].op_ret < 0) { - if (local->inode) - afr_inode_event_gen_reset (local->inode, this); - if (local->parent) - afr_inode_event_gen_reset (local->parent, - this); - if (local->parent2) - afr_inode_event_gen_reset (local->parent2, - this); - continue; - } - - if (local->op_ret == -1) { - local->op_ret = local->replies[i].op_ret; - local->op_errno = local->replies[i].op_errno; - - local->cont.dir_fop.buf = - local->replies[i].poststat; - local->cont.dir_fop.preparent = - local->replies[i].preparent; - local->cont.dir_fop.postparent = - local->replies[i].postparent; - local->cont.dir_fop.prenewparent = - local->replies[i].preparent2; - local->cont.dir_fop.postnewparent = - local->replies[i].postparent2; - if (local->xdata_rsp) { - dict_unref (local->xdata_rsp); - local->xdata_rsp = NULL; - } - - if (local->replies[i].xdata) - local->xdata_rsp = - dict_ref (local->replies[i].xdata); - continue; - } - - if (i == inode_read_subvol) { - local->cont.dir_fop.buf = - local->replies[i].poststat; - if (local->replies[i].xdata) { - if (local->xdata_rsp) - dict_unref (local->xdata_rsp); - local->xdata_rsp = - dict_ref (local->replies[i].xdata); - } - } - - if (i == parent_read_subvol) { - local->cont.dir_fop.preparent = - local->replies[i].preparent; - local->cont.dir_fop.postparent = - local->replies[i].postparent; - } - - if (i == parent2_read_subvol) { - local->cont.dir_fop.prenewparent = - local->replies[i].preparent2; - local->cont.dir_fop.postnewparent = - local->replies[i].postparent2; - } - } + if (local->op_ret == -1) { + local->op_ret = local->replies[i].op_ret; + local->op_errno = local->replies[i].op_errno; + + local->cont.dir_fop.buf = local->replies[i].poststat; + local->cont.dir_fop.preparent = local->replies[i].preparent; + local->cont.dir_fop.postparent = local->replies[i].postparent; + local->cont.dir_fop.prenewparent = local->replies[i].preparent2; + local->cont.dir_fop.postnewparent = local->replies[i].postparent2; + if (local->xdata_rsp) { + dict_unref(local->xdata_rsp); + local->xdata_rsp = NULL; + } + + if (local->replies[i].xdata) + local->xdata_rsp = dict_ref(local->replies[i].xdata); + continue; + } -} + if (i == inode_read_subvol) { + local->cont.dir_fop.buf = local->replies[i].poststat; + if (local->replies[i].xdata) { + if (local->xdata_rsp) + dict_unref(local->xdata_rsp); + local->xdata_rsp = dict_ref(local->replies[i].xdata); + } + } + if (i == parent_read_subvol) { + local->cont.dir_fop.preparent = local->replies[i].preparent; + local->cont.dir_fop.postparent = local->replies[i].postparent; + } + + if (i == parent2_read_subvol) { + local->cont.dir_fop.prenewparent = local->replies[i].preparent2; + local->cont.dir_fop.postnewparent = local->replies[i].postparent2; + } + } +} static void -__afr_dir_write_fill (call_frame_t *frame, xlator_t *this, int child_index, - int op_ret, int op_errno, struct iatt *poststat, - struct iatt *preparent, struct iatt *postparent, - struct iatt *preparent2, struct iatt *postparent2, - dict_t *xdata) +__afr_dir_write_fill(call_frame_t *frame, xlator_t *this, int child_index, + int op_ret, int op_errno, struct iatt *poststat, + struct iatt *preparent, struct iatt *postparent, + struct iatt *preparent2, struct iatt *postparent2, + dict_t *xdata) { - afr_local_t *local = NULL; - afr_fd_ctx_t *fd_ctx = NULL; - - local = frame->local; - fd_ctx = local->fd_ctx; - - local->replies[child_index].valid = 1; - local->replies[child_index].op_ret = op_ret; - local->replies[child_index].op_errno = op_errno; - if (xdata) - local->replies[child_index].xdata = dict_ref (xdata); - - - if (op_ret >= 0) { - if (poststat) - local->replies[child_index].poststat = *poststat; - if (preparent) - local->replies[child_index].preparent = *preparent; - if (postparent) - local->replies[child_index].postparent = *postparent; - if (preparent2) - local->replies[child_index].preparent2 = *preparent2; - if (postparent2) - local->replies[child_index].postparent2 = *postparent2; - if (fd_ctx) - fd_ctx->opened_on[child_index] = AFR_FD_OPENED; - } else { - if (op_errno != ENOTEMPTY) - afr_transaction_fop_failed (frame, this, child_index); - if (fd_ctx) - fd_ctx->opened_on[child_index] = AFR_FD_NOT_OPENED; - } - - return; + afr_local_t *local = NULL; + afr_fd_ctx_t *fd_ctx = NULL; + + local = frame->local; + fd_ctx = local->fd_ctx; + + local->replies[child_index].valid = 1; + local->replies[child_index].op_ret = op_ret; + local->replies[child_index].op_errno = op_errno; + if (xdata) + local->replies[child_index].xdata = dict_ref(xdata); + + if (op_ret >= 0) { + if (poststat) + local->replies[child_index].poststat = *poststat; + if (preparent) + local->replies[child_index].preparent = *preparent; + if (postparent) + local->replies[child_index].postparent = *postparent; + if (preparent2) + local->replies[child_index].preparent2 = *preparent2; + if (postparent2) + local->replies[child_index].postparent2 = *postparent2; + if (fd_ctx) + fd_ctx->opened_on[child_index] = AFR_FD_OPENED; + } else { + if (op_errno != ENOTEMPTY) + afr_transaction_fop_failed(frame, this, child_index); + if (fd_ctx) + fd_ctx->opened_on[child_index] = AFR_FD_NOT_OPENED; + } + + return; } - static int -__afr_dir_write_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, struct iatt *buf, - struct iatt *preparent, struct iatt *postparent, - struct iatt *preparent2, struct iatt *postparent2, - dict_t *xdata) +__afr_dir_write_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, struct iatt *buf, + struct iatt *preparent, struct iatt *postparent, + struct iatt *preparent2, struct iatt *postparent2, + dict_t *xdata) { - afr_local_t *local = NULL; - int child_index = (long) cookie; - int call_count = -1; - afr_private_t *priv = NULL; - - priv = this->private; - local = frame->local; - - LOCK (&frame->lock); - { - __afr_dir_write_fill (frame, this, child_index, op_ret, - op_errno, buf, preparent, postparent, - preparent2, postparent2, xdata); - } - UNLOCK (&frame->lock); - call_count = afr_frame_return (frame); - - if (call_count == 0) { - __afr_dir_write_finalize (frame, this); - - if (afr_txn_nothing_failed (frame, this)) { - /*if it did pre-op, it will do post-op changing ctime*/ - if (priv->consistent_metadata && - afr_needs_changelog_update (local)) - afr_zero_fill_stat (local); - local->transaction.unwind (frame, this); - } - - afr_mark_entry_pending_changelog (frame, this); - - afr_transaction_resume (frame, this); + afr_local_t *local = NULL; + int child_index = (long)cookie; + int call_count = -1; + afr_private_t *priv = NULL; + + priv = this->private; + local = frame->local; + + LOCK(&frame->lock); + { + __afr_dir_write_fill(frame, this, child_index, op_ret, op_errno, buf, + preparent, postparent, preparent2, postparent2, + xdata); + } + UNLOCK(&frame->lock); + call_count = afr_frame_return(frame); + + if (call_count == 0) { + __afr_dir_write_finalize(frame, this); + + if (afr_txn_nothing_failed(frame, this)) { + /*if it did pre-op, it will do post-op changing ctime*/ + if (priv->consistent_metadata && afr_needs_changelog_update(local)) + afr_zero_fill_stat(local); + local->transaction.unwind(frame, this); } - return 0; -} + afr_mark_entry_pending_changelog(frame, this); + afr_transaction_resume(frame, this); + } + + return 0; +} int -afr_mark_new_entry_changelog_cbk (call_frame_t *frame, void *cookie, - xlator_t *this, int op_ret, int op_errno, - dict_t *xattr, dict_t *xdata) +afr_mark_new_entry_changelog_cbk(call_frame_t *frame, void *cookie, + xlator_t *this, int op_ret, int op_errno, + dict_t *xattr, dict_t *xdata) { - int call_count = 0; + int call_count = 0; - call_count = afr_frame_return (frame); + call_count = afr_frame_return(frame); - if (call_count == 0) - AFR_STACK_DESTROY (frame); + if (call_count == 0) + AFR_STACK_DESTROY(frame); - return 0; + return 0; } - void -afr_mark_new_entry_changelog (call_frame_t *frame, xlator_t *this) +afr_mark_new_entry_changelog(call_frame_t *frame, xlator_t *this) { - call_frame_t *new_frame = NULL; - afr_local_t *local = NULL; - afr_local_t *new_local = NULL; - afr_private_t *priv = NULL; - dict_t *xattr = NULL; - int32_t **changelog = NULL; - int i = 0; - int op_errno = ENOMEM; - unsigned char *pending = NULL; - int call_count = 0; - - local = frame->local; - priv = this->private; - - new_frame = copy_frame (frame); - if (!new_frame) - goto out; - - new_local = AFR_FRAME_INIT (new_frame, op_errno); - if (!new_local) - goto out; - - xattr = dict_new (); - if (!xattr) - goto out; - - pending = alloca0 (priv->child_count); - - for (i = 0; i < priv->child_count; i++) { - if (local->transaction.pre_op[i] && - !local->transaction.failed_subvols[i]) { - call_count ++; - continue; - } - pending[i] = 1; - } - - changelog = afr_mark_pending_changelog (priv, pending, xattr, - local->cont.dir_fop.buf.ia_type); - if (!changelog) - goto out; - - new_local->pending = changelog; - gf_uuid_copy (new_local->loc.gfid, local->cont.dir_fop.buf.ia_gfid); - new_local->loc.inode = inode_ref (local->inode); - - new_local->call_count = call_count; - - for (i = 0; i < priv->child_count; i++) { - if (pending[i]) - continue; - - STACK_WIND_COOKIE (new_frame, afr_mark_new_entry_changelog_cbk, - (void *) (long) i, priv->children[i], - priv->children[i]->fops->xattrop, - &new_local->loc, GF_XATTROP_ADD_ARRAY, - xattr, NULL); - if (!--call_count) - break; + call_frame_t *new_frame = NULL; + afr_local_t *local = NULL; + afr_local_t *new_local = NULL; + afr_private_t *priv = NULL; + dict_t *xattr = NULL; + int32_t **changelog = NULL; + int i = 0; + int op_errno = ENOMEM; + unsigned char *pending = NULL; + int call_count = 0; + + local = frame->local; + priv = this->private; + + new_frame = copy_frame(frame); + if (!new_frame) + goto out; + + new_local = AFR_FRAME_INIT(new_frame, op_errno); + if (!new_local) + goto out; + + xattr = dict_new(); + if (!xattr) + goto out; + + pending = alloca0(priv->child_count); + + for (i = 0; i < priv->child_count; i++) { + if (local->transaction.pre_op[i] && + !local->transaction.failed_subvols[i]) { + call_count++; + continue; } + pending[i] = 1; + } + + changelog = afr_mark_pending_changelog(priv, pending, xattr, + local->cont.dir_fop.buf.ia_type); + if (!changelog) + goto out; + + new_local->pending = changelog; + gf_uuid_copy(new_local->loc.gfid, local->cont.dir_fop.buf.ia_gfid); + new_local->loc.inode = inode_ref(local->inode); + + new_local->call_count = call_count; + + for (i = 0; i < priv->child_count; i++) { + if (pending[i]) + continue; - new_frame = NULL; + STACK_WIND_COOKIE(new_frame, afr_mark_new_entry_changelog_cbk, + (void *)(long)i, priv->children[i], + priv->children[i]->fops->xattrop, &new_local->loc, + GF_XATTROP_ADD_ARRAY, xattr, NULL); + if (!--call_count) + break; + } + + new_frame = NULL; out: - if (new_frame) - AFR_STACK_DESTROY (new_frame); - if (xattr) - dict_unref (xattr); - return; + if (new_frame) + AFR_STACK_DESTROY(new_frame); + if (xattr) + dict_unref(xattr); + return; } - void -afr_mark_entry_pending_changelog (call_frame_t *frame, xlator_t *this) +afr_mark_entry_pending_changelog(call_frame_t *frame, xlator_t *this) { - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - int pre_op_count = 0; - int failed_count = 0; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + int pre_op_count = 0; + int failed_count = 0; - local = frame->local; - priv = this->private; + local = frame->local; + priv = this->private; - if (local->op_ret < 0) - return; + if (local->op_ret < 0) + return; - if (local->op != GF_FOP_CREATE && local->op != GF_FOP_MKNOD && - local->op != GF_FOP_MKDIR) - return; + if (local->op != GF_FOP_CREATE && local->op != GF_FOP_MKNOD && + local->op != GF_FOP_MKDIR) + return; - pre_op_count = AFR_COUNT (local->transaction.pre_op, priv->child_count); - failed_count = AFR_COUNT (local->transaction.failed_subvols, - priv->child_count); + pre_op_count = AFR_COUNT(local->transaction.pre_op, priv->child_count); + failed_count = AFR_COUNT(local->transaction.failed_subvols, + priv->child_count); - if (pre_op_count == priv->child_count && !failed_count) - return; + if (pre_op_count == priv->child_count && !failed_count) + return; - afr_mark_new_entry_changelog (frame, this); + afr_mark_new_entry_changelog(frame, this); - return; + return; } - /* {{{ create */ int -afr_create_unwind (call_frame_t *frame, xlator_t *this) +afr_create_unwind(call_frame_t *frame, xlator_t *this) { - call_frame_t *main_frame = NULL; - afr_local_t *local = NULL; - - local = frame->local; + call_frame_t *main_frame = NULL; + afr_local_t *local = NULL; - main_frame = afr_transaction_detach_fop_frame (frame); + local = frame->local; - if (!main_frame) - return 0; + main_frame = afr_transaction_detach_fop_frame(frame); - AFR_STACK_UNWIND (create, main_frame, local->op_ret, local->op_errno, - local->cont.create.fd, local->inode, - &local->cont.dir_fop.buf, - &local->cont.dir_fop.preparent, - &local->cont.dir_fop.postparent, local->xdata_rsp); + if (!main_frame) return 0; -} + AFR_STACK_UNWIND(create, main_frame, local->op_ret, local->op_errno, + local->cont.create.fd, local->inode, + &local->cont.dir_fop.buf, &local->cont.dir_fop.preparent, + &local->cont.dir_fop.postparent, local->xdata_rsp); + return 0; +} int -afr_create_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, - fd_t *fd, inode_t *inode, struct iatt *buf, - struct iatt *preparent, struct iatt *postparent, - dict_t *xdata) +afr_create_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode, + struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) { - return __afr_dir_write_cbk (frame, cookie, this, op_ret, op_errno, buf, - preparent, postparent, NULL, NULL, xdata); + return __afr_dir_write_cbk(frame, cookie, this, op_ret, op_errno, buf, + preparent, postparent, NULL, NULL, xdata); } - int -afr_create_wind (call_frame_t *frame, xlator_t *this, int subvol) +afr_create_wind(call_frame_t *frame, xlator_t *this, int subvol) { - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - - local = frame->local; - priv = this->private; - - STACK_WIND_COOKIE (frame, afr_create_wind_cbk, (void *) (long) subvol, - priv->children[subvol], - priv->children[subvol]->fops->create, - &local->loc, local->cont.create.flags, - local->cont.create.mode, local->umask, - local->cont.create.fd, local->xdata_req); - return 0; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + + local = frame->local; + priv = this->private; + + STACK_WIND_COOKIE(frame, afr_create_wind_cbk, (void *)(long)subvol, + priv->children[subvol], + priv->children[subvol]->fops->create, &local->loc, + local->cont.create.flags, local->cont.create.mode, + local->umask, local->cont.create.fd, local->xdata_req); + return 0; } - int -afr_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, - mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata) +afr_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata) { - afr_private_t *priv = NULL; - afr_local_t *local = NULL; - afr_internal_lock_t *int_lock = NULL; - call_frame_t *transaction_frame = NULL; - int ret = -1; - int op_errno = ENOMEM; - - priv = this->private; - - transaction_frame = copy_frame (frame); - if (!transaction_frame) - goto out; - - local = AFR_FRAME_INIT (transaction_frame, op_errno); - if (!local) - goto out; - - loc_copy (&local->loc, loc); - - local->fd_ctx = afr_fd_ctx_get (fd, this); - if (!local->fd_ctx) - goto out; - - local->inode = inode_ref (loc->inode); - local->parent = inode_ref (loc->parent); - - local->op = GF_FOP_CREATE; - local->cont.create.flags = flags; - local->fd_ctx->flags = flags; - local->cont.create.mode = mode; - local->cont.create.fd = fd_ref (fd); - local->umask = umask; - - if (xdata) - local->xdata_req = dict_copy_with_ref (xdata, NULL); - else - local->xdata_req = dict_new (); - - if (!local->xdata_req) - goto out; - - local->transaction.wind = afr_create_wind; - local->transaction.unwind = afr_create_unwind; - - ret = afr_build_parent_loc (&local->transaction.parent_loc, loc, - &op_errno); - if (ret) - goto out; - - local->transaction.main_frame = frame; - local->transaction.basename = AFR_BASENAME (loc->path); - int_lock = &local->internal_lock; - - int_lock->lockee_count = 0; - ret = afr_init_entry_lockee (&int_lock->lockee[0], local, - &local->transaction.parent_loc, - local->transaction.basename, - priv->child_count); - if (ret) - goto out; - - int_lock->lockee_count++; - ret = afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION); - if (ret < 0) { - op_errno = -ret; - goto out; - } - - return 0; + afr_private_t *priv = NULL; + afr_local_t *local = NULL; + afr_internal_lock_t *int_lock = NULL; + call_frame_t *transaction_frame = NULL; + int ret = -1; + int op_errno = ENOMEM; + + priv = this->private; + + transaction_frame = copy_frame(frame); + if (!transaction_frame) + goto out; + + local = AFR_FRAME_INIT(transaction_frame, op_errno); + if (!local) + goto out; + + loc_copy(&local->loc, loc); + + local->fd_ctx = afr_fd_ctx_get(fd, this); + if (!local->fd_ctx) + goto out; + + local->inode = inode_ref(loc->inode); + local->parent = inode_ref(loc->parent); + + local->op = GF_FOP_CREATE; + local->cont.create.flags = flags; + local->fd_ctx->flags = flags; + local->cont.create.mode = mode; + local->cont.create.fd = fd_ref(fd); + local->umask = umask; + + if (xdata) + local->xdata_req = dict_copy_with_ref(xdata, NULL); + else + local->xdata_req = dict_new(); + + if (!local->xdata_req) + goto out; + + local->transaction.wind = afr_create_wind; + local->transaction.unwind = afr_create_unwind; + + ret = afr_build_parent_loc(&local->transaction.parent_loc, loc, &op_errno); + if (ret) + goto out; + + local->transaction.main_frame = frame; + local->transaction.basename = AFR_BASENAME(loc->path); + int_lock = &local->internal_lock; + + int_lock->lockee_count = 0; + ret = afr_init_entry_lockee(&int_lock->lockee[0], local, + &local->transaction.parent_loc, + local->transaction.basename, priv->child_count); + if (ret) + goto out; + + int_lock->lockee_count++; + ret = afr_transaction(transaction_frame, this, AFR_ENTRY_TRANSACTION); + if (ret < 0) { + op_errno = -ret; + goto out; + } + + return 0; out: - if (transaction_frame) - AFR_STACK_DESTROY (transaction_frame); + if (transaction_frame) + AFR_STACK_DESTROY(transaction_frame); - AFR_STACK_UNWIND (create, frame, -1, op_errno, NULL, NULL, NULL, NULL, - NULL, NULL); - return 0; + AFR_STACK_UNWIND(create, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL, + NULL); + return 0; } /* }}} */ @@ -536,516 +504,492 @@ out: /* {{{ mknod */ int -afr_mknod_unwind (call_frame_t *frame, xlator_t *this) +afr_mknod_unwind(call_frame_t *frame, xlator_t *this) { - call_frame_t *main_frame = NULL; - afr_local_t *local = NULL; + call_frame_t *main_frame = NULL; + afr_local_t *local = NULL; - local = frame->local; + local = frame->local; - main_frame = afr_transaction_detach_fop_frame (frame); - if (!main_frame) - return 0; - - AFR_STACK_UNWIND (mknod, main_frame, local->op_ret, local->op_errno, - local->inode, &local->cont.dir_fop.buf, - &local->cont.dir_fop.preparent, - &local->cont.dir_fop.postparent, local->xdata_rsp); + main_frame = afr_transaction_detach_fop_frame(frame); + if (!main_frame) return 0; -} + AFR_STACK_UNWIND(mknod, main_frame, local->op_ret, local->op_errno, + local->inode, &local->cont.dir_fop.buf, + &local->cont.dir_fop.preparent, + &local->cont.dir_fop.postparent, local->xdata_rsp); + return 0; +} int -afr_mknod_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, inode_t *inode, - struct iatt *buf, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) +afr_mknod_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) { - return __afr_dir_write_cbk (frame, cookie, this, op_ret, op_errno, buf, - preparent, postparent, NULL, NULL, xdata); + return __afr_dir_write_cbk(frame, cookie, this, op_ret, op_errno, buf, + preparent, postparent, NULL, NULL, xdata); } - int -afr_mknod_wind (call_frame_t *frame, xlator_t *this, int subvol) +afr_mknod_wind(call_frame_t *frame, xlator_t *this, int subvol) { - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - - local = frame->local; - priv = this->private; - - STACK_WIND_COOKIE (frame, afr_mknod_wind_cbk, (void *) (long) subvol, - priv->children[subvol], - priv->children[subvol]->fops->mknod, - &local->loc, local->cont.mknod.mode, - local->cont.mknod.dev, local->umask, - local->xdata_req); - return 0; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + + local = frame->local; + priv = this->private; + + STACK_WIND_COOKIE(frame, afr_mknod_wind_cbk, (void *)(long)subvol, + priv->children[subvol], + priv->children[subvol]->fops->mknod, &local->loc, + local->cont.mknod.mode, local->cont.mknod.dev, + local->umask, local->xdata_req); + return 0; } int -afr_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, - dev_t dev, mode_t umask, dict_t *xdata) +afr_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, + dev_t dev, mode_t umask, dict_t *xdata) { - afr_private_t *priv = NULL; - afr_local_t *local = NULL; - afr_internal_lock_t *int_lock = NULL; - call_frame_t *transaction_frame = NULL; - int ret = -1; - int op_errno = ENOMEM; - - priv = this->private; - - transaction_frame = copy_frame (frame); - if (!transaction_frame) - goto out; - - local = AFR_FRAME_INIT (transaction_frame, op_errno); - if (!local) - goto out; - - loc_copy (&local->loc, loc); - local->inode = inode_ref (loc->inode); - local->parent = inode_ref (loc->parent); - - local->op = GF_FOP_MKNOD; - local->cont.mknod.mode = mode; - local->cont.mknod.dev = dev; - local->umask = umask; - - if (xdata) - local->xdata_req = dict_copy_with_ref (xdata, NULL); - else - local->xdata_req = dict_new (); - - if (!local->xdata_req) - goto out; - - local->transaction.wind = afr_mknod_wind; - local->transaction.unwind = afr_mknod_unwind; - - ret = afr_build_parent_loc (&local->transaction.parent_loc, loc, - &op_errno); - if (ret) - goto out; - - local->transaction.main_frame = frame; - local->transaction.basename = AFR_BASENAME (loc->path); - int_lock = &local->internal_lock; - - int_lock->lockee_count = 0; - ret = afr_init_entry_lockee (&int_lock->lockee[0], local, - &local->transaction.parent_loc, - local->transaction.basename, - priv->child_count); - if (ret) - goto out; - - int_lock->lockee_count++; - ret = afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION); - if (ret < 0) { - op_errno = -ret; - goto out; - } - - return 0; + afr_private_t *priv = NULL; + afr_local_t *local = NULL; + afr_internal_lock_t *int_lock = NULL; + call_frame_t *transaction_frame = NULL; + int ret = -1; + int op_errno = ENOMEM; + + priv = this->private; + + transaction_frame = copy_frame(frame); + if (!transaction_frame) + goto out; + + local = AFR_FRAME_INIT(transaction_frame, op_errno); + if (!local) + goto out; + + loc_copy(&local->loc, loc); + local->inode = inode_ref(loc->inode); + local->parent = inode_ref(loc->parent); + + local->op = GF_FOP_MKNOD; + local->cont.mknod.mode = mode; + local->cont.mknod.dev = dev; + local->umask = umask; + + if (xdata) + local->xdata_req = dict_copy_with_ref(xdata, NULL); + else + local->xdata_req = dict_new(); + + if (!local->xdata_req) + goto out; + + local->transaction.wind = afr_mknod_wind; + local->transaction.unwind = afr_mknod_unwind; + + ret = afr_build_parent_loc(&local->transaction.parent_loc, loc, &op_errno); + if (ret) + goto out; + + local->transaction.main_frame = frame; + local->transaction.basename = AFR_BASENAME(loc->path); + int_lock = &local->internal_lock; + + int_lock->lockee_count = 0; + ret = afr_init_entry_lockee(&int_lock->lockee[0], local, + &local->transaction.parent_loc, + local->transaction.basename, priv->child_count); + if (ret) + goto out; + + int_lock->lockee_count++; + ret = afr_transaction(transaction_frame, this, AFR_ENTRY_TRANSACTION); + if (ret < 0) { + op_errno = -ret; + goto out; + } + + return 0; out: - if (transaction_frame) - AFR_STACK_DESTROY (transaction_frame); + if (transaction_frame) + AFR_STACK_DESTROY(transaction_frame); - AFR_STACK_UNWIND (mknod, frame, -1, op_errno, NULL, NULL, NULL, NULL, - NULL); - return 0; + AFR_STACK_UNWIND(mknod, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL); + return 0; } /* }}} */ /* {{{ mkdir */ - int -afr_mkdir_unwind (call_frame_t *frame, xlator_t *this) +afr_mkdir_unwind(call_frame_t *frame, xlator_t *this) { - call_frame_t *main_frame = NULL; - afr_local_t *local = NULL; + call_frame_t *main_frame = NULL; + afr_local_t *local = NULL; - local = frame->local; + local = frame->local; - main_frame = afr_transaction_detach_fop_frame (frame); - if (!main_frame) - return 0; - - AFR_STACK_UNWIND (mkdir, main_frame, local->op_ret, local->op_errno, - local->inode, &local->cont.dir_fop.buf, - &local->cont.dir_fop.preparent, - &local->cont.dir_fop.postparent, local->xdata_rsp); + main_frame = afr_transaction_detach_fop_frame(frame); + if (!main_frame) return 0; -} + AFR_STACK_UNWIND(mkdir, main_frame, local->op_ret, local->op_errno, + local->inode, &local->cont.dir_fop.buf, + &local->cont.dir_fop.preparent, + &local->cont.dir_fop.postparent, local->xdata_rsp); + return 0; +} int -afr_mkdir_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, inode_t *inode, - struct iatt *buf, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) +afr_mkdir_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) { - return __afr_dir_write_cbk (frame, cookie, this, op_ret, op_errno, buf, - preparent, postparent, NULL, NULL, xdata); + return __afr_dir_write_cbk(frame, cookie, this, op_ret, op_errno, buf, + preparent, postparent, NULL, NULL, xdata); } - int -afr_mkdir_wind (call_frame_t *frame, xlator_t *this, int subvol) +afr_mkdir_wind(call_frame_t *frame, xlator_t *this, int subvol) { - afr_local_t *local = NULL; - afr_private_t *priv = NULL; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; - local = frame->local; - priv = this->private; + local = frame->local; + priv = this->private; - STACK_WIND_COOKIE (frame, afr_mkdir_wind_cbk, (void *) (long) subvol, - priv->children[subvol], - priv->children[subvol]->fops->mkdir, &local->loc, - local->cont.mkdir.mode, local->umask, - local->xdata_req); - return 0; + STACK_WIND_COOKIE(frame, afr_mkdir_wind_cbk, (void *)(long)subvol, + priv->children[subvol], + priv->children[subvol]->fops->mkdir, &local->loc, + local->cont.mkdir.mode, local->umask, local->xdata_req); + return 0; } - int -afr_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, - mode_t umask, dict_t *xdata) +afr_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, + mode_t umask, dict_t *xdata) { - afr_private_t *priv = NULL; - afr_local_t *local = NULL; - afr_internal_lock_t *int_lock = NULL; - call_frame_t *transaction_frame = NULL; - int ret = -1; - int op_errno = ENOMEM; - - priv = this->private; - - transaction_frame = copy_frame (frame); - if (!transaction_frame) - goto out; - - local = AFR_FRAME_INIT (transaction_frame, op_errno); - if (!local) - goto out; - - loc_copy (&local->loc, loc); - local->inode = inode_ref (loc->inode); - local->parent = inode_ref (loc->parent); - - local->cont.mkdir.mode = mode; - local->umask = umask; - - if (!xdata || !dict_get (xdata, "gfid-req")) { - op_errno = EPERM; - gf_msg_callingfn (this->name, GF_LOG_WARNING, op_errno, - AFR_MSG_GFID_NULL, "mkdir: %s is received " - "without gfid-req %p", loc->path, xdata); - goto out; - } - - local->xdata_req = dict_copy_with_ref (xdata, NULL); - if (!local->xdata_req) { - op_errno = ENOMEM; - goto out; - } - - local->op = GF_FOP_MKDIR; - local->transaction.wind = afr_mkdir_wind; - local->transaction.unwind = afr_mkdir_unwind; - - ret = afr_build_parent_loc (&local->transaction.parent_loc, loc, - &op_errno); - if (ret) - goto out; - - local->transaction.main_frame = frame; - local->transaction.basename = AFR_BASENAME (loc->path); - int_lock = &local->internal_lock; - - int_lock->lockee_count = 0; - ret = afr_init_entry_lockee (&int_lock->lockee[0], local, - &local->transaction.parent_loc, - local->transaction.basename, - priv->child_count); - if (ret) - goto out; - - int_lock->lockee_count++; - ret = afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION); - if (ret < 0) { - op_errno = -ret; - goto out; - } - - return 0; + afr_private_t *priv = NULL; + afr_local_t *local = NULL; + afr_internal_lock_t *int_lock = NULL; + call_frame_t *transaction_frame = NULL; + int ret = -1; + int op_errno = ENOMEM; + + priv = this->private; + + transaction_frame = copy_frame(frame); + if (!transaction_frame) + goto out; + + local = AFR_FRAME_INIT(transaction_frame, op_errno); + if (!local) + goto out; + + loc_copy(&local->loc, loc); + local->inode = inode_ref(loc->inode); + local->parent = inode_ref(loc->parent); + + local->cont.mkdir.mode = mode; + local->umask = umask; + + if (!xdata || !dict_get(xdata, "gfid-req")) { + op_errno = EPERM; + gf_msg_callingfn(this->name, GF_LOG_WARNING, op_errno, + AFR_MSG_GFID_NULL, + "mkdir: %s is received " + "without gfid-req %p", + loc->path, xdata); + goto out; + } + + local->xdata_req = dict_copy_with_ref(xdata, NULL); + if (!local->xdata_req) { + op_errno = ENOMEM; + goto out; + } + + local->op = GF_FOP_MKDIR; + local->transaction.wind = afr_mkdir_wind; + local->transaction.unwind = afr_mkdir_unwind; + + ret = afr_build_parent_loc(&local->transaction.parent_loc, loc, &op_errno); + if (ret) + goto out; + + local->transaction.main_frame = frame; + local->transaction.basename = AFR_BASENAME(loc->path); + int_lock = &local->internal_lock; + + int_lock->lockee_count = 0; + ret = afr_init_entry_lockee(&int_lock->lockee[0], local, + &local->transaction.parent_loc, + local->transaction.basename, priv->child_count); + if (ret) + goto out; + + int_lock->lockee_count++; + ret = afr_transaction(transaction_frame, this, AFR_ENTRY_TRANSACTION); + if (ret < 0) { + op_errno = -ret; + goto out; + } + + return 0; out: - if (transaction_frame) - AFR_STACK_DESTROY (transaction_frame); + if (transaction_frame) + AFR_STACK_DESTROY(transaction_frame); - AFR_STACK_UNWIND (mkdir, frame, -1, op_errno, NULL, NULL, NULL, NULL, - NULL); - return 0; + AFR_STACK_UNWIND(mkdir, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL); + return 0; } /* }}} */ /* {{{ link */ - int -afr_link_unwind (call_frame_t *frame, xlator_t *this) +afr_link_unwind(call_frame_t *frame, xlator_t *this) { - call_frame_t *main_frame = NULL; - afr_local_t *local = NULL; - - local = frame->local; + call_frame_t *main_frame = NULL; + afr_local_t *local = NULL; - main_frame = afr_transaction_detach_fop_frame (frame); - if (!main_frame) - return 0; + local = frame->local; - AFR_STACK_UNWIND (link, main_frame, local->op_ret, local->op_errno, - local->inode, &local->cont.dir_fop.buf, - &local->cont.dir_fop.preparent, - &local->cont.dir_fop.postparent, local->xdata_rsp); + main_frame = afr_transaction_detach_fop_frame(frame); + if (!main_frame) return 0; -} + AFR_STACK_UNWIND(link, main_frame, local->op_ret, local->op_errno, + local->inode, &local->cont.dir_fop.buf, + &local->cont.dir_fop.preparent, + &local->cont.dir_fop.postparent, local->xdata_rsp); + return 0; +} int -afr_link_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, inode_t *inode, - struct iatt *buf, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) +afr_link_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) { - return __afr_dir_write_cbk (frame, cookie, this, op_ret, op_errno, buf, - preparent, postparent, NULL, NULL, xdata); + return __afr_dir_write_cbk(frame, cookie, this, op_ret, op_errno, buf, + preparent, postparent, NULL, NULL, xdata); } - int -afr_link_wind (call_frame_t *frame, xlator_t *this, int subvol) +afr_link_wind(call_frame_t *frame, xlator_t *this, int subvol) { - afr_local_t *local = NULL; - afr_private_t *priv = NULL; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; - local = frame->local; - priv = this->private; + local = frame->local; + priv = this->private; - STACK_WIND_COOKIE (frame, afr_link_wind_cbk, (void *) (long) subvol, - priv->children[subvol], - priv->children[subvol]->fops->link, - &local->loc, &local->newloc, local->xdata_req); - return 0; + STACK_WIND_COOKIE(frame, afr_link_wind_cbk, (void *)(long)subvol, + priv->children[subvol], + priv->children[subvol]->fops->link, &local->loc, + &local->newloc, local->xdata_req); + return 0; } - int -afr_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, - dict_t *xdata) +afr_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, + dict_t *xdata) { - afr_private_t *priv = NULL; - afr_local_t *local = NULL; - afr_internal_lock_t *int_lock = NULL; - call_frame_t *transaction_frame = NULL; - int ret = -1; - int op_errno = ENOMEM; - - priv = this->private; - - transaction_frame = copy_frame (frame); - if (!transaction_frame) - goto out; - - local = AFR_FRAME_INIT (transaction_frame, op_errno); - if (!local) - goto out; - - loc_copy (&local->loc, oldloc); - loc_copy (&local->newloc, newloc); - - local->inode = inode_ref (oldloc->inode); - local->parent = inode_ref (newloc->parent); - - if (xdata) - local->xdata_req = dict_copy_with_ref (xdata, NULL); - else - local->xdata_req = dict_new (); - - if (!local->xdata_req) - goto out; - - local->op = GF_FOP_LINK; - - local->transaction.wind = afr_link_wind; - local->transaction.unwind = afr_link_unwind; - - ret = afr_build_parent_loc (&local->transaction.parent_loc, newloc, - &op_errno); - if (ret) - goto out; - - local->transaction.main_frame = frame; - local->transaction.basename = AFR_BASENAME (newloc->path); - int_lock = &local->internal_lock; - - int_lock->lockee_count = 0; - ret = afr_init_entry_lockee (&int_lock->lockee[0], local, - &local->transaction.parent_loc, - local->transaction.basename, - priv->child_count); - if (ret) - goto out; - - int_lock->lockee_count++; - ret = afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION); - if (ret < 0) { - op_errno = -ret; - goto out; - } - - return 0; + afr_private_t *priv = NULL; + afr_local_t *local = NULL; + afr_internal_lock_t *int_lock = NULL; + call_frame_t *transaction_frame = NULL; + int ret = -1; + int op_errno = ENOMEM; + + priv = this->private; + + transaction_frame = copy_frame(frame); + if (!transaction_frame) + goto out; + + local = AFR_FRAME_INIT(transaction_frame, op_errno); + if (!local) + goto out; + + loc_copy(&local->loc, oldloc); + loc_copy(&local->newloc, newloc); + + local->inode = inode_ref(oldloc->inode); + local->parent = inode_ref(newloc->parent); + + if (xdata) + local->xdata_req = dict_copy_with_ref(xdata, NULL); + else + local->xdata_req = dict_new(); + + if (!local->xdata_req) + goto out; + + local->op = GF_FOP_LINK; + + local->transaction.wind = afr_link_wind; + local->transaction.unwind = afr_link_unwind; + + ret = afr_build_parent_loc(&local->transaction.parent_loc, newloc, + &op_errno); + if (ret) + goto out; + + local->transaction.main_frame = frame; + local->transaction.basename = AFR_BASENAME(newloc->path); + int_lock = &local->internal_lock; + + int_lock->lockee_count = 0; + ret = afr_init_entry_lockee(&int_lock->lockee[0], local, + &local->transaction.parent_loc, + local->transaction.basename, priv->child_count); + if (ret) + goto out; + + int_lock->lockee_count++; + ret = afr_transaction(transaction_frame, this, AFR_ENTRY_TRANSACTION); + if (ret < 0) { + op_errno = -ret; + goto out; + } + + return 0; out: - if (transaction_frame) - AFR_STACK_DESTROY (transaction_frame); + if (transaction_frame) + AFR_STACK_DESTROY(transaction_frame); - AFR_STACK_UNWIND (link, frame, -1, op_errno, NULL, NULL, NULL, NULL, - NULL); - return 0; + AFR_STACK_UNWIND(link, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL); + return 0; } /* }}} */ /* {{{ symlink */ - int -afr_symlink_unwind (call_frame_t *frame, xlator_t *this) +afr_symlink_unwind(call_frame_t *frame, xlator_t *this) { - call_frame_t *main_frame = NULL; - afr_local_t *local = NULL; + call_frame_t *main_frame = NULL; + afr_local_t *local = NULL; - local = frame->local; + local = frame->local; - main_frame = afr_transaction_detach_fop_frame (frame); - if (!main_frame) - return 0; - - AFR_STACK_UNWIND (symlink, main_frame, local->op_ret, local->op_errno, - local->inode, &local->cont.dir_fop.buf, - &local->cont.dir_fop.preparent, - &local->cont.dir_fop.postparent, local->xdata_rsp); + main_frame = afr_transaction_detach_fop_frame(frame); + if (!main_frame) return 0; -} + AFR_STACK_UNWIND(symlink, main_frame, local->op_ret, local->op_errno, + local->inode, &local->cont.dir_fop.buf, + &local->cont.dir_fop.preparent, + &local->cont.dir_fop.postparent, local->xdata_rsp); + return 0; +} int -afr_symlink_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, inode_t *inode, - struct iatt *buf, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) +afr_symlink_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) { - return __afr_dir_write_cbk (frame, cookie, this, op_ret, op_errno, buf, - preparent, postparent, NULL, NULL, xdata); + return __afr_dir_write_cbk(frame, cookie, this, op_ret, op_errno, buf, + preparent, postparent, NULL, NULL, xdata); } - int -afr_symlink_wind (call_frame_t *frame, xlator_t *this, int subvol) +afr_symlink_wind(call_frame_t *frame, xlator_t *this, int subvol) { - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - - local = frame->local; - priv = this->private; - - STACK_WIND_COOKIE (frame, afr_symlink_wind_cbk, (void *) (long) subvol, - priv->children[subvol], - priv->children[subvol]->fops->symlink, - local->cont.symlink.linkpath, &local->loc, - local->umask, local->xdata_req); - return 0; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + + local = frame->local; + priv = this->private; + + STACK_WIND_COOKIE(frame, afr_symlink_wind_cbk, (void *)(long)subvol, + priv->children[subvol], + priv->children[subvol]->fops->symlink, + local->cont.symlink.linkpath, &local->loc, local->umask, + local->xdata_req); + return 0; } - int -afr_symlink (call_frame_t *frame, xlator_t *this, const char *linkpath, - loc_t *loc, mode_t umask, dict_t *xdata) +afr_symlink(call_frame_t *frame, xlator_t *this, const char *linkpath, + loc_t *loc, mode_t umask, dict_t *xdata) { - afr_private_t *priv = NULL; - afr_local_t *local = NULL; - afr_internal_lock_t *int_lock = NULL; - call_frame_t *transaction_frame = NULL; - int ret = -1; - int op_errno = ENOMEM; - - priv = this->private; - - transaction_frame = copy_frame (frame); - if (!transaction_frame) - goto out; - - local = AFR_FRAME_INIT (transaction_frame, op_errno); - if (!local) - goto out; - - loc_copy (&local->loc, loc); - local->inode = inode_ref (loc->inode); - local->parent = inode_ref (loc->parent); - - local->cont.symlink.linkpath = gf_strdup (linkpath); - local->umask = umask; - - if (xdata) - local->xdata_req = dict_copy_with_ref (xdata, NULL); - else - local->xdata_req = dict_new (); - - if (!local->xdata_req) - goto out; - - local->op = GF_FOP_SYMLINK; - local->transaction.wind = afr_symlink_wind; - local->transaction.unwind = afr_symlink_unwind; - - ret = afr_build_parent_loc (&local->transaction.parent_loc, loc, - &op_errno); - if (ret) - goto out; - - local->transaction.main_frame = frame; - local->transaction.basename = AFR_BASENAME (loc->path); - int_lock = &local->internal_lock; - - int_lock->lockee_count = 0; - ret = afr_init_entry_lockee (&int_lock->lockee[0], local, - &local->transaction.parent_loc, - local->transaction.basename, - priv->child_count); - if (ret) - goto out; - - int_lock->lockee_count++; - ret = afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION); - if (ret < 0) { - op_errno = -ret; - goto out; - } - - return 0; + afr_private_t *priv = NULL; + afr_local_t *local = NULL; + afr_internal_lock_t *int_lock = NULL; + call_frame_t *transaction_frame = NULL; + int ret = -1; + int op_errno = ENOMEM; + + priv = this->private; + + transaction_frame = copy_frame(frame); + if (!transaction_frame) + goto out; + + local = AFR_FRAME_INIT(transaction_frame, op_errno); + if (!local) + goto out; + + loc_copy(&local->loc, loc); + local->inode = inode_ref(loc->inode); + local->parent = inode_ref(loc->parent); + + local->cont.symlink.linkpath = gf_strdup(linkpath); + local->umask = umask; + + if (xdata) + local->xdata_req = dict_copy_with_ref(xdata, NULL); + else + local->xdata_req = dict_new(); + + if (!local->xdata_req) + goto out; + + local->op = GF_FOP_SYMLINK; + local->transaction.wind = afr_symlink_wind; + local->transaction.unwind = afr_symlink_unwind; + + ret = afr_build_parent_loc(&local->transaction.parent_loc, loc, &op_errno); + if (ret) + goto out; + + local->transaction.main_frame = frame; + local->transaction.basename = AFR_BASENAME(loc->path); + int_lock = &local->internal_lock; + + int_lock->lockee_count = 0; + ret = afr_init_entry_lockee(&int_lock->lockee[0], local, + &local->transaction.parent_loc, + local->transaction.basename, priv->child_count); + if (ret) + goto out; + + int_lock->lockee_count++; + ret = afr_transaction(transaction_frame, this, AFR_ENTRY_TRANSACTION); + if (ret < 0) { + op_errno = -ret; + goto out; + } + + return 0; out: - if (transaction_frame) - AFR_STACK_DESTROY (transaction_frame); + if (transaction_frame) + AFR_STACK_DESTROY(transaction_frame); - AFR_STACK_UNWIND (symlink, frame, -1, op_errno, NULL, NULL, NULL, - NULL, NULL); - return 0; + AFR_STACK_UNWIND(symlink, frame, -1, op_errno, NULL, NULL, NULL, NULL, + NULL); + return 0; } /* }}} */ @@ -1053,159 +997,152 @@ out: /* {{{ rename */ int -afr_rename_unwind (call_frame_t *frame, xlator_t *this) +afr_rename_unwind(call_frame_t *frame, xlator_t *this) { - call_frame_t *main_frame = NULL; - afr_local_t *local = NULL; - - local = frame->local; + call_frame_t *main_frame = NULL; + afr_local_t *local = NULL; - main_frame = afr_transaction_detach_fop_frame (frame); - if (!main_frame) - return 0; + local = frame->local; - AFR_STACK_UNWIND (rename, main_frame, local->op_ret, local->op_errno, - &local->cont.dir_fop.buf, - &local->cont.dir_fop.preparent, - &local->cont.dir_fop.postparent, - &local->cont.dir_fop.prenewparent, - &local->cont.dir_fop.postnewparent, local->xdata_rsp); + main_frame = afr_transaction_detach_fop_frame(frame); + if (!main_frame) return 0; -} + AFR_STACK_UNWIND(rename, main_frame, local->op_ret, local->op_errno, + &local->cont.dir_fop.buf, &local->cont.dir_fop.preparent, + &local->cont.dir_fop.postparent, + &local->cont.dir_fop.prenewparent, + &local->cont.dir_fop.postnewparent, local->xdata_rsp); + return 0; +} int -afr_rename_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *buf, - struct iatt *preoldparent, struct iatt *postoldparent, - struct iatt *prenewparent, struct iatt *postnewparent, - dict_t *xdata) +afr_rename_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *buf, + struct iatt *preoldparent, struct iatt *postoldparent, + struct iatt *prenewparent, struct iatt *postnewparent, + dict_t *xdata) { - return __afr_dir_write_cbk (frame, cookie, this, op_ret, op_errno, buf, - preoldparent, postoldparent, prenewparent, - postnewparent, xdata); + return __afr_dir_write_cbk(frame, cookie, this, op_ret, op_errno, buf, + preoldparent, postoldparent, prenewparent, + postnewparent, xdata); } - int -afr_rename_wind (call_frame_t *frame, xlator_t *this, int subvol) +afr_rename_wind(call_frame_t *frame, xlator_t *this, int subvol) { - afr_local_t *local = NULL; - afr_private_t *priv = NULL; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; - local = frame->local; - priv = this->private; + local = frame->local; + priv = this->private; - STACK_WIND_COOKIE (frame, afr_rename_wind_cbk, (void *) (long) subvol, - priv->children[subvol], - priv->children[subvol]->fops->rename, - &local->loc, &local->newloc, local->xdata_req); - return 0; + STACK_WIND_COOKIE(frame, afr_rename_wind_cbk, (void *)(long)subvol, + priv->children[subvol], + priv->children[subvol]->fops->rename, &local->loc, + &local->newloc, local->xdata_req); + return 0; } - int -afr_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, - dict_t *xdata) +afr_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, + dict_t *xdata) { - afr_private_t *priv = NULL; - afr_local_t *local = NULL; - afr_internal_lock_t *int_lock = NULL; - call_frame_t *transaction_frame = NULL; - int ret = -1; - int op_errno = ENOMEM; - int nlockee = 0; - - priv = this->private; - - transaction_frame = copy_frame (frame); - if (!transaction_frame) { - op_errno = ENOMEM; - goto out; - } - - local = AFR_FRAME_INIT (transaction_frame, op_errno); - if (!local) - goto out; - - loc_copy (&local->loc, oldloc); - loc_copy (&local->newloc, newloc); - - local->inode = inode_ref (oldloc->inode); - local->parent = inode_ref (oldloc->parent); - local->parent2 = inode_ref (newloc->parent); - - if (xdata) - local->xdata_req = dict_copy_with_ref (xdata, NULL); - else - local->xdata_req = dict_new (); - - if (!local->xdata_req) - goto out; - - local->op = GF_FOP_RENAME; - local->transaction.wind = afr_rename_wind; - local->transaction.unwind = afr_rename_unwind; - - ret = afr_build_parent_loc (&local->transaction.parent_loc, oldloc, - &op_errno); + afr_private_t *priv = NULL; + afr_local_t *local = NULL; + afr_internal_lock_t *int_lock = NULL; + call_frame_t *transaction_frame = NULL; + int ret = -1; + int op_errno = ENOMEM; + int nlockee = 0; + + priv = this->private; + + transaction_frame = copy_frame(frame); + if (!transaction_frame) { + op_errno = ENOMEM; + goto out; + } + + local = AFR_FRAME_INIT(transaction_frame, op_errno); + if (!local) + goto out; + + loc_copy(&local->loc, oldloc); + loc_copy(&local->newloc, newloc); + + local->inode = inode_ref(oldloc->inode); + local->parent = inode_ref(oldloc->parent); + local->parent2 = inode_ref(newloc->parent); + + if (xdata) + local->xdata_req = dict_copy_with_ref(xdata, NULL); + else + local->xdata_req = dict_new(); + + if (!local->xdata_req) + goto out; + + local->op = GF_FOP_RENAME; + local->transaction.wind = afr_rename_wind; + local->transaction.unwind = afr_rename_unwind; + + ret = afr_build_parent_loc(&local->transaction.parent_loc, oldloc, + &op_errno); + if (ret) + goto out; + ret = afr_build_parent_loc(&local->transaction.new_parent_loc, newloc, + &op_errno); + if (ret) + goto out; + + local->transaction.main_frame = frame; + local->transaction.basename = AFR_BASENAME(oldloc->path); + local->transaction.new_basename = AFR_BASENAME(newloc->path); + int_lock = &local->internal_lock; + + int_lock->lockee_count = nlockee = 0; + ret = afr_init_entry_lockee( + &int_lock->lockee[nlockee], local, &local->transaction.new_parent_loc, + local->transaction.new_basename, priv->child_count); + if (ret) + goto out; + + nlockee++; + ret = afr_init_entry_lockee(&int_lock->lockee[nlockee], local, + &local->transaction.parent_loc, + local->transaction.basename, priv->child_count); + if (ret) + goto out; + + nlockee++; + if (local->newloc.inode && IA_ISDIR(local->newloc.inode->ia_type)) { + ret = afr_init_entry_lockee(&int_lock->lockee[nlockee], local, + &local->newloc, NULL, priv->child_count); if (ret) - goto out; - ret = afr_build_parent_loc (&local->transaction.new_parent_loc, newloc, - &op_errno); - if (ret) - goto out; - - local->transaction.main_frame = frame; - local->transaction.basename = AFR_BASENAME (oldloc->path); - local->transaction.new_basename = AFR_BASENAME (newloc->path); - int_lock = &local->internal_lock; - - int_lock->lockee_count = nlockee = 0; - ret = afr_init_entry_lockee (&int_lock->lockee[nlockee], local, - &local->transaction.new_parent_loc, - local->transaction.new_basename, - priv->child_count); - if (ret) - goto out; - - nlockee++; - ret = afr_init_entry_lockee (&int_lock->lockee[nlockee], local, - &local->transaction.parent_loc, - local->transaction.basename, - priv->child_count); - if (ret) - goto out; + goto out; nlockee++; - if (local->newloc.inode && IA_ISDIR (local->newloc.inode->ia_type)) { - ret = afr_init_entry_lockee (&int_lock->lockee[nlockee], local, - &local->newloc, - NULL, - priv->child_count); - if (ret) - goto out; - - nlockee++; - } - qsort (int_lock->lockee, nlockee, sizeof (*int_lock->lockee), - afr_entry_lockee_cmp); - int_lock->lockee_count = nlockee; - - ret = afr_transaction (transaction_frame, this, AFR_ENTRY_RENAME_TRANSACTION); - if (ret < 0) { - op_errno = -ret; - goto out; - } - - return 0; + } + qsort(int_lock->lockee, nlockee, sizeof(*int_lock->lockee), + afr_entry_lockee_cmp); + int_lock->lockee_count = nlockee; + + ret = afr_transaction(transaction_frame, this, + AFR_ENTRY_RENAME_TRANSACTION); + if (ret < 0) { + op_errno = -ret; + goto out; + } + + return 0; out: - if (transaction_frame) - AFR_STACK_DESTROY (transaction_frame); + if (transaction_frame) + AFR_STACK_DESTROY(transaction_frame); - AFR_STACK_UNWIND (rename, frame, -1, op_errno, NULL, NULL, NULL, NULL, - NULL, NULL); - return 0; + AFR_STACK_UNWIND(rename, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL, + NULL); + return 0; } /* }}} */ @@ -1213,259 +1150,244 @@ out: /* {{{ unlink */ int -afr_unlink_unwind (call_frame_t *frame, xlator_t *this) +afr_unlink_unwind(call_frame_t *frame, xlator_t *this) { - call_frame_t *main_frame = NULL; - afr_local_t *local = NULL; - - local = frame->local; + call_frame_t *main_frame = NULL; + afr_local_t *local = NULL; - main_frame = afr_transaction_detach_fop_frame (frame); - if (!main_frame) - return 0; + local = frame->local; - AFR_STACK_UNWIND (unlink, main_frame, local->op_ret, local->op_errno, - &local->cont.dir_fop.preparent, - &local->cont.dir_fop.postparent, local->xdata_rsp); + main_frame = afr_transaction_detach_fop_frame(frame); + if (!main_frame) return 0; -} + AFR_STACK_UNWIND(unlink, main_frame, local->op_ret, local->op_errno, + &local->cont.dir_fop.preparent, + &local->cont.dir_fop.postparent, local->xdata_rsp); + return 0; +} int -afr_unlink_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) +afr_unlink_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) { - return __afr_dir_write_cbk (frame, cookie, this, op_ret, op_errno, NULL, - preparent, postparent, NULL, NULL, xdata); + return __afr_dir_write_cbk(frame, cookie, this, op_ret, op_errno, NULL, + preparent, postparent, NULL, NULL, xdata); } - int -afr_unlink_wind (call_frame_t *frame, xlator_t *this, int subvol) +afr_unlink_wind(call_frame_t *frame, xlator_t *this, int subvol) { - afr_local_t *local = NULL; - afr_private_t *priv = NULL; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; - local = frame->local; - priv = this->private; + local = frame->local; + priv = this->private; - STACK_WIND_COOKIE (frame, afr_unlink_wind_cbk, (void *) (long) subvol, - priv->children[subvol], - priv->children[subvol]->fops->unlink, - &local->loc, local->xflag, local->xdata_req); - return 0; + STACK_WIND_COOKIE(frame, afr_unlink_wind_cbk, (void *)(long)subvol, + priv->children[subvol], + priv->children[subvol]->fops->unlink, &local->loc, + local->xflag, local->xdata_req); + return 0; } - int -afr_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag, - dict_t *xdata) +afr_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag, + dict_t *xdata) { - afr_private_t *priv = NULL; - afr_local_t *local = NULL; - afr_internal_lock_t *int_lock = NULL; - call_frame_t *transaction_frame = NULL; - int ret = -1; - int op_errno = ENOMEM; - - priv = this->private; - - transaction_frame = copy_frame (frame); - if (!transaction_frame) - goto out; - - local = AFR_FRAME_INIT (transaction_frame, op_errno); - if (!local) - goto out; - - loc_copy (&local->loc, loc); - local->xflag = xflag; - - local->inode = inode_ref (loc->inode); - local->parent = inode_ref (loc->parent); - - if (xdata) - local->xdata_req = dict_copy_with_ref (xdata, NULL); - else - local->xdata_req = dict_new (); - - if (!local->xdata_req) - goto out; - - local->op = GF_FOP_UNLINK; - local->transaction.wind = afr_unlink_wind; - local->transaction.unwind = afr_unlink_unwind; - - ret = afr_build_parent_loc (&local->transaction.parent_loc, loc, - &op_errno); - if (ret) - goto out; - - local->transaction.main_frame = frame; - local->transaction.basename = AFR_BASENAME (loc->path); - int_lock = &local->internal_lock; - - int_lock->lockee_count = 0; - ret = afr_init_entry_lockee (&int_lock->lockee[0], local, - &local->transaction.parent_loc, - local->transaction.basename, - priv->child_count); - if (ret) - goto out; - - int_lock->lockee_count++; - ret = afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION); - if (ret < 0) { - op_errno = -ret; - goto out; - } - - return 0; + afr_private_t *priv = NULL; + afr_local_t *local = NULL; + afr_internal_lock_t *int_lock = NULL; + call_frame_t *transaction_frame = NULL; + int ret = -1; + int op_errno = ENOMEM; + + priv = this->private; + + transaction_frame = copy_frame(frame); + if (!transaction_frame) + goto out; + + local = AFR_FRAME_INIT(transaction_frame, op_errno); + if (!local) + goto out; + + loc_copy(&local->loc, loc); + local->xflag = xflag; + + local->inode = inode_ref(loc->inode); + local->parent = inode_ref(loc->parent); + + if (xdata) + local->xdata_req = dict_copy_with_ref(xdata, NULL); + else + local->xdata_req = dict_new(); + + if (!local->xdata_req) + goto out; + + local->op = GF_FOP_UNLINK; + local->transaction.wind = afr_unlink_wind; + local->transaction.unwind = afr_unlink_unwind; + + ret = afr_build_parent_loc(&local->transaction.parent_loc, loc, &op_errno); + if (ret) + goto out; + + local->transaction.main_frame = frame; + local->transaction.basename = AFR_BASENAME(loc->path); + int_lock = &local->internal_lock; + + int_lock->lockee_count = 0; + ret = afr_init_entry_lockee(&int_lock->lockee[0], local, + &local->transaction.parent_loc, + local->transaction.basename, priv->child_count); + if (ret) + goto out; + + int_lock->lockee_count++; + ret = afr_transaction(transaction_frame, this, AFR_ENTRY_TRANSACTION); + if (ret < 0) { + op_errno = -ret; + goto out; + } + + return 0; out: - if (transaction_frame) - AFR_STACK_DESTROY (transaction_frame); + if (transaction_frame) + AFR_STACK_DESTROY(transaction_frame); - AFR_STACK_UNWIND (unlink, frame, -1, op_errno, NULL, NULL, NULL); - return 0; + AFR_STACK_UNWIND(unlink, frame, -1, op_errno, NULL, NULL, NULL); + return 0; } /* }}} */ /* {{{ rmdir */ - - int -afr_rmdir_unwind (call_frame_t *frame, xlator_t *this) +afr_rmdir_unwind(call_frame_t *frame, xlator_t *this) { - call_frame_t *main_frame = NULL; - afr_local_t *local = NULL; - - local = frame->local; + call_frame_t *main_frame = NULL; + afr_local_t *local = NULL; - main_frame = afr_transaction_detach_fop_frame (frame); - if (!main_frame) - return 0; + local = frame->local; - AFR_STACK_UNWIND (rmdir, main_frame, local->op_ret, local->op_errno, - &local->cont.dir_fop.preparent, - &local->cont.dir_fop.postparent, local->xdata_rsp); + main_frame = afr_transaction_detach_fop_frame(frame); + if (!main_frame) return 0; -} + AFR_STACK_UNWIND(rmdir, main_frame, local->op_ret, local->op_errno, + &local->cont.dir_fop.preparent, + &local->cont.dir_fop.postparent, local->xdata_rsp); + return 0; +} int -afr_rmdir_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) +afr_rmdir_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) { - return __afr_dir_write_cbk (frame, cookie, this, op_ret, op_errno, NULL, - preparent, postparent, NULL, NULL, xdata); + return __afr_dir_write_cbk(frame, cookie, this, op_ret, op_errno, NULL, + preparent, postparent, NULL, NULL, xdata); } - int -afr_rmdir_wind (call_frame_t *frame, xlator_t *this, int subvol) +afr_rmdir_wind(call_frame_t *frame, xlator_t *this, int subvol) { - afr_local_t *local = NULL; - afr_private_t *priv = NULL; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; - local = frame->local; - priv = this->private; + local = frame->local; + priv = this->private; - STACK_WIND_COOKIE (frame, afr_rmdir_wind_cbk, (void *) (long) subvol, - priv->children[subvol], - priv->children[subvol]->fops->rmdir, - &local->loc, local->cont.rmdir.flags, local->xdata_req); - return 0; + STACK_WIND_COOKIE(frame, afr_rmdir_wind_cbk, (void *)(long)subvol, + priv->children[subvol], + priv->children[subvol]->fops->rmdir, &local->loc, + local->cont.rmdir.flags, local->xdata_req); + return 0; } - int -afr_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, - dict_t *xdata) +afr_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, + dict_t *xdata) { - afr_private_t *priv = NULL; - afr_local_t *local = NULL; - afr_internal_lock_t *int_lock = NULL; - call_frame_t *transaction_frame = NULL; - int ret = -1; - int op_errno = ENOMEM; - int nlockee = 0; - - priv = this->private; - - transaction_frame = copy_frame (frame); - if (!transaction_frame) - goto out; - - local = AFR_FRAME_INIT (transaction_frame, op_errno); - if (!local) - goto out; - - - loc_copy (&local->loc, loc); - local->inode = inode_ref (loc->inode); - local->parent = inode_ref (loc->parent); - - local->cont.rmdir.flags = flags; - - if (xdata) - local->xdata_req = dict_copy_with_ref (xdata, NULL); - else - local->xdata_req = dict_new (); - - if (!local->xdata_req) - goto out; - - local->op = GF_FOP_RMDIR; - local->transaction.wind = afr_rmdir_wind; - local->transaction.unwind = afr_rmdir_unwind; - - ret = afr_build_parent_loc (&local->transaction.parent_loc, loc, - &op_errno); - if (ret) - goto out; - - local->transaction.main_frame = frame; - local->transaction.basename = AFR_BASENAME (loc->path); - int_lock = &local->internal_lock; - - int_lock->lockee_count = nlockee = 0; - ret = afr_init_entry_lockee (&int_lock->lockee[nlockee], local, - &local->transaction.parent_loc, - local->transaction.basename, - priv->child_count); - if (ret) - goto out; - - nlockee++; - ret = afr_init_entry_lockee (&int_lock->lockee[nlockee], local, - &local->loc, - NULL, - priv->child_count); - if (ret) - goto out; - - nlockee++; - qsort (int_lock->lockee, nlockee, sizeof (*int_lock->lockee), - afr_entry_lockee_cmp); - int_lock->lockee_count = nlockee; - - ret = afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION); - if (ret < 0) { - op_errno = -ret; - goto out; - } - - return 0; + afr_private_t *priv = NULL; + afr_local_t *local = NULL; + afr_internal_lock_t *int_lock = NULL; + call_frame_t *transaction_frame = NULL; + int ret = -1; + int op_errno = ENOMEM; + int nlockee = 0; + + priv = this->private; + + transaction_frame = copy_frame(frame); + if (!transaction_frame) + goto out; + + local = AFR_FRAME_INIT(transaction_frame, op_errno); + if (!local) + goto out; + + loc_copy(&local->loc, loc); + local->inode = inode_ref(loc->inode); + local->parent = inode_ref(loc->parent); + + local->cont.rmdir.flags = flags; + + if (xdata) + local->xdata_req = dict_copy_with_ref(xdata, NULL); + else + local->xdata_req = dict_new(); + + if (!local->xdata_req) + goto out; + + local->op = GF_FOP_RMDIR; + local->transaction.wind = afr_rmdir_wind; + local->transaction.unwind = afr_rmdir_unwind; + + ret = afr_build_parent_loc(&local->transaction.parent_loc, loc, &op_errno); + if (ret) + goto out; + + local->transaction.main_frame = frame; + local->transaction.basename = AFR_BASENAME(loc->path); + int_lock = &local->internal_lock; + + int_lock->lockee_count = nlockee = 0; + ret = afr_init_entry_lockee(&int_lock->lockee[nlockee], local, + &local->transaction.parent_loc, + local->transaction.basename, priv->child_count); + if (ret) + goto out; + + nlockee++; + ret = afr_init_entry_lockee(&int_lock->lockee[nlockee], local, &local->loc, + NULL, priv->child_count); + if (ret) + goto out; + + nlockee++; + qsort(int_lock->lockee, nlockee, sizeof(*int_lock->lockee), + afr_entry_lockee_cmp); + int_lock->lockee_count = nlockee; + + ret = afr_transaction(transaction_frame, this, AFR_ENTRY_TRANSACTION); + if (ret < 0) { + op_errno = -ret; + goto out; + } + + return 0; out: - if (transaction_frame) - AFR_STACK_DESTROY (transaction_frame); + if (transaction_frame) + AFR_STACK_DESTROY(transaction_frame); - AFR_STACK_UNWIND (rmdir, frame, -1, op_errno, NULL, NULL, NULL); - return 0; + AFR_STACK_UNWIND(rmdir, frame, -1, op_errno, NULL, NULL, NULL); + return 0; } /* }}} */ diff --git a/xlators/cluster/afr/src/afr-inode-read.c b/xlators/cluster/afr/src/afr-inode-read.c index d0b07e9064f..113e39acfe8 100644 --- a/xlators/cluster/afr/src/afr-inode-read.c +++ b/xlators/cluster/afr/src/afr-inode-read.c @@ -8,7 +8,6 @@ cases as published by the Free Software Foundation. */ - #include #include #include @@ -44,146 +43,145 @@ * */ int -afr_handle_quota_size (call_frame_t *frame, xlator_t *this) +afr_handle_quota_size(call_frame_t *frame, xlator_t *this) { - unsigned char *readable = NULL; - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - struct afr_reply *replies = NULL; - int i = 0; - int ret = 0; - quota_meta_t size = {0, }; - quota_meta_t max_size = {0, }; - int readable_cnt = 0; - int read_subvol = -1; - - local = frame->local; - priv = this->private; - replies = local->replies; - - readable = alloca0 (priv->child_count); - - afr_inode_read_subvol_get (local->inode, this, readable, 0, 0); - - readable_cnt = AFR_COUNT (readable, priv->child_count); - - for (i = 0; i < priv->child_count; i++) { - if (!replies[i].valid || replies[i].op_ret == -1) - continue; - if (readable_cnt && !readable[i]) - continue; - if (!replies[i].xdata) - continue; - ret = quota_dict_get_meta (replies[i].xdata, QUOTA_SIZE_KEY, - &size); - if (ret == -1) - continue; - if (read_subvol == -1) - read_subvol = i; - if (size.size > max_size.size || - (size.file_count + size.dir_count) > - (max_size.file_count + max_size.dir_count)) - read_subvol = i; - - if (size.size > max_size.size) - max_size.size = size.size; - if (size.file_count > max_size.file_count) - max_size.file_count = size.file_count; - if (size.dir_count > max_size.dir_count) - max_size.dir_count = size.dir_count; - } - - if (max_size.size == 0 && max_size.file_count == 0 && - max_size.dir_count == 0) - return read_subvol; - - for (i = 0; i < priv->child_count; i++) { - if (!replies[i].valid || replies[i].op_ret == -1) - continue; - if (readable_cnt && !readable[i]) - continue; - if (!replies[i].xdata) - continue; - quota_dict_set_meta (replies[i].xdata, QUOTA_SIZE_KEY, - &max_size, IA_IFDIR); - } - + unsigned char *readable = NULL; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + struct afr_reply *replies = NULL; + int i = 0; + int ret = 0; + quota_meta_t size = { + 0, + }; + quota_meta_t max_size = { + 0, + }; + int readable_cnt = 0; + int read_subvol = -1; + + local = frame->local; + priv = this->private; + replies = local->replies; + + readable = alloca0(priv->child_count); + + afr_inode_read_subvol_get(local->inode, this, readable, 0, 0); + + readable_cnt = AFR_COUNT(readable, priv->child_count); + + for (i = 0; i < priv->child_count; i++) { + if (!replies[i].valid || replies[i].op_ret == -1) + continue; + if (readable_cnt && !readable[i]) + continue; + if (!replies[i].xdata) + continue; + ret = quota_dict_get_meta(replies[i].xdata, QUOTA_SIZE_KEY, &size); + if (ret == -1) + continue; + if (read_subvol == -1) + read_subvol = i; + if (size.size > max_size.size || + (size.file_count + size.dir_count) > + (max_size.file_count + max_size.dir_count)) + read_subvol = i; + + if (size.size > max_size.size) + max_size.size = size.size; + if (size.file_count > max_size.file_count) + max_size.file_count = size.file_count; + if (size.dir_count > max_size.dir_count) + max_size.dir_count = size.dir_count; + } + + if (max_size.size == 0 && max_size.file_count == 0 && + max_size.dir_count == 0) return read_subvol; -} + for (i = 0; i < priv->child_count; i++) { + if (!replies[i].valid || replies[i].op_ret == -1) + continue; + if (readable_cnt && !readable[i]) + continue; + if (!replies[i].xdata) + continue; + quota_dict_set_meta(replies[i].xdata, QUOTA_SIZE_KEY, &max_size, + IA_IFDIR); + } + + return read_subvol; +} /* {{{ access */ int -afr_access_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, dict_t *xdata) +afr_access_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, + int op_errno, dict_t *xdata) { - afr_local_t *local = NULL; + afr_local_t *local = NULL; - local = frame->local; + local = frame->local; - if (op_ret < 0) { - local->op_ret = op_ret; - local->op_errno = op_errno; + if (op_ret < 0) { + local->op_ret = op_ret; + local->op_errno = op_errno; - afr_read_txn_continue (frame, this, (long) cookie); - return 0; - } + afr_read_txn_continue(frame, this, (long)cookie); + return 0; + } - AFR_STACK_UNWIND (access, frame, op_ret, op_errno, xdata); + AFR_STACK_UNWIND(access, frame, op_ret, op_errno, xdata); - return 0; + return 0; } - int -afr_access_wind (call_frame_t *frame, xlator_t *this, int subvol) +afr_access_wind(call_frame_t *frame, xlator_t *this, int subvol) { - afr_private_t *priv = NULL; - afr_local_t *local = NULL; - - priv = this->private; - local = frame->local; - - if (subvol == -1) { - AFR_STACK_UNWIND (access, frame, local->op_ret, - local->op_errno, 0); - return 0; - } - - STACK_WIND_COOKIE (frame, afr_access_cbk, (void *) (long) subvol, - priv->children[subvol], - priv->children[subvol]->fops->access, - &local->loc, local->cont.access.mask, - local->xdata_req); - return 0; + afr_private_t *priv = NULL; + afr_local_t *local = NULL; + + priv = this->private; + local = frame->local; + + if (subvol == -1) { + AFR_STACK_UNWIND(access, frame, local->op_ret, local->op_errno, 0); + return 0; + } + + STACK_WIND_COOKIE(frame, afr_access_cbk, (void *)(long)subvol, + priv->children[subvol], + priv->children[subvol]->fops->access, &local->loc, + local->cont.access.mask, local->xdata_req); + return 0; } int -afr_access (call_frame_t *frame, xlator_t *this, loc_t *loc, - int mask, dict_t *xdata) +afr_access(call_frame_t *frame, xlator_t *this, loc_t *loc, int mask, + dict_t *xdata) { - afr_local_t *local = NULL; - int op_errno = 0; + afr_local_t *local = NULL; + int op_errno = 0; - local = AFR_FRAME_INIT (frame, op_errno); - if (!local) - goto out; + local = AFR_FRAME_INIT(frame, op_errno); + if (!local) + goto out; - local->op = GF_FOP_ACCESS; - loc_copy (&local->loc, loc); - local->cont.access.mask = mask; - if (xdata) - local->xdata_req = dict_ref (xdata); + local->op = GF_FOP_ACCESS; + loc_copy(&local->loc, loc); + local->cont.access.mask = mask; + if (xdata) + local->xdata_req = dict_ref(xdata); - afr_read_txn (frame, this, loc->inode, afr_access_wind, - AFR_METADATA_TRANSACTION); + afr_read_txn(frame, this, loc->inode, afr_access_wind, + AFR_METADATA_TRANSACTION); - return 0; + return 0; out: - AFR_STACK_UNWIND (access, frame, -1, op_errno, NULL); + AFR_STACK_UNWIND(access, frame, -1, op_errno, NULL); - return 0; + return 0; } /* }}} */ @@ -191,152 +189,139 @@ out: /* {{{ stat */ int -afr_stat_cbk (call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, int32_t op_errno, - struct iatt *buf, dict_t *xdata) +afr_stat_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + int32_t op_errno, struct iatt *buf, dict_t *xdata) { - afr_local_t *local = NULL; + afr_local_t *local = NULL; - local = frame->local; + local = frame->local; - if (op_ret < 0) { - local->op_ret = op_ret; - local->op_errno = op_errno; + if (op_ret < 0) { + local->op_ret = op_ret; + local->op_errno = op_errno; - afr_read_txn_continue (frame, this, (long) cookie); - return 0; - } + afr_read_txn_continue(frame, this, (long)cookie); + return 0; + } - AFR_STACK_UNWIND (stat, frame, op_ret, op_errno, buf, xdata); + AFR_STACK_UNWIND(stat, frame, op_ret, op_errno, buf, xdata); - return 0; + return 0; } - int -afr_stat_wind (call_frame_t *frame, xlator_t *this, int subvol) +afr_stat_wind(call_frame_t *frame, xlator_t *this, int subvol) { - afr_private_t *priv = NULL; - afr_local_t *local = NULL; - - priv = this->private; - local = frame->local; - - if (subvol == -1) { - AFR_STACK_UNWIND (stat, frame, local->op_ret, local->op_errno, - 0, 0); - return 0; - } - - STACK_WIND_COOKIE (frame, afr_stat_cbk, (void *) (long) subvol, - priv->children[subvol], - priv->children[subvol]->fops->stat, - &local->loc, local->xdata_req); - return 0; + afr_private_t *priv = NULL; + afr_local_t *local = NULL; + + priv = this->private; + local = frame->local; + + if (subvol == -1) { + AFR_STACK_UNWIND(stat, frame, local->op_ret, local->op_errno, 0, 0); + return 0; + } + + STACK_WIND_COOKIE( + frame, afr_stat_cbk, (void *)(long)subvol, priv->children[subvol], + priv->children[subvol]->fops->stat, &local->loc, local->xdata_req); + return 0; } int -afr_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) +afr_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) { - afr_local_t *local = NULL; - int op_errno = 0; + afr_local_t *local = NULL; + int op_errno = 0; - local = AFR_FRAME_INIT (frame, op_errno); - if (!local) - goto out; + local = AFR_FRAME_INIT(frame, op_errno); + if (!local) + goto out; - local->op = GF_FOP_STAT; - loc_copy (&local->loc, loc); - if (xdata) - local->xdata_req = dict_ref (xdata); + local->op = GF_FOP_STAT; + loc_copy(&local->loc, loc); + if (xdata) + local->xdata_req = dict_ref(xdata); - afr_read_txn (frame, this, loc->inode, afr_stat_wind, - AFR_DATA_TRANSACTION); + afr_read_txn(frame, this, loc->inode, afr_stat_wind, AFR_DATA_TRANSACTION); - return 0; + return 0; out: - AFR_STACK_UNWIND (stat, frame, -1, op_errno, NULL, NULL); + AFR_STACK_UNWIND(stat, frame, -1, op_errno, NULL, NULL); - return 0; + return 0; } - /* }}} */ /* {{{ fstat */ int -afr_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *buf, - dict_t *xdata) +afr_fstat_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + int32_t op_errno, struct iatt *buf, dict_t *xdata) { - afr_local_t *local = NULL; + afr_local_t *local = NULL; - local = frame->local; + local = frame->local; - if (op_ret < 0) { - local->op_ret = op_ret; - local->op_errno = op_errno; + if (op_ret < 0) { + local->op_ret = op_ret; + local->op_errno = op_errno; - afr_read_txn_continue (frame, this, (long) cookie); - return 0; - } + afr_read_txn_continue(frame, this, (long)cookie); + return 0; + } - AFR_STACK_UNWIND (fstat, frame, op_ret, op_errno, buf, xdata); + AFR_STACK_UNWIND(fstat, frame, op_ret, op_errno, buf, xdata); - return 0; + return 0; } - int -afr_fstat_wind (call_frame_t *frame, xlator_t *this, int subvol) +afr_fstat_wind(call_frame_t *frame, xlator_t *this, int subvol) { - afr_private_t *priv = NULL; - afr_local_t *local = NULL; - - priv = this->private; - local = frame->local; - - if (subvol == -1) { - AFR_STACK_UNWIND (fstat, frame, local->op_ret, local->op_errno, - 0, 0); - return 0; - } - - STACK_WIND_COOKIE (frame, afr_fstat_cbk, (void *) (long) subvol, - priv->children[subvol], - priv->children[subvol]->fops->fstat, - local->fd, local->xdata_req); - return 0; -} + afr_private_t *priv = NULL; + afr_local_t *local = NULL; + + priv = this->private; + local = frame->local; + + if (subvol == -1) { + AFR_STACK_UNWIND(fstat, frame, local->op_ret, local->op_errno, 0, 0); + return 0; + } + STACK_WIND_COOKIE( + frame, afr_fstat_cbk, (void *)(long)subvol, priv->children[subvol], + priv->children[subvol]->fops->fstat, local->fd, local->xdata_req); + return 0; +} int32_t -afr_fstat (call_frame_t *frame, xlator_t *this, - fd_t *fd, dict_t *xdata) +afr_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) { - afr_local_t *local = NULL; - int op_errno = 0; + afr_local_t *local = NULL; + int op_errno = 0; - local = AFR_FRAME_INIT (frame, op_errno); - if (!local) - goto out; + local = AFR_FRAME_INIT(frame, op_errno); + if (!local) + goto out; - local->op = GF_FOP_FSTAT; - local->fd = fd_ref (fd); - if (xdata) - local->xdata_req = dict_ref (xdata); + local->op = GF_FOP_FSTAT; + local->fd = fd_ref(fd); + if (xdata) + local->xdata_req = dict_ref(xdata); - afr_fix_open (fd, this); + afr_fix_open(fd, this); - afr_read_txn (frame, this, fd->inode, afr_fstat_wind, - AFR_DATA_TRANSACTION); + afr_read_txn(frame, this, fd->inode, afr_fstat_wind, AFR_DATA_TRANSACTION); - return 0; + return 0; out: - AFR_STACK_UNWIND (fstat, frame, -1, op_errno, NULL, NULL); + AFR_STACK_UNWIND(fstat, frame, -1, op_errno, NULL, NULL); - return 0; + return 0; } /* }}} */ @@ -344,1565 +329,1484 @@ out: /* {{{ readlink */ int -afr_readlink_cbk (call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, int32_t op_errno, - const char *buf, struct iatt *sbuf, dict_t *xdata) +afr_readlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, const char *buf, + struct iatt *sbuf, dict_t *xdata) { - afr_local_t *local = NULL; + afr_local_t *local = NULL; - local = frame->local; + local = frame->local; - if (op_ret < 0) { - local->op_ret = -1; - local->op_errno = op_errno; + if (op_ret < 0) { + local->op_ret = -1; + local->op_errno = op_errno; - afr_read_txn_continue (frame, this, (long) cookie); - return 0; - } + afr_read_txn_continue(frame, this, (long)cookie); + return 0; + } - AFR_STACK_UNWIND (readlink, frame, op_ret, op_errno, - buf, sbuf, xdata); - return 0; + AFR_STACK_UNWIND(readlink, frame, op_ret, op_errno, buf, sbuf, xdata); + return 0; } int -afr_readlink_wind (call_frame_t *frame, xlator_t *this, int subvol) +afr_readlink_wind(call_frame_t *frame, xlator_t *this, int subvol) { - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - - local = frame->local; - priv = this->private; - - if (subvol == -1) { - AFR_STACK_UNWIND (readlink, frame, local->op_ret, - local->op_errno, 0, 0, 0); - return 0; - } - - STACK_WIND_COOKIE (frame, afr_readlink_cbk, (void *) (long) subvol, - priv->children[subvol], - priv->children[subvol]->fops->readlink, - &local->loc, local->cont.readlink.size, - local->xdata_req); - return 0; -} + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + local = frame->local; + priv = this->private; + + if (subvol == -1) { + AFR_STACK_UNWIND(readlink, frame, local->op_ret, local->op_errno, 0, 0, + 0); + return 0; + } + + STACK_WIND_COOKIE(frame, afr_readlink_cbk, (void *)(long)subvol, + priv->children[subvol], + priv->children[subvol]->fops->readlink, &local->loc, + local->cont.readlink.size, local->xdata_req); + return 0; +} int -afr_readlink (call_frame_t *frame, xlator_t *this, - loc_t *loc, size_t size, dict_t *xdata) +afr_readlink(call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size, + dict_t *xdata) { - afr_local_t * local = NULL; - int32_t op_errno = 0; + afr_local_t *local = NULL; + int32_t op_errno = 0; - local = AFR_FRAME_INIT (frame, op_errno); - if (!local) - goto out; + local = AFR_FRAME_INIT(frame, op_errno); + if (!local) + goto out; - local->op = GF_FOP_READLINK; - loc_copy (&local->loc, loc); - local->cont.readlink.size = size; - if (xdata) - local->xdata_req = dict_ref (xdata); + local->op = GF_FOP_READLINK; + loc_copy(&local->loc, loc); + local->cont.readlink.size = size; + if (xdata) + local->xdata_req = dict_ref(xdata); - afr_read_txn (frame, this, loc->inode, afr_readlink_wind, - AFR_DATA_TRANSACTION); + afr_read_txn(frame, this, loc->inode, afr_readlink_wind, + AFR_DATA_TRANSACTION); - return 0; + return 0; out: - AFR_STACK_UNWIND(readlink, frame, -1, op_errno, 0, 0, 0); + AFR_STACK_UNWIND(readlink, frame, -1, op_errno, 0, 0, 0); - return 0; + return 0; } - /* }}} */ /* {{{ getxattr */ struct _xattr_key { - char *key; - struct list_head list; + char *key; + struct list_head list; }; - int -__gather_xattr_keys (dict_t *dict, char *key, data_t *value, - void *data) +__gather_xattr_keys(dict_t *dict, char *key, data_t *value, void *data) { - struct list_head * list = data; - struct _xattr_key * xkey = NULL; - - if (!strncmp (key, AFR_XATTR_PREFIX, - SLEN (AFR_XATTR_PREFIX))) { + struct list_head *list = data; + struct _xattr_key *xkey = NULL; - xkey = GF_MALLOC (sizeof (*xkey), gf_afr_mt_xattr_key); - if (!xkey) - return -1; + if (!strncmp(key, AFR_XATTR_PREFIX, SLEN(AFR_XATTR_PREFIX))) { + xkey = GF_MALLOC(sizeof(*xkey), gf_afr_mt_xattr_key); + if (!xkey) + return -1; - xkey->key = key; - INIT_LIST_HEAD (&xkey->list); + xkey->key = key; + INIT_LIST_HEAD(&xkey->list); - list_add_tail (&xkey->list, list); - } - return 0; + list_add_tail(&xkey->list, list); + } + return 0; } - void -afr_filter_xattrs (dict_t *dict) +afr_filter_xattrs(dict_t *dict) { - struct list_head keys = {0,}; - struct _xattr_key *key = NULL; - struct _xattr_key *tmp = NULL; + struct list_head keys = { + 0, + }; + struct _xattr_key *key = NULL; + struct _xattr_key *tmp = NULL; - INIT_LIST_HEAD (&keys); + INIT_LIST_HEAD(&keys); - dict_foreach (dict, __gather_xattr_keys, - (void *) &keys); + dict_foreach(dict, __gather_xattr_keys, (void *)&keys); - list_for_each_entry_safe (key, tmp, &keys, list) { - dict_del (dict, key->key); + list_for_each_entry_safe(key, tmp, &keys, list) + { + dict_del(dict, key->key); - list_del_init (&key->list); + list_del_init(&key->list); - GF_FREE (key); - } + GF_FREE(key); + } } -static -gf_boolean_t -afr_getxattr_ignorable_errnos (int32_t op_errno) +static gf_boolean_t +afr_getxattr_ignorable_errnos(int32_t op_errno) { - if (op_errno == ENODATA || op_errno == ENOTSUP || op_errno == ERANGE || - op_errno == ENAMETOOLONG) - return _gf_true; + if (op_errno == ENODATA || op_errno == ENOTSUP || op_errno == ERANGE || + op_errno == ENAMETOOLONG) + return _gf_true; - return _gf_false; + return _gf_false; } int -afr_getxattr_cbk (call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, int32_t op_errno, - dict_t *dict, dict_t *xdata) +afr_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata) { - afr_local_t *local = NULL; + afr_local_t *local = NULL; - local = frame->local; + local = frame->local; - if (op_ret < 0 && !afr_getxattr_ignorable_errnos(op_errno)) { - local->op_ret = op_ret; - local->op_errno = op_errno; + if (op_ret < 0 && !afr_getxattr_ignorable_errnos(op_errno)) { + local->op_ret = op_ret; + local->op_errno = op_errno; - afr_read_txn_continue (frame, this, (long) cookie); - return 0; - } + afr_read_txn_continue(frame, this, (long)cookie); + return 0; + } - if (dict) - afr_filter_xattrs (dict); + if (dict) + afr_filter_xattrs(dict); - AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict, xdata); + AFR_STACK_UNWIND(getxattr, frame, op_ret, op_errno, dict, xdata); - return 0; + return 0; } - int -afr_getxattr_wind (call_frame_t *frame, xlator_t *this, int subvol) +afr_getxattr_wind(call_frame_t *frame, xlator_t *this, int subvol) { - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - - local = frame->local; - priv = this->private; - - if (subvol == -1) { - AFR_STACK_UNWIND (getxattr, frame, local->op_ret, - local->op_errno, NULL, NULL); - return 0; - } - - STACK_WIND_COOKIE (frame, afr_getxattr_cbk, (void *) (long) subvol, - priv->children[subvol], - priv->children[subvol]->fops->getxattr, - &local->loc, local->cont.getxattr.name, - local->xdata_req); - return 0; -} + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + local = frame->local; + priv = this->private; + + if (subvol == -1) { + AFR_STACK_UNWIND(getxattr, frame, local->op_ret, local->op_errno, NULL, + NULL); + return 0; + } + + STACK_WIND_COOKIE(frame, afr_getxattr_cbk, (void *)(long)subvol, + priv->children[subvol], + priv->children[subvol]->fops->getxattr, &local->loc, + local->cont.getxattr.name, local->xdata_req); + return 0; +} int32_t -afr_getxattr_unwind (call_frame_t *frame, int op_ret, int op_errno, - dict_t *dict, dict_t *xdata) +afr_getxattr_unwind(call_frame_t *frame, int op_ret, int op_errno, dict_t *dict, + dict_t *xdata) { - AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict, xdata); - return 0; + AFR_STACK_UNWIND(getxattr, frame, op_ret, op_errno, dict, xdata); + return 0; } int32_t -afr_fgetxattr_clrlk_cbk (call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, int32_t op_errno, - dict_t *dict, dict_t *xdata) +afr_fgetxattr_clrlk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *dict, + dict_t *xdata) { - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - xlator_t **children = NULL; - dict_t *xattr = NULL; - char *tmp_report = NULL; - char lk_summary[1024] = {0,}; - int serz_len = 0; - int32_t callcnt = 0; - long int cky = 0; - int ret = 0; - - priv = this->private; - children = priv->children; - - local = frame->local; - cky = (long) cookie; - - LOCK (&frame->lock); - { - callcnt = --local->call_count; - if (op_ret == -1) - local->replies[cky].op_errno = op_errno; - - if (!local->dict) - local->dict = dict_new (); - if (local->dict) { - ret = dict_get_str (dict, local->cont.getxattr.name, - &tmp_report); - if (ret) - goto unlock; - ret = dict_set_dynstr (local->dict, - children[cky]->name, - gf_strdup (tmp_report)); - if (ret) - goto unlock; - } + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + xlator_t **children = NULL; + dict_t *xattr = NULL; + char *tmp_report = NULL; + char lk_summary[1024] = { + 0, + }; + int serz_len = 0; + int32_t callcnt = 0; + long int cky = 0; + int ret = 0; + + priv = this->private; + children = priv->children; + + local = frame->local; + cky = (long)cookie; + + LOCK(&frame->lock); + { + callcnt = --local->call_count; + if (op_ret == -1) + local->replies[cky].op_errno = op_errno; + + if (!local->dict) + local->dict = dict_new(); + if (local->dict) { + ret = dict_get_str(dict, local->cont.getxattr.name, &tmp_report); + if (ret) + goto unlock; + ret = dict_set_dynstr(local->dict, children[cky]->name, + gf_strdup(tmp_report)); + if (ret) + goto unlock; } + } unlock: - UNLOCK (&frame->lock); - - if (!callcnt) { - xattr = dict_new (); - if (!xattr) { - op_ret = -1; - op_errno = ENOMEM; - goto unwind; - } - ret = dict_serialize_value_with_delim (local->dict, - lk_summary, - &serz_len, '\n'); - if (ret) { - op_ret = -1; - op_errno = ENOMEM; - goto unwind; - } - if (serz_len == -1) - snprintf (lk_summary, sizeof (lk_summary), - "No locks cleared."); - ret = dict_set_dynstr (xattr, local->cont.getxattr.name, - gf_strdup (lk_summary)); - if (ret) { - op_ret = -1; - op_errno = ENOMEM; - gf_msg (this->name, GF_LOG_ERROR, - ENOMEM, AFR_MSG_DICT_SET_FAILED, - "Error setting dictionary"); - goto unwind; - } + UNLOCK(&frame->lock); + + if (!callcnt) { + xattr = dict_new(); + if (!xattr) { + op_ret = -1; + op_errno = ENOMEM; + goto unwind; + } + ret = dict_serialize_value_with_delim(local->dict, lk_summary, + &serz_len, '\n'); + if (ret) { + op_ret = -1; + op_errno = ENOMEM; + goto unwind; + } + if (serz_len == -1) + snprintf(lk_summary, sizeof(lk_summary), "No locks cleared."); + ret = dict_set_dynstr(xattr, local->cont.getxattr.name, + gf_strdup(lk_summary)); + if (ret) { + op_ret = -1; + op_errno = ENOMEM; + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, AFR_MSG_DICT_SET_FAILED, + "Error setting dictionary"); + goto unwind; + } - op_errno = afr_final_errno (local, priv); + op_errno = afr_final_errno(local, priv); -unwind: - AFR_STACK_UNWIND (fgetxattr, frame, op_ret, op_errno, xattr, - xdata); - if (xattr) - dict_unref (xattr); - } + unwind: + AFR_STACK_UNWIND(fgetxattr, frame, op_ret, op_errno, xattr, xdata); + if (xattr) + dict_unref(xattr); + } - return ret; + return ret; } int32_t -afr_getxattr_clrlk_cbk (call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, int32_t op_errno, - dict_t *dict, dict_t *xdata) +afr_getxattr_clrlk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *dict, + dict_t *xdata) { - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - xlator_t **children = NULL; - dict_t *xattr = NULL; - char *tmp_report = NULL; - char lk_summary[1024] = {0,}; - int serz_len = 0; - int32_t callcnt = 0; - long int cky = 0; - int ret = 0; - - priv = this->private; - children = priv->children; - - local = frame->local; - cky = (long) cookie; - - LOCK (&frame->lock); - { - callcnt = --local->call_count; - if (op_ret == -1) - local->replies[cky].op_errno = op_errno; - - if (!local->dict) - local->dict = dict_new (); - if (local->dict) { - ret = dict_get_str (dict, local->cont.getxattr.name, - &tmp_report); - if (ret) - goto unlock; - ret = dict_set_dynstr (local->dict, - children[cky]->name, - gf_strdup (tmp_report)); - if (ret) - goto unlock; - } + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + xlator_t **children = NULL; + dict_t *xattr = NULL; + char *tmp_report = NULL; + char lk_summary[1024] = { + 0, + }; + int serz_len = 0; + int32_t callcnt = 0; + long int cky = 0; + int ret = 0; + + priv = this->private; + children = priv->children; + + local = frame->local; + cky = (long)cookie; + + LOCK(&frame->lock); + { + callcnt = --local->call_count; + if (op_ret == -1) + local->replies[cky].op_errno = op_errno; + + if (!local->dict) + local->dict = dict_new(); + if (local->dict) { + ret = dict_get_str(dict, local->cont.getxattr.name, &tmp_report); + if (ret) + goto unlock; + ret = dict_set_dynstr(local->dict, children[cky]->name, + gf_strdup(tmp_report)); + if (ret) + goto unlock; } + } unlock: - UNLOCK (&frame->lock); - - if (!callcnt) { - xattr = dict_new (); - if (!xattr) { - op_ret = -1; - op_errno = ENOMEM; - goto unwind; - } - ret = dict_serialize_value_with_delim (local->dict, - lk_summary, - &serz_len, '\n'); - if (ret) { - op_ret = -1; - op_errno = ENOMEM; - goto unwind; - } - if (serz_len == -1) - snprintf (lk_summary, sizeof (lk_summary), - "No locks cleared."); - ret = dict_set_dynstr (xattr, local->cont.getxattr.name, - gf_strdup (lk_summary)); - if (ret) { - op_ret = -1; - op_errno = ENOMEM; - gf_msg (this->name, GF_LOG_ERROR, - ENOMEM, AFR_MSG_DICT_SET_FAILED, - "Error setting dictionary"); - goto unwind; - } + UNLOCK(&frame->lock); + + if (!callcnt) { + xattr = dict_new(); + if (!xattr) { + op_ret = -1; + op_errno = ENOMEM; + goto unwind; + } + ret = dict_serialize_value_with_delim(local->dict, lk_summary, + &serz_len, '\n'); + if (ret) { + op_ret = -1; + op_errno = ENOMEM; + goto unwind; + } + if (serz_len == -1) + snprintf(lk_summary, sizeof(lk_summary), "No locks cleared."); + ret = dict_set_dynstr(xattr, local->cont.getxattr.name, + gf_strdup(lk_summary)); + if (ret) { + op_ret = -1; + op_errno = ENOMEM; + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, AFR_MSG_DICT_SET_FAILED, + "Error setting dictionary"); + goto unwind; + } - op_errno = afr_final_errno (local, priv); + op_errno = afr_final_errno(local, priv); -unwind: - AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, xattr, xdata); + unwind: + AFR_STACK_UNWIND(getxattr, frame, op_ret, op_errno, xattr, xdata); - if (xattr) - dict_unref (xattr); - } + if (xattr) + dict_unref(xattr); + } - return ret; + return ret; } /** * node-uuid cbk uses next child querying mechanism */ int32_t -afr_getxattr_node_uuid_cbk (call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, int32_t op_errno, - dict_t *dict, dict_t *xdata) +afr_getxattr_node_uuid_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *dict, + dict_t *xdata) { - afr_private_t *priv = NULL; - afr_local_t *local = NULL; - xlator_t **children = NULL; - int unwind = 1; - int curr_call_child = 0; + afr_private_t *priv = NULL; + afr_local_t *local = NULL; + xlator_t **children = NULL; + int unwind = 1; + int curr_call_child = 0; - priv = this->private; - children = priv->children; + priv = this->private; + children = priv->children; - local = frame->local; + local = frame->local; - if (op_ret == -1) { /** query the _next_ child */ - - /** - * _current_ becomes _next_ - * If done with all children and yet no success; give up ! - */ - curr_call_child = (int) ((long)cookie); - if (++curr_call_child == priv->child_count) - goto unwind; - - gf_msg_debug (this->name, op_errno, - "op_ret (-1): Re-querying afr-child (%d/%d)", - curr_call_child, priv->child_count); - - unwind = 0; - STACK_WIND_COOKIE (frame, afr_getxattr_node_uuid_cbk, - (void *) (long) curr_call_child, - children[curr_call_child], - children[curr_call_child]->fops->getxattr, - &local->loc, - local->cont.getxattr.name, - local->xdata_req); - } + if (op_ret == -1) { /** query the _next_ child */ - unwind: - if (unwind) - AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict, - xdata); + /** + * _current_ becomes _next_ + * If done with all children and yet no success; give up ! + */ + curr_call_child = (int)((long)cookie); + if (++curr_call_child == priv->child_count) + goto unwind; + + gf_msg_debug(this->name, op_errno, + "op_ret (-1): Re-querying afr-child (%d/%d)", + curr_call_child, priv->child_count); + + unwind = 0; + STACK_WIND_COOKIE( + frame, afr_getxattr_node_uuid_cbk, (void *)(long)curr_call_child, + children[curr_call_child], + children[curr_call_child]->fops->getxattr, &local->loc, + local->cont.getxattr.name, local->xdata_req); + } - return 0; +unwind: + if (unwind) + AFR_STACK_UNWIND(getxattr, frame, op_ret, op_errno, dict, xdata); + + return 0; } /** * list-node-uuids cbk returns the list of node_uuids for the subvolume. */ int32_t -afr_getxattr_list_node_uuids_cbk (call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, - int32_t op_errno, dict_t *dict, dict_t *xdata) +afr_getxattr_list_node_uuids_cbk(call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, + int32_t op_errno, dict_t *dict, dict_t *xdata) { - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - int32_t callcnt = 0; - int ret = 0; - char *xattr_serz = NULL; - long cky = 0; - int32_t tlen = 0; - - local = frame->local; - priv = this->private; - cky = (long) cookie; - - LOCK (&frame->lock); - { - callcnt = --local->call_count; - local->replies[cky].valid = 1; - local->replies[cky].op_ret = op_ret; - local->replies[cky].op_errno = op_errno; - - if (op_ret < 0) - goto unlock; - - local->op_ret = 0; - - if (!local->xdata_rsp && xdata) - local->xdata_rsp = dict_ref (xdata); - local->replies[cky].xattr = dict_ref (dict); - } + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + int32_t callcnt = 0; + int ret = 0; + char *xattr_serz = NULL; + long cky = 0; + int32_t tlen = 0; + + local = frame->local; + priv = this->private; + cky = (long)cookie; + + LOCK(&frame->lock); + { + callcnt = --local->call_count; + local->replies[cky].valid = 1; + local->replies[cky].op_ret = op_ret; + local->replies[cky].op_errno = op_errno; + + if (op_ret < 0) + goto unlock; + + local->op_ret = 0; + + if (!local->xdata_rsp && xdata) + local->xdata_rsp = dict_ref(xdata); + local->replies[cky].xattr = dict_ref(dict); + } unlock: - UNLOCK (&frame->lock); - - if (!callcnt) { + UNLOCK(&frame->lock); - if (local->op_ret != 0) { - /* All bricks gave an error. */ - local->op_errno = afr_final_errno (local, priv); - goto unwind; - } + if (!callcnt) { + if (local->op_ret != 0) { + /* All bricks gave an error. */ + local->op_errno = afr_final_errno(local, priv); + goto unwind; + } - /*Since we store the UUID0_STR as node uuid for down bricks and - *for non zero op_ret, assigning length to priv->child_count - *number of uuids*/ - local->cont.getxattr.xattr_len = (SLEN (UUID0_STR) + 2) * - priv->child_count; - - if (!local->dict) - local->dict = dict_new (); - if (!local->dict) { - local->op_ret = -1; - local->op_errno = ENOMEM; - goto unwind; - } + /*Since we store the UUID0_STR as node uuid for down bricks and + *for non zero op_ret, assigning length to priv->child_count + *number of uuids*/ + local->cont.getxattr.xattr_len = (SLEN(UUID0_STR) + 2) * + priv->child_count; + + if (!local->dict) + local->dict = dict_new(); + if (!local->dict) { + local->op_ret = -1; + local->op_errno = ENOMEM; + goto unwind; + } - xattr_serz = GF_CALLOC (local->cont.getxattr.xattr_len, - sizeof (char), gf_common_mt_char); + xattr_serz = GF_CALLOC(local->cont.getxattr.xattr_len, sizeof(char), + gf_common_mt_char); - if (!xattr_serz) { - local->op_ret = -1; - local->op_errno = ENOMEM; - goto unwind; - } - - ret = afr_serialize_xattrs_with_delimiter (frame, this, - xattr_serz, - UUID0_STR, &tlen, - ' '); - if (ret) { - local->op_ret = -1; - local->op_errno = ENOMEM; - goto unwind; - } - ret = dict_set_dynstr (local->dict, - GF_XATTR_LIST_NODE_UUIDS_KEY, - xattr_serz); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, - -ret, AFR_MSG_DICT_SET_FAILED, - "Cannot set node_uuid key in dict"); - local->op_ret = -1; - local->op_errno = ENOMEM; - } else { - local->op_ret = local->cont.getxattr.xattr_len - 1; - local->op_errno = 0; - } + if (!xattr_serz) { + local->op_ret = -1; + local->op_errno = ENOMEM; + goto unwind; + } -unwind: - AFR_STACK_UNWIND (getxattr, frame, local->op_ret, - local->op_errno, local->dict, - local->xdata_rsp); + ret = afr_serialize_xattrs_with_delimiter(frame, this, xattr_serz, + UUID0_STR, &tlen, ' '); + if (ret) { + local->op_ret = -1; + local->op_errno = ENOMEM; + goto unwind; + } + ret = dict_set_dynstr(local->dict, GF_XATTR_LIST_NODE_UUIDS_KEY, + xattr_serz); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_SET_FAILED, + "Cannot set node_uuid key in dict"); + local->op_ret = -1; + local->op_errno = ENOMEM; + } else { + local->op_ret = local->cont.getxattr.xattr_len - 1; + local->op_errno = 0; } - return ret; -} + unwind: + AFR_STACK_UNWIND(getxattr, frame, local->op_ret, local->op_errno, + local->dict, local->xdata_rsp); + } + return ret; +} int32_t -afr_getxattr_quota_size_cbk (call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, int32_t op_errno, - dict_t *dict, dict_t *xdata) +afr_getxattr_quota_size_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *dict, + dict_t *xdata) { - int idx = (long) cookie; - int call_count = 0; - afr_local_t *local = frame->local; - int read_subvol = -1; - - local->replies[idx].valid = 1; - local->replies[idx].op_ret = op_ret; - local->replies[idx].op_errno = op_errno; - if (dict) - local->replies[idx].xdata = dict_ref (dict); - call_count = afr_frame_return (frame); - if (call_count == 0) { - local->inode = inode_ref (local->loc.inode); - read_subvol = afr_handle_quota_size (frame, this); - if (read_subvol != -1) { - op_ret = local->replies[read_subvol].op_ret; - op_errno = local->replies[read_subvol].op_errno; - dict = local->replies[read_subvol].xdata; - } - AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict, - xdata); + int idx = (long)cookie; + int call_count = 0; + afr_local_t *local = frame->local; + int read_subvol = -1; + + local->replies[idx].valid = 1; + local->replies[idx].op_ret = op_ret; + local->replies[idx].op_errno = op_errno; + if (dict) + local->replies[idx].xdata = dict_ref(dict); + call_count = afr_frame_return(frame); + if (call_count == 0) { + local->inode = inode_ref(local->loc.inode); + read_subvol = afr_handle_quota_size(frame, this); + if (read_subvol != -1) { + op_ret = local->replies[read_subvol].op_ret; + op_errno = local->replies[read_subvol].op_errno; + dict = local->replies[read_subvol].xdata; } + AFR_STACK_UNWIND(getxattr, frame, op_ret, op_errno, dict, xdata); + } - return 0; + return 0; } int32_t -afr_getxattr_lockinfo_cbk (call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, int32_t op_errno, - dict_t *dict, dict_t *xdata) +afr_getxattr_lockinfo_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *dict, + dict_t *xdata) { - int call_cnt = 0, len = 0; - char *lockinfo_buf = NULL; - dict_t *lockinfo = NULL, *newdict = NULL; - afr_local_t *local = NULL; + int call_cnt = 0, len = 0; + char *lockinfo_buf = NULL; + dict_t *lockinfo = NULL, *newdict = NULL; + afr_local_t *local = NULL; - LOCK (&frame->lock); - { - local = frame->local; - - call_cnt = --local->call_count; + LOCK(&frame->lock); + { + local = frame->local; - if ((op_ret < 0) || (!dict && !xdata)) { - goto unlock; - } + call_cnt = --local->call_count; - if (xdata) { - if (!local->xdata_rsp) { - local->xdata_rsp = dict_new (); - if (!local->xdata_rsp) { - local->op_ret = -1; - local->op_errno = ENOMEM; - goto unlock; - } - } - } + if ((op_ret < 0) || (!dict && !xdata)) { + goto unlock; + } - if (!dict) { - goto unlock; + if (xdata) { + if (!local->xdata_rsp) { + local->xdata_rsp = dict_new(); + if (!local->xdata_rsp) { + local->op_ret = -1; + local->op_errno = ENOMEM; + goto unlock; } + } + } - op_ret = dict_get_ptr_and_len (dict, GF_XATTR_LOCKINFO_KEY, - (void **)&lockinfo_buf, &len); + if (!dict) { + goto unlock; + } - if (!lockinfo_buf) { - goto unlock; - } + op_ret = dict_get_ptr_and_len(dict, GF_XATTR_LOCKINFO_KEY, + (void **)&lockinfo_buf, &len); - if (!local->dict) { - local->dict = dict_new (); - if (!local->dict) { - local->op_ret = -1; - local->op_errno = ENOMEM; - goto unlock; - } - } - } -unlock: - UNLOCK (&frame->lock); - - if (lockinfo_buf != NULL) { - lockinfo = dict_new (); - if (lockinfo == NULL) { - local->op_ret = -1; - local->op_errno = ENOMEM; - } else { - op_ret = dict_unserialize (lockinfo_buf, len, - &lockinfo); - - if (lockinfo && local->dict) { - dict_copy (lockinfo, local->dict); - } - } + if (!lockinfo_buf) { + goto unlock; } - if (xdata && local->xdata_rsp) { - dict_copy (xdata, local->xdata_rsp); + if (!local->dict) { + local->dict = dict_new(); + if (!local->dict) { + local->op_ret = -1; + local->op_errno = ENOMEM; + goto unlock; + } } + } +unlock: + UNLOCK(&frame->lock); - if (!call_cnt) { - newdict = dict_new (); - if (!newdict) { - local->op_ret = -1; - local->op_errno = ENOMEM; - goto unwind; - } + if (lockinfo_buf != NULL) { + lockinfo = dict_new(); + if (lockinfo == NULL) { + local->op_ret = -1; + local->op_errno = ENOMEM; + } else { + op_ret = dict_unserialize(lockinfo_buf, len, &lockinfo); - len = dict_serialized_length (local->dict); - if (len <= 0) { - goto unwind; - } + if (lockinfo && local->dict) { + dict_copy(lockinfo, local->dict); + } + } + } + + if (xdata && local->xdata_rsp) { + dict_copy(xdata, local->xdata_rsp); + } + + if (!call_cnt) { + newdict = dict_new(); + if (!newdict) { + local->op_ret = -1; + local->op_errno = ENOMEM; + goto unwind; + } - lockinfo_buf = GF_CALLOC (1, len, gf_common_mt_char); - if (!lockinfo_buf) { - local->op_ret = -1; - local->op_errno = ENOMEM; - goto unwind; - } + len = dict_serialized_length(local->dict); + if (len <= 0) { + goto unwind; + } - op_ret = dict_serialize (local->dict, lockinfo_buf); - if (op_ret < 0) { - local->op_ret = -1; - local->op_errno = -op_ret; - } + lockinfo_buf = GF_CALLOC(1, len, gf_common_mt_char); + if (!lockinfo_buf) { + local->op_ret = -1; + local->op_errno = ENOMEM; + goto unwind; + } - op_ret = dict_set_dynptr (newdict, GF_XATTR_LOCKINFO_KEY, - (void *)lockinfo_buf, len); - if (op_ret < 0) { - local->op_ret = -1; - local->op_errno = -op_ret; - goto unwind; - } + op_ret = dict_serialize(local->dict, lockinfo_buf); + if (op_ret < 0) { + local->op_ret = -1; + local->op_errno = -op_ret; + } - unwind: - AFR_STACK_UNWIND (getxattr, frame, op_ret, - op_errno, newdict, - local->xdata_rsp); + op_ret = dict_set_dynptr(newdict, GF_XATTR_LOCKINFO_KEY, + (void *)lockinfo_buf, len); + if (op_ret < 0) { + local->op_ret = -1; + local->op_errno = -op_ret; + goto unwind; } - dict_unref (lockinfo); + unwind: + AFR_STACK_UNWIND(getxattr, frame, op_ret, op_errno, newdict, + local->xdata_rsp); + } - return 0; + dict_unref(lockinfo); + + return 0; } int32_t -afr_fgetxattr_lockinfo_cbk (call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, int32_t op_errno, - dict_t *dict, dict_t *xdata) +afr_fgetxattr_lockinfo_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *dict, + dict_t *xdata) { - int call_cnt = 0, len = 0; - char *lockinfo_buf = NULL; - dict_t *lockinfo = NULL, *newdict = NULL; - afr_local_t *local = NULL; + int call_cnt = 0, len = 0; + char *lockinfo_buf = NULL; + dict_t *lockinfo = NULL, *newdict = NULL; + afr_local_t *local = NULL; - LOCK (&frame->lock); - { - local = frame->local; - - call_cnt = --local->call_count; + LOCK(&frame->lock); + { + local = frame->local; - if ((op_ret < 0) || (!dict && !xdata)) { - goto unlock; - } + call_cnt = --local->call_count; - if (xdata) { - if (!local->xdata_rsp) { - local->xdata_rsp = dict_new (); - if (!local->xdata_rsp) { - local->op_ret = -1; - local->op_errno = ENOMEM; - goto unlock; - } - } - } + if ((op_ret < 0) || (!dict && !xdata)) { + goto unlock; + } - if (!dict) { - goto unlock; + if (xdata) { + if (!local->xdata_rsp) { + local->xdata_rsp = dict_new(); + if (!local->xdata_rsp) { + local->op_ret = -1; + local->op_errno = ENOMEM; + goto unlock; } + } + } - op_ret = dict_get_ptr_and_len (dict, GF_XATTR_LOCKINFO_KEY, - (void **)&lockinfo_buf, &len); + if (!dict) { + goto unlock; + } - if (!lockinfo_buf) { - goto unlock; - } + op_ret = dict_get_ptr_and_len(dict, GF_XATTR_LOCKINFO_KEY, + (void **)&lockinfo_buf, &len); - if (!local->dict) { - local->dict = dict_new (); - if (!local->dict) { - local->op_ret = -1; - local->op_errno = ENOMEM; - goto unlock; - } - } - } -unlock: - UNLOCK (&frame->lock); - - if (lockinfo_buf != NULL) { - lockinfo = dict_new (); - if (lockinfo == NULL) { - local->op_ret = -1; - local->op_errno = ENOMEM; - } else { - op_ret = dict_unserialize (lockinfo_buf, len, - &lockinfo); - - if (lockinfo && local->dict) { - dict_copy (lockinfo, local->dict); - } - } + if (!lockinfo_buf) { + goto unlock; } - if (xdata && local->xdata_rsp) { - dict_copy (xdata, local->xdata_rsp); + if (!local->dict) { + local->dict = dict_new(); + if (!local->dict) { + local->op_ret = -1; + local->op_errno = ENOMEM; + goto unlock; + } } + } +unlock: + UNLOCK(&frame->lock); - if (!call_cnt) { - newdict = dict_new (); - if (!newdict) { - local->op_ret = -1; - local->op_errno = ENOMEM; - goto unwind; - } + if (lockinfo_buf != NULL) { + lockinfo = dict_new(); + if (lockinfo == NULL) { + local->op_ret = -1; + local->op_errno = ENOMEM; + } else { + op_ret = dict_unserialize(lockinfo_buf, len, &lockinfo); - len = dict_serialized_length (local->dict); - if (len <= 0) { - goto unwind; - } + if (lockinfo && local->dict) { + dict_copy(lockinfo, local->dict); + } + } + } + + if (xdata && local->xdata_rsp) { + dict_copy(xdata, local->xdata_rsp); + } + + if (!call_cnt) { + newdict = dict_new(); + if (!newdict) { + local->op_ret = -1; + local->op_errno = ENOMEM; + goto unwind; + } - lockinfo_buf = GF_CALLOC (1, len, gf_common_mt_char); - if (!lockinfo_buf) { - local->op_ret = -1; - local->op_errno = ENOMEM; - goto unwind; - } + len = dict_serialized_length(local->dict); + if (len <= 0) { + goto unwind; + } - op_ret = dict_serialize (local->dict, lockinfo_buf); - if (op_ret < 0) { - local->op_ret = -1; - local->op_errno = -op_ret; - } + lockinfo_buf = GF_CALLOC(1, len, gf_common_mt_char); + if (!lockinfo_buf) { + local->op_ret = -1; + local->op_errno = ENOMEM; + goto unwind; + } - op_ret = dict_set_dynptr (newdict, GF_XATTR_LOCKINFO_KEY, - (void *)lockinfo_buf, len); - if (op_ret < 0) { - local->op_ret = -1; - local->op_errno = -op_ret; - goto unwind; - } + op_ret = dict_serialize(local->dict, lockinfo_buf); + if (op_ret < 0) { + local->op_ret = -1; + local->op_errno = -op_ret; + } - unwind: - AFR_STACK_UNWIND (fgetxattr, frame, op_ret, - op_errno, newdict, - local->xdata_rsp); + op_ret = dict_set_dynptr(newdict, GF_XATTR_LOCKINFO_KEY, + (void *)lockinfo_buf, len); + if (op_ret < 0) { + local->op_ret = -1; + local->op_errno = -op_ret; + goto unwind; } - dict_unref (lockinfo); + unwind: + AFR_STACK_UNWIND(fgetxattr, frame, op_ret, op_errno, newdict, + local->xdata_rsp); + } - return 0; + dict_unref(lockinfo); + + return 0; } int32_t -afr_fgetxattr_pathinfo_cbk (call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, int32_t op_errno, - dict_t *dict, dict_t *xdata) +afr_fgetxattr_pathinfo_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *dict, + dict_t *xdata) { - afr_local_t *local = NULL; - int32_t callcnt = 0; - int ret = 0; - char *xattr = NULL; - char *xattr_serz = NULL; - char xattr_cky[1024] = {0,}; - dict_t *nxattr = NULL; - long cky = 0; - int32_t padding = 0; - int32_t tlen = 0; - - if (!frame || !frame->local || !this) { - gf_msg ("", GF_LOG_ERROR, 0, - AFR_MSG_INVALID_ARG, "possible NULL deref"); - goto out; + afr_local_t *local = NULL; + int32_t callcnt = 0; + int ret = 0; + char *xattr = NULL; + char *xattr_serz = NULL; + char xattr_cky[1024] = { + 0, + }; + dict_t *nxattr = NULL; + long cky = 0; + int32_t padding = 0; + int32_t tlen = 0; + + if (!frame || !frame->local || !this) { + gf_msg("", GF_LOG_ERROR, 0, AFR_MSG_INVALID_ARG, "possible NULL deref"); + goto out; + } + + local = frame->local; + cky = (long)cookie; + + LOCK(&frame->lock); + { + callcnt = --local->call_count; + + if (op_ret < 0) { + local->op_errno = op_errno; + } else { + local->op_ret = op_ret; + if (!local->xdata_rsp && xdata) + local->xdata_rsp = dict_ref(xdata); } - local = frame->local; - cky = (long) cookie; - - LOCK (&frame->lock); - { - callcnt = --local->call_count; - - if (op_ret < 0) { - local->op_errno = op_errno; - } else { - local->op_ret = op_ret; - if (!local->xdata_rsp && xdata) - local->xdata_rsp = dict_ref (xdata); - } + if (!dict || (op_ret < 0)) + goto unlock; - if (!dict || (op_ret < 0)) - goto unlock; - - if (!local->dict) - local->dict = dict_new (); - - if (local->dict) { - ret = dict_get_str (dict, - local->cont.getxattr.name, - &xattr); - if (ret) - goto unlock; - - xattr = gf_strdup (xattr); - - (void)snprintf (xattr_cky, sizeof(xattr_cky), "%s-%ld", - local->cont.getxattr.name, cky); - ret = dict_set_dynstr (local->dict, - xattr_cky, xattr); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, - -ret, AFR_MSG_DICT_SET_FAILED, - "Cannot set xattr cookie key"); - goto unlock; - } - - local->cont.getxattr.xattr_len - += strlen (xattr) + 1; - } + if (!local->dict) + local->dict = dict_new(); + + if (local->dict) { + ret = dict_get_str(dict, local->cont.getxattr.name, &xattr); + if (ret) + goto unlock; + + xattr = gf_strdup(xattr); + + (void)snprintf(xattr_cky, sizeof(xattr_cky), "%s-%ld", + local->cont.getxattr.name, cky); + ret = dict_set_dynstr(local->dict, xattr_cky, xattr); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_SET_FAILED, + "Cannot set xattr cookie key"); + goto unlock; + } + + local->cont.getxattr.xattr_len += strlen(xattr) + 1; } + } unlock: - UNLOCK (&frame->lock); - - if (!callcnt) { - if (!local->cont.getxattr.xattr_len) - goto unwind; - - nxattr = dict_new (); - if (!nxattr) - goto unwind; - - /* extra bytes for decorations (brackets and <>'s) */ - padding += strlen (this->name) - + SLEN (AFR_PATHINFO_HEADER) + 4; - local->cont.getxattr.xattr_len += (padding + 2); - - xattr_serz = GF_MALLOC (local->cont.getxattr.xattr_len, - gf_common_mt_char); - - if (!xattr_serz) - goto unwind; - - /* the xlator info */ - int xattr_serz_len = sprintf (xattr_serz, - "(<"AFR_PATHINFO_HEADER"%s> ", - this->name); - - /* actual series of pathinfo */ - ret = dict_serialize_value_with_delim (local->dict, - xattr_serz - + xattr_serz_len, - &tlen, ' '); - if (ret) { - goto unwind; - } + UNLOCK(&frame->lock); + + if (!callcnt) { + if (!local->cont.getxattr.xattr_len) + goto unwind; - /* closing part */ - *(xattr_serz + padding + tlen) = ')'; - *(xattr_serz + padding + tlen + 1) = '\0'; + nxattr = dict_new(); + if (!nxattr) + goto unwind; - ret = dict_set_dynstr (nxattr, local->cont.getxattr.name, - xattr_serz); - if (ret) - gf_msg (this->name, GF_LOG_ERROR, - -ret, AFR_MSG_DICT_SET_FAILED, - "Cannot set pathinfo key in dict"); + /* extra bytes for decorations (brackets and <>'s) */ + padding += strlen(this->name) + SLEN(AFR_PATHINFO_HEADER) + 4; + local->cont.getxattr.xattr_len += (padding + 2); - unwind: - AFR_STACK_UNWIND (fgetxattr, frame, local->op_ret, - local->op_errno, nxattr, local->xdata_rsp); + xattr_serz = GF_MALLOC(local->cont.getxattr.xattr_len, + gf_common_mt_char); - if (nxattr) - dict_unref (nxattr); + if (!xattr_serz) + goto unwind; + + /* the xlator info */ + int xattr_serz_len = sprintf( + xattr_serz, "(<" AFR_PATHINFO_HEADER "%s> ", this->name); + + /* actual series of pathinfo */ + ret = dict_serialize_value_with_delim( + local->dict, xattr_serz + xattr_serz_len, &tlen, ' '); + if (ret) { + goto unwind; } + /* closing part */ + *(xattr_serz + padding + tlen) = ')'; + *(xattr_serz + padding + tlen + 1) = '\0'; + + ret = dict_set_dynstr(nxattr, local->cont.getxattr.name, xattr_serz); + if (ret) + gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_SET_FAILED, + "Cannot set pathinfo key in dict"); + + unwind: + AFR_STACK_UNWIND(fgetxattr, frame, local->op_ret, local->op_errno, + nxattr, local->xdata_rsp); + + if (nxattr) + dict_unref(nxattr); + } + out: - return ret; + return ret; } int32_t -afr_getxattr_pathinfo_cbk (call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, int32_t op_errno, - dict_t *dict, dict_t *xdata) +afr_getxattr_pathinfo_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *dict, + dict_t *xdata) { - afr_local_t *local = NULL; - int32_t callcnt = 0; - int ret = 0; - char *xattr = NULL; - char *xattr_serz = NULL; - char xattr_cky[1024] = {0,}; - dict_t *nxattr = NULL; - long cky = 0; - int32_t padding = 0; - int32_t tlen = 0; - - if (!frame || !frame->local || !this) { - gf_msg ("", GF_LOG_ERROR, 0, - AFR_MSG_INVALID_ARG, "possible NULL deref"); - goto out; + afr_local_t *local = NULL; + int32_t callcnt = 0; + int ret = 0; + char *xattr = NULL; + char *xattr_serz = NULL; + char xattr_cky[1024] = { + 0, + }; + dict_t *nxattr = NULL; + long cky = 0; + int32_t padding = 0; + int32_t tlen = 0; + + if (!frame || !frame->local || !this) { + gf_msg("", GF_LOG_ERROR, 0, AFR_MSG_INVALID_ARG, "possible NULL deref"); + goto out; + } + + local = frame->local; + cky = (long)cookie; + + LOCK(&frame->lock); + { + callcnt = --local->call_count; + + if (op_ret < 0) { + local->op_errno = op_errno; + } else { + local->op_ret = op_ret; + if (!local->xdata_rsp && xdata) + local->xdata_rsp = dict_ref(xdata); } - local = frame->local; - cky = (long) cookie; - - LOCK (&frame->lock); - { - callcnt = --local->call_count; - - if (op_ret < 0) { - local->op_errno = op_errno; - } else { - local->op_ret = op_ret; - if (!local->xdata_rsp && xdata) - local->xdata_rsp = dict_ref (xdata); - } - - if (!dict || (op_ret < 0)) - goto unlock; - - if (!local->dict) - local->dict = dict_new (); - - if (local->dict) { - ret = dict_get_str (dict, - local->cont.getxattr.name, - &xattr); - if (ret) - goto unlock; - - xattr = gf_strdup (xattr); - - (void)snprintf (xattr_cky, 1024, "%s-%ld", - local->cont.getxattr.name, cky); - ret = dict_set_dynstr (local->dict, - xattr_cky, xattr); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, - -ret, - AFR_MSG_DICT_SET_FAILED, - "Cannot set xattr " - "cookie key"); - goto unlock; - } - - local->cont.getxattr.xattr_len += strlen (xattr) + 1; - } - } -unlock: - UNLOCK (&frame->lock); + if (!dict || (op_ret < 0)) + goto unlock; + + if (!local->dict) + local->dict = dict_new(); - if (!callcnt) { - if (!local->cont.getxattr.xattr_len) - goto unwind; + if (local->dict) { + ret = dict_get_str(dict, local->cont.getxattr.name, &xattr); + if (ret) + goto unlock; - nxattr = dict_new (); - if (!nxattr) - goto unwind; + xattr = gf_strdup(xattr); - /* extra bytes for decorations (brackets and <>'s) */ - padding += strlen (this->name) + SLEN (AFR_PATHINFO_HEADER) + 4; - local->cont.getxattr.xattr_len += (padding + 2); + (void)snprintf(xattr_cky, 1024, "%s-%ld", local->cont.getxattr.name, + cky); + ret = dict_set_dynstr(local->dict, xattr_cky, xattr); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_SET_FAILED, + "Cannot set xattr " + "cookie key"); + goto unlock; + } - xattr_serz = GF_MALLOC (local->cont.getxattr.xattr_len, - gf_common_mt_char); + local->cont.getxattr.xattr_len += strlen(xattr) + 1; + } + } +unlock: + UNLOCK(&frame->lock); - if (!xattr_serz) - goto unwind; + if (!callcnt) { + if (!local->cont.getxattr.xattr_len) + goto unwind; - /* the xlator info */ - int xattr_serz_len = sprintf (xattr_serz, - "(<"AFR_PATHINFO_HEADER"%s> ", - this->name); + nxattr = dict_new(); + if (!nxattr) + goto unwind; - /* actual series of pathinfo */ - ret = dict_serialize_value_with_delim (local->dict, - xattr_serz + xattr_serz_len, - &tlen, ' '); - if (ret) { - goto unwind; - } + /* extra bytes for decorations (brackets and <>'s) */ + padding += strlen(this->name) + SLEN(AFR_PATHINFO_HEADER) + 4; + local->cont.getxattr.xattr_len += (padding + 2); - /* closing part */ - *(xattr_serz + padding + tlen) = ')'; - *(xattr_serz + padding + tlen + 1) = '\0'; + xattr_serz = GF_MALLOC(local->cont.getxattr.xattr_len, + gf_common_mt_char); - ret = dict_set_dynstr (nxattr, local->cont.getxattr.name, - xattr_serz); - if (ret) - gf_msg (this->name, GF_LOG_ERROR, - -ret, AFR_MSG_DICT_SET_FAILED, - "Cannot set pathinfo key in dict"); + if (!xattr_serz) + goto unwind; - unwind: - AFR_STACK_UNWIND (getxattr, frame, local->op_ret, - local->op_errno, nxattr, local->xdata_rsp); + /* the xlator info */ + int xattr_serz_len = sprintf( + xattr_serz, "(<" AFR_PATHINFO_HEADER "%s> ", this->name); - if (nxattr) - dict_unref (nxattr); + /* actual series of pathinfo */ + ret = dict_serialize_value_with_delim( + local->dict, xattr_serz + xattr_serz_len, &tlen, ' '); + if (ret) { + goto unwind; } + /* closing part */ + *(xattr_serz + padding + tlen) = ')'; + *(xattr_serz + padding + tlen + 1) = '\0'; + + ret = dict_set_dynstr(nxattr, local->cont.getxattr.name, xattr_serz); + if (ret) + gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_SET_FAILED, + "Cannot set pathinfo key in dict"); + + unwind: + AFR_STACK_UNWIND(getxattr, frame, local->op_ret, local->op_errno, + nxattr, local->xdata_rsp); + + if (nxattr) + dict_unref(nxattr); + } + out: - return ret; + return ret; } static int -afr_aggregate_stime_xattr (dict_t *this, char *key, data_t *value, void *data) +afr_aggregate_stime_xattr(dict_t *this, char *key, data_t *value, void *data) { - int ret = 0; + int ret = 0; - if (fnmatch (GF_XATTR_STIME_PATTERN, key, FNM_NOESCAPE) == 0) - ret = gf_get_max_stime (THIS, data, key, value); + if (fnmatch(GF_XATTR_STIME_PATTERN, key, FNM_NOESCAPE) == 0) + ret = gf_get_max_stime(THIS, data, key, value); - return ret; + return ret; } int32_t -afr_common_getxattr_stime_cbk (call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, int32_t op_errno, - dict_t *dict, dict_t *xdata) +afr_common_getxattr_stime_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *dict, + dict_t *xdata) { - afr_local_t *local = NULL; - int32_t callcnt = 0; - - if (!frame || !frame->local || !this) { - gf_msg ("", GF_LOG_ERROR, 0, - AFR_MSG_INVALID_ARG, "possible NULL deref"); - goto out; - } + afr_local_t *local = NULL; + int32_t callcnt = 0; - local = frame->local; + if (!frame || !frame->local || !this) { + gf_msg("", GF_LOG_ERROR, 0, AFR_MSG_INVALID_ARG, "possible NULL deref"); + goto out; + } - LOCK (&frame->lock); - { - callcnt = --local->call_count; + local = frame->local; - if (!dict || (op_ret < 0)) { - local->op_errno = op_errno; - goto cleanup; - } + LOCK(&frame->lock); + { + callcnt = --local->call_count; - if (!local->dict) - local->dict = dict_copy_with_ref (dict, NULL); - else - dict_foreach (dict, afr_aggregate_stime_xattr, - local->dict); - local->op_ret = 0; + if (!dict || (op_ret < 0)) { + local->op_errno = op_errno; + goto cleanup; } + if (!local->dict) + local->dict = dict_copy_with_ref(dict, NULL); + else + dict_foreach(dict, afr_aggregate_stime_xattr, local->dict); + local->op_ret = 0; + } + cleanup: - UNLOCK (&frame->lock); + UNLOCK(&frame->lock); - if (!callcnt) { - AFR_STACK_UNWIND (getxattr, frame, local->op_ret, - local->op_errno, local->dict, xdata); - } + if (!callcnt) { + AFR_STACK_UNWIND(getxattr, frame, local->op_ret, local->op_errno, + local->dict, xdata); + } out: - return 0; + return 0; } - static gf_boolean_t -afr_is_special_xattr (const char *name, fop_getxattr_cbk_t *cbk, - gf_boolean_t is_fgetxattr) +afr_is_special_xattr(const char *name, fop_getxattr_cbk_t *cbk, + gf_boolean_t is_fgetxattr) { - gf_boolean_t is_spl = _gf_true; - - GF_ASSERT (cbk); - if (!cbk || !name) { - is_spl = _gf_false; - goto out; + gf_boolean_t is_spl = _gf_true; + + GF_ASSERT(cbk); + if (!cbk || !name) { + is_spl = _gf_false; + goto out; + } + + if (!strcmp(name, GF_XATTR_PATHINFO_KEY) || + !strcmp(name, GF_XATTR_USER_PATHINFO_KEY)) { + if (is_fgetxattr) { + *cbk = afr_fgetxattr_pathinfo_cbk; + } else { + *cbk = afr_getxattr_pathinfo_cbk; } - - if (!strcmp (name, GF_XATTR_PATHINFO_KEY) || - !strcmp (name, GF_XATTR_USER_PATHINFO_KEY)) { - if (is_fgetxattr) { - *cbk = afr_fgetxattr_pathinfo_cbk; - } else { - *cbk = afr_getxattr_pathinfo_cbk; - } - } else if (!strncmp (name, GF_XATTR_CLRLK_CMD, - SLEN (GF_XATTR_CLRLK_CMD))) { - if (is_fgetxattr) { - *cbk = afr_fgetxattr_clrlk_cbk; - } else { - *cbk = afr_getxattr_clrlk_cbk; - } - } else if (!strncmp (name, GF_XATTR_LOCKINFO_KEY, - SLEN (GF_XATTR_LOCKINFO_KEY))) { - if (is_fgetxattr) { - *cbk = afr_fgetxattr_lockinfo_cbk; - } else { - *cbk = afr_getxattr_lockinfo_cbk; - } - } else if (fnmatch (GF_XATTR_STIME_PATTERN, name, FNM_NOESCAPE) == 0) { - *cbk = afr_common_getxattr_stime_cbk; - } else if (strcmp (name, QUOTA_SIZE_KEY) == 0) { - *cbk = afr_getxattr_quota_size_cbk; - } else if (!strcmp (name, GF_XATTR_LIST_NODE_UUIDS_KEY)) { - *cbk = afr_getxattr_list_node_uuids_cbk; + } else if (!strncmp(name, GF_XATTR_CLRLK_CMD, SLEN(GF_XATTR_CLRLK_CMD))) { + if (is_fgetxattr) { + *cbk = afr_fgetxattr_clrlk_cbk; + } else { + *cbk = afr_getxattr_clrlk_cbk; + } + } else if (!strncmp(name, GF_XATTR_LOCKINFO_KEY, + SLEN(GF_XATTR_LOCKINFO_KEY))) { + if (is_fgetxattr) { + *cbk = afr_fgetxattr_lockinfo_cbk; } else { - is_spl = _gf_false; + *cbk = afr_getxattr_lockinfo_cbk; } + } else if (fnmatch(GF_XATTR_STIME_PATTERN, name, FNM_NOESCAPE) == 0) { + *cbk = afr_common_getxattr_stime_cbk; + } else if (strcmp(name, QUOTA_SIZE_KEY) == 0) { + *cbk = afr_getxattr_quota_size_cbk; + } else if (!strcmp(name, GF_XATTR_LIST_NODE_UUIDS_KEY)) { + *cbk = afr_getxattr_list_node_uuids_cbk; + } else { + is_spl = _gf_false; + } out: - return is_spl; + return is_spl; } static void -afr_getxattr_all_subvols (xlator_t *this, call_frame_t *frame, - const char *name, loc_t *loc, - fop_getxattr_cbk_t cbk) +afr_getxattr_all_subvols(xlator_t *this, call_frame_t *frame, const char *name, + loc_t *loc, fop_getxattr_cbk_t cbk) { - afr_private_t *priv = NULL; - afr_local_t *local = NULL; - int i = 0; - int call_count = 0; - - priv = this->private; - - local = frame->local; - //local->call_count set in afr_local_init - call_count = local->call_count; - - if (!strcmp (name, GF_XATTR_LIST_NODE_UUIDS_KEY)) { - GF_FREE (local->cont.getxattr.name); - local->cont.getxattr.name = gf_strdup (GF_XATTR_NODE_UUID_KEY); + afr_private_t *priv = NULL; + afr_local_t *local = NULL; + int i = 0; + int call_count = 0; + + priv = this->private; + + local = frame->local; + // local->call_count set in afr_local_init + call_count = local->call_count; + + if (!strcmp(name, GF_XATTR_LIST_NODE_UUIDS_KEY)) { + GF_FREE(local->cont.getxattr.name); + local->cont.getxattr.name = gf_strdup(GF_XATTR_NODE_UUID_KEY); + } + + // If up-children count is 0, afr_local_init would have failed already + // and the call would have unwound so not handling it here. + for (i = 0; i < priv->child_count; i++) { + if (local->child_up[i]) { + STACK_WIND_COOKIE(frame, cbk, (void *)(long)i, priv->children[i], + priv->children[i]->fops->getxattr, loc, + local->cont.getxattr.name, NULL); + if (!--call_count) + break; } - - //If up-children count is 0, afr_local_init would have failed already - //and the call would have unwound so not handling it here. - for (i = 0; i < priv->child_count; i++) { - if (local->child_up[i]) { - STACK_WIND_COOKIE (frame, cbk, - (void *) (long) i, priv->children[i], - priv->children[i]->fops->getxattr, - loc, local->cont.getxattr.name, - NULL); - if (!--call_count) - break; - } - } - return; + } + return; } int -afr_marker_populate_args (call_frame_t *frame, int type, int *gauge, - xlator_t **subvols) +afr_marker_populate_args(call_frame_t *frame, int type, int *gauge, + xlator_t **subvols) { - xlator_t *this = frame->this; - afr_private_t *priv = this->private; + xlator_t *this = frame->this; + afr_private_t *priv = this->private; - memcpy (subvols, priv->children, sizeof (*subvols) * priv->child_count); + memcpy(subvols, priv->children, sizeof(*subvols) * priv->child_count); - if (type == MARKER_XTIME_TYPE) { - /*Don't error out on ENOENT/ENOTCONN */ - gauge[MCNT_NOTFOUND] = 0; - gauge[MCNT_ENOTCONN] = 0; - } - return priv->child_count; + if (type == MARKER_XTIME_TYPE) { + /*Don't error out on ENOENT/ENOTCONN */ + gauge[MCNT_NOTFOUND] = 0; + gauge[MCNT_ENOTCONN] = 0; + } + return priv->child_count; } static int -afr_handle_heal_xattrs (call_frame_t *frame, xlator_t *this, loc_t *loc, - const char *heal_op) +afr_handle_heal_xattrs(call_frame_t *frame, xlator_t *this, loc_t *loc, + const char *heal_op) { - int ret = -1; - afr_spb_status_t *data = NULL; + int ret = -1; + afr_spb_status_t *data = NULL; - if (!strcmp (heal_op, GF_HEAL_INFO)) { - afr_get_heal_info (frame, this, loc); - ret = 0; - goto out; - } + if (!strcmp(heal_op, GF_HEAL_INFO)) { + afr_get_heal_info(frame, this, loc); + ret = 0; + goto out; + } - if (!strcmp (heal_op, GF_AFR_HEAL_SBRAIN)) { - afr_heal_splitbrain_file (frame, this, loc); - ret = 0; - goto out; + if (!strcmp(heal_op, GF_AFR_HEAL_SBRAIN)) { + afr_heal_splitbrain_file(frame, this, loc); + ret = 0; + goto out; + } + + if (!strcmp(heal_op, GF_AFR_SBRAIN_STATUS)) { + data = GF_CALLOC(1, sizeof(*data), gf_afr_mt_spb_status_t); + if (!data) { + ret = 1; + goto out; } - - if (!strcmp (heal_op, GF_AFR_SBRAIN_STATUS)) { - data = GF_CALLOC (1, sizeof (*data), gf_afr_mt_spb_status_t); - if (!data) { - ret = 1; - goto out; - } - data->frame = frame; - data->loc = loc; - ret = synctask_new (this->ctx->env, - afr_get_split_brain_status, - afr_get_split_brain_status_cbk, - NULL, data); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - AFR_MSG_SPLIT_BRAIN_STATUS, - "Failed to create" - " synctask. Unable to fetch split-brain status" - " for %s.", loc->name); - ret = 1; - goto out; - } - goto out; + data->frame = frame; + data->loc = loc; + ret = synctask_new(this->ctx->env, afr_get_split_brain_status, + afr_get_split_brain_status_cbk, NULL, data); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN_STATUS, + "Failed to create" + " synctask. Unable to fetch split-brain status" + " for %s.", + loc->name); + ret = 1; + goto out; } + goto out; + } out: - if (ret == 1) { - AFR_STACK_UNWIND (getxattr, frame, -1, ENOMEM, NULL, NULL); - if (data) - GF_FREE (data); - ret = 0; - } - return ret; + if (ret == 1) { + AFR_STACK_UNWIND(getxattr, frame, -1, ENOMEM, NULL, NULL); + if (data) + GF_FREE(data); + ret = 0; + } + return ret; } int32_t -afr_getxattr (call_frame_t *frame, xlator_t *this, - loc_t *loc, const char *name, dict_t *xdata) +afr_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, const char *name, + dict_t *xdata) { - afr_private_t *priv = NULL; - afr_local_t *local = NULL; - xlator_t **children = NULL; - int i = 0; - int32_t op_errno = 0; - int ret = -1; - fop_getxattr_cbk_t cbk = NULL; + afr_private_t *priv = NULL; + afr_local_t *local = NULL; + xlator_t **children = NULL; + int i = 0; + int32_t op_errno = 0; + int ret = -1; + fop_getxattr_cbk_t cbk = NULL; + local = AFR_FRAME_INIT(frame, op_errno); + if (!local) + goto out; - local = AFR_FRAME_INIT (frame, op_errno); - if (!local) - goto out; + priv = this->private; - priv = this->private; + children = priv->children; - children = priv->children; + loc_copy(&local->loc, loc); - loc_copy (&local->loc, loc); + local->op = GF_FOP_GETXATTR; - local->op = GF_FOP_GETXATTR; + if (xdata) + local->xdata_req = dict_ref(xdata); - if (xdata) - local->xdata_req = dict_ref (xdata); + if (!name) + goto no_name; - if (!name) - goto no_name; + local->cont.getxattr.name = gf_strdup(name); - local->cont.getxattr.name = gf_strdup (name); + if (!local->cont.getxattr.name) { + op_errno = ENOMEM; + goto out; + } - if (!local->cont.getxattr.name) { - op_errno = ENOMEM; - goto out; - } - - if (!strncmp (name, AFR_XATTR_PREFIX, - SLEN (AFR_XATTR_PREFIX))) { - op_errno = ENODATA; - goto out; - } + if (!strncmp(name, AFR_XATTR_PREFIX, SLEN(AFR_XATTR_PREFIX))) { + op_errno = ENODATA; + goto out; + } - if (cluster_handle_marker_getxattr (frame, loc, name, priv->vol_uuid, - afr_getxattr_unwind, - afr_marker_populate_args) == 0) - return 0; - - ret = afr_handle_heal_xattrs (frame, this, &local->loc, name); - if (ret == 0) - return 0; + if (cluster_handle_marker_getxattr(frame, loc, name, priv->vol_uuid, + afr_getxattr_unwind, + afr_marker_populate_args) == 0) + return 0; - /* - * Heal daemons don't have IO threads ... and as a result they - * send this getxattr down and eventually crash :( - */ - op_errno = -1; - GF_CHECK_XATTR_KEY_AND_GOTO(name, IO_THREADS_QUEUE_SIZE_KEY, op_errno, - out); + ret = afr_handle_heal_xattrs(frame, this, &local->loc, name); + if (ret == 0) + return 0; - /* - * Special xattrs which need responses from all subvols - */ - if (afr_is_special_xattr (name, &cbk, 0)) { - afr_getxattr_all_subvols (this, frame, name, loc, cbk); - return 0; - } + /* + * Heal daemons don't have IO threads ... and as a result they + * send this getxattr down and eventually crash :( + */ + op_errno = -1; + GF_CHECK_XATTR_KEY_AND_GOTO(name, IO_THREADS_QUEUE_SIZE_KEY, op_errno, out); + + /* + * Special xattrs which need responses from all subvols + */ + if (afr_is_special_xattr(name, &cbk, 0)) { + afr_getxattr_all_subvols(this, frame, name, loc, cbk); + return 0; + } - if (XATTR_IS_NODE_UUID (name)) { - i = 0; - STACK_WIND_COOKIE (frame, afr_getxattr_node_uuid_cbk, - (void *) (long) i, - children[i], - children[i]->fops->getxattr, - loc, name, xdata); - return 0; - } + if (XATTR_IS_NODE_UUID(name)) { + i = 0; + STACK_WIND_COOKIE(frame, afr_getxattr_node_uuid_cbk, (void *)(long)i, + children[i], children[i]->fops->getxattr, loc, name, + xdata); + return 0; + } no_name: - afr_read_txn (frame, this, local->loc.inode, afr_getxattr_wind, - AFR_METADATA_TRANSACTION); + afr_read_txn(frame, this, local->loc.inode, afr_getxattr_wind, + AFR_METADATA_TRANSACTION); - ret = 0; + ret = 0; out: - if (ret < 0) - AFR_STACK_UNWIND (getxattr, frame, -1, op_errno, NULL, NULL); - return 0; + if (ret < 0) + AFR_STACK_UNWIND(getxattr, frame, -1, op_errno, NULL, NULL); + return 0; } /* {{{ fgetxattr */ - int32_t -afr_fgetxattr_cbk (call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, int32_t op_errno, - dict_t *dict, dict_t *xdata) +afr_fgetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata) { - afr_local_t *local = NULL; + afr_local_t *local = NULL; - local = frame->local; + local = frame->local; - if (op_ret < 0) { - local->op_ret = -1; - local->op_errno = op_errno; + if (op_ret < 0) { + local->op_ret = -1; + local->op_errno = op_errno; - afr_read_txn_continue (frame, this, (long) cookie); - return 0; - } + afr_read_txn_continue(frame, this, (long)cookie); + return 0; + } - if (dict) - afr_filter_xattrs (dict); + if (dict) + afr_filter_xattrs(dict); - AFR_STACK_UNWIND (fgetxattr, frame, op_ret, op_errno, dict, xdata); + AFR_STACK_UNWIND(fgetxattr, frame, op_ret, op_errno, dict, xdata); - return 0; + return 0; } int -afr_fgetxattr_wind (call_frame_t *frame, xlator_t *this, int subvol) +afr_fgetxattr_wind(call_frame_t *frame, xlator_t *this, int subvol) { - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - - local = frame->local; - priv = this->private; - - if (subvol == -1) { - AFR_STACK_UNWIND (fgetxattr, frame, local->op_ret, - local->op_errno, NULL, NULL); - return 0; - } - - STACK_WIND_COOKIE (frame, afr_fgetxattr_cbk, (void *) (long) subvol, - priv->children[subvol], - priv->children[subvol]->fops->fgetxattr, - local->fd, local->cont.getxattr.name, - local->xdata_req); - return 0; -} + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + local = frame->local; + priv = this->private; -static void -afr_fgetxattr_all_subvols (xlator_t *this, call_frame_t *frame, - fop_fgetxattr_cbk_t cbk) -{ - afr_private_t *priv = NULL; - afr_local_t *local = NULL; - int i = 0; - int call_count = 0; + if (subvol == -1) { + AFR_STACK_UNWIND(fgetxattr, frame, local->op_ret, local->op_errno, NULL, + NULL); + return 0; + } - priv = this->private; + STACK_WIND_COOKIE(frame, afr_fgetxattr_cbk, (void *)(long)subvol, + priv->children[subvol], + priv->children[subvol]->fops->fgetxattr, local->fd, + local->cont.getxattr.name, local->xdata_req); + return 0; +} - local = frame->local; - //local->call_count set in afr_local_init - call_count = local->call_count; - - //If up-children count is 0, afr_local_init would have failed already - //and the call would have unwound so not handling it here. - - for (i = 0; i < priv->child_count; i++) { - if (local->child_up[i]) { - STACK_WIND_COOKIE (frame, cbk, - (void *) (long) i, - priv->children[i], - priv->children[i]->fops->fgetxattr, - local->fd, local->cont.getxattr.name, - NULL); - if (!--call_count) - break; - } +static void +afr_fgetxattr_all_subvols(xlator_t *this, call_frame_t *frame, + fop_fgetxattr_cbk_t cbk) +{ + afr_private_t *priv = NULL; + afr_local_t *local = NULL; + int i = 0; + int call_count = 0; + + priv = this->private; + + local = frame->local; + // local->call_count set in afr_local_init + call_count = local->call_count; + + // If up-children count is 0, afr_local_init would have failed already + // and the call would have unwound so not handling it here. + + for (i = 0; i < priv->child_count; i++) { + if (local->child_up[i]) { + STACK_WIND_COOKIE(frame, cbk, (void *)(long)i, priv->children[i], + priv->children[i]->fops->fgetxattr, local->fd, + local->cont.getxattr.name, NULL); + if (!--call_count) + break; } + } - return; + return; } - int -afr_fgetxattr (call_frame_t *frame, xlator_t *this, - fd_t *fd, const char *name, dict_t *xdata) +afr_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name, + dict_t *xdata) { - afr_local_t *local = NULL; - int32_t op_errno = 0; - fop_fgetxattr_cbk_t cbk = NULL; - - local = AFR_FRAME_INIT (frame, op_errno); - if (!local) - goto out; - - local->op = GF_FOP_FGETXATTR; - local->fd = fd_ref (fd); - if (name) { - local->cont.getxattr.name = gf_strdup (name); - if (!local->cont.getxattr.name) { - op_errno = ENOMEM; - goto out; - } - } - if (xdata) - local->xdata_req = dict_ref (xdata); - - /* pathinfo gets handled only in getxattr(), but we need to handle - * lockinfo. - * If we are doing fgetxattr with lockinfo as the key then we - * collect information from all children. - */ - if (afr_is_special_xattr (name, &cbk, 1)) { - afr_fgetxattr_all_subvols (this, frame, cbk); - return 0; + afr_local_t *local = NULL; + int32_t op_errno = 0; + fop_fgetxattr_cbk_t cbk = NULL; + + local = AFR_FRAME_INIT(frame, op_errno); + if (!local) + goto out; + + local->op = GF_FOP_FGETXATTR; + local->fd = fd_ref(fd); + if (name) { + local->cont.getxattr.name = gf_strdup(name); + if (!local->cont.getxattr.name) { + op_errno = ENOMEM; + goto out; } + } + if (xdata) + local->xdata_req = dict_ref(xdata); + + /* pathinfo gets handled only in getxattr(), but we need to handle + * lockinfo. + * If we are doing fgetxattr with lockinfo as the key then we + * collect information from all children. + */ + if (afr_is_special_xattr(name, &cbk, 1)) { + afr_fgetxattr_all_subvols(this, frame, cbk); + return 0; + } - afr_fix_open (fd, this); + afr_fix_open(fd, this); - afr_read_txn (frame, this, fd->inode, afr_fgetxattr_wind, - AFR_METADATA_TRANSACTION); + afr_read_txn(frame, this, fd->inode, afr_fgetxattr_wind, + AFR_METADATA_TRANSACTION); - return 0; + return 0; out: - AFR_STACK_UNWIND (fgetxattr, frame, -1, op_errno, NULL, NULL); + AFR_STACK_UNWIND(fgetxattr, frame, -1, op_errno, NULL, NULL); - return 0; + return 0; } - /* }}} */ /* {{{ readv */ int -afr_readv_cbk (call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, int32_t op_errno, - struct iovec *vector, int32_t count, struct iatt *buf, - struct iobref *iobref, dict_t *xdata) +afr_readv_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + int32_t op_errno, struct iovec *vector, int32_t count, + struct iatt *buf, struct iobref *iobref, dict_t *xdata) { - afr_local_t *local = NULL; + afr_local_t *local = NULL; - local = frame->local; + local = frame->local; - if (op_ret < 0) { - local->op_ret = -1; - local->op_errno = op_errno; + if (op_ret < 0) { + local->op_ret = -1; + local->op_errno = op_errno; - afr_read_txn_continue (frame, this, (long) cookie); - return 0; - } + afr_read_txn_continue(frame, this, (long)cookie); + return 0; + } - AFR_STACK_UNWIND (readv, frame, op_ret, op_errno, - vector, count, buf, iobref, xdata); - return 0; + AFR_STACK_UNWIND(readv, frame, op_ret, op_errno, vector, count, buf, iobref, + xdata); + return 0; } - int -afr_readv_wind (call_frame_t *frame, xlator_t *this, int subvol) +afr_readv_wind(call_frame_t *frame, xlator_t *this, int subvol) { - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - - local = frame->local; - priv = this->private; - - if (subvol == -1) { - AFR_STACK_UNWIND (readv, frame, local->op_ret, local->op_errno, - 0, 0, 0, 0, 0); - return 0; - } - - STACK_WIND_COOKIE (frame, afr_readv_cbk, (void *) (long) subvol, - priv->children[subvol], - priv->children[subvol]->fops->readv, - local->fd, local->cont.readv.size, - local->cont.readv.offset, local->cont.readv.flags, - local->xdata_req); - return 0; -} + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + + local = frame->local; + priv = this->private; + if (subvol == -1) { + AFR_STACK_UNWIND(readv, frame, local->op_ret, local->op_errno, 0, 0, 0, + 0, 0); + return 0; + } + + STACK_WIND_COOKIE( + frame, afr_readv_cbk, (void *)(long)subvol, priv->children[subvol], + priv->children[subvol]->fops->readv, local->fd, local->cont.readv.size, + local->cont.readv.offset, local->cont.readv.flags, local->xdata_req); + return 0; +} int -afr_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, - off_t offset, uint32_t flags, dict_t *xdata) +afr_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t offset, uint32_t flags, dict_t *xdata) { - afr_local_t * local = NULL; - int32_t op_errno = 0; + afr_local_t *local = NULL; + int32_t op_errno = 0; - local = AFR_FRAME_INIT (frame, op_errno); - if (!local) - goto out; + local = AFR_FRAME_INIT(frame, op_errno); + if (!local) + goto out; - local->op = GF_FOP_READ; - local->fd = fd_ref (fd); - local->cont.readv.size = size; - local->cont.readv.offset = offset; - local->cont.readv.flags = flags; - if (xdata) - local->xdata_req = dict_ref (xdata); + local->op = GF_FOP_READ; + local->fd = fd_ref(fd); + local->cont.readv.size = size; + local->cont.readv.offset = offset; + local->cont.readv.flags = flags; + if (xdata) + local->xdata_req = dict_ref(xdata); - afr_fix_open (fd, this); + afr_fix_open(fd, this); - afr_read_txn (frame, this, fd->inode, afr_readv_wind, - AFR_DATA_TRANSACTION); + afr_read_txn(frame, this, fd->inode, afr_readv_wind, AFR_DATA_TRANSACTION); - return 0; + return 0; out: - AFR_STACK_UNWIND(readv, frame, -1, op_errno, 0, 0, 0, 0, 0); + AFR_STACK_UNWIND(readv, frame, -1, op_errno, 0, 0, 0, 0, 0); - return 0; + return 0; } /* }}} */ @@ -1910,77 +1814,72 @@ out: /* {{{ seek */ int -afr_seek_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, off_t offset, dict_t *xdata) +afr_seek_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + int32_t op_errno, off_t offset, dict_t *xdata) { - afr_local_t *local = NULL; + afr_local_t *local = NULL; - local = frame->local; + local = frame->local; - if (op_ret < 0) { - local->op_ret = -1; - local->op_errno = op_errno; - - afr_read_txn_continue (frame, this, (long) cookie); - return 0; - } + if (op_ret < 0) { + local->op_ret = -1; + local->op_errno = op_errno; - AFR_STACK_UNWIND (seek, frame, op_ret, op_errno, offset, xdata); + afr_read_txn_continue(frame, this, (long)cookie); return 0; -} + } + AFR_STACK_UNWIND(seek, frame, op_ret, op_errno, offset, xdata); + return 0; +} int -afr_seek_wind (call_frame_t *frame, xlator_t *this, int subvol) +afr_seek_wind(call_frame_t *frame, xlator_t *this, int subvol) { - afr_local_t *local = NULL; - afr_private_t *priv = NULL; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; - local = frame->local; - priv = this->private; - - if (subvol == -1) { - AFR_STACK_UNWIND (seek, frame, local->op_ret, local->op_errno, - 0, NULL); - return 0; - } + local = frame->local; + priv = this->private; - STACK_WIND_COOKIE (frame, afr_seek_cbk, (void *) (long) subvol, - priv->children[subvol], - priv->children[subvol]->fops->seek, - local->fd, local->cont.seek.offset, - local->cont.seek.what, local->xdata_req); + if (subvol == -1) { + AFR_STACK_UNWIND(seek, frame, local->op_ret, local->op_errno, 0, NULL); return 0; -} + } + STACK_WIND_COOKIE( + frame, afr_seek_cbk, (void *)(long)subvol, priv->children[subvol], + priv->children[subvol]->fops->seek, local->fd, local->cont.seek.offset, + local->cont.seek.what, local->xdata_req); + return 0; +} int -afr_seek (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, - gf_seek_what_t what, dict_t *xdata) +afr_seek(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + gf_seek_what_t what, dict_t *xdata) { - afr_local_t *local = NULL; - int32_t op_errno = 0; + afr_local_t *local = NULL; + int32_t op_errno = 0; - local = AFR_FRAME_INIT (frame, op_errno); - if (!local) - goto out; + local = AFR_FRAME_INIT(frame, op_errno); + if (!local) + goto out; - local->op = GF_FOP_SEEK; - local->fd = fd_ref (fd); - local->cont.seek.offset = offset; - local->cont.seek.what = what; - if (xdata) - local->xdata_req = dict_ref (xdata); + local->op = GF_FOP_SEEK; + local->fd = fd_ref(fd); + local->cont.seek.offset = offset; + local->cont.seek.what = what; + if (xdata) + local->xdata_req = dict_ref(xdata); - afr_fix_open (fd, this); + afr_fix_open(fd, this); - afr_read_txn (frame, this, fd->inode, afr_seek_wind, - AFR_DATA_TRANSACTION); + afr_read_txn(frame, this, fd->inode, afr_seek_wind, AFR_DATA_TRANSACTION); - return 0; + return 0; out: - AFR_STACK_UNWIND (seek, frame, -1, op_errno, 0, NULL); + AFR_STACK_UNWIND(seek, frame, -1, op_errno, 0, NULL); - return 0; + return 0; } /* }}} */ diff --git a/xlators/cluster/afr/src/afr-inode-write.c b/xlators/cluster/afr/src/afr-inode-write.c index 755e928ef62..f9e2b302f8d 100644 --- a/xlators/cluster/afr/src/afr-inode-write.c +++ b/xlators/cluster/afr/src/afr-inode-write.c @@ -8,7 +8,6 @@ cases as published by the Free Software Foundation. */ - #include #include #include @@ -35,771 +34,736 @@ #include "afr-messages.h" static void -__afr_inode_write_finalize (call_frame_t *frame, xlator_t *this) -{ - int i = 0; - int ret = 0; - int read_subvol = 0; - struct iatt *stbuf = NULL; - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - afr_read_subvol_args_t args = {0,}; - - local = frame->local; - priv = this->private; - GF_VALIDATE_OR_GOTO (this->name, local->inode, out); - - /*This code needs to stay till DHT sends fops on linked - * inodes*/ - if (!inode_is_linked (local->inode)) { - for (i = 0; i < priv->child_count; i++) { - if (!local->replies[i].valid) - continue; - if (local->replies[i].op_ret == -1) - continue; - if (!gf_uuid_is_null - (local->replies[i].poststat.ia_gfid)) { - gf_uuid_copy (args.gfid, - local->replies[i].poststat.ia_gfid); - args.ia_type = - local->replies[i].poststat.ia_type; - break; - } else { - ret = dict_get_bin (local->replies[i].xdata, - DHT_IATT_IN_XDATA_KEY, - (void **) &stbuf); - if (ret) - continue; - gf_uuid_copy (args.gfid, stbuf->ia_gfid); - args.ia_type = stbuf->ia_type; - break; - } - } +__afr_inode_write_finalize(call_frame_t *frame, xlator_t *this) +{ + int i = 0; + int ret = 0; + int read_subvol = 0; + struct iatt *stbuf = NULL; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + afr_read_subvol_args_t args = { + 0, + }; + + local = frame->local; + priv = this->private; + GF_VALIDATE_OR_GOTO(this->name, local->inode, out); + + /*This code needs to stay till DHT sends fops on linked + * inodes*/ + if (!inode_is_linked(local->inode)) { + for (i = 0; i < priv->child_count; i++) { + if (!local->replies[i].valid) + continue; + if (local->replies[i].op_ret == -1) + continue; + if (!gf_uuid_is_null(local->replies[i].poststat.ia_gfid)) { + gf_uuid_copy(args.gfid, local->replies[i].poststat.ia_gfid); + args.ia_type = local->replies[i].poststat.ia_type; + break; + } else { + ret = dict_get_bin(local->replies[i].xdata, + DHT_IATT_IN_XDATA_KEY, (void **)&stbuf); + if (ret) + continue; + gf_uuid_copy(args.gfid, stbuf->ia_gfid); + args.ia_type = stbuf->ia_type; + break; + } } - - if (local->transaction.type == AFR_METADATA_TRANSACTION) { - read_subvol = afr_metadata_subvol_get (local->inode, this, - NULL, local->readable, - NULL, &args); - } else { - read_subvol = afr_data_subvol_get (local->inode, this, - NULL, local->readable, - NULL, &args); + } + + if (local->transaction.type == AFR_METADATA_TRANSACTION) { + read_subvol = afr_metadata_subvol_get(local->inode, this, NULL, + local->readable, NULL, &args); + } else { + read_subvol = afr_data_subvol_get(local->inode, this, NULL, + local->readable, NULL, &args); + } + + local->op_ret = -1; + local->op_errno = afr_final_errno(local, priv); + afr_pick_error_xdata(local, priv, local->inode, local->readable, NULL, + NULL); + + for (i = 0; i < priv->child_count; i++) { + if (!local->replies[i].valid) + continue; + if (local->replies[i].op_ret < 0) + continue; + + /* Order of checks in the compound conditional + below is important. + + - Highest precedence: largest op_ret + - Next precedence: if all op_rets are equal, read subvol + - Least precedence: any succeeded subvol + */ + if ((local->op_ret < local->replies[i].op_ret) || + ((local->op_ret == local->replies[i].op_ret) && + (i == read_subvol))) { + local->op_ret = local->replies[i].op_ret; + local->op_errno = local->replies[i].op_errno; + + local->cont.inode_wfop.prebuf = local->replies[i].prestat; + local->cont.inode_wfop.postbuf = local->replies[i].poststat; + + if (local->replies[i].xdata) { + if (local->xdata_rsp) + dict_unref(local->xdata_rsp); + local->xdata_rsp = dict_ref(local->replies[i].xdata); + } + if (local->replies[i].xattr) { + if (local->xattr_rsp) + dict_unref(local->xattr_rsp); + local->xattr_rsp = dict_ref(local->replies[i].xattr); + } } + } - local->op_ret = -1; - local->op_errno = afr_final_errno (local, priv); - afr_pick_error_xdata (local, priv, local->inode, local->readable, NULL, - NULL); - - for (i = 0; i < priv->child_count; i++) { - if (!local->replies[i].valid) - continue; - if (local->replies[i].op_ret < 0) - continue; - - /* Order of checks in the compound conditional - below is important. - - - Highest precedence: largest op_ret - - Next precedence: if all op_rets are equal, read subvol - - Least precedence: any succeeded subvol - */ - if ((local->op_ret < local->replies[i].op_ret) || - ((local->op_ret == local->replies[i].op_ret) && - (i == read_subvol))) { - - local->op_ret = local->replies[i].op_ret; - local->op_errno = local->replies[i].op_errno; - - local->cont.inode_wfop.prebuf = - local->replies[i].prestat; - local->cont.inode_wfop.postbuf = - local->replies[i].poststat; - - if (local->replies[i].xdata) { - if (local->xdata_rsp) - dict_unref (local->xdata_rsp); - local->xdata_rsp = - dict_ref (local->replies[i].xdata); - } - if (local->replies[i].xattr) { - if (local->xattr_rsp) - dict_unref (local->xattr_rsp); - local->xattr_rsp = - dict_ref (local->replies[i].xattr); - } - } - } - - afr_set_in_flight_sb_status (this, frame, local->inode); + afr_set_in_flight_sb_status(this, frame, local->inode); out: - return; + return; } - static void -__afr_inode_write_fill (call_frame_t *frame, xlator_t *this, int child_index, - int op_ret, int op_errno, - struct iatt *prebuf, struct iatt *postbuf, - dict_t *xattr, dict_t *xdata) +__afr_inode_write_fill(call_frame_t *frame, xlator_t *this, int child_index, + int op_ret, int op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xattr, dict_t *xdata) { - afr_local_t *local = NULL; - afr_private_t *priv = NULL; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; - local = frame->local; - priv = this->private; + local = frame->local; + priv = this->private; - local->replies[child_index].valid = 1; + local->replies[child_index].valid = 1; - if (AFR_IS_ARBITER_BRICK(priv, child_index) && op_ret == 1) - op_ret = iov_length (local->cont.writev.vector, - local->cont.writev.count); + if (AFR_IS_ARBITER_BRICK(priv, child_index) && op_ret == 1) + op_ret = iov_length(local->cont.writev.vector, + local->cont.writev.count); - local->replies[child_index].op_ret = op_ret; - local->replies[child_index].op_errno = op_errno; - if (xdata) - local->replies[child_index].xdata = dict_ref (xdata); + local->replies[child_index].op_ret = op_ret; + local->replies[child_index].op_errno = op_errno; + if (xdata) + local->replies[child_index].xdata = dict_ref(xdata); - if (op_ret >= 0) { - if (prebuf) - local->replies[child_index].prestat = *prebuf; - if (postbuf) - local->replies[child_index].poststat = *postbuf; - if (xattr) - local->replies[child_index].xattr = dict_ref (xattr); - } else { - afr_transaction_fop_failed (frame, this, child_index); - } + if (op_ret >= 0) { + if (prebuf) + local->replies[child_index].prestat = *prebuf; + if (postbuf) + local->replies[child_index].poststat = *postbuf; + if (xattr) + local->replies[child_index].xattr = dict_ref(xattr); + } else { + afr_transaction_fop_failed(frame, this, child_index); + } - return; + return; } - static int -__afr_inode_write_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *prebuf, - struct iatt *postbuf, dict_t *xattr, dict_t *xdata) +__afr_inode_write_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xattr, dict_t *xdata) { - afr_local_t *local = NULL; - int child_index = (long) cookie; - int call_count = -1; - afr_private_t *priv = NULL; + afr_local_t *local = NULL; + int child_index = (long)cookie; + int call_count = -1; + afr_private_t *priv = NULL; - priv = this->private; - local = frame->local; + priv = this->private; + local = frame->local; - LOCK (&frame->lock); - { - __afr_inode_write_fill (frame, this, child_index, op_ret, - op_errno, prebuf, postbuf, xattr, - xdata); - } - UNLOCK (&frame->lock); + LOCK(&frame->lock); + { + __afr_inode_write_fill(frame, this, child_index, op_ret, op_errno, + prebuf, postbuf, xattr, xdata); + } + UNLOCK(&frame->lock); - call_count = afr_frame_return (frame); + call_count = afr_frame_return(frame); - if (call_count == 0) { - __afr_inode_write_finalize (frame, this); + if (call_count == 0) { + __afr_inode_write_finalize(frame, this); - if (afr_txn_nothing_failed (frame, this)) { - /*if it did pre-op, it will do post-op changing ctime*/ - if (priv->consistent_metadata && - afr_needs_changelog_update (local)) - afr_zero_fill_stat (local); - local->transaction.unwind (frame, this); - } - - afr_transaction_resume (frame, this); + if (afr_txn_nothing_failed(frame, this)) { + /*if it did pre-op, it will do post-op changing ctime*/ + if (priv->consistent_metadata && afr_needs_changelog_update(local)) + afr_zero_fill_stat(local); + local->transaction.unwind(frame, this); } - return 0; + afr_transaction_resume(frame, this); + } + + return 0; } /* {{{ writev */ void -afr_writev_copy_outvars (call_frame_t *src_frame, call_frame_t *dst_frame) +afr_writev_copy_outvars(call_frame_t *src_frame, call_frame_t *dst_frame) { - afr_local_t *src_local = NULL; - afr_local_t *dst_local = NULL; + afr_local_t *src_local = NULL; + afr_local_t *dst_local = NULL; - src_local = src_frame->local; - dst_local = dst_frame->local; + src_local = src_frame->local; + dst_local = dst_frame->local; - dst_local->op_ret = src_local->op_ret; - dst_local->op_errno = src_local->op_errno; - dst_local->cont.inode_wfop.prebuf = src_local->cont.inode_wfop.prebuf; - dst_local->cont.inode_wfop.postbuf = src_local->cont.inode_wfop.postbuf; - if (src_local->xdata_rsp) - dst_local->xdata_rsp = dict_ref (src_local->xdata_rsp); + dst_local->op_ret = src_local->op_ret; + dst_local->op_errno = src_local->op_errno; + dst_local->cont.inode_wfop.prebuf = src_local->cont.inode_wfop.prebuf; + dst_local->cont.inode_wfop.postbuf = src_local->cont.inode_wfop.postbuf; + if (src_local->xdata_rsp) + dst_local->xdata_rsp = dict_ref(src_local->xdata_rsp); } void -afr_writev_unwind (call_frame_t *frame, xlator_t *this) +afr_writev_unwind(call_frame_t *frame, xlator_t *this) { - afr_local_t * local = NULL; - afr_private_t *priv = this->private; + afr_local_t *local = NULL; + afr_private_t *priv = this->private; - local = frame->local; + local = frame->local; - if (priv->consistent_metadata) - afr_zero_fill_stat (local); + if (priv->consistent_metadata) + afr_zero_fill_stat(local); - AFR_STACK_UNWIND (writev, frame, - local->op_ret, local->op_errno, - &local->cont.inode_wfop.prebuf, - &local->cont.inode_wfop.postbuf, - local->xdata_rsp); + AFR_STACK_UNWIND(writev, frame, local->op_ret, local->op_errno, + &local->cont.inode_wfop.prebuf, + &local->cont.inode_wfop.postbuf, local->xdata_rsp); } - int -afr_transaction_writev_unwind (call_frame_t *frame, xlator_t *this) +afr_transaction_writev_unwind(call_frame_t *frame, xlator_t *this) { - call_frame_t *fop_frame = NULL; + call_frame_t *fop_frame = NULL; - fop_frame = afr_transaction_detach_fop_frame (frame); + fop_frame = afr_transaction_detach_fop_frame(frame); - if (fop_frame) { - afr_writev_copy_outvars (frame, fop_frame); - afr_writev_unwind (fop_frame, this); - } - return 0; + if (fop_frame) { + afr_writev_copy_outvars(frame, fop_frame); + afr_writev_unwind(fop_frame, this); + } + return 0; } static void -afr_writev_handle_short_writes (call_frame_t *frame, xlator_t *this) -{ - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - int i = 0; - - local = frame->local; - priv = this->private; - /* - * We already have the best case result of the writev calls staged - * as the return value. Any writev that returns some value less - * than the best case is now out of sync, so mark the fop as - * failed. Note that fops that have returned with errors have - * already been marked as failed. - */ - for (i = 0; i < priv->child_count; i++) { - if ((!local->replies[i].valid) || - (local->replies[i].op_ret == -1)) - continue; - - if (local->replies[i].op_ret < local->op_ret) - afr_transaction_fop_failed (frame, this, i); - } +afr_writev_handle_short_writes(call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + int i = 0; + + local = frame->local; + priv = this->private; + /* + * We already have the best case result of the writev calls staged + * as the return value. Any writev that returns some value less + * than the best case is now out of sync, so mark the fop as + * failed. Note that fops that have returned with errors have + * already been marked as failed. + */ + for (i = 0; i < priv->child_count; i++) { + if ((!local->replies[i].valid) || (local->replies[i].op_ret == -1)) + continue; + + if (local->replies[i].op_ret < local->op_ret) + afr_transaction_fop_failed(frame, this, i); + } } void -afr_inode_write_fill (call_frame_t *frame, xlator_t *this, int child_index, +afr_inode_write_fill(call_frame_t *frame, xlator_t *this, int child_index, int32_t op_ret, int32_t op_errno, struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata) { - int ret = 0; - afr_local_t *local = frame->local; - uint32_t open_fd_count = 0; - uint32_t write_is_append = 0; - - LOCK (&frame->lock); - { - __afr_inode_write_fill (frame, this, child_index, op_ret, - op_errno, prebuf, postbuf, NULL, xdata); - if (op_ret == -1 || !xdata) - goto unlock; - - write_is_append = 0; - ret = dict_get_uint32 (xdata, GLUSTERFS_WRITE_IS_APPEND, - &write_is_append); - if (ret || !write_is_append) - local->append_write = _gf_false; - - ret = dict_get_uint32 (xdata, GLUSTERFS_ACTIVE_FD_COUNT, - &open_fd_count); - if (ret < 0) - goto unlock; - if (open_fd_count > local->open_fd_count) { - local->open_fd_count = open_fd_count; - local->update_open_fd_count = _gf_true; - } + int ret = 0; + afr_local_t *local = frame->local; + uint32_t open_fd_count = 0; + uint32_t write_is_append = 0; + + LOCK(&frame->lock); + { + __afr_inode_write_fill(frame, this, child_index, op_ret, op_errno, + prebuf, postbuf, NULL, xdata); + if (op_ret == -1 || !xdata) + goto unlock; + + write_is_append = 0; + ret = dict_get_uint32(xdata, GLUSTERFS_WRITE_IS_APPEND, + &write_is_append); + if (ret || !write_is_append) + local->append_write = _gf_false; + + ret = dict_get_uint32(xdata, GLUSTERFS_ACTIVE_FD_COUNT, &open_fd_count); + if (ret < 0) + goto unlock; + if (open_fd_count > local->open_fd_count) { + local->open_fd_count = open_fd_count; + local->update_open_fd_count = _gf_true; } + } unlock: - UNLOCK (&frame->lock); + UNLOCK(&frame->lock); } void -afr_process_post_writev (call_frame_t *frame, xlator_t *this) +afr_process_post_writev(call_frame_t *frame, xlator_t *this) { - afr_local_t *local = NULL; + afr_local_t *local = NULL; - local = frame->local; + local = frame->local; - if (!local->stable_write && !local->append_write) - /* An appended write removes the necessity to - fsync() the file. This is because self-heal - has the logic to check for larger file when - the xattrs are not reliably pointing at - a stale file. - */ - afr_fd_report_unstable_write (this, local); + if (!local->stable_write && !local->append_write) + /* An appended write removes the necessity to + fsync() the file. This is because self-heal + has the logic to check for larger file when + the xattrs are not reliably pointing at + a stale file. + */ + afr_fd_report_unstable_write(this, local); - __afr_inode_write_finalize (frame, this); + __afr_inode_write_finalize(frame, this); - afr_writev_handle_short_writes (frame, this); - - if (local->update_open_fd_count) - local->inode_ctx->open_fd_count = local->open_fd_count; + afr_writev_handle_short_writes(frame, this); + if (local->update_open_fd_count) + local->inode_ctx->open_fd_count = local->open_fd_count; } int -afr_writev_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *prebuf, - struct iatt *postbuf, dict_t *xdata) +afr_writev_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) { - call_frame_t *fop_frame = NULL; - int child_index = (long) cookie; - int call_count = -1; - - afr_inode_write_fill (frame, this, child_index, op_ret, op_errno, - prebuf, postbuf, xdata); - - call_count = afr_frame_return (frame); - - if (call_count == 0) { - afr_process_post_writev (frame, this); - - if (!afr_txn_nothing_failed (frame, this)) { - //Don't unwind until post-op is complete - afr_transaction_resume (frame, this); - } else { - /* - * Generally inode-write fops do transaction.unwind then - * transaction.resume, but writev needs to make sure that - * delayed post-op frame is placed in fdctx before unwind - * happens. This prevents the race of flush doing the - * changelog wakeup first in fuse thread and then this - * writev placing its delayed post-op frame in fdctx. - * This helps flush make sure all the delayed post-ops are - * completed. - */ - - fop_frame = afr_transaction_detach_fop_frame (frame); - afr_writev_copy_outvars (frame, fop_frame); - afr_transaction_resume (frame, this); - afr_writev_unwind (fop_frame, this); - } + call_frame_t *fop_frame = NULL; + int child_index = (long)cookie; + int call_count = -1; + + afr_inode_write_fill(frame, this, child_index, op_ret, op_errno, prebuf, + postbuf, xdata); + + call_count = afr_frame_return(frame); + + if (call_count == 0) { + afr_process_post_writev(frame, this); + + if (!afr_txn_nothing_failed(frame, this)) { + // Don't unwind until post-op is complete + afr_transaction_resume(frame, this); + } else { + /* + * Generally inode-write fops do transaction.unwind then + * transaction.resume, but writev needs to make sure that + * delayed post-op frame is placed in fdctx before unwind + * happens. This prevents the race of flush doing the + * changelog wakeup first in fuse thread and then this + * writev placing its delayed post-op frame in fdctx. + * This helps flush make sure all the delayed post-ops are + * completed. + */ + + fop_frame = afr_transaction_detach_fop_frame(frame); + afr_writev_copy_outvars(frame, fop_frame); + afr_transaction_resume(frame, this); + afr_writev_unwind(fop_frame, this); } - return 0; + } + return 0; } static int -afr_arbiter_writev_wind (call_frame_t *frame, xlator_t *this, int subvol) +afr_arbiter_writev_wind(call_frame_t *frame, xlator_t *this, int subvol) { - afr_local_t *local = frame->local; - afr_private_t *priv = this->private; - static char byte = 0xFF; - static struct iovec vector = {&byte, 1}; - int32_t count = 1; + afr_local_t *local = frame->local; + afr_private_t *priv = this->private; + static char byte = 0xFF; + static struct iovec vector = {&byte, 1}; + int32_t count = 1; - STACK_WIND_COOKIE (frame, afr_writev_wind_cbk, (void *) (long) subvol, - priv->children[subvol], - priv->children[subvol]->fops->writev, - local->fd, &vector, count, local->cont.writev.offset, - local->cont.writev.flags, local->cont.writev.iobref, - local->xdata_req); + STACK_WIND_COOKIE( + frame, afr_writev_wind_cbk, (void *)(long)subvol, + priv->children[subvol], priv->children[subvol]->fops->writev, local->fd, + &vector, count, local->cont.writev.offset, local->cont.writev.flags, + local->cont.writev.iobref, local->xdata_req); - return 0; + return 0; } int -afr_writev_wind (call_frame_t *frame, xlator_t *this, int subvol) +afr_writev_wind(call_frame_t *frame, xlator_t *this, int subvol) { - afr_local_t *local = NULL; - afr_private_t *priv = NULL; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; - local = frame->local; - priv = this->private; - - if (AFR_IS_ARBITER_BRICK(priv, subvol)) { - afr_arbiter_writev_wind (frame, this, subvol); - return 0; - } + local = frame->local; + priv = this->private; - STACK_WIND_COOKIE (frame, afr_writev_wind_cbk, (void *) (long) subvol, - priv->children[subvol], - priv->children[subvol]->fops->writev, - local->fd, local->cont.writev.vector, - local->cont.writev.count, local->cont.writev.offset, - local->cont.writev.flags, local->cont.writev.iobref, - local->xdata_req); + if (AFR_IS_ARBITER_BRICK(priv, subvol)) { + afr_arbiter_writev_wind(frame, this, subvol); return 0; -} + } + STACK_WIND_COOKIE(frame, afr_writev_wind_cbk, (void *)(long)subvol, + priv->children[subvol], + priv->children[subvol]->fops->writev, local->fd, + local->cont.writev.vector, local->cont.writev.count, + local->cont.writev.offset, local->cont.writev.flags, + local->cont.writev.iobref, local->xdata_req); + return 0; +} int -afr_do_writev (call_frame_t *frame, xlator_t *this) +afr_do_writev(call_frame_t *frame, xlator_t *this) { - call_frame_t *transaction_frame = NULL; - afr_local_t *local = NULL; - int ret = -1; - int op_errno = ENOMEM; - - transaction_frame = copy_frame (frame); - if (!transaction_frame) - goto out; - - local = frame->local; - transaction_frame->local = local; - frame->local = NULL; + call_frame_t *transaction_frame = NULL; + afr_local_t *local = NULL; + int ret = -1; + int op_errno = ENOMEM; - if (!AFR_FRAME_INIT (frame, op_errno)) - goto out; + transaction_frame = copy_frame(frame); + if (!transaction_frame) + goto out; - local->op = GF_FOP_WRITE; + local = frame->local; + transaction_frame->local = local; + frame->local = NULL; - local->transaction.wind = afr_writev_wind; - local->transaction.unwind = afr_transaction_writev_unwind; + if (!AFR_FRAME_INIT(frame, op_errno)) + goto out; - local->transaction.main_frame = frame; + local->op = GF_FOP_WRITE; - if (local->fd->flags & O_APPEND) { - /* - * Backend vfs ignores the 'offset' for append mode fd so - * locking just the region provided for the writev does not - * give consistency guarantee. The actual write may happen at a - * completely different range than the one provided by the - * offset, len in the fop. So lock the entire file. - */ - local->transaction.start = 0; - local->transaction.len = 0; - } else { - local->transaction.start = local->cont.writev.offset; - local->transaction.len = iov_length (local->cont.writev.vector, - local->cont.writev.count); - } + local->transaction.wind = afr_writev_wind; + local->transaction.unwind = afr_transaction_writev_unwind; - ret = afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION); - if (ret < 0) { - op_errno = -ret; - goto out; - } + local->transaction.main_frame = frame; - return 0; + if (local->fd->flags & O_APPEND) { + /* + * Backend vfs ignores the 'offset' for append mode fd so + * locking just the region provided for the writev does not + * give consistency guarantee. The actual write may happen at a + * completely different range than the one provided by the + * offset, len in the fop. So lock the entire file. + */ + local->transaction.start = 0; + local->transaction.len = 0; + } else { + local->transaction.start = local->cont.writev.offset; + local->transaction.len = iov_length(local->cont.writev.vector, + local->cont.writev.count); + } + + ret = afr_transaction(transaction_frame, this, AFR_DATA_TRANSACTION); + if (ret < 0) { + op_errno = -ret; + goto out; + } + + return 0; out: - if (transaction_frame) - AFR_STACK_DESTROY (transaction_frame); + if (transaction_frame) + AFR_STACK_DESTROY(transaction_frame); - AFR_STACK_UNWIND (writev, frame, -1, op_errno, NULL, NULL, NULL); - return 0; + AFR_STACK_UNWIND(writev, frame, -1, op_errno, NULL, NULL, NULL); + return 0; } - int -afr_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, - struct iovec *vector, int32_t count, off_t offset, - uint32_t flags, struct iobref *iobref, dict_t *xdata) +afr_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector, + int32_t count, off_t offset, uint32_t flags, struct iobref *iobref, + dict_t *xdata) { - afr_local_t *local = NULL; - int op_errno = ENOMEM; - int ret = -1; + afr_local_t *local = NULL; + int op_errno = ENOMEM; + int ret = -1; - local = AFR_FRAME_INIT (frame, op_errno); - if (!local) - goto out; + local = AFR_FRAME_INIT(frame, op_errno); + if (!local) + goto out; - local->cont.writev.vector = iov_dup (vector, count); - if (!local->cont.writev.vector) - goto out; - local->cont.writev.count = count; - local->cont.writev.offset = offset; - local->cont.writev.flags = flags; - local->cont.writev.iobref = iobref_ref (iobref); + local->cont.writev.vector = iov_dup(vector, count); + if (!local->cont.writev.vector) + goto out; + local->cont.writev.count = count; + local->cont.writev.offset = offset; + local->cont.writev.flags = flags; + local->cont.writev.iobref = iobref_ref(iobref); - if (xdata) - local->xdata_req = dict_copy_with_ref (xdata, NULL); - else - local->xdata_req = dict_new (); + if (xdata) + local->xdata_req = dict_copy_with_ref(xdata, NULL); + else + local->xdata_req = dict_new(); - if (!local->xdata_req) - goto out; + if (!local->xdata_req) + goto out; - local->fd = fd_ref (fd); - ret = afr_set_inode_local (this, local, fd->inode); - if (ret) - goto out; + local->fd = fd_ref(fd); + ret = afr_set_inode_local(this, local, fd->inode); + if (ret) + goto out; - if (dict_set_uint32 (local->xdata_req, GLUSTERFS_ACTIVE_FD_COUNT, 4)) { - op_errno = ENOMEM; - goto out; - } + if (dict_set_uint32(local->xdata_req, GLUSTERFS_ACTIVE_FD_COUNT, 4)) { + op_errno = ENOMEM; + goto out; + } - if (dict_set_uint32 (local->xdata_req, GLUSTERFS_WRITE_IS_APPEND, 4)) { - op_errno = ENOMEM; - goto out; - } + if (dict_set_uint32(local->xdata_req, GLUSTERFS_WRITE_IS_APPEND, 4)) { + op_errno = ENOMEM; + goto out; + } - /* Set append_write to be true speculatively. If on any - server it turns not be true, we unset it in the - callback. - */ - local->append_write = _gf_true; + /* Set append_write to be true speculatively. If on any + server it turns not be true, we unset it in the + callback. + */ + local->append_write = _gf_true; - /* detect here, but set it in writev_wind_cbk *after* the unstable - write is performed - */ - local->stable_write = !!((fd->flags|flags)&(O_SYNC|O_DSYNC)); + /* detect here, but set it in writev_wind_cbk *after* the unstable + write is performed + */ + local->stable_write = !!((fd->flags | flags) & (O_SYNC | O_DSYNC)); - afr_fix_open (fd, this); + afr_fix_open(fd, this); - afr_do_writev (frame, this); + afr_do_writev(frame, this); - return 0; + return 0; out: - AFR_STACK_UNWIND (writev, frame, -1, op_errno, NULL, NULL, NULL); + AFR_STACK_UNWIND(writev, frame, -1, op_errno, NULL, NULL, NULL); - return 0; + return 0; } - /* }}} */ /* {{{ truncate */ int -afr_truncate_unwind (call_frame_t *frame, xlator_t *this) +afr_truncate_unwind(call_frame_t *frame, xlator_t *this) { - afr_local_t * local = NULL; - call_frame_t *main_frame = NULL; - - local = frame->local; + afr_local_t *local = NULL; + call_frame_t *main_frame = NULL; - main_frame = afr_transaction_detach_fop_frame (frame); - if (!main_frame) - return 0; + local = frame->local; - AFR_STACK_UNWIND (truncate, main_frame, local->op_ret, local->op_errno, - &local->cont.inode_wfop.prebuf, - &local->cont.inode_wfop.postbuf, local->xdata_rsp); + main_frame = afr_transaction_detach_fop_frame(frame); + if (!main_frame) return 0; -} + AFR_STACK_UNWIND(truncate, main_frame, local->op_ret, local->op_errno, + &local->cont.inode_wfop.prebuf, + &local->cont.inode_wfop.postbuf, local->xdata_rsp); + return 0; +} int -afr_truncate_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *prebuf, - struct iatt *postbuf, dict_t *xdata) +afr_truncate_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) { - afr_local_t *local = NULL; + afr_local_t *local = NULL; - local = frame->local; + local = frame->local; - if (op_ret == 0 && prebuf->ia_size != postbuf->ia_size) - local->stable_write = _gf_false; + if (op_ret == 0 && prebuf->ia_size != postbuf->ia_size) + local->stable_write = _gf_false; - return __afr_inode_write_cbk (frame, cookie, this, op_ret, op_errno, - prebuf, postbuf, NULL, xdata); + return __afr_inode_write_cbk(frame, cookie, this, op_ret, op_errno, prebuf, + postbuf, NULL, xdata); } - int -afr_truncate_wind (call_frame_t *frame, xlator_t *this, int subvol) +afr_truncate_wind(call_frame_t *frame, xlator_t *this, int subvol) { - afr_local_t *local = NULL; - afr_private_t *priv = NULL; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; - local = frame->local; - priv = this->private; + local = frame->local; + priv = this->private; - STACK_WIND_COOKIE (frame, afr_truncate_wind_cbk, (void *) (long) subvol, - priv->children[subvol], - priv->children[subvol]->fops->truncate, - &local->loc, local->cont.truncate.offset, - local->xdata_req); - return 0; + STACK_WIND_COOKIE(frame, afr_truncate_wind_cbk, (void *)(long)subvol, + priv->children[subvol], + priv->children[subvol]->fops->truncate, &local->loc, + local->cont.truncate.offset, local->xdata_req); + return 0; } - int -afr_truncate (call_frame_t *frame, xlator_t *this, - loc_t *loc, off_t offset, dict_t *xdata) +afr_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, + dict_t *xdata) { - afr_local_t * local = NULL; - call_frame_t *transaction_frame = NULL; - int ret = -1; - int op_errno = ENOMEM; + afr_local_t *local = NULL; + call_frame_t *transaction_frame = NULL; + int ret = -1; + int op_errno = ENOMEM; - transaction_frame = copy_frame (frame); - if (!transaction_frame) - goto out; + transaction_frame = copy_frame(frame); + if (!transaction_frame) + goto out; - local = AFR_FRAME_INIT (transaction_frame, op_errno); - if (!local) - goto out; + local = AFR_FRAME_INIT(transaction_frame, op_errno); + if (!local) + goto out; - local->cont.truncate.offset = offset; - if (xdata) - local->xdata_req = dict_copy_with_ref (xdata, NULL); - else - local->xdata_req = dict_new (); + local->cont.truncate.offset = offset; + if (xdata) + local->xdata_req = dict_copy_with_ref(xdata, NULL); + else + local->xdata_req = dict_new(); - if (!local->xdata_req) - goto out; + if (!local->xdata_req) + goto out; - local->transaction.wind = afr_truncate_wind; - local->transaction.unwind = afr_truncate_unwind; + local->transaction.wind = afr_truncate_wind; + local->transaction.unwind = afr_truncate_unwind; - loc_copy (&local->loc, loc); - ret = afr_set_inode_local (this, local, loc->inode); - if (ret) - goto out; + loc_copy(&local->loc, loc); + ret = afr_set_inode_local(this, local, loc->inode); + if (ret) + goto out; - local->op = GF_FOP_TRUNCATE; + local->op = GF_FOP_TRUNCATE; - local->transaction.main_frame = frame; - local->transaction.start = offset; - local->transaction.len = 0; + local->transaction.main_frame = frame; + local->transaction.start = offset; + local->transaction.len = 0; - /* Set it true speculatively, will get reset in afr_truncate_wind_cbk - if truncate was not a NOP */ - local->stable_write = _gf_true; + /* Set it true speculatively, will get reset in afr_truncate_wind_cbk + if truncate was not a NOP */ + local->stable_write = _gf_true; - ret = afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION); - if (ret < 0) { - op_errno = -ret; - goto out; - } + ret = afr_transaction(transaction_frame, this, AFR_DATA_TRANSACTION); + if (ret < 0) { + op_errno = -ret; + goto out; + } - return 0; + return 0; out: - if (transaction_frame) - AFR_STACK_DESTROY (transaction_frame); + if (transaction_frame) + AFR_STACK_DESTROY(transaction_frame); - AFR_STACK_UNWIND (truncate, frame, -1, op_errno, NULL, NULL, NULL); - return 0; + AFR_STACK_UNWIND(truncate, frame, -1, op_errno, NULL, NULL, NULL); + return 0; } - /* }}} */ /* {{{ ftruncate */ - int -afr_ftruncate_unwind (call_frame_t *frame, xlator_t *this) +afr_ftruncate_unwind(call_frame_t *frame, xlator_t *this) { - afr_local_t * local = NULL; - call_frame_t *main_frame = NULL; - - local = frame->local; + afr_local_t *local = NULL; + call_frame_t *main_frame = NULL; - main_frame = afr_transaction_detach_fop_frame (frame); - if (!main_frame) - return 0; + local = frame->local; - AFR_STACK_UNWIND (ftruncate, main_frame, local->op_ret, local->op_errno, - &local->cont.inode_wfop.prebuf, - &local->cont.inode_wfop.postbuf, local->xdata_rsp); + main_frame = afr_transaction_detach_fop_frame(frame); + if (!main_frame) return 0; -} + AFR_STACK_UNWIND(ftruncate, main_frame, local->op_ret, local->op_errno, + &local->cont.inode_wfop.prebuf, + &local->cont.inode_wfop.postbuf, local->xdata_rsp); + return 0; +} int -afr_ftruncate_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *prebuf, - struct iatt *postbuf, dict_t *xdata) +afr_ftruncate_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) { - afr_local_t *local = NULL; + afr_local_t *local = NULL; - local = frame->local; + local = frame->local; - if (op_ret == 0 && prebuf->ia_size != postbuf->ia_size) - local->stable_write = _gf_false; + if (op_ret == 0 && prebuf->ia_size != postbuf->ia_size) + local->stable_write = _gf_false; - return __afr_inode_write_cbk (frame, cookie, this, op_ret, op_errno, - prebuf, postbuf, NULL, xdata); + return __afr_inode_write_cbk(frame, cookie, this, op_ret, op_errno, prebuf, + postbuf, NULL, xdata); } - int -afr_ftruncate_wind (call_frame_t *frame, xlator_t *this, int subvol) +afr_ftruncate_wind(call_frame_t *frame, xlator_t *this, int subvol) { - afr_local_t *local = NULL; - afr_private_t *priv = NULL; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; - local = frame->local; - priv = this->private; + local = frame->local; + priv = this->private; - STACK_WIND_COOKIE (frame, afr_ftruncate_wind_cbk, (void *) (long) subvol, - priv->children[subvol], - priv->children[subvol]->fops->ftruncate, - local->fd, local->cont.ftruncate.offset, - local->xdata_req); - return 0; + STACK_WIND_COOKIE(frame, afr_ftruncate_wind_cbk, (void *)(long)subvol, + priv->children[subvol], + priv->children[subvol]->fops->ftruncate, local->fd, + local->cont.ftruncate.offset, local->xdata_req); + return 0; } - int -afr_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, - dict_t *xdata) +afr_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + dict_t *xdata) { - afr_local_t *local = NULL; - call_frame_t *transaction_frame = NULL; - int ret = -1; - int op_errno = ENOMEM; + afr_local_t *local = NULL; + call_frame_t *transaction_frame = NULL; + int ret = -1; + int op_errno = ENOMEM; - transaction_frame = copy_frame (frame); - if (!transaction_frame) - goto out; + transaction_frame = copy_frame(frame); + if (!transaction_frame) + goto out; - local = AFR_FRAME_INIT (transaction_frame, op_errno); - if (!local) - goto out; + local = AFR_FRAME_INIT(transaction_frame, op_errno); + if (!local) + goto out; - local->cont.ftruncate.offset = offset; - if (xdata) - local->xdata_req = dict_copy_with_ref (xdata, NULL); - else - local->xdata_req = dict_new (); + local->cont.ftruncate.offset = offset; + if (xdata) + local->xdata_req = dict_copy_with_ref(xdata, NULL); + else + local->xdata_req = dict_new(); - if (!local->xdata_req) - goto out; + if (!local->xdata_req) + goto out; - local->fd = fd_ref (fd); - ret = afr_set_inode_local (this, local, fd->inode); - if (ret) - goto out; + local->fd = fd_ref(fd); + ret = afr_set_inode_local(this, local, fd->inode); + if (ret) + goto out; - local->op = GF_FOP_FTRUNCATE; + local->op = GF_FOP_FTRUNCATE; - local->transaction.wind = afr_ftruncate_wind; - local->transaction.unwind = afr_ftruncate_unwind; + local->transaction.wind = afr_ftruncate_wind; + local->transaction.unwind = afr_ftruncate_unwind; - local->transaction.main_frame = frame; + local->transaction.main_frame = frame; - local->transaction.start = local->cont.ftruncate.offset; - local->transaction.len = 0; + local->transaction.start = local->cont.ftruncate.offset; + local->transaction.len = 0; - afr_fix_open (fd, this); + afr_fix_open(fd, this); - /* Set it true speculatively, will get reset in afr_ftruncate_wind_cbk - if truncate was not a NOP */ - local->stable_write = _gf_true; + /* Set it true speculatively, will get reset in afr_ftruncate_wind_cbk + if truncate was not a NOP */ + local->stable_write = _gf_true; - ret = afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION); - if (ret < 0) { - op_errno = -ret; - goto out; - } + ret = afr_transaction(transaction_frame, this, AFR_DATA_TRANSACTION); + if (ret < 0) { + op_errno = -ret; + goto out; + } - return 0; + return 0; out: - AFR_STACK_UNWIND (ftruncate, frame, -1, op_errno, NULL, NULL, NULL); + AFR_STACK_UNWIND(ftruncate, frame, -1, op_errno, NULL, NULL, NULL); - return 0; + return 0; } /* }}} */ @@ -807,1813 +771,1743 @@ out: /* {{{ setattr */ int -afr_setattr_unwind (call_frame_t *frame, xlator_t *this) +afr_setattr_unwind(call_frame_t *frame, xlator_t *this) { - afr_local_t *local = NULL; - call_frame_t *main_frame = NULL; + afr_local_t *local = NULL; + call_frame_t *main_frame = NULL; - local = frame->local; + local = frame->local; - main_frame = afr_transaction_detach_fop_frame (frame); - if (!main_frame) - return 0; - - AFR_STACK_UNWIND (setattr, main_frame, local->op_ret, local->op_errno, - &local->cont.inode_wfop.prebuf, - &local->cont.inode_wfop.postbuf, - local->xdata_rsp); + main_frame = afr_transaction_detach_fop_frame(frame); + if (!main_frame) return 0; -} + AFR_STACK_UNWIND(setattr, main_frame, local->op_ret, local->op_errno, + &local->cont.inode_wfop.prebuf, + &local->cont.inode_wfop.postbuf, local->xdata_rsp); + return 0; +} int -afr_setattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, - struct iatt *preop, struct iatt *postop, dict_t *xdata) +afr_setattr_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, struct iatt *preop, + struct iatt *postop, dict_t *xdata) { - return __afr_inode_write_cbk (frame, cookie, this, op_ret, op_errno, - preop, postop, NULL, xdata); + return __afr_inode_write_cbk(frame, cookie, this, op_ret, op_errno, preop, + postop, NULL, xdata); } - int -afr_setattr_wind (call_frame_t *frame, xlator_t *this, int subvol) +afr_setattr_wind(call_frame_t *frame, xlator_t *this, int subvol) { - afr_local_t *local = NULL; - afr_private_t *priv = NULL; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; - local = frame->local; - priv = this->private; + local = frame->local; + priv = this->private; - STACK_WIND_COOKIE (frame, afr_setattr_wind_cbk, (void *) (long) subvol, - priv->children[subvol], - priv->children[subvol]->fops->setattr, - &local->loc, &local->cont.setattr.in_buf, - local->cont.setattr.valid, local->xdata_req); - return 0; + STACK_WIND_COOKIE(frame, afr_setattr_wind_cbk, (void *)(long)subvol, + priv->children[subvol], + priv->children[subvol]->fops->setattr, &local->loc, + &local->cont.setattr.in_buf, local->cont.setattr.valid, + local->xdata_req); + return 0; } - int -afr_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *buf, - int32_t valid, dict_t *xdata) +afr_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *buf, + int32_t valid, dict_t *xdata) { - afr_local_t *local = NULL; - call_frame_t *transaction_frame = NULL; - int ret = -1; - int op_errno = ENOMEM; + afr_local_t *local = NULL; + call_frame_t *transaction_frame = NULL; + int ret = -1; + int op_errno = ENOMEM; - transaction_frame = copy_frame (frame); - if (!transaction_frame) - goto out; + transaction_frame = copy_frame(frame); + if (!transaction_frame) + goto out; - local = AFR_FRAME_INIT (transaction_frame, op_errno); - if (!local) - goto out; + local = AFR_FRAME_INIT(transaction_frame, op_errno); + if (!local) + goto out; - local->cont.setattr.in_buf = *buf; - local->cont.setattr.valid = valid; - if (xdata) - local->xdata_req = dict_copy_with_ref (xdata, NULL); - else - local->xdata_req = dict_new (); + local->cont.setattr.in_buf = *buf; + local->cont.setattr.valid = valid; + if (xdata) + local->xdata_req = dict_copy_with_ref(xdata, NULL); + else + local->xdata_req = dict_new(); - if (!local->xdata_req) - goto out; + if (!local->xdata_req) + goto out; - local->transaction.wind = afr_setattr_wind; - local->transaction.unwind = afr_setattr_unwind; + local->transaction.wind = afr_setattr_wind; + local->transaction.unwind = afr_setattr_unwind; - loc_copy (&local->loc, loc); - ret = afr_set_inode_local (this, local, loc->inode); - if (ret) - goto out; + loc_copy(&local->loc, loc); + ret = afr_set_inode_local(this, local, loc->inode); + if (ret) + goto out; - local->op = GF_FOP_SETATTR; + local->op = GF_FOP_SETATTR; - local->transaction.main_frame = frame; - local->transaction.start = LLONG_MAX - 1; - local->transaction.len = 0; + local->transaction.main_frame = frame; + local->transaction.start = LLONG_MAX - 1; + local->transaction.len = 0; - ret = afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION); - if (ret < 0) { - op_errno = -ret; - goto out; - } + ret = afr_transaction(transaction_frame, this, AFR_METADATA_TRANSACTION); + if (ret < 0) { + op_errno = -ret; + goto out; + } - return 0; + return 0; out: - if (transaction_frame) - AFR_STACK_DESTROY (transaction_frame); + if (transaction_frame) + AFR_STACK_DESTROY(transaction_frame); - AFR_STACK_UNWIND (setattr, frame, -1, op_errno, NULL, NULL, NULL); - return 0; + AFR_STACK_UNWIND(setattr, frame, -1, op_errno, NULL, NULL, NULL); + return 0; } /* {{{ fsetattr */ int -afr_fsetattr_unwind (call_frame_t *frame, xlator_t *this) +afr_fsetattr_unwind(call_frame_t *frame, xlator_t *this) { - afr_local_t * local = NULL; - call_frame_t *main_frame = NULL; - - local = frame->local; + afr_local_t *local = NULL; + call_frame_t *main_frame = NULL; - main_frame = afr_transaction_detach_fop_frame (frame); - if (!main_frame) - return 0; + local = frame->local; - AFR_STACK_UNWIND (fsetattr, main_frame, local->op_ret, local->op_errno, - &local->cont.inode_wfop.prebuf, - &local->cont.inode_wfop.postbuf, local->xdata_rsp); + main_frame = afr_transaction_detach_fop_frame(frame); + if (!main_frame) return 0; -} + AFR_STACK_UNWIND(fsetattr, main_frame, local->op_ret, local->op_errno, + &local->cont.inode_wfop.prebuf, + &local->cont.inode_wfop.postbuf, local->xdata_rsp); + return 0; +} int -afr_fsetattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, - struct iatt *preop, struct iatt *postop, dict_t *xdata) +afr_fsetattr_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *preop, + struct iatt *postop, dict_t *xdata) { - return __afr_inode_write_cbk (frame, cookie, this, op_ret, op_errno, - preop, postop, NULL, xdata); + return __afr_inode_write_cbk(frame, cookie, this, op_ret, op_errno, preop, + postop, NULL, xdata); } - int -afr_fsetattr_wind (call_frame_t *frame, xlator_t *this, int subvol) +afr_fsetattr_wind(call_frame_t *frame, xlator_t *this, int subvol) { - afr_local_t *local = NULL; - afr_private_t *priv = NULL; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; - local = frame->local; - priv = this->private; + local = frame->local; + priv = this->private; - STACK_WIND_COOKIE (frame, afr_fsetattr_wind_cbk, (void *) (long) subvol, - priv->children[subvol], - priv->children[subvol]->fops->fsetattr, - local->fd, &local->cont.fsetattr.in_buf, - local->cont.fsetattr.valid, local->xdata_req); - return 0; + STACK_WIND_COOKIE(frame, afr_fsetattr_wind_cbk, (void *)(long)subvol, + priv->children[subvol], + priv->children[subvol]->fops->fsetattr, local->fd, + &local->cont.fsetattr.in_buf, local->cont.fsetattr.valid, + local->xdata_req); + return 0; } - int -afr_fsetattr (call_frame_t *frame, xlator_t *this, - fd_t *fd, struct iatt *buf, int32_t valid, dict_t *xdata) +afr_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *buf, + int32_t valid, dict_t *xdata) { - afr_local_t *local = NULL; - call_frame_t *transaction_frame = NULL; - int ret = -1; - int op_errno = ENOMEM; + afr_local_t *local = NULL; + call_frame_t *transaction_frame = NULL; + int ret = -1; + int op_errno = ENOMEM; - transaction_frame = copy_frame (frame); - if (!transaction_frame) - goto out; + transaction_frame = copy_frame(frame); + if (!transaction_frame) + goto out; - local = AFR_FRAME_INIT (transaction_frame, op_errno); - if (!local) - goto out; + local = AFR_FRAME_INIT(transaction_frame, op_errno); + if (!local) + goto out; - local->cont.fsetattr.in_buf = *buf; - local->cont.fsetattr.valid = valid; - if (xdata) - local->xdata_req = dict_copy_with_ref (xdata, NULL); - else - local->xdata_req = dict_new (); + local->cont.fsetattr.in_buf = *buf; + local->cont.fsetattr.valid = valid; + if (xdata) + local->xdata_req = dict_copy_with_ref(xdata, NULL); + else + local->xdata_req = dict_new(); - if (!local->xdata_req) - goto out; + if (!local->xdata_req) + goto out; - local->transaction.wind = afr_fsetattr_wind; - local->transaction.unwind = afr_fsetattr_unwind; + local->transaction.wind = afr_fsetattr_wind; + local->transaction.unwind = afr_fsetattr_unwind; - local->fd = fd_ref (fd); - ret = afr_set_inode_local (this, local, fd->inode); - if (ret) - goto out; + local->fd = fd_ref(fd); + ret = afr_set_inode_local(this, local, fd->inode); + if (ret) + goto out; - local->op = GF_FOP_FSETATTR; + local->op = GF_FOP_FSETATTR; - afr_fix_open (fd, this); + afr_fix_open(fd, this); - local->transaction.main_frame = frame; - local->transaction.start = LLONG_MAX - 1; - local->transaction.len = 0; + local->transaction.main_frame = frame; + local->transaction.start = LLONG_MAX - 1; + local->transaction.len = 0; - ret = afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION); - if (ret < 0) { - op_errno = -ret; - goto out; - } + ret = afr_transaction(transaction_frame, this, AFR_METADATA_TRANSACTION); + if (ret < 0) { + op_errno = -ret; + goto out; + } - return 0; + return 0; out: - if (transaction_frame) - AFR_STACK_DESTROY (transaction_frame); + if (transaction_frame) + AFR_STACK_DESTROY(transaction_frame); - AFR_STACK_UNWIND (fsetattr, frame, -1, op_errno, NULL, NULL, NULL); - return 0; + AFR_STACK_UNWIND(fsetattr, frame, -1, op_errno, NULL, NULL, NULL); + return 0; } - /* {{{ setxattr */ - int -afr_setxattr_unwind (call_frame_t *frame, xlator_t *this) +afr_setxattr_unwind(call_frame_t *frame, xlator_t *this) { - afr_local_t * local = NULL; - call_frame_t *main_frame = NULL; - - local = frame->local; + afr_local_t *local = NULL; + call_frame_t *main_frame = NULL; - main_frame = afr_transaction_detach_fop_frame (frame); - if (!main_frame) - return 0; + local = frame->local; - AFR_STACK_UNWIND (setxattr, main_frame, local->op_ret, local->op_errno, - local->xdata_rsp); + main_frame = afr_transaction_detach_fop_frame(frame); + if (!main_frame) return 0; -} + AFR_STACK_UNWIND(setxattr, main_frame, local->op_ret, local->op_errno, + local->xdata_rsp); + return 0; +} int -afr_setxattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) +afr_setxattr_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - return __afr_inode_write_cbk (frame, cookie, this, op_ret, op_errno, - NULL, NULL, NULL, xdata); + return __afr_inode_write_cbk(frame, cookie, this, op_ret, op_errno, NULL, + NULL, NULL, xdata); } - int -afr_setxattr_wind (call_frame_t *frame, xlator_t *this, int subvol) +afr_setxattr_wind(call_frame_t *frame, xlator_t *this, int subvol) { - afr_local_t *local = NULL; - afr_private_t *priv = NULL; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; - local = frame->local; - priv = this->private; + local = frame->local; + priv = this->private; - STACK_WIND_COOKIE (frame, afr_setxattr_wind_cbk, (void *) (long) subvol, - priv->children[subvol], - priv->children[subvol]->fops->setxattr, - &local->loc, local->cont.setxattr.dict, - local->cont.setxattr.flags, local->xdata_req); - return 0; + STACK_WIND_COOKIE(frame, afr_setxattr_wind_cbk, (void *)(long)subvol, + priv->children[subvol], + priv->children[subvol]->fops->setxattr, &local->loc, + local->cont.setxattr.dict, local->cont.setxattr.flags, + local->xdata_req); + return 0; } int -afr_emptyb_set_pending_changelog_cbk (call_frame_t *frame, void *cookie, - xlator_t *this, int op_ret, int op_errno, - dict_t *xattr, dict_t *xdata) +afr_emptyb_set_pending_changelog_cbk(call_frame_t *frame, void *cookie, + xlator_t *this, int op_ret, int op_errno, + dict_t *xattr, dict_t *xdata) { - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - int i, ret = 0; - char *op_type = NULL; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + int i, ret = 0; + char *op_type = NULL; - local = frame->local; - priv = this->private; - i = (long) cookie; + local = frame->local; + priv = this->private; + i = (long)cookie; - local->replies[i].valid = 1; - local->replies[i].op_ret = op_ret; - local->replies[i].op_errno = op_errno; + local->replies[i].valid = 1; + local->replies[i].op_ret = op_ret; + local->replies[i].op_errno = op_errno; - ret = dict_get_str (local->xdata_req, "replicate-brick-op", &op_type); - if (ret) - goto out; + ret = dict_get_str(local->xdata_req, "replicate-brick-op", &op_type); + if (ret) + goto out; - gf_msg (this->name, op_ret ? GF_LOG_ERROR : GF_LOG_INFO, - op_ret ? op_errno : 0, - afr_get_msg_id (op_type), - "Set of pending xattr %s on" - " %s.", op_ret ? "failed" : "succeeded", - priv->children[i]->name); + gf_msg(this->name, op_ret ? GF_LOG_ERROR : GF_LOG_INFO, + op_ret ? op_errno : 0, afr_get_msg_id(op_type), + "Set of pending xattr %s on" + " %s.", + op_ret ? "failed" : "succeeded", priv->children[i]->name); out: - syncbarrier_wake (&local->barrier); - return 0; + syncbarrier_wake(&local->barrier); + return 0; } int -afr_emptyb_set_pending_changelog (call_frame_t *frame, xlator_t *this, - unsigned char *locked_nodes) +afr_emptyb_set_pending_changelog(call_frame_t *frame, xlator_t *this, + unsigned char *locked_nodes) { - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - int ret = 0, i = 0; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + int ret = 0, i = 0; - local = frame->local; - priv = this->private; + local = frame->local; + priv = this->private; - AFR_ONLIST (locked_nodes, frame, afr_emptyb_set_pending_changelog_cbk, - xattrop, &local->loc, GF_XATTROP_ADD_ARRAY, - local->xattr_req, NULL); + AFR_ONLIST(locked_nodes, frame, afr_emptyb_set_pending_changelog_cbk, + xattrop, &local->loc, GF_XATTROP_ADD_ARRAY, local->xattr_req, + NULL); - /* It is sufficient if xattrop was successful on one child */ - for (i = 0; i < priv->child_count; i++) { - if (!local->replies[i].valid) - continue; - - if (local->replies[i].op_ret == 0) { - ret = 0; - goto out; - } else { - ret = afr_higher_errno (ret, - local->replies[i].op_errno); - } + /* It is sufficient if xattrop was successful on one child */ + for (i = 0; i < priv->child_count; i++) { + if (!local->replies[i].valid) + continue; + + if (local->replies[i].op_ret == 0) { + ret = 0; + goto out; + } else { + ret = afr_higher_errno(ret, local->replies[i].op_errno); } + } out: - return -ret; + return -ret; } int -_afr_handle_empty_brick_type (xlator_t *this, call_frame_t *frame, - loc_t *loc, int empty_index, - afr_transaction_type type, - char *op_type) +_afr_handle_empty_brick_type(xlator_t *this, call_frame_t *frame, loc_t *loc, + int empty_index, afr_transaction_type type, + char *op_type) { - int count = 0; - int ret = -ENOMEM; - int idx = -1; - int d_idx = -1; - unsigned char *locked_nodes = NULL; - afr_local_t *local = NULL; - afr_private_t *priv = NULL; + int count = 0; + int ret = -ENOMEM; + int idx = -1; + int d_idx = -1; + unsigned char *locked_nodes = NULL; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; - priv = this->private; - local = frame->local; + priv = this->private; + local = frame->local; - locked_nodes = alloca0 (priv->child_count); + locked_nodes = alloca0(priv->child_count); - idx = afr_index_for_transaction_type (type); - d_idx = afr_index_for_transaction_type (AFR_DATA_TRANSACTION); + idx = afr_index_for_transaction_type(type); + d_idx = afr_index_for_transaction_type(AFR_DATA_TRANSACTION); - local->pending = afr_matrix_create (priv->child_count, - AFR_NUM_CHANGE_LOGS); - if (!local->pending) - goto out; + local->pending = afr_matrix_create(priv->child_count, AFR_NUM_CHANGE_LOGS); + if (!local->pending) + goto out; - local->pending[empty_index][idx] = hton32 (1); + local->pending[empty_index][idx] = hton32(1); - if ((priv->esh_granular) && (type == AFR_ENTRY_TRANSACTION)) - local->pending[empty_index][d_idx] = hton32 (1); + if ((priv->esh_granular) && (type == AFR_ENTRY_TRANSACTION)) + local->pending[empty_index][d_idx] = hton32(1); - local->xdata_req = dict_new (); - if (!local->xdata_req) - goto out; + local->xdata_req = dict_new(); + if (!local->xdata_req) + goto out; - ret = dict_set_str (local->xdata_req, "replicate-brick-op", op_type); - if (ret) - goto out; + ret = dict_set_str(local->xdata_req, "replicate-brick-op", op_type); + if (ret) + goto out; - local->xattr_req = dict_new (); - if (!local->xattr_req) - goto out; + local->xattr_req = dict_new(); + if (!local->xattr_req) + goto out; - ret = afr_set_pending_dict (priv, local->xattr_req, local->pending); - if (ret < 0) - goto out; + ret = afr_set_pending_dict(priv, local->xattr_req, local->pending); + if (ret < 0) + goto out; - if (AFR_ENTRY_TRANSACTION == type) { - count = afr_selfheal_entrylk (frame, this, loc->inode, - this->name, NULL, locked_nodes); - } else { - count = afr_selfheal_inodelk (frame, this, loc->inode, - this->name, LLONG_MAX - 1, 0, - locked_nodes); - } + if (AFR_ENTRY_TRANSACTION == type) { + count = afr_selfheal_entrylk(frame, this, loc->inode, this->name, NULL, + locked_nodes); + } else { + count = afr_selfheal_inodelk(frame, this, loc->inode, this->name, + LLONG_MAX - 1, 0, locked_nodes); + } - if (!count) { - gf_msg (this->name, GF_LOG_ERROR, EAGAIN, - AFR_MSG_REPLACE_BRICK_STATUS, "Couldn't acquire lock on" - " any child."); - ret = -EAGAIN; - goto unlock; - } + if (!count) { + gf_msg(this->name, GF_LOG_ERROR, EAGAIN, AFR_MSG_REPLACE_BRICK_STATUS, + "Couldn't acquire lock on" + " any child."); + ret = -EAGAIN; + goto unlock; + } - ret = afr_emptyb_set_pending_changelog (frame, this, locked_nodes); - if (ret) - goto unlock; - ret = 0; + ret = afr_emptyb_set_pending_changelog(frame, this, locked_nodes); + if (ret) + goto unlock; + ret = 0; unlock: - if (AFR_ENTRY_TRANSACTION == type) { - afr_selfheal_unentrylk (frame, this, loc->inode, this->name, - NULL, locked_nodes, NULL); - } else { - afr_selfheal_uninodelk (frame, this, loc->inode, this->name, - LLONG_MAX - 1, 0, locked_nodes); - } + if (AFR_ENTRY_TRANSACTION == type) { + afr_selfheal_unentrylk(frame, this, loc->inode, this->name, NULL, + locked_nodes, NULL); + } else { + afr_selfheal_uninodelk(frame, this, loc->inode, this->name, + LLONG_MAX - 1, 0, locked_nodes); + } out: - return ret; + return ret; } void -afr_brick_args_cleanup (void *opaque) +afr_brick_args_cleanup(void *opaque) { - afr_empty_brick_args_t *data = NULL; + afr_empty_brick_args_t *data = NULL; - data = opaque; - loc_wipe (&data->loc); - GF_FREE (data); + data = opaque; + loc_wipe(&data->loc); + GF_FREE(data); } int -_afr_handle_empty_brick_cbk (int ret, call_frame_t *frame, void *opaque) +_afr_handle_empty_brick_cbk(int ret, call_frame_t *frame, void *opaque) { - afr_brick_args_cleanup (opaque); - return 0; + afr_brick_args_cleanup(opaque); + return 0; } int -_afr_handle_empty_brick (void *opaque) +_afr_handle_empty_brick(void *opaque) { + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + int empty_index = -1; + int ret = -1; + int op_errno = ENOMEM; + call_frame_t *frame = NULL; + xlator_t *this = NULL; + char *op_type = NULL; + afr_empty_brick_args_t *data = NULL; - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - int empty_index = -1; - int ret = -1; - int op_errno = ENOMEM; - call_frame_t *frame = NULL; - xlator_t *this = NULL; - char *op_type = NULL; - afr_empty_brick_args_t *data = NULL; + data = opaque; + frame = data->frame; + empty_index = data->empty_index; + op_type = data->op_type; + this = frame->this; + priv = this->private; - data = opaque; - frame = data->frame; - empty_index = data->empty_index; - op_type = data->op_type; - this = frame->this; - priv = this->private; + local = AFR_FRAME_INIT(frame, op_errno); + if (!local) + goto out; - local = AFR_FRAME_INIT (frame, op_errno); - if (!local) - goto out; + loc_copy(&local->loc, &data->loc); - loc_copy (&local->loc, &data->loc); + gf_msg(this->name, GF_LOG_INFO, 0, 0, "New brick is : %s", + priv->children[empty_index]->name); - gf_msg (this->name, GF_LOG_INFO, 0, 0, "New brick is : %s", - priv->children[empty_index]->name); + ret = _afr_handle_empty_brick_type(this, frame, &local->loc, empty_index, + AFR_METADATA_TRANSACTION, op_type); + if (ret) { + op_errno = -ret; + ret = -1; + goto out; + } - ret = _afr_handle_empty_brick_type (this, frame, &local->loc, empty_index, - AFR_METADATA_TRANSACTION, op_type); - if (ret) { - op_errno = -ret; - ret = -1; - goto out; - } - - dict_unref (local->xdata_req); - dict_unref (local->xattr_req); - afr_matrix_cleanup (local->pending, priv->child_count); - local->pending = NULL; - local->xattr_req = NULL; - local->xdata_req = NULL; + dict_unref(local->xdata_req); + dict_unref(local->xattr_req); + afr_matrix_cleanup(local->pending, priv->child_count); + local->pending = NULL; + local->xattr_req = NULL; + local->xdata_req = NULL; - ret = _afr_handle_empty_brick_type (this, frame, &local->loc, empty_index, - AFR_ENTRY_TRANSACTION, op_type); - if (ret) { - op_errno = -ret; - ret = -1; - goto out; - } - ret = 0; + ret = _afr_handle_empty_brick_type(this, frame, &local->loc, empty_index, + AFR_ENTRY_TRANSACTION, op_type); + if (ret) { + op_errno = -ret; + ret = -1; + goto out; + } + ret = 0; out: - AFR_STACK_UNWIND (setxattr, frame, ret, op_errno, NULL); - return 0; + AFR_STACK_UNWIND(setxattr, frame, ret, op_errno, NULL); + return 0; +} + +int +afr_split_brain_resolve_do(call_frame_t *frame, xlator_t *this, loc_t *loc, + char *data) +{ + afr_local_t *local = NULL; + int ret = -1; + int op_errno = EINVAL; + + local = frame->local; + local->xdata_req = dict_new(); + + if (!local->xdata_req) { + op_errno = ENOMEM; + goto out; + } + + ret = dict_set_int32(local->xdata_req, "heal-op", + GF_SHD_OP_SBRAIN_HEAL_FROM_BRICK); + if (ret) { + op_errno = -ret; + ret = -1; + goto out; + } + ret = dict_set_str(local->xdata_req, "child-name", data); + if (ret) { + op_errno = -ret; + ret = -1; + goto out; + } + /* set spb choice to -1 whether heal succeeds or not: + * If heal succeeds : spb-choice should be set to -1 as + * it is no longer valid; file is not + * in split-brain anymore. + * If heal doesn't succeed: + * spb-choice should be set to -1 + * otherwise reads will be served + * from spb-choice which is misleading. + */ + ret = afr_inode_split_brain_choice_set(loc->inode, this, -1); + if (ret) + gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_SPLIT_BRAIN_CHOICE_ERROR, + "Failed to set" + "split-brain choice to -1"); + afr_heal_splitbrain_file(frame, this, loc); + ret = 0; +out: + if (ret < 0) + AFR_STACK_UNWIND(setxattr, frame, -1, op_errno, NULL); + return 0; } - int -afr_split_brain_resolve_do (call_frame_t *frame, xlator_t *this, loc_t *loc, - char *data) +afr_get_split_brain_child_index(xlator_t *this, void *value, size_t len) { - afr_local_t *local = NULL; - int ret = -1; - int op_errno = EINVAL; + int spb_child_index = -1; + char *spb_child_str = NULL; - local = frame->local; - local->xdata_req = dict_new (); + spb_child_str = alloca0(len + 1); + memcpy(spb_child_str, value, len); - if (!local->xdata_req) { - op_errno = ENOMEM; - goto out; - } + if (!strcmp(spb_child_str, "none")) + return -2; - ret = dict_set_int32 (local->xdata_req, "heal-op", - GF_SHD_OP_SBRAIN_HEAL_FROM_BRICK); - if (ret) { - op_errno = -ret; - ret = -1; - goto out; - } - ret = dict_set_str (local->xdata_req, "child-name", data); - if (ret) { - op_errno = -ret; - ret = -1; - goto out; - } - /* set spb choice to -1 whether heal succeeds or not: - * If heal succeeds : spb-choice should be set to -1 as - * it is no longer valid; file is not - * in split-brain anymore. - * If heal doesn't succeed: - * spb-choice should be set to -1 - * otherwise reads will be served - * from spb-choice which is misleading. - */ - ret = afr_inode_split_brain_choice_set (loc->inode, this, -1); - if (ret) - gf_msg (this->name, GF_LOG_WARNING, 0, - AFR_MSG_SPLIT_BRAIN_CHOICE_ERROR, "Failed to set" - "split-brain choice to -1"); - afr_heal_splitbrain_file (frame, this, loc); - ret = 0; -out: - if (ret < 0) - AFR_STACK_UNWIND (setxattr, frame, -1, op_errno, NULL); - return 0; + spb_child_index = afr_get_child_index_from_name(this, spb_child_str); + if (spb_child_index < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_INVALID_SUBVOL, + "Invalid subvol: %s", spb_child_str); + } + return spb_child_index; } int -afr_get_split_brain_child_index (xlator_t *this, void *value, size_t len) +afr_can_set_split_brain_choice(void *opaque) { - int spb_child_index = -1; - char *spb_child_str = NULL; + afr_spbc_timeout_t *data = opaque; + call_frame_t *frame = NULL; + xlator_t *this = NULL; + loc_t *loc = NULL; + int ret = -1; - spb_child_str = alloca0 (len + 1); - memcpy (spb_child_str, value, len); + frame = data->frame; + loc = data->loc; + this = frame->this; - if (!strcmp (spb_child_str, "none")) - return -2; + ret = afr_is_split_brain(frame, this, loc->inode, loc->gfid, &data->d_spb, + &data->m_spb); - spb_child_index = afr_get_child_index_from_name (this, - spb_child_str); - if (spb_child_index < 0) { - gf_msg (this->name, GF_LOG_ERROR, 0, - AFR_MSG_INVALID_SUBVOL, "Invalid subvol: %s", - spb_child_str); - } - return spb_child_index; + if (ret) + gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN_CHOICE_ERROR, + "Failed to determine if %s" + " is in split-brain. " + "Aborting split-brain-choice set.", + uuid_utoa(loc->gfid)); + return ret; } int -afr_can_set_split_brain_choice (void *opaque) +afr_handle_split_brain_commands(xlator_t *this, call_frame_t *frame, loc_t *loc, + dict_t *dict) { - afr_spbc_timeout_t *data = opaque; - call_frame_t *frame = NULL; - xlator_t *this = NULL; - loc_t *loc = NULL; - int ret = -1; + void *value = NULL; + afr_private_t *priv = NULL; + afr_local_t *local = NULL; + afr_spbc_timeout_t *data = NULL; + int len = 0; + int spb_child_index = -1; + int ret = -1; + int op_errno = EINVAL; - frame = data->frame; - loc = data->loc; - this = frame->this; + priv = this->private; - ret = afr_is_split_brain (frame, this, loc->inode, loc->gfid, - &data->d_spb, &data->m_spb); + local = AFR_FRAME_INIT(frame, op_errno); + if (!local) { + ret = 1; + goto out; + } - if (ret) - gf_msg (this->name, GF_LOG_ERROR, 0, - AFR_MSG_SPLIT_BRAIN_CHOICE_ERROR, - "Failed to determine if %s" - " is in split-brain. " - "Aborting split-brain-choice set.", - uuid_utoa (loc->gfid)); - return ret; -} + local->op = GF_FOP_SETXATTR; -int -afr_handle_split_brain_commands (xlator_t *this, call_frame_t *frame, - loc_t *loc, dict_t *dict) -{ - void *value = NULL; - afr_private_t *priv = NULL; - afr_local_t *local = NULL; - afr_spbc_timeout_t *data = NULL; - int len = 0; - int spb_child_index = -1; - int ret = -1; - int op_errno = EINVAL; - - priv = this->private; - - local = AFR_FRAME_INIT (frame, op_errno); - if (!local) { + ret = dict_get_ptr_and_len(dict, GF_AFR_SBRAIN_CHOICE, &value, &len); + if (value) { + spb_child_index = afr_get_split_brain_child_index(this, value, len); + if (spb_child_index < 0) { + /* Case where value was "none" */ + if (spb_child_index == -2) + spb_child_index = -1; + else { ret = 1; + op_errno = EINVAL; goto out; + } } - local->op = GF_FOP_SETXATTR; - - ret = dict_get_ptr_and_len (dict, GF_AFR_SBRAIN_CHOICE, &value, - &len); - if (value) { - spb_child_index = afr_get_split_brain_child_index (this, value, - len); - if (spb_child_index < 0) { - /* Case where value was "none" */ - if (spb_child_index == -2) - spb_child_index = -1; - else { - ret = 1; - op_errno = EINVAL; - goto out; - } - } - - data = GF_CALLOC (1, sizeof (*data), gf_afr_mt_spbc_timeout_t); - if (!data) { - ret = 1; - goto out; - } - data->spb_child_index = spb_child_index; - data->frame = frame; - loc_copy (&local->loc, loc); - data->loc = &local->loc; - ret = synctask_new (this->ctx->env, - afr_can_set_split_brain_choice, - afr_set_split_brain_choice, NULL, data); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - AFR_MSG_SPLIT_BRAIN_CHOICE_ERROR, - "Failed to create" - " synctask. Aborting split-brain choice set" - " for %s", loc->name); - ret = 1; - op_errno = ENOMEM; - goto out; - } - ret = 0; - goto out; + data = GF_CALLOC(1, sizeof(*data), gf_afr_mt_spbc_timeout_t); + if (!data) { + ret = 1; + goto out; } + data->spb_child_index = spb_child_index; + data->frame = frame; + loc_copy(&local->loc, loc); + data->loc = &local->loc; + ret = synctask_new(this->ctx->env, afr_can_set_split_brain_choice, + afr_set_split_brain_choice, NULL, data); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, + AFR_MSG_SPLIT_BRAIN_CHOICE_ERROR, + "Failed to create" + " synctask. Aborting split-brain choice set" + " for %s", + loc->name); + ret = 1; + op_errno = ENOMEM; + goto out; + } + ret = 0; + goto out; + } - ret = dict_get_ptr_and_len (dict, GF_AFR_SBRAIN_RESOLVE, &value, &len); - if (value) { - spb_child_index = afr_get_split_brain_child_index (this, value, - len); - if (spb_child_index < 0) { - ret = 1; - goto out; - } - - afr_split_brain_resolve_do (frame, this, loc, - priv->children[spb_child_index]->name); - ret = 0; + ret = dict_get_ptr_and_len(dict, GF_AFR_SBRAIN_RESOLVE, &value, &len); + if (value) { + spb_child_index = afr_get_split_brain_child_index(this, value, len); + if (spb_child_index < 0) { + ret = 1; + goto out; } + + afr_split_brain_resolve_do(frame, this, loc, + priv->children[spb_child_index]->name); + ret = 0; + } out: - /* key was correct but value was invalid when ret == 1 */ - if (ret == 1) { - AFR_STACK_UNWIND (setxattr, frame, -1, op_errno, NULL); - if (data) - GF_FREE (data); - ret = 0; - } - return ret; + /* key was correct but value was invalid when ret == 1 */ + if (ret == 1) { + AFR_STACK_UNWIND(setxattr, frame, -1, op_errno, NULL); + if (data) + GF_FREE(data); + ret = 0; + } + return ret; } int -afr_handle_spb_choice_timeout (xlator_t *this, call_frame_t *frame, - dict_t *dict) +afr_handle_spb_choice_timeout(xlator_t *this, call_frame_t *frame, dict_t *dict) { - int ret = -1; - int op_errno = 0; - uint64_t timeout = 0; - afr_private_t *priv = NULL; + int ret = -1; + int op_errno = 0; + uint64_t timeout = 0; + afr_private_t *priv = NULL; - priv = this->private; + priv = this->private; - ret = dict_get_uint64 (dict, GF_AFR_SPB_CHOICE_TIMEOUT, &timeout); - if (!ret) { - priv->spb_choice_timeout = timeout * 60; - AFR_STACK_UNWIND (setxattr, frame, ret, op_errno, NULL); - } + ret = dict_get_uint64(dict, GF_AFR_SPB_CHOICE_TIMEOUT, &timeout); + if (!ret) { + priv->spb_choice_timeout = timeout * 60; + AFR_STACK_UNWIND(setxattr, frame, ret, op_errno, NULL); + } - return ret; + return ret; } int -afr_handle_empty_brick (xlator_t *this, call_frame_t *frame, loc_t *loc, - dict_t *dict) +afr_handle_empty_brick(xlator_t *this, call_frame_t *frame, loc_t *loc, + dict_t *dict) { - int ret = -1; - int ab_ret = -1; - int empty_index = -1; - int op_errno = EPERM; - char *empty_brick = NULL; - char *op_type = NULL; - afr_empty_brick_args_t *data = NULL; + int ret = -1; + int ab_ret = -1; + int empty_index = -1; + int op_errno = EPERM; + char *empty_brick = NULL; + char *op_type = NULL; + afr_empty_brick_args_t *data = NULL; - ret = dict_get_str (dict, GF_AFR_REPLACE_BRICK, &empty_brick); - if (!ret) - op_type = GF_AFR_REPLACE_BRICK; + ret = dict_get_str(dict, GF_AFR_REPLACE_BRICK, &empty_brick); + if (!ret) + op_type = GF_AFR_REPLACE_BRICK; - ab_ret = dict_get_str (dict, GF_AFR_ADD_BRICK, &empty_brick); - if (!ab_ret) - op_type = GF_AFR_ADD_BRICK; + ab_ret = dict_get_str(dict, GF_AFR_ADD_BRICK, &empty_brick); + if (!ab_ret) + op_type = GF_AFR_ADD_BRICK; - if (ret && ab_ret) - goto out; + if (ret && ab_ret) + goto out; - if (frame->root->pid != GF_CLIENT_PID_SELF_HEALD) { - gf_msg (this->name, GF_LOG_ERROR, EPERM, - afr_get_msg_id (op_type), - "'%s' is an internal extended attribute.", - op_type); - ret = 1; - goto out; + if (frame->root->pid != GF_CLIENT_PID_SELF_HEALD) { + gf_msg(this->name, GF_LOG_ERROR, EPERM, afr_get_msg_id(op_type), + "'%s' is an internal extended attribute.", op_type); + ret = 1; + goto out; + } + empty_index = afr_get_child_index_from_name(this, empty_brick); + + if (empty_index < 0) { + /* Didn't belong to this replica pair + * Just do a no-op + */ + AFR_STACK_UNWIND(setxattr, frame, 0, 0, NULL); + return 0; + } else { + data = GF_CALLOC(1, sizeof(*data), gf_afr_mt_empty_brick_t); + if (!data) { + ret = 1; + op_errno = ENOMEM; + goto out; } - empty_index = afr_get_child_index_from_name (this, empty_brick); - - if (empty_index < 0) { - /* Didn't belong to this replica pair - * Just do a no-op - */ - AFR_STACK_UNWIND (setxattr, frame, 0, 0, NULL); - return 0; - } else { - data = GF_CALLOC (1, sizeof (*data), - gf_afr_mt_empty_brick_t); - if (!data) { - ret = 1; - op_errno = ENOMEM; - goto out; - } - data->frame = frame; - loc_copy (&data->loc, loc); - data->empty_index = empty_index; - data->op_type = op_type; - ret = synctask_new (this->ctx->env, - _afr_handle_empty_brick, - _afr_handle_empty_brick_cbk, - NULL, data); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - afr_get_msg_id (op_type), - "Failed to create synctask."); - ret = 1; - op_errno = ENOMEM; - afr_brick_args_cleanup (data); - goto out; - } + data->frame = frame; + loc_copy(&data->loc, loc); + data->empty_index = empty_index; + data->op_type = op_type; + ret = synctask_new(this->ctx->env, _afr_handle_empty_brick, + _afr_handle_empty_brick_cbk, NULL, data); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, afr_get_msg_id(op_type), + "Failed to create synctask."); + ret = 1; + op_errno = ENOMEM; + afr_brick_args_cleanup(data); + goto out; } - ret = 0; + } + ret = 0; out: - if (ret == 1) { - AFR_STACK_UNWIND (setxattr, frame, -1, op_errno, NULL); - ret = 0; - } - return ret; + if (ret == 1) { + AFR_STACK_UNWIND(setxattr, frame, -1, op_errno, NULL); + ret = 0; + } + return ret; } static int -afr_handle_special_xattr (xlator_t *this, call_frame_t *frame, loc_t *loc, - dict_t *dict) +afr_handle_special_xattr(xlator_t *this, call_frame_t *frame, loc_t *loc, + dict_t *dict) { - int ret = -1; + int ret = -1; - ret = afr_handle_split_brain_commands (this, frame, loc, dict); - if (ret == 0) - goto out; + ret = afr_handle_split_brain_commands(this, frame, loc, dict); + if (ret == 0) + goto out; - ret = afr_handle_spb_choice_timeout (this, frame, dict); - if (ret == 0) - goto out; + ret = afr_handle_spb_choice_timeout(this, frame, dict); + if (ret == 0) + goto out; - /* Applicable for replace-brick and add-brick commands */ - ret = afr_handle_empty_brick (this, frame, loc, dict); + /* Applicable for replace-brick and add-brick commands */ + ret = afr_handle_empty_brick(this, frame, loc, dict); out: - return ret; + return ret; } int -afr_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, - int32_t flags, dict_t *xdata) +afr_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, + int32_t flags, dict_t *xdata) { - afr_local_t *local = NULL; - call_frame_t *transaction_frame = NULL; - int ret = -1; - int op_errno = EINVAL; + afr_local_t *local = NULL; + call_frame_t *transaction_frame = NULL; + int ret = -1; + int op_errno = EINVAL; - GF_IF_INTERNAL_XATTR_GOTO ("trusted.afr.*", dict, - op_errno, out); + GF_IF_INTERNAL_XATTR_GOTO("trusted.afr.*", dict, op_errno, out); - GF_IF_INTERNAL_XATTR_GOTO ("trusted.glusterfs.afr.*", dict, - op_errno, out); + GF_IF_INTERNAL_XATTR_GOTO("trusted.glusterfs.afr.*", dict, op_errno, out); - ret = afr_handle_special_xattr (this, frame, loc, dict); - if (ret == 0) - return 0; + ret = afr_handle_special_xattr(this, frame, loc, dict); + if (ret == 0) + return 0; - transaction_frame = copy_frame (frame); - if (!transaction_frame) - goto out; + transaction_frame = copy_frame(frame); + if (!transaction_frame) + goto out; - local = AFR_FRAME_INIT (transaction_frame, op_errno); - if (!local) - goto out; + local = AFR_FRAME_INIT(transaction_frame, op_errno); + if (!local) + goto out; - local->cont.setxattr.dict = dict_ref (dict); - local->cont.setxattr.flags = flags; - if (xdata) - local->xdata_req = dict_copy_with_ref (xdata, NULL); - else - local->xdata_req = dict_new (); + local->cont.setxattr.dict = dict_ref(dict); + local->cont.setxattr.flags = flags; + if (xdata) + local->xdata_req = dict_copy_with_ref(xdata, NULL); + else + local->xdata_req = dict_new(); - if (!local->xdata_req) - goto out; + if (!local->xdata_req) + goto out; - local->transaction.wind = afr_setxattr_wind; - local->transaction.unwind = afr_setxattr_unwind; + local->transaction.wind = afr_setxattr_wind; + local->transaction.unwind = afr_setxattr_unwind; - loc_copy (&local->loc, loc); - ret = afr_set_inode_local (this, local, loc->inode); - if (ret) - goto out; + loc_copy(&local->loc, loc); + ret = afr_set_inode_local(this, local, loc->inode); + if (ret) + goto out; - local->transaction.main_frame = frame; - local->transaction.start = LLONG_MAX - 1; - local->transaction.len = 0; + local->transaction.main_frame = frame; + local->transaction.start = LLONG_MAX - 1; + local->transaction.len = 0; - local->op = GF_FOP_SETXATTR; + local->op = GF_FOP_SETXATTR; - ret = afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION); - if (ret < 0) { - op_errno = -ret; - goto out; - } + ret = afr_transaction(transaction_frame, this, AFR_METADATA_TRANSACTION); + if (ret < 0) { + op_errno = -ret; + goto out; + } - return 0; + return 0; out: - if (transaction_frame) - AFR_STACK_DESTROY (transaction_frame); + if (transaction_frame) + AFR_STACK_DESTROY(transaction_frame); - AFR_STACK_UNWIND (setxattr, frame, -1, op_errno, NULL); + AFR_STACK_UNWIND(setxattr, frame, -1, op_errno, NULL); - return 0; + return 0; } /* {{{ fsetxattr */ - int -afr_fsetxattr_unwind (call_frame_t *frame, xlator_t *this) +afr_fsetxattr_unwind(call_frame_t *frame, xlator_t *this) { - afr_local_t *local = NULL; - call_frame_t *main_frame = NULL; + afr_local_t *local = NULL; + call_frame_t *main_frame = NULL; - local = frame->local; + local = frame->local; - main_frame = afr_transaction_detach_fop_frame (frame); - if (!main_frame) - return 0; - - AFR_STACK_UNWIND (fsetxattr, main_frame, local->op_ret, local->op_errno, - local->xdata_rsp); + main_frame = afr_transaction_detach_fop_frame(frame); + if (!main_frame) return 0; -} + AFR_STACK_UNWIND(fsetxattr, main_frame, local->op_ret, local->op_errno, + local->xdata_rsp); + return 0; +} int -afr_fsetxattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) +afr_fsetxattr_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - return __afr_inode_write_cbk (frame, cookie, this, op_ret, op_errno, - NULL, NULL, NULL, xdata); + return __afr_inode_write_cbk(frame, cookie, this, op_ret, op_errno, NULL, + NULL, NULL, xdata); } - int -afr_fsetxattr_wind (call_frame_t *frame, xlator_t *this, int subvol) +afr_fsetxattr_wind(call_frame_t *frame, xlator_t *this, int subvol) { - afr_local_t *local = NULL; - afr_private_t *priv = NULL; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; - local = frame->local; - priv = this->private; + local = frame->local; + priv = this->private; - STACK_WIND_COOKIE (frame, afr_fsetxattr_wind_cbk, (void *) (long) subvol, - priv->children[subvol], - priv->children[subvol]->fops->fsetxattr, - local->fd, local->cont.fsetxattr.dict, - local->cont.fsetxattr.flags, local->xdata_req); - return 0; + STACK_WIND_COOKIE(frame, afr_fsetxattr_wind_cbk, (void *)(long)subvol, + priv->children[subvol], + priv->children[subvol]->fops->fsetxattr, local->fd, + local->cont.fsetxattr.dict, local->cont.fsetxattr.flags, + local->xdata_req); + return 0; } - int -afr_fsetxattr (call_frame_t *frame, xlator_t *this, - fd_t *fd, dict_t *dict, int32_t flags, dict_t *xdata) +afr_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict, + int32_t flags, dict_t *xdata) { - afr_local_t *local = NULL; - call_frame_t *transaction_frame = NULL; - int ret = -1; - int op_errno = ENOMEM; + afr_local_t *local = NULL; + call_frame_t *transaction_frame = NULL; + int ret = -1; + int op_errno = ENOMEM; - GF_IF_INTERNAL_XATTR_GOTO ("trusted.afr.*", dict, - op_errno, out); + GF_IF_INTERNAL_XATTR_GOTO("trusted.afr.*", dict, op_errno, out); - GF_IF_INTERNAL_XATTR_GOTO ("trusted.glusterfs.afr.*", dict, - op_errno, out); + GF_IF_INTERNAL_XATTR_GOTO("trusted.glusterfs.afr.*", dict, op_errno, out); - transaction_frame = copy_frame (frame); - if (!transaction_frame) - goto out; + transaction_frame = copy_frame(frame); + if (!transaction_frame) + goto out; - local = AFR_FRAME_INIT (transaction_frame, op_errno); - if (!local) - goto out; + local = AFR_FRAME_INIT(transaction_frame, op_errno); + if (!local) + goto out; - local->cont.fsetxattr.dict = dict_ref (dict); - local->cont.fsetxattr.flags = flags; + local->cont.fsetxattr.dict = dict_ref(dict); + local->cont.fsetxattr.flags = flags; - if (xdata) - local->xdata_req = dict_copy_with_ref (xdata, NULL); - else - local->xdata_req = dict_new (); + if (xdata) + local->xdata_req = dict_copy_with_ref(xdata, NULL); + else + local->xdata_req = dict_new(); - if (!local->xdata_req) - goto out; + if (!local->xdata_req) + goto out; - local->transaction.wind = afr_fsetxattr_wind; - local->transaction.unwind = afr_fsetxattr_unwind; + local->transaction.wind = afr_fsetxattr_wind; + local->transaction.unwind = afr_fsetxattr_unwind; - local->fd = fd_ref (fd); - ret = afr_set_inode_local (this, local, fd->inode); - if (ret) - goto out; + local->fd = fd_ref(fd); + ret = afr_set_inode_local(this, local, fd->inode); + if (ret) + goto out; - local->op = GF_FOP_FSETXATTR; + local->op = GF_FOP_FSETXATTR; - local->transaction.main_frame = frame; - local->transaction.start = LLONG_MAX - 1; - local->transaction.len = 0; + local->transaction.main_frame = frame; + local->transaction.start = LLONG_MAX - 1; + local->transaction.len = 0; - ret = afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION); - if (ret < 0) { - op_errno = -ret; - goto out; - } + ret = afr_transaction(transaction_frame, this, AFR_METADATA_TRANSACTION); + if (ret < 0) { + op_errno = -ret; + goto out; + } - return 0; + return 0; out: - if (transaction_frame) - AFR_STACK_DESTROY (transaction_frame); + if (transaction_frame) + AFR_STACK_DESTROY(transaction_frame); - AFR_STACK_UNWIND (fsetxattr, frame, -1, op_errno, NULL); - return 0; + AFR_STACK_UNWIND(fsetxattr, frame, -1, op_errno, NULL); + return 0; } /* }}} */ - /* {{{ removexattr */ - int -afr_removexattr_unwind (call_frame_t *frame, xlator_t *this) +afr_removexattr_unwind(call_frame_t *frame, xlator_t *this) { - afr_local_t * local = NULL; - call_frame_t *main_frame = NULL; - - local = frame->local; + afr_local_t *local = NULL; + call_frame_t *main_frame = NULL; - main_frame = afr_transaction_detach_fop_frame (frame); - if (!main_frame) - return 0; + local = frame->local; - AFR_STACK_UNWIND (removexattr, main_frame, local->op_ret, local->op_errno, - local->xdata_rsp); + main_frame = afr_transaction_detach_fop_frame(frame); + if (!main_frame) return 0; -} + AFR_STACK_UNWIND(removexattr, main_frame, local->op_ret, local->op_errno, + local->xdata_rsp); + return 0; +} int -afr_removexattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) +afr_removexattr_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - return __afr_inode_write_cbk (frame, cookie, this, op_ret, op_errno, - NULL, NULL, NULL, xdata); + return __afr_inode_write_cbk(frame, cookie, this, op_ret, op_errno, NULL, + NULL, NULL, xdata); } - int -afr_removexattr_wind (call_frame_t *frame, xlator_t *this, int subvol) +afr_removexattr_wind(call_frame_t *frame, xlator_t *this, int subvol) { - afr_local_t *local = NULL; - afr_private_t *priv = NULL; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; - local = frame->local; - priv = this->private; + local = frame->local; + priv = this->private; - STACK_WIND_COOKIE (frame, afr_removexattr_wind_cbk, (void *) (long) subvol, - priv->children[subvol], - priv->children[subvol]->fops->removexattr, - &local->loc, local->cont.removexattr.name, - local->xdata_req); - return 0; + STACK_WIND_COOKIE(frame, afr_removexattr_wind_cbk, (void *)(long)subvol, + priv->children[subvol], + priv->children[subvol]->fops->removexattr, &local->loc, + local->cont.removexattr.name, local->xdata_req); + return 0; } - int -afr_removexattr (call_frame_t *frame, xlator_t *this, - loc_t *loc, const char *name, dict_t *xdata) +afr_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc, + const char *name, dict_t *xdata) { - afr_local_t *local = NULL; - call_frame_t *transaction_frame = NULL; - int ret = -1; - int op_errno = ENOMEM; + afr_local_t *local = NULL; + call_frame_t *transaction_frame = NULL; + int ret = -1; + int op_errno = ENOMEM; - GF_IF_NATIVE_XATTR_GOTO ("trusted.afr.*", - name, op_errno, out); + GF_IF_NATIVE_XATTR_GOTO("trusted.afr.*", name, op_errno, out); - GF_IF_NATIVE_XATTR_GOTO ("trusted.glusterfs.afr.*", - name, op_errno, out); + GF_IF_NATIVE_XATTR_GOTO("trusted.glusterfs.afr.*", name, op_errno, out); - transaction_frame = copy_frame (frame); - if (!transaction_frame) - goto out; + transaction_frame = copy_frame(frame); + if (!transaction_frame) + goto out; - local = AFR_FRAME_INIT (transaction_frame, op_errno); - if (!local) - goto out; + local = AFR_FRAME_INIT(transaction_frame, op_errno); + if (!local) + goto out; - local->cont.removexattr.name = gf_strdup (name); + local->cont.removexattr.name = gf_strdup(name); - if (xdata) - local->xdata_req = dict_copy_with_ref (xdata, NULL); - else - local->xdata_req = dict_new (); + if (xdata) + local->xdata_req = dict_copy_with_ref(xdata, NULL); + else + local->xdata_req = dict_new(); - if (!local->xdata_req) - goto out; + if (!local->xdata_req) + goto out; - local->transaction.wind = afr_removexattr_wind; - local->transaction.unwind = afr_removexattr_unwind; + local->transaction.wind = afr_removexattr_wind; + local->transaction.unwind = afr_removexattr_unwind; - loc_copy (&local->loc, loc); - ret = afr_set_inode_local (this, local, loc->inode); - if (ret) - goto out; + loc_copy(&local->loc, loc); + ret = afr_set_inode_local(this, local, loc->inode); + if (ret) + goto out; - local->op = GF_FOP_REMOVEXATTR; + local->op = GF_FOP_REMOVEXATTR; - local->transaction.main_frame = frame; - local->transaction.start = LLONG_MAX - 1; - local->transaction.len = 0; + local->transaction.main_frame = frame; + local->transaction.start = LLONG_MAX - 1; + local->transaction.len = 0; - ret = afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION); - if (ret < 0) { - op_errno = -ret; - goto out; - } + ret = afr_transaction(transaction_frame, this, AFR_METADATA_TRANSACTION); + if (ret < 0) { + op_errno = -ret; + goto out; + } - return 0; + return 0; out: - if (transaction_frame) - AFR_STACK_DESTROY (transaction_frame); + if (transaction_frame) + AFR_STACK_DESTROY(transaction_frame); - AFR_STACK_UNWIND (removexattr, frame, -1, op_errno, NULL); - return 0; + AFR_STACK_UNWIND(removexattr, frame, -1, op_errno, NULL); + return 0; } /* ffremovexattr */ int -afr_fremovexattr_unwind (call_frame_t *frame, xlator_t *this) +afr_fremovexattr_unwind(call_frame_t *frame, xlator_t *this) { - afr_local_t * local = NULL; - call_frame_t *main_frame = NULL; - - local = frame->local; + afr_local_t *local = NULL; + call_frame_t *main_frame = NULL; - main_frame = afr_transaction_detach_fop_frame (frame); - if (!main_frame) - return 0; + local = frame->local; - AFR_STACK_UNWIND (fremovexattr, main_frame, local->op_ret, local->op_errno, - local->xdata_rsp); + main_frame = afr_transaction_detach_fop_frame(frame); + if (!main_frame) return 0; -} + AFR_STACK_UNWIND(fremovexattr, main_frame, local->op_ret, local->op_errno, + local->xdata_rsp); + return 0; +} int -afr_fremovexattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this, +afr_fremovexattr_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, dict_t *xdata) { - return __afr_inode_write_cbk (frame, cookie, this, op_ret, op_errno, - NULL, NULL, NULL, xdata); + return __afr_inode_write_cbk(frame, cookie, this, op_ret, op_errno, NULL, + NULL, NULL, xdata); } - int -afr_fremovexattr_wind (call_frame_t *frame, xlator_t *this, int subvol) +afr_fremovexattr_wind(call_frame_t *frame, xlator_t *this, int subvol) { - afr_local_t *local = NULL; - afr_private_t *priv = NULL; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; - local = frame->local; - priv = this->private; + local = frame->local; + priv = this->private; - STACK_WIND_COOKIE (frame, afr_fremovexattr_wind_cbk, (void *) (long) subvol, - priv->children[subvol], - priv->children[subvol]->fops->fremovexattr, - local->fd, local->cont.removexattr.name, - local->xdata_req); - return 0; + STACK_WIND_COOKIE(frame, afr_fremovexattr_wind_cbk, (void *)(long)subvol, + priv->children[subvol], + priv->children[subvol]->fops->fremovexattr, local->fd, + local->cont.removexattr.name, local->xdata_req); + return 0; } - int -afr_fremovexattr (call_frame_t *frame, xlator_t *this, fd_t *fd, - const char *name, dict_t *xdata) +afr_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd, + const char *name, dict_t *xdata) { - afr_local_t *local = NULL; - call_frame_t *transaction_frame = NULL; - int ret = -1; - int op_errno = ENOMEM; + afr_local_t *local = NULL; + call_frame_t *transaction_frame = NULL; + int ret = -1; + int op_errno = ENOMEM; - GF_IF_NATIVE_XATTR_GOTO ("trusted.afr.*", - name, op_errno, out); + GF_IF_NATIVE_XATTR_GOTO("trusted.afr.*", name, op_errno, out); - GF_IF_NATIVE_XATTR_GOTO ("trusted.glusterfs.afr.*", - name, op_errno, out); + GF_IF_NATIVE_XATTR_GOTO("trusted.glusterfs.afr.*", name, op_errno, out); - transaction_frame = copy_frame (frame); - if (!transaction_frame) - goto out; + transaction_frame = copy_frame(frame); + if (!transaction_frame) + goto out; - local = AFR_FRAME_INIT (transaction_frame, op_errno); - if (!local) - goto out; + local = AFR_FRAME_INIT(transaction_frame, op_errno); + if (!local) + goto out; - local->cont.removexattr.name = gf_strdup (name); - if (xdata) - local->xdata_req = dict_copy_with_ref (xdata, NULL); - else - local->xdata_req = dict_new (); + local->cont.removexattr.name = gf_strdup(name); + if (xdata) + local->xdata_req = dict_copy_with_ref(xdata, NULL); + else + local->xdata_req = dict_new(); - if (!local->xdata_req) - goto out; + if (!local->xdata_req) + goto out; - local->transaction.wind = afr_fremovexattr_wind; - local->transaction.unwind = afr_fremovexattr_unwind; + local->transaction.wind = afr_fremovexattr_wind; + local->transaction.unwind = afr_fremovexattr_unwind; - local->fd = fd_ref (fd); - ret = afr_set_inode_local (this, local, fd->inode); - if (ret) - goto out; + local->fd = fd_ref(fd); + ret = afr_set_inode_local(this, local, fd->inode); + if (ret) + goto out; - local->op = GF_FOP_FREMOVEXATTR; + local->op = GF_FOP_FREMOVEXATTR; - local->transaction.main_frame = frame; - local->transaction.start = LLONG_MAX - 1; - local->transaction.len = 0; + local->transaction.main_frame = frame; + local->transaction.start = LLONG_MAX - 1; + local->transaction.len = 0; - ret = afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION); - if (ret < 0) { - op_errno = -ret; - goto out; - } + ret = afr_transaction(transaction_frame, this, AFR_METADATA_TRANSACTION); + if (ret < 0) { + op_errno = -ret; + goto out; + } - return 0; + return 0; out: - if (transaction_frame) - AFR_STACK_DESTROY (transaction_frame); + if (transaction_frame) + AFR_STACK_DESTROY(transaction_frame); - AFR_STACK_UNWIND (fremovexattr, frame, -1, op_errno, NULL); + AFR_STACK_UNWIND(fremovexattr, frame, -1, op_errno, NULL); - return 0; + return 0; } - int -afr_fallocate_unwind (call_frame_t *frame, xlator_t *this) +afr_fallocate_unwind(call_frame_t *frame, xlator_t *this) { - afr_local_t * local = NULL; - call_frame_t *main_frame = NULL; - - local = frame->local; + afr_local_t *local = NULL; + call_frame_t *main_frame = NULL; - main_frame = afr_transaction_detach_fop_frame (frame); - if (!main_frame) - return 0; + local = frame->local; - AFR_STACK_UNWIND (fallocate, main_frame, local->op_ret, local->op_errno, - &local->cont.inode_wfop.prebuf, - &local->cont.inode_wfop.postbuf, local->xdata_rsp); + main_frame = afr_transaction_detach_fop_frame(frame); + if (!main_frame) return 0; -} + AFR_STACK_UNWIND(fallocate, main_frame, local->op_ret, local->op_errno, + &local->cont.inode_wfop.prebuf, + &local->cont.inode_wfop.postbuf, local->xdata_rsp); + return 0; +} int -afr_fallocate_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *prebuf, - struct iatt *postbuf, dict_t *xdata) +afr_fallocate_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) { - return __afr_inode_write_cbk (frame, cookie, this, op_ret, op_errno, - prebuf, postbuf, NULL, xdata); + return __afr_inode_write_cbk(frame, cookie, this, op_ret, op_errno, prebuf, + postbuf, NULL, xdata); } - int -afr_fallocate_wind (call_frame_t *frame, xlator_t *this, int subvol) +afr_fallocate_wind(call_frame_t *frame, xlator_t *this, int subvol) { - afr_local_t *local = NULL; - afr_private_t *priv = NULL; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; - local = frame->local; - priv = this->private; + local = frame->local; + priv = this->private; - STACK_WIND_COOKIE (frame, afr_fallocate_wind_cbk, (void *) (long) subvol, - priv->children[subvol], - priv->children[subvol]->fops->fallocate, - local->fd, local->cont.fallocate.mode, - local->cont.fallocate.offset, - local->cont.fallocate.len, local->xdata_req); - return 0; + STACK_WIND_COOKIE(frame, afr_fallocate_wind_cbk, (void *)(long)subvol, + priv->children[subvol], + priv->children[subvol]->fops->fallocate, local->fd, + local->cont.fallocate.mode, local->cont.fallocate.offset, + local->cont.fallocate.len, local->xdata_req); + return 0; } - int -afr_fallocate (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode, - off_t offset, size_t len, dict_t *xdata) +afr_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode, + off_t offset, size_t len, dict_t *xdata) { - call_frame_t *transaction_frame = NULL; - afr_local_t *local = NULL; - int ret = -1; - int op_errno = ENOMEM; + call_frame_t *transaction_frame = NULL; + afr_local_t *local = NULL; + int ret = -1; + int op_errno = ENOMEM; - transaction_frame = copy_frame (frame); - if (!transaction_frame) - goto out; + transaction_frame = copy_frame(frame); + if (!transaction_frame) + goto out; - local = AFR_FRAME_INIT (transaction_frame, op_errno); - if (!local) - goto out; + local = AFR_FRAME_INIT(transaction_frame, op_errno); + if (!local) + goto out; - local->cont.fallocate.mode = mode; - local->cont.fallocate.offset = offset; - local->cont.fallocate.len = len; + local->cont.fallocate.mode = mode; + local->cont.fallocate.offset = offset; + local->cont.fallocate.len = len; - local->fd = fd_ref (fd); - ret = afr_set_inode_local (this, local, fd->inode); - if (ret) - goto out; + local->fd = fd_ref(fd); + ret = afr_set_inode_local(this, local, fd->inode); + if (ret) + goto out; - if (xdata) - local->xdata_req = dict_copy_with_ref (xdata, NULL); - else - local->xdata_req = dict_new (); + if (xdata) + local->xdata_req = dict_copy_with_ref(xdata, NULL); + else + local->xdata_req = dict_new(); - if (!local->xdata_req) - goto out; + if (!local->xdata_req) + goto out; - local->op = GF_FOP_FALLOCATE; + local->op = GF_FOP_FALLOCATE; - local->transaction.wind = afr_fallocate_wind; - local->transaction.unwind = afr_fallocate_unwind; + local->transaction.wind = afr_fallocate_wind; + local->transaction.unwind = afr_fallocate_unwind; - local->transaction.main_frame = frame; + local->transaction.main_frame = frame; - local->transaction.start = local->cont.fallocate.offset; - local->transaction.len = 0; + local->transaction.start = local->cont.fallocate.offset; + local->transaction.len = 0; - afr_fix_open (fd, this); + afr_fix_open(fd, this); - ret = afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION); - if (ret < 0) { - op_errno = -ret; - goto out; - } + ret = afr_transaction(transaction_frame, this, AFR_DATA_TRANSACTION); + if (ret < 0) { + op_errno = -ret; + goto out; + } - return 0; + return 0; out: - if (transaction_frame) - AFR_STACK_DESTROY (transaction_frame); + if (transaction_frame) + AFR_STACK_DESTROY(transaction_frame); - AFR_STACK_UNWIND (fallocate, frame, -1, op_errno, NULL, NULL, NULL); - return 0; + AFR_STACK_UNWIND(fallocate, frame, -1, op_errno, NULL, NULL, NULL); + return 0; } - /* }}} */ /* {{{ discard */ int -afr_discard_unwind (call_frame_t *frame, xlator_t *this) +afr_discard_unwind(call_frame_t *frame, xlator_t *this) { - afr_local_t * local = NULL; - call_frame_t *main_frame = NULL; + afr_local_t *local = NULL; + call_frame_t *main_frame = NULL; - local = frame->local; + local = frame->local; - main_frame = afr_transaction_detach_fop_frame (frame); - if (!main_frame) - return 0; - - AFR_STACK_UNWIND (discard, main_frame, local->op_ret, local->op_errno, - &local->cont.inode_wfop.prebuf, - &local->cont.inode_wfop.postbuf, local->xdata_rsp); + main_frame = afr_transaction_detach_fop_frame(frame); + if (!main_frame) return 0; -} + AFR_STACK_UNWIND(discard, main_frame, local->op_ret, local->op_errno, + &local->cont.inode_wfop.prebuf, + &local->cont.inode_wfop.postbuf, local->xdata_rsp); + return 0; +} int -afr_discard_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *prebuf, - struct iatt *postbuf, dict_t *xdata) +afr_discard_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) { - return __afr_inode_write_cbk (frame, cookie, this, op_ret, op_errno, - prebuf, postbuf, NULL, xdata); + return __afr_inode_write_cbk(frame, cookie, this, op_ret, op_errno, prebuf, + postbuf, NULL, xdata); } - int -afr_discard_wind (call_frame_t *frame, xlator_t *this, int subvol) +afr_discard_wind(call_frame_t *frame, xlator_t *this, int subvol) { - afr_local_t *local = NULL; - afr_private_t *priv = NULL; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; - local = frame->local; - priv = this->private; + local = frame->local; + priv = this->private; - STACK_WIND_COOKIE (frame, afr_discard_wind_cbk, (void *) (long) subvol, - priv->children[subvol], - priv->children[subvol]->fops->discard, - local->fd, local->cont.discard.offset, - local->cont.discard.len, local->xdata_req); - return 0; + STACK_WIND_COOKIE(frame, afr_discard_wind_cbk, (void *)(long)subvol, + priv->children[subvol], + priv->children[subvol]->fops->discard, local->fd, + local->cont.discard.offset, local->cont.discard.len, + local->xdata_req); + return 0; } - int -afr_discard (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, - size_t len, dict_t *xdata) +afr_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + size_t len, dict_t *xdata) { - afr_local_t *local = NULL; - call_frame_t *transaction_frame = NULL; - int ret = -1; - int op_errno = ENOMEM; + afr_local_t *local = NULL; + call_frame_t *transaction_frame = NULL; + int ret = -1; + int op_errno = ENOMEM; - transaction_frame = copy_frame (frame); - if (!transaction_frame) - goto out; + transaction_frame = copy_frame(frame); + if (!transaction_frame) + goto out; - local = AFR_FRAME_INIT (transaction_frame, op_errno); - if (!local) - goto out; + local = AFR_FRAME_INIT(transaction_frame, op_errno); + if (!local) + goto out; - local->cont.discard.offset = offset; - local->cont.discard.len = len; + local->cont.discard.offset = offset; + local->cont.discard.len = len; - local->fd = fd_ref (fd); - ret = afr_set_inode_local (this, local, fd->inode); - if (ret) - goto out; + local->fd = fd_ref(fd); + ret = afr_set_inode_local(this, local, fd->inode); + if (ret) + goto out; - if (xdata) - local->xdata_req = dict_copy_with_ref (xdata, NULL); - else - local->xdata_req = dict_new (); + if (xdata) + local->xdata_req = dict_copy_with_ref(xdata, NULL); + else + local->xdata_req = dict_new(); - if (!local->xdata_req) - goto out; + if (!local->xdata_req) + goto out; - local->op = GF_FOP_DISCARD; + local->op = GF_FOP_DISCARD; - local->transaction.wind = afr_discard_wind; - local->transaction.unwind = afr_discard_unwind; + local->transaction.wind = afr_discard_wind; + local->transaction.unwind = afr_discard_unwind; - local->transaction.main_frame = frame; + local->transaction.main_frame = frame; - local->transaction.start = local->cont.discard.offset; - local->transaction.len = 0; + local->transaction.start = local->cont.discard.offset; + local->transaction.len = 0; - afr_fix_open (fd, this); + afr_fix_open(fd, this); - ret = afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION); - if (ret < 0) { - op_errno = -ret; - goto out; - } + ret = afr_transaction(transaction_frame, this, AFR_DATA_TRANSACTION); + if (ret < 0) { + op_errno = -ret; + goto out; + } - return 0; + return 0; out: - if (transaction_frame) - AFR_STACK_DESTROY (transaction_frame); + if (transaction_frame) + AFR_STACK_DESTROY(transaction_frame); - AFR_STACK_UNWIND (discard, frame, -1, op_errno, NULL, NULL, NULL); - return 0; + AFR_STACK_UNWIND(discard, frame, -1, op_errno, NULL, NULL, NULL); + return 0; } - /* {{{ zerofill */ int -afr_zerofill_unwind (call_frame_t *frame, xlator_t *this) +afr_zerofill_unwind(call_frame_t *frame, xlator_t *this) { - afr_local_t * local = NULL; - call_frame_t *main_frame = NULL; - - local = frame->local; + afr_local_t *local = NULL; + call_frame_t *main_frame = NULL; - main_frame = afr_transaction_detach_fop_frame (frame); - if (!main_frame) - return 0; + local = frame->local; - AFR_STACK_UNWIND (discard, main_frame, local->op_ret, local->op_errno, - &local->cont.inode_wfop.prebuf, - &local->cont.inode_wfop.postbuf, local->xdata_rsp); + main_frame = afr_transaction_detach_fop_frame(frame); + if (!main_frame) return 0; -} + AFR_STACK_UNWIND(discard, main_frame, local->op_ret, local->op_errno, + &local->cont.inode_wfop.prebuf, + &local->cont.inode_wfop.postbuf, local->xdata_rsp); + return 0; +} int -afr_zerofill_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this, +afr_zerofill_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata) { - return __afr_inode_write_cbk (frame, cookie, this, op_ret, op_errno, - prebuf, postbuf, NULL, xdata); + return __afr_inode_write_cbk(frame, cookie, this, op_ret, op_errno, prebuf, + postbuf, NULL, xdata); } - int -afr_zerofill_wind (call_frame_t *frame, xlator_t *this, int subvol) +afr_zerofill_wind(call_frame_t *frame, xlator_t *this, int subvol) { - afr_local_t *local = NULL; - afr_private_t *priv = NULL; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; - local = frame->local; - priv = this->private; + local = frame->local; + priv = this->private; - STACK_WIND_COOKIE (frame, afr_zerofill_wind_cbk, (void *) (long) subvol, - priv->children[subvol], - priv->children[subvol]->fops->zerofill, - local->fd, local->cont.zerofill.offset, - local->cont.zerofill.len, local->xdata_req); - return 0; + STACK_WIND_COOKIE(frame, afr_zerofill_wind_cbk, (void *)(long)subvol, + priv->children[subvol], + priv->children[subvol]->fops->zerofill, local->fd, + local->cont.zerofill.offset, local->cont.zerofill.len, + local->xdata_req); + return 0; } int -afr_zerofill (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, +afr_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, size_t len, dict_t *xdata) { - afr_local_t *local = NULL; - call_frame_t *transaction_frame = NULL; - int ret = -1; - int op_errno = ENOMEM; + afr_local_t *local = NULL; + call_frame_t *transaction_frame = NULL; + int ret = -1; + int op_errno = ENOMEM; - transaction_frame = copy_frame (frame); - if (!transaction_frame) - goto out; + transaction_frame = copy_frame(frame); + if (!transaction_frame) + goto out; - local = AFR_FRAME_INIT (transaction_frame, op_errno); - if (!local) - goto out; + local = AFR_FRAME_INIT(transaction_frame, op_errno); + if (!local) + goto out; - local->cont.zerofill.offset = offset; - local->cont.zerofill.len = len; + local->cont.zerofill.offset = offset; + local->cont.zerofill.len = len; - local->fd = fd_ref (fd); - ret = afr_set_inode_local (this, local, fd->inode); - if (ret) - goto out; + local->fd = fd_ref(fd); + ret = afr_set_inode_local(this, local, fd->inode); + if (ret) + goto out; - if (xdata) - local->xdata_req = dict_copy_with_ref (xdata, NULL); - else - local->xdata_req = dict_new (); + if (xdata) + local->xdata_req = dict_copy_with_ref(xdata, NULL); + else + local->xdata_req = dict_new(); - if (!local->xdata_req) - goto out; + if (!local->xdata_req) + goto out; - local->op = GF_FOP_ZEROFILL; + local->op = GF_FOP_ZEROFILL; - local->transaction.wind = afr_zerofill_wind; - local->transaction.unwind = afr_zerofill_unwind; + local->transaction.wind = afr_zerofill_wind; + local->transaction.unwind = afr_zerofill_unwind; - local->transaction.main_frame = frame; + local->transaction.main_frame = frame; - local->transaction.start = local->cont.discard.offset; - local->transaction.len = len; + local->transaction.start = local->cont.discard.offset; + local->transaction.len = len; - afr_fix_open (fd, this); + afr_fix_open(fd, this); - ret = afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION); - if (ret < 0) { - op_errno = -ret; - goto out; - } + ret = afr_transaction(transaction_frame, this, AFR_DATA_TRANSACTION); + if (ret < 0) { + op_errno = -ret; + goto out; + } - return 0; + return 0; out: - if (transaction_frame) - AFR_STACK_DESTROY (transaction_frame); + if (transaction_frame) + AFR_STACK_DESTROY(transaction_frame); - AFR_STACK_UNWIND (zerofill, frame, -1, op_errno, NULL, NULL, NULL); - return 0; + AFR_STACK_UNWIND(zerofill, frame, -1, op_errno, NULL, NULL, NULL); + return 0; } /* }}} */ int32_t -afr_xattrop_wind_cbk (call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, int32_t op_errno, - dict_t *xattr, dict_t *xdata) +afr_xattrop_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xattr, + dict_t *xdata) { - return __afr_inode_write_cbk (frame, cookie, this, op_ret, op_errno, - NULL, NULL, xattr, xdata); + return __afr_inode_write_cbk(frame, cookie, this, op_ret, op_errno, NULL, + NULL, xattr, xdata); } int -afr_xattrop_wind (call_frame_t *frame, xlator_t *this, int subvol) +afr_xattrop_wind(call_frame_t *frame, xlator_t *this, int subvol) { - afr_local_t *local = NULL; - afr_private_t *priv = NULL; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; - local = frame->local; - priv = this->private; + local = frame->local; + priv = this->private; - STACK_WIND_COOKIE (frame, afr_xattrop_wind_cbk, (void *) (long) subvol, - priv->children[subvol], - priv->children[subvol]->fops->xattrop, - &local->loc, local->cont.xattrop.optype, - local->cont.xattrop.xattr, local->xdata_req); - return 0; + STACK_WIND_COOKIE(frame, afr_xattrop_wind_cbk, (void *)(long)subvol, + priv->children[subvol], + priv->children[subvol]->fops->xattrop, &local->loc, + local->cont.xattrop.optype, local->cont.xattrop.xattr, + local->xdata_req); + return 0; } int -afr_xattrop_unwind (call_frame_t *frame, xlator_t *this) +afr_xattrop_unwind(call_frame_t *frame, xlator_t *this) { - afr_local_t *local = NULL; - call_frame_t *main_frame = NULL; + afr_local_t *local = NULL; + call_frame_t *main_frame = NULL; - local = frame->local; + local = frame->local; - main_frame = afr_transaction_detach_fop_frame (frame); - if (!main_frame) - return 0; - - AFR_STACK_UNWIND (xattrop, main_frame, local->op_ret, local->op_errno, - local->xattr_rsp, local->xdata_rsp); + main_frame = afr_transaction_detach_fop_frame(frame); + if (!main_frame) return 0; + + AFR_STACK_UNWIND(xattrop, main_frame, local->op_ret, local->op_errno, + local->xattr_rsp, local->xdata_rsp); + return 0; } int32_t -afr_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc, - gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata) +afr_xattrop(call_frame_t *frame, xlator_t *this, loc_t *loc, + gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata) { - afr_local_t *local = NULL; - call_frame_t *transaction_frame = NULL; - int ret = -1; - int op_errno = ENOMEM; + afr_local_t *local = NULL; + call_frame_t *transaction_frame = NULL; + int ret = -1; + int op_errno = ENOMEM; - transaction_frame = copy_frame (frame); - if (!transaction_frame) - goto out; + transaction_frame = copy_frame(frame); + if (!transaction_frame) + goto out; - local = AFR_FRAME_INIT (transaction_frame, op_errno); - if (!local) - goto out; + local = AFR_FRAME_INIT(transaction_frame, op_errno); + if (!local) + goto out; - local->cont.xattrop.xattr = dict_ref (xattr); - local->cont.xattrop.optype = optype; - if (xdata) - local->xdata_req = dict_ref (xdata); + local->cont.xattrop.xattr = dict_ref(xattr); + local->cont.xattrop.optype = optype; + if (xdata) + local->xdata_req = dict_ref(xdata); - local->transaction.wind = afr_xattrop_wind; - local->transaction.unwind = afr_xattrop_unwind; + local->transaction.wind = afr_xattrop_wind; + local->transaction.unwind = afr_xattrop_unwind; - loc_copy (&local->loc, loc); - ret = afr_set_inode_local (this, local, loc->inode); - if (ret) - goto out; + loc_copy(&local->loc, loc); + ret = afr_set_inode_local(this, local, loc->inode); + if (ret) + goto out; - local->op = GF_FOP_XATTROP; + local->op = GF_FOP_XATTROP; - local->transaction.main_frame = frame; - local->transaction.start = LLONG_MAX - 1; - local->transaction.len = 0; + local->transaction.main_frame = frame; + local->transaction.start = LLONG_MAX - 1; + local->transaction.len = 0; - ret = afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION); - if (ret < 0) { - op_errno = -ret; - goto out; - } + ret = afr_transaction(transaction_frame, this, AFR_METADATA_TRANSACTION); + if (ret < 0) { + op_errno = -ret; + goto out; + } - return 0; + return 0; out: - if (transaction_frame) - AFR_STACK_DESTROY (transaction_frame); + if (transaction_frame) + AFR_STACK_DESTROY(transaction_frame); - AFR_STACK_UNWIND (xattrop, frame, -1, op_errno, NULL, NULL); - return 0; + AFR_STACK_UNWIND(xattrop, frame, -1, op_errno, NULL, NULL); + return 0; } int32_t -afr_fxattrop_wind_cbk (call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, int32_t op_errno, - dict_t *xattr, dict_t *xdata) +afr_fxattrop_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xattr, + dict_t *xdata) { - return __afr_inode_write_cbk (frame, cookie, this, op_ret, op_errno, - NULL, NULL, xattr, xdata); + return __afr_inode_write_cbk(frame, cookie, this, op_ret, op_errno, NULL, + NULL, xattr, xdata); } int -afr_fxattrop_wind (call_frame_t *frame, xlator_t *this, int subvol) +afr_fxattrop_wind(call_frame_t *frame, xlator_t *this, int subvol) { - afr_local_t *local = NULL; - afr_private_t *priv = NULL; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; - local = frame->local; - priv = this->private; + local = frame->local; + priv = this->private; - STACK_WIND_COOKIE (frame, afr_fxattrop_wind_cbk, (void *) (long) subvol, - priv->children[subvol], - priv->children[subvol]->fops->fxattrop, - local->fd, local->cont.xattrop.optype, - local->cont.xattrop.xattr, local->xdata_req); - return 0; + STACK_WIND_COOKIE(frame, afr_fxattrop_wind_cbk, (void *)(long)subvol, + priv->children[subvol], + priv->children[subvol]->fops->fxattrop, local->fd, + local->cont.xattrop.optype, local->cont.xattrop.xattr, + local->xdata_req); + return 0; } int -afr_fxattrop_unwind (call_frame_t *frame, xlator_t *this) +afr_fxattrop_unwind(call_frame_t *frame, xlator_t *this) { - afr_local_t *local = NULL; - call_frame_t *main_frame = NULL; - - local = frame->local; + afr_local_t *local = NULL; + call_frame_t *main_frame = NULL; - main_frame = afr_transaction_detach_fop_frame (frame); - if (!main_frame) - return 0; + local = frame->local; - AFR_STACK_UNWIND (fxattrop, main_frame, local->op_ret, local->op_errno, - local->xattr_rsp, local->xdata_rsp); + main_frame = afr_transaction_detach_fop_frame(frame); + if (!main_frame) return 0; + + AFR_STACK_UNWIND(fxattrop, main_frame, local->op_ret, local->op_errno, + local->xattr_rsp, local->xdata_rsp); + return 0; } int32_t -afr_fxattrop (call_frame_t *frame, xlator_t *this, fd_t *fd, - gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata) +afr_fxattrop(call_frame_t *frame, xlator_t *this, fd_t *fd, + gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata) { - afr_local_t *local = NULL; - call_frame_t *transaction_frame = NULL; - int ret = -1; - int op_errno = ENOMEM; + afr_local_t *local = NULL; + call_frame_t *transaction_frame = NULL; + int ret = -1; + int op_errno = ENOMEM; - transaction_frame = copy_frame (frame); - if (!transaction_frame) - goto out; + transaction_frame = copy_frame(frame); + if (!transaction_frame) + goto out; - local = AFR_FRAME_INIT (transaction_frame, op_errno); - if (!local) - goto out; + local = AFR_FRAME_INIT(transaction_frame, op_errno); + if (!local) + goto out; - local->cont.xattrop.xattr = dict_ref (xattr); - local->cont.xattrop.optype = optype; - if (xdata) - local->xdata_req = dict_ref (xdata); + local->cont.xattrop.xattr = dict_ref(xattr); + local->cont.xattrop.optype = optype; + if (xdata) + local->xdata_req = dict_ref(xdata); - local->transaction.wind = afr_fxattrop_wind; - local->transaction.unwind = afr_fxattrop_unwind; + local->transaction.wind = afr_fxattrop_wind; + local->transaction.unwind = afr_fxattrop_unwind; - local->fd = fd_ref (fd); - ret = afr_set_inode_local (this, local, fd->inode); - if (ret) - goto out; + local->fd = fd_ref(fd); + ret = afr_set_inode_local(this, local, fd->inode); + if (ret) + goto out; - local->op = GF_FOP_FXATTROP; + local->op = GF_FOP_FXATTROP; - local->transaction.main_frame = frame; - local->transaction.start = LLONG_MAX - 1; - local->transaction.len = 0; + local->transaction.main_frame = frame; + local->transaction.start = LLONG_MAX - 1; + local->transaction.len = 0; - ret = afr_transaction (transaction_frame, this, - AFR_METADATA_TRANSACTION); - if (ret < 0) { - op_errno = -ret; - goto out; - } + ret = afr_transaction(transaction_frame, this, AFR_METADATA_TRANSACTION); + if (ret < 0) { + op_errno = -ret; + goto out; + } - return 0; + return 0; out: - if (transaction_frame) - AFR_STACK_DESTROY (transaction_frame); + if (transaction_frame) + AFR_STACK_DESTROY(transaction_frame); - AFR_STACK_UNWIND (fxattrop, frame, -1, op_errno, NULL, NULL); - return 0; + AFR_STACK_UNWIND(fxattrop, frame, -1, op_errno, NULL, NULL); + return 0; } - int -afr_fsync_unwind (call_frame_t *frame, xlator_t *this) +afr_fsync_unwind(call_frame_t *frame, xlator_t *this) { - afr_local_t *local = NULL; - call_frame_t *main_frame = NULL; + afr_local_t *local = NULL; + call_frame_t *main_frame = NULL; - local = frame->local; + local = frame->local; - main_frame = afr_transaction_detach_fop_frame (frame); - if (!main_frame) - return 0; + main_frame = afr_transaction_detach_fop_frame(frame); + if (!main_frame) + return 0; - AFR_STACK_UNWIND (fsync, main_frame, local->op_ret, local->op_errno, - &local->cont.inode_wfop.prebuf, - &local->cont.inode_wfop.postbuf, local->xdata_rsp); + AFR_STACK_UNWIND(fsync, main_frame, local->op_ret, local->op_errno, + &local->cont.inode_wfop.prebuf, + &local->cont.inode_wfop.postbuf, local->xdata_rsp); - return 0; + return 0; } - int -afr_fsync_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *prebuf, - struct iatt *postbuf, dict_t *xdata) +afr_fsync_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) { - return __afr_inode_write_cbk (frame, cookie, this, op_ret, op_errno, - prebuf, postbuf, NULL, xdata); + return __afr_inode_write_cbk(frame, cookie, this, op_ret, op_errno, prebuf, + postbuf, NULL, xdata); } - int -afr_fsync_wind (call_frame_t *frame, xlator_t *this, int subvol) +afr_fsync_wind(call_frame_t *frame, xlator_t *this, int subvol) { - afr_local_t *local = NULL; - afr_private_t *priv = NULL; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; - local = frame->local; - priv = this->private; + local = frame->local; + priv = this->private; - STACK_WIND_COOKIE (frame, afr_fsync_wind_cbk, (void *)(long) subvol, - priv->children[subvol], - priv->children[subvol]->fops->fsync, - local->fd, local->cont.fsync.datasync, - local->xdata_req); - return 0; + STACK_WIND_COOKIE(frame, afr_fsync_wind_cbk, (void *)(long)subvol, + priv->children[subvol], + priv->children[subvol]->fops->fsync, local->fd, + local->cont.fsync.datasync, local->xdata_req); + return 0; } int -afr_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync, - dict_t *xdata) +afr_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync, + dict_t *xdata) { - afr_local_t *local = NULL; - call_frame_t *transaction_frame = NULL; - int ret = -1; - int32_t op_errno = ENOMEM; + afr_local_t *local = NULL; + call_frame_t *transaction_frame = NULL; + int ret = -1; + int32_t op_errno = ENOMEM; - transaction_frame = copy_frame (frame); - if (!transaction_frame) - goto out; + transaction_frame = copy_frame(frame); + if (!transaction_frame) + goto out; - local = AFR_FRAME_INIT (transaction_frame, op_errno); - if (!local) - goto out; + local = AFR_FRAME_INIT(transaction_frame, op_errno); + if (!local) + goto out; - if (xdata) - local->xdata_req = dict_copy_with_ref (xdata, NULL); - else - local->xdata_req = dict_new (); + if (xdata) + local->xdata_req = dict_copy_with_ref(xdata, NULL); + else + local->xdata_req = dict_new(); - if (!local->xdata_req) - goto out; + if (!local->xdata_req) + goto out; - local->fd = fd_ref (fd); - ret = afr_set_inode_local (this, local, fd->inode); - if (ret) - goto out; + local->fd = fd_ref(fd); + ret = afr_set_inode_local(this, local, fd->inode); + if (ret) + goto out; - local->op = GF_FOP_FSYNC; - local->cont.fsync.datasync = datasync; + local->op = GF_FOP_FSYNC; + local->cont.fsync.datasync = datasync; - if (afr_fd_has_witnessed_unstable_write (this, fd->inode)) { - /* don't care. we only wanted to CLEAR the bit */ - } + if (afr_fd_has_witnessed_unstable_write(this, fd->inode)) { + /* don't care. we only wanted to CLEAR the bit */ + } - local->transaction.wind = afr_fsync_wind; - local->transaction.unwind = afr_fsync_unwind; + local->transaction.wind = afr_fsync_wind; + local->transaction.unwind = afr_fsync_unwind; - local->transaction.main_frame = frame; + local->transaction.main_frame = frame; - ret = afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION); - if (ret < 0) { - op_errno = -ret; - goto out; - } + ret = afr_transaction(transaction_frame, this, AFR_DATA_TRANSACTION); + if (ret < 0) { + op_errno = -ret; + goto out; + } - return 0; + return 0; out: - if (transaction_frame) - AFR_STACK_DESTROY (transaction_frame); + if (transaction_frame) + AFR_STACK_DESTROY(transaction_frame); - AFR_STACK_UNWIND (fsync, frame, -1, op_errno, NULL, NULL, NULL); + AFR_STACK_UNWIND(fsync, frame, -1, op_errno, NULL, NULL, NULL); - return 0; + return 0; } diff --git a/xlators/cluster/afr/src/afr-lk-common.c b/xlators/cluster/afr/src/afr-lk-common.c index d7ff2b01a06..95e52ff4a09 100644 --- a/xlators/cluster/afr/src/afr-lk-common.c +++ b/xlators/cluster/afr/src/afr-lk-common.c @@ -18,1122 +18,1075 @@ #include - -#define LOCKED_NO 0x0 /* no lock held */ -#define LOCKED_YES 0x1 /* for DATA, METADATA, ENTRY and higher_path */ -#define LOCKED_LOWER 0x2 /* for lower path */ +#define LOCKED_NO 0x0 /* no lock held */ +#define LOCKED_YES 0x1 /* for DATA, METADATA, ENTRY and higher_path */ +#define LOCKED_LOWER 0x2 /* for lower path */ int -afr_entry_lockee_cmp (const void *l1, const void *l2) +afr_entry_lockee_cmp(const void *l1, const void *l2) { - const afr_entry_lockee_t *r1 = l1; - const afr_entry_lockee_t *r2 = l2; - int ret = 0; - uuid_t gfid1 = {0}; - uuid_t gfid2 = {0}; - - loc_gfid ((loc_t*)&r1->loc, gfid1); - loc_gfid ((loc_t*)&r2->loc, gfid2); - ret = gf_uuid_compare (gfid1, gfid2); - /*Entrylks with NULL basename are the 'smallest'*/ - if (ret == 0) { - if (!r1->basename) - return -1; - if (!r2->basename) - return 1; - ret = strcmp (r1->basename, r2->basename); - } - - if (ret <= 0) - return -1; - else - return 1; + const afr_entry_lockee_t *r1 = l1; + const afr_entry_lockee_t *r2 = l2; + int ret = 0; + uuid_t gfid1 = {0}; + uuid_t gfid2 = {0}; + + loc_gfid((loc_t *)&r1->loc, gfid1); + loc_gfid((loc_t *)&r2->loc, gfid2); + ret = gf_uuid_compare(gfid1, gfid2); + /*Entrylks with NULL basename are the 'smallest'*/ + if (ret == 0) { + if (!r1->basename) + return -1; + if (!r2->basename) + return 1; + ret = strcmp(r1->basename, r2->basename); + } + + if (ret <= 0) + return -1; + else + return 1; } -int afr_lock_blocking (call_frame_t *frame, xlator_t *this, int child_index); +int +afr_lock_blocking(call_frame_t *frame, xlator_t *this, int child_index); void -afr_set_lk_owner (call_frame_t *frame, xlator_t *this, void *lk_owner) +afr_set_lk_owner(call_frame_t *frame, xlator_t *this, void *lk_owner) { - gf_msg_trace (this->name, 0, - "Setting lk-owner=%llu", - (unsigned long long) (unsigned long)lk_owner); + gf_msg_trace(this->name, 0, "Setting lk-owner=%llu", + (unsigned long long)(unsigned long)lk_owner); - set_lk_owner_from_ptr (&frame->root->lk_owner, lk_owner); + set_lk_owner_from_ptr(&frame->root->lk_owner, lk_owner); } int32_t -internal_lock_count (call_frame_t *frame, xlator_t *this) +internal_lock_count(call_frame_t *frame, xlator_t *this) { - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - int32_t call_count = 0; - int i = 0; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + int32_t call_count = 0; + int i = 0; - local = frame->local; - priv = this->private; + local = frame->local; + priv = this->private; - for (i = 0; i < priv->child_count; i++) { - if (local->child_up[i]) - ++call_count; - } + for (i = 0; i < priv->child_count; i++) { + if (local->child_up[i]) + ++call_count; + } - return call_count; + return call_count; } int afr_is_inodelk_transaction(afr_transaction_type type) { - int ret = 0; + int ret = 0; - switch (type) { + switch (type) { case AFR_DATA_TRANSACTION: case AFR_METADATA_TRANSACTION: - ret = 1; - break; + ret = 1; + break; case AFR_ENTRY_RENAME_TRANSACTION: case AFR_ENTRY_TRANSACTION: - ret = 0; - break; + ret = 0; + break; + } - } - - return ret; + return ret; } int -afr_init_entry_lockee (afr_entry_lockee_t *lockee, afr_local_t *local, - loc_t *loc, char *basename, int child_count) +afr_init_entry_lockee(afr_entry_lockee_t *lockee, afr_local_t *local, + loc_t *loc, char *basename, int child_count) { - int ret = -1; + int ret = -1; - loc_copy (&lockee->loc, loc); - lockee->basename = (basename)? gf_strdup (basename): NULL; - if (basename && !lockee->basename) - goto out; + loc_copy(&lockee->loc, loc); + lockee->basename = (basename) ? gf_strdup(basename) : NULL; + if (basename && !lockee->basename) + goto out; - lockee->locked_count = 0; - lockee->locked_nodes = GF_CALLOC (child_count, - sizeof (*lockee->locked_nodes), - gf_afr_mt_afr_node_character); + lockee->locked_count = 0; + lockee->locked_nodes = GF_CALLOC(child_count, sizeof(*lockee->locked_nodes), + gf_afr_mt_afr_node_character); - if (!lockee->locked_nodes) - goto out; + if (!lockee->locked_nodes) + goto out; - ret = 0; + ret = 0; out: - return ret; - + return ret; } void -afr_entry_lockee_cleanup (afr_internal_lock_t *int_lock) +afr_entry_lockee_cleanup(afr_internal_lock_t *int_lock) { - int i = 0; - - for (i = 0; i < int_lock->lockee_count; i++) { - loc_wipe (&int_lock->lockee[i].loc); - if (int_lock->lockee[i].basename) - GF_FREE (int_lock->lockee[i].basename); - if (int_lock->lockee[i].locked_nodes) - GF_FREE (int_lock->lockee[i].locked_nodes); - } + int i = 0; + + for (i = 0; i < int_lock->lockee_count; i++) { + loc_wipe(&int_lock->lockee[i].loc); + if (int_lock->lockee[i].basename) + GF_FREE(int_lock->lockee[i].basename); + if (int_lock->lockee[i].locked_nodes) + GF_FREE(int_lock->lockee[i].locked_nodes); + } - return; + return; } static int -initialize_entrylk_variables (call_frame_t *frame, xlator_t *this) +initialize_entrylk_variables(call_frame_t *frame, xlator_t *this) { - afr_local_t *local = NULL; - afr_internal_lock_t *int_lock = NULL; - afr_private_t *priv = NULL; - - int i = 0; - - priv = this->private; - local = frame->local; - int_lock = &local->internal_lock; - - int_lock->entrylk_lock_count = 0; - int_lock->lock_op_ret = -1; - int_lock->lock_op_errno = 0; - - for (i = 0; i < AFR_LOCKEE_COUNT_MAX; i++) { - if (!int_lock->lockee[i].locked_nodes) - break; - int_lock->lockee[i].locked_count = 0; - memset (int_lock->lockee[i].locked_nodes, 0, - sizeof (*int_lock->lockee[i].locked_nodes) * - priv->child_count); - } + afr_local_t *local = NULL; + afr_internal_lock_t *int_lock = NULL; + afr_private_t *priv = NULL; - return 0; + int i = 0; + + priv = this->private; + local = frame->local; + int_lock = &local->internal_lock; + + int_lock->entrylk_lock_count = 0; + int_lock->lock_op_ret = -1; + int_lock->lock_op_errno = 0; + + for (i = 0; i < AFR_LOCKEE_COUNT_MAX; i++) { + if (!int_lock->lockee[i].locked_nodes) + break; + int_lock->lockee[i].locked_count = 0; + memset(int_lock->lockee[i].locked_nodes, 0, + sizeof(*int_lock->lockee[i].locked_nodes) * priv->child_count); + } + + return 0; } static int -initialize_inodelk_variables (call_frame_t *frame, xlator_t *this) +initialize_inodelk_variables(call_frame_t *frame, xlator_t *this) { - afr_local_t *local = NULL; - afr_internal_lock_t *int_lock = NULL; - afr_private_t *priv = NULL; + afr_local_t *local = NULL; + afr_internal_lock_t *int_lock = NULL; + afr_private_t *priv = NULL; - priv = this->private; - local = frame->local; - int_lock = &local->internal_lock; + priv = this->private; + local = frame->local; + int_lock = &local->internal_lock; - int_lock->lock_count = 0; - int_lock->lk_attempted_count = 0; - int_lock->lock_op_ret = -1; - int_lock->lock_op_errno = 0; + int_lock->lock_count = 0; + int_lock->lk_attempted_count = 0; + int_lock->lock_op_ret = -1; + int_lock->lock_op_errno = 0; - memset (int_lock->locked_nodes, 0, - sizeof (*int_lock->locked_nodes) * priv->child_count); + memset(int_lock->locked_nodes, 0, + sizeof(*int_lock->locked_nodes) * priv->child_count); - return 0; + return 0; } int -afr_lockee_locked_nodes_count (afr_internal_lock_t *int_lock) +afr_lockee_locked_nodes_count(afr_internal_lock_t *int_lock) { - int call_count = 0; - int i = 0; + int call_count = 0; + int i = 0; - for (i = 0; i < int_lock->lockee_count; i++) - call_count += int_lock->lockee[i].locked_count; + for (i = 0; i < int_lock->lockee_count; i++) + call_count += int_lock->lockee[i].locked_count; - return call_count; + return call_count; } int -afr_locked_nodes_count (unsigned char *locked_nodes, int child_count) +afr_locked_nodes_count(unsigned char *locked_nodes, int child_count) { - int i = 0; - int call_count = 0; + int i = 0; + int call_count = 0; - for (i = 0; i < child_count; i++) { - if (locked_nodes[i] & LOCKED_YES) - call_count++; - } + for (i = 0; i < child_count; i++) { + if (locked_nodes[i] & LOCKED_YES) + call_count++; + } - return call_count; + return call_count; } /* FIXME: What if UNLOCK fails */ static int32_t -afr_unlock_common_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) +afr_unlock_common_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - afr_local_t *local = NULL; - afr_internal_lock_t *int_lock = NULL; - int call_count = 0; - int ret = 0; + afr_local_t *local = NULL; + afr_internal_lock_t *int_lock = NULL; + int call_count = 0; + int ret = 0; - local = frame->local; - int_lock = &local->internal_lock; + local = frame->local; + int_lock = &local->internal_lock; - if (local->transaction.type == AFR_DATA_TRANSACTION && op_ret != 1) - ret = afr_write_subvol_reset (frame, this); + if (local->transaction.type == AFR_DATA_TRANSACTION && op_ret != 1) + ret = afr_write_subvol_reset(frame, this); - LOCK (&frame->lock); - { - call_count = --int_lock->lk_call_count; - } - UNLOCK (&frame->lock); + LOCK(&frame->lock); + { + call_count = --int_lock->lk_call_count; + } + UNLOCK(&frame->lock); - if (call_count == 0) { - gf_msg_trace (this->name, 0, - "All internal locks unlocked"); - int_lock->lock_cbk (frame, this); - } + if (call_count == 0) { + gf_msg_trace(this->name, 0, "All internal locks unlocked"); + int_lock->lock_cbk(frame, this); + } - return ret; + return ret; } void -afr_update_uninodelk (afr_local_t *local, afr_internal_lock_t *int_lock, - int32_t child_index) +afr_update_uninodelk(afr_local_t *local, afr_internal_lock_t *int_lock, + int32_t child_index) { - int_lock->locked_nodes[child_index] &= LOCKED_NO; - + int_lock->locked_nodes[child_index] &= LOCKED_NO; } static int32_t -afr_unlock_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) +afr_unlock_inodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - afr_local_t *local = NULL; - afr_internal_lock_t *int_lock = NULL; - int32_t child_index = (long)cookie; - afr_private_t *priv = NULL; - - local = frame->local; - int_lock = &local->internal_lock; - - priv = this->private; - - if (op_ret < 0 && op_errno != ENOTCONN && op_errno != EBADFD) { - gf_msg (this->name, GF_LOG_ERROR, op_errno, - AFR_MSG_UNLOCK_FAIL, - "path=%s gfid=%s: unlock failed on subvolume %s " - "with lock owner %s", local->loc.path, - loc_gfid_utoa (&(local->loc)), - priv->children[child_index]->name, - lkowner_utoa (&frame->root->lk_owner)); - } + afr_local_t *local = NULL; + afr_internal_lock_t *int_lock = NULL; + int32_t child_index = (long)cookie; + afr_private_t *priv = NULL; - afr_update_uninodelk (local, int_lock, child_index); + local = frame->local; + int_lock = &local->internal_lock; - afr_unlock_common_cbk (frame, cookie, this, op_ret, op_errno, xdata); + priv = this->private; - return 0; + if (op_ret < 0 && op_errno != ENOTCONN && op_errno != EBADFD) { + gf_msg(this->name, GF_LOG_ERROR, op_errno, AFR_MSG_UNLOCK_FAIL, + "path=%s gfid=%s: unlock failed on subvolume %s " + "with lock owner %s", + local->loc.path, loc_gfid_utoa(&(local->loc)), + priv->children[child_index]->name, + lkowner_utoa(&frame->root->lk_owner)); + } + + afr_update_uninodelk(local, int_lock, child_index); + + afr_unlock_common_cbk(frame, cookie, this, op_ret, op_errno, xdata); + return 0; } static int -afr_unlock_inodelk (call_frame_t *frame, xlator_t *this) +afr_unlock_inodelk(call_frame_t *frame, xlator_t *this) { - afr_internal_lock_t *int_lock = NULL; - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - struct gf_flock flock = {0,}; - int call_count = 0; - int i = 0; + afr_internal_lock_t *int_lock = NULL; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + struct gf_flock flock = { + 0, + }; + int call_count = 0; + int i = 0; - local = frame->local; - int_lock = &local->internal_lock; - priv = this->private; + local = frame->local; + int_lock = &local->internal_lock; + priv = this->private; - flock.l_start = int_lock->flock.l_start; - flock.l_len = int_lock->flock.l_len; - flock.l_type = F_UNLCK; + flock.l_start = int_lock->flock.l_start; + flock.l_len = int_lock->flock.l_len; + flock.l_type = F_UNLCK; - call_count = afr_locked_nodes_count (int_lock->locked_nodes, - priv->child_count); + call_count = afr_locked_nodes_count(int_lock->locked_nodes, + priv->child_count); - int_lock->lk_call_count = call_count; + int_lock->lk_call_count = call_count; - if (!call_count) { - GF_ASSERT (!local->transaction.do_eager_unlock); - gf_msg_trace (this->name, 0, - "No internal locks unlocked"); + if (!call_count) { + GF_ASSERT(!local->transaction.do_eager_unlock); + gf_msg_trace(this->name, 0, "No internal locks unlocked"); - int_lock->lock_cbk (frame, this); - goto out; - } + int_lock->lock_cbk(frame, this); + goto out; + } - for (i = 0; i < priv->child_count; i++) { - if ((int_lock->locked_nodes[i] & LOCKED_YES) != LOCKED_YES) - continue; - - if (local->fd) { - STACK_WIND_COOKIE (frame, afr_unlock_inodelk_cbk, - (void *) (long)i, - priv->children[i], - priv->children[i]->fops->finodelk, - int_lock->domain, local->fd, - F_SETLK, &flock, NULL); - } else { - STACK_WIND_COOKIE (frame, afr_unlock_inodelk_cbk, - (void *) (long)i, - priv->children[i], - priv->children[i]->fops->inodelk, - int_lock->domain, &local->loc, - F_SETLK, &flock, NULL); - } + for (i = 0; i < priv->child_count; i++) { + if ((int_lock->locked_nodes[i] & LOCKED_YES) != LOCKED_YES) + continue; - if (!--call_count) - break; + if (local->fd) { + STACK_WIND_COOKIE( + frame, afr_unlock_inodelk_cbk, (void *)(long)i, + priv->children[i], priv->children[i]->fops->finodelk, + int_lock->domain, local->fd, F_SETLK, &flock, NULL); + } else { + STACK_WIND_COOKIE( + frame, afr_unlock_inodelk_cbk, (void *)(long)i, + priv->children[i], priv->children[i]->fops->inodelk, + int_lock->domain, &local->loc, F_SETLK, &flock, NULL); } + + if (!--call_count) + break; + } out: - return 0; + return 0; } static int32_t -afr_unlock_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) +afr_unlock_entrylk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - afr_internal_lock_t *int_lock = NULL; - int32_t child_index = 0; - int lockee_no = 0; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + afr_internal_lock_t *int_lock = NULL; + int32_t child_index = 0; + int lockee_no = 0; - priv = this->private; - lockee_no = (int)((long) cookie) / priv->child_count; - child_index = (int) ((long) cookie) % priv->child_count; + priv = this->private; + lockee_no = (int)((long)cookie) / priv->child_count; + child_index = (int)((long)cookie) % priv->child_count; - local = frame->local; - int_lock = &local->internal_lock; + local = frame->local; + int_lock = &local->internal_lock; - if (op_ret < 0) { - gf_msg (this->name, GF_LOG_ERROR, op_errno, - AFR_MSG_ENTRY_UNLOCK_FAIL, - "%s: unlock failed on %s", local->loc.path, - priv->children[child_index]->name); - } + if (op_ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, op_errno, AFR_MSG_ENTRY_UNLOCK_FAIL, + "%s: unlock failed on %s", local->loc.path, + priv->children[child_index]->name); + } - int_lock->lockee[lockee_no].locked_nodes[child_index] &= LOCKED_NO; - afr_unlock_common_cbk (frame, cookie, this, op_ret, op_errno, NULL); + int_lock->lockee[lockee_no].locked_nodes[child_index] &= LOCKED_NO; + afr_unlock_common_cbk(frame, cookie, this, op_ret, op_errno, NULL); - return 0; + return 0; } static int -afr_unlock_entrylk (call_frame_t *frame, xlator_t *this) +afr_unlock_entrylk(call_frame_t *frame, xlator_t *this) { - afr_internal_lock_t *int_lock = NULL; - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - int call_count = 0; - int index = 0; - int lockee_no = 0; - int copies = 0; - int i = -1; - - local = frame->local; - int_lock = &local->internal_lock; - priv = this->private; - copies = priv->child_count; - - call_count = afr_lockee_locked_nodes_count (int_lock); - - int_lock->lk_call_count = call_count; - - if (!call_count){ - gf_msg_trace (this->name, 0, - "No internal locks unlocked"); - int_lock->lock_cbk (frame, this); - goto out; - } - - for (i = 0; i < int_lock->lockee_count * priv->child_count; i++) { - lockee_no = i / copies; - index = i % copies; - if (int_lock->lockee[lockee_no].locked_nodes[index] & LOCKED_YES) { - - STACK_WIND_COOKIE (frame, afr_unlock_entrylk_cbk, - (void *) (long) i, - priv->children[index], - priv->children[index]->fops->entrylk, - int_lock->domain, - &int_lock->lockee[lockee_no].loc, - int_lock->lockee[lockee_no].basename, - ENTRYLK_UNLOCK, ENTRYLK_WRLCK, NULL); - - if (!--call_count) - break; - } + afr_internal_lock_t *int_lock = NULL; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + int call_count = 0; + int index = 0; + int lockee_no = 0; + int copies = 0; + int i = -1; + + local = frame->local; + int_lock = &local->internal_lock; + priv = this->private; + copies = priv->child_count; + + call_count = afr_lockee_locked_nodes_count(int_lock); + + int_lock->lk_call_count = call_count; + + if (!call_count) { + gf_msg_trace(this->name, 0, "No internal locks unlocked"); + int_lock->lock_cbk(frame, this); + goto out; + } + + for (i = 0; i < int_lock->lockee_count * priv->child_count; i++) { + lockee_no = i / copies; + index = i % copies; + if (int_lock->lockee[lockee_no].locked_nodes[index] & LOCKED_YES) { + STACK_WIND_COOKIE( + frame, afr_unlock_entrylk_cbk, (void *)(long)i, + priv->children[index], priv->children[index]->fops->entrylk, + int_lock->domain, &int_lock->lockee[lockee_no].loc, + int_lock->lockee[lockee_no].basename, ENTRYLK_UNLOCK, + ENTRYLK_WRLCK, NULL); + + if (!--call_count) + break; } + } out: - return 0; - + return 0; } int32_t -afr_unlock_now (call_frame_t *frame, xlator_t *this) +afr_unlock_now(call_frame_t *frame, xlator_t *this) { - afr_local_t *local = frame->local; + afr_local_t *local = frame->local; - if (afr_is_inodelk_transaction(local->transaction.type)) - afr_unlock_inodelk (frame, this); - else - afr_unlock_entrylk (frame, this); - return 0; + if (afr_is_inodelk_transaction(local->transaction.type)) + afr_unlock_inodelk(frame, this); + else + afr_unlock_entrylk(frame, this); + return 0; } static int32_t -afr_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) +afr_lock_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + int32_t op_errno, dict_t *xdata) { - afr_internal_lock_t *int_lock = NULL; - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - int cky = (long) cookie; - int child_index = 0; - int lockee_no = 0; - - priv = this->private; - local = frame->local; - int_lock = &local->internal_lock; - - child_index = ((int)cky) % priv->child_count; - lockee_no = ((int)cky) / priv->child_count; - - LOCK (&frame->lock); - { - if (op_ret == -1) { - if (op_errno == ENOSYS) { - /* return ENOTSUP */ - gf_msg (this->name, GF_LOG_ERROR, ENOSYS, - AFR_MSG_LOCK_XLATOR_NOT_LOADED, - "subvolume does not support locking. " - "please load features/locks xlator on server"); - local->op_ret = op_ret; - int_lock->lock_op_ret = op_ret; - } - - local->op_errno = op_errno; - int_lock->lock_op_errno = op_errno; - } - - int_lock->lk_attempted_count++; + afr_internal_lock_t *int_lock = NULL; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + int cky = (long)cookie; + int child_index = 0; + int lockee_no = 0; + + priv = this->private; + local = frame->local; + int_lock = &local->internal_lock; + + child_index = ((int)cky) % priv->child_count; + lockee_no = ((int)cky) / priv->child_count; + + LOCK(&frame->lock); + { + if (op_ret == -1) { + if (op_errno == ENOSYS) { + /* return ENOTSUP */ + gf_msg(this->name, GF_LOG_ERROR, ENOSYS, + AFR_MSG_LOCK_XLATOR_NOT_LOADED, + "subvolume does not support locking. " + "please load features/locks xlator on server"); + local->op_ret = op_ret; + int_lock->lock_op_ret = op_ret; + } + + local->op_errno = op_errno; + int_lock->lock_op_errno = op_errno; } - UNLOCK (&frame->lock); - if ((op_ret == -1) && - (op_errno == ENOSYS)) { - afr_unlock_now (frame, this); - } else { - if (op_ret == 0) { - if (local->transaction.type == AFR_ENTRY_TRANSACTION || - local->transaction.type == AFR_ENTRY_RENAME_TRANSACTION) { - int_lock->lockee[lockee_no].locked_nodes[child_index] |= LOCKED_YES; - int_lock->lockee[lockee_no].locked_count++; - int_lock->entrylk_lock_count++; - } else { - int_lock->locked_nodes[child_index] |= LOCKED_YES; - int_lock->lock_count++; - - if (local->transaction.type == - AFR_DATA_TRANSACTION) { - LOCK(&local->inode->lock); - { - local->inode_ctx->lock_count++; - } - UNLOCK (&local->inode->lock); - } - } + int_lock->lk_attempted_count++; + } + UNLOCK(&frame->lock); + + if ((op_ret == -1) && (op_errno == ENOSYS)) { + afr_unlock_now(frame, this); + } else { + if (op_ret == 0) { + if (local->transaction.type == AFR_ENTRY_TRANSACTION || + local->transaction.type == AFR_ENTRY_RENAME_TRANSACTION) { + int_lock->lockee[lockee_no] + .locked_nodes[child_index] |= LOCKED_YES; + int_lock->lockee[lockee_no].locked_count++; + int_lock->entrylk_lock_count++; + } else { + int_lock->locked_nodes[child_index] |= LOCKED_YES; + int_lock->lock_count++; + + if (local->transaction.type == AFR_DATA_TRANSACTION) { + LOCK(&local->inode->lock); + { + local->inode_ctx->lock_count++; + } + UNLOCK(&local->inode->lock); } - afr_lock_blocking (frame, this, cky + 1); + } } + afr_lock_blocking(frame, this, cky + 1); + } - return 0; + return 0; } static int32_t -afr_blocking_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) +afr_blocking_inodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - afr_lock_cbk (frame, cookie, this, op_ret, op_errno, xdata); - return 0; - + afr_lock_cbk(frame, cookie, this, op_ret, op_errno, xdata); + return 0; } static int32_t -afr_blocking_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) +afr_blocking_entrylk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - afr_lock_cbk (frame, cookie, this, op_ret, op_errno, xdata); - return 0; + afr_lock_cbk(frame, cookie, this, op_ret, op_errno, xdata); + return 0; } static gf_boolean_t -afr_is_entrylk (afr_transaction_type trans_type) +afr_is_entrylk(afr_transaction_type trans_type) { - if (afr_is_inodelk_transaction (trans_type)) - return _gf_false; - return _gf_true; + if (afr_is_inodelk_transaction(trans_type)) + return _gf_false; + return _gf_true; } static gf_boolean_t -_is_lock_wind_needed (afr_local_t *local, int child_index) +_is_lock_wind_needed(afr_local_t *local, int child_index) { - if (!local->child_up[child_index]) - return _gf_false; + if (!local->child_up[child_index]) + return _gf_false; - return _gf_true; + return _gf_true; } static void afr_log_entry_locks_failure(xlator_t *this, afr_local_t *local, afr_internal_lock_t *int_lock) { - const char *fop = NULL; - char *pargfid = NULL; - const char *name = NULL; + const char *fop = NULL; + char *pargfid = NULL; + const char *name = NULL; - fop = gf_fop_list[local->op]; + fop = gf_fop_list[local->op]; - switch (local->op) { + switch (local->op) { case GF_FOP_LINK: - pargfid = uuid_utoa(local->newloc.pargfid); - name = local->newloc.name; - break; + pargfid = uuid_utoa(local->newloc.pargfid); + name = local->newloc.name; + break; default: - pargfid = uuid_utoa(local->loc.pargfid); - name = local->loc.name; - break; - } - - gf_msg (this->name, GF_LOG_WARNING, 0, AFR_MSG_BLOCKING_LKS_FAILED, - "Unable to obtain sufficient blocking entry locks on at least " - "one child while attempting %s on {pgfid:%s, name:%s}.", fop, - pargfid, name); + pargfid = uuid_utoa(local->loc.pargfid); + name = local->loc.name; + break; + } + + gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_BLOCKING_LKS_FAILED, + "Unable to obtain sufficient blocking entry locks on at least " + "one child while attempting %s on {pgfid:%s, name:%s}.", + fop, pargfid, name); } static gf_boolean_t -is_blocking_locks_count_sufficient (call_frame_t *frame, xlator_t *this) +is_blocking_locks_count_sufficient(call_frame_t *frame, xlator_t *this) { - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - afr_internal_lock_t *int_lock = NULL; - gf_boolean_t is_entrylk = _gf_false; - int child = 0; - int nlockee = 0; - int lockee_count = 0; - gf_boolean_t ret = _gf_true; - - local = frame->local; - priv = this->private; - int_lock = &local->internal_lock; - lockee_count = int_lock->lockee_count; - is_entrylk = afr_is_entrylk (local->transaction.type); - - if (!is_entrylk) { - if (int_lock->lock_count == 0) { - gf_msg (this->name, GF_LOG_WARNING, 0, - AFR_MSG_BLOCKING_LKS_FAILED, "Unable to obtain " - "blocking inode lock on even one child for " - "gfid:%s.", uuid_utoa (local->inode->gfid)); - return _gf_false; - } else { - /*inodelk succeeded on at least one child. */ - return _gf_true; - } - + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + afr_internal_lock_t *int_lock = NULL; + gf_boolean_t is_entrylk = _gf_false; + int child = 0; + int nlockee = 0; + int lockee_count = 0; + gf_boolean_t ret = _gf_true; + + local = frame->local; + priv = this->private; + int_lock = &local->internal_lock; + lockee_count = int_lock->lockee_count; + is_entrylk = afr_is_entrylk(local->transaction.type); + + if (!is_entrylk) { + if (int_lock->lock_count == 0) { + gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_BLOCKING_LKS_FAILED, + "Unable to obtain " + "blocking inode lock on even one child for " + "gfid:%s.", + uuid_utoa(local->inode->gfid)); + return _gf_false; } else { - if (int_lock->entrylk_lock_count == 0) { - afr_log_entry_locks_failure (this, local, int_lock); - return _gf_false; - } - /* For FOPS that take multiple sets of locks (mkdir, rename), - * there must be at least one brick on which the locks from - * all lock sets were successful. */ - for (child = 0; child < priv->child_count; child++) { - ret = _gf_true; - for (nlockee = 0; nlockee < lockee_count; nlockee++) { - if (!(int_lock->lockee[nlockee].locked_nodes[child] & LOCKED_YES)) - ret = _gf_false; - } - if (ret) - return ret; - } - if (!ret) - afr_log_entry_locks_failure (this, local, int_lock); + /*inodelk succeeded on at least one child. */ + return _gf_true; } - return ret; + } else { + if (int_lock->entrylk_lock_count == 0) { + afr_log_entry_locks_failure(this, local, int_lock); + return _gf_false; + } + /* For FOPS that take multiple sets of locks (mkdir, rename), + * there must be at least one brick on which the locks from + * all lock sets were successful. */ + for (child = 0; child < priv->child_count; child++) { + ret = _gf_true; + for (nlockee = 0; nlockee < lockee_count; nlockee++) { + if (!(int_lock->lockee[nlockee].locked_nodes[child] & + LOCKED_YES)) + ret = _gf_false; + } + if (ret) + return ret; + } + if (!ret) + afr_log_entry_locks_failure(this, local, int_lock); + } + return ret; } int -afr_lock_blocking (call_frame_t *frame, xlator_t *this, int cookie) +afr_lock_blocking(call_frame_t *frame, xlator_t *this, int cookie) { - afr_internal_lock_t *int_lock = NULL; - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - struct gf_flock flock = {0,}; - uint64_t ctx = 0; - int ret = 0; - int child_index = 0; - int lockee_no = 0; - gf_boolean_t is_entrylk = _gf_false; - - local = frame->local; - int_lock = &local->internal_lock; - priv = this->private; - child_index = cookie % priv->child_count; - lockee_no = cookie / priv->child_count; - is_entrylk = afr_is_entrylk (local->transaction.type); - - - if (!is_entrylk) { - flock.l_start = int_lock->flock.l_start; - flock.l_len = int_lock->flock.l_len; - flock.l_type = int_lock->flock.l_type; - } + afr_internal_lock_t *int_lock = NULL; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + struct gf_flock flock = { + 0, + }; + uint64_t ctx = 0; + int ret = 0; + int child_index = 0; + int lockee_no = 0; + gf_boolean_t is_entrylk = _gf_false; + + local = frame->local; + int_lock = &local->internal_lock; + priv = this->private; + child_index = cookie % priv->child_count; + lockee_no = cookie / priv->child_count; + is_entrylk = afr_is_entrylk(local->transaction.type); + + if (!is_entrylk) { + flock.l_start = int_lock->flock.l_start; + flock.l_len = int_lock->flock.l_len; + flock.l_type = int_lock->flock.l_type; + } - if (local->fd) { - ret = fd_ctx_get (local->fd, this, &ctx); + if (local->fd) { + ret = fd_ctx_get(local->fd, this, &ctx); - if (ret < 0) { - gf_msg (this->name, GF_LOG_INFO, 0, - AFR_MSG_FD_CTX_GET_FAILED, - "unable to get fd ctx for fd=%p", - local->fd); + if (ret < 0) { + gf_msg(this->name, GF_LOG_INFO, 0, AFR_MSG_FD_CTX_GET_FAILED, + "unable to get fd ctx for fd=%p", local->fd); - local->op_ret = -1; - int_lock->lock_op_ret = -1; + local->op_ret = -1; + int_lock->lock_op_ret = -1; - afr_unlock_now (frame, this); + afr_unlock_now(frame, this); - return 0; - } + return 0; } + } - if (int_lock->lk_expected_count == int_lock->lk_attempted_count) { - if (!is_blocking_locks_count_sufficient (frame, this)) { - - local->op_ret = -1; - int_lock->lock_op_ret = -1; + if (int_lock->lk_expected_count == int_lock->lk_attempted_count) { + if (!is_blocking_locks_count_sufficient(frame, this)) { + local->op_ret = -1; + int_lock->lock_op_ret = -1; - afr_unlock_now(frame, this); + afr_unlock_now(frame, this); - return 0; - } + return 0; } + } - if (int_lock->lk_expected_count == int_lock->lk_attempted_count) { - /* we're done locking */ + if (int_lock->lk_expected_count == int_lock->lk_attempted_count) { + /* we're done locking */ - gf_msg_debug (this->name, 0, - "we're done locking"); + gf_msg_debug(this->name, 0, "we're done locking"); - int_lock->lock_op_ret = 0; - int_lock->lock_cbk (frame, this); - return 0; - } + int_lock->lock_op_ret = 0; + int_lock->lock_cbk(frame, this); + return 0; + } - if (!_is_lock_wind_needed (local, child_index)) { - afr_lock_blocking (frame, this, cookie + 1); - return 0; - } + if (!_is_lock_wind_needed(local, child_index)) { + afr_lock_blocking(frame, this, cookie + 1); + return 0; + } - switch (local->transaction.type) { + switch (local->transaction.type) { case AFR_DATA_TRANSACTION: case AFR_METADATA_TRANSACTION: - if (local->fd) { - STACK_WIND_COOKIE (frame, afr_blocking_inodelk_cbk, - (void *) (long) child_index, - priv->children[child_index], - priv->children[child_index]->fops->finodelk, - int_lock->domain, local->fd, - F_SETLKW, &flock, NULL); - - } else { - STACK_WIND_COOKIE (frame, afr_blocking_inodelk_cbk, - (void *) (long) child_index, - priv->children[child_index], - priv->children[child_index]->fops->inodelk, - int_lock->domain, &local->loc, - F_SETLKW, &flock, NULL); - } + if (local->fd) { + STACK_WIND_COOKIE( + frame, afr_blocking_inodelk_cbk, (void *)(long)child_index, + priv->children[child_index], + priv->children[child_index]->fops->finodelk, + int_lock->domain, local->fd, F_SETLKW, &flock, NULL); - break; + } else { + STACK_WIND_COOKIE( + frame, afr_blocking_inodelk_cbk, (void *)(long)child_index, + priv->children[child_index], + priv->children[child_index]->fops->inodelk, + int_lock->domain, &local->loc, F_SETLKW, &flock, NULL); + } + + break; case AFR_ENTRY_RENAME_TRANSACTION: case AFR_ENTRY_TRANSACTION: - /*Accounting for child_index increments on 'down' - *and 'fd-less' children */ - - if (local->fd) { - STACK_WIND_COOKIE (frame, afr_blocking_entrylk_cbk, - (void *) (long) cookie, - priv->children[child_index], - priv->children[child_index]->fops->fentrylk, - int_lock->domain, local->fd, - int_lock->lockee[lockee_no].basename, - ENTRYLK_LOCK, ENTRYLK_WRLCK, NULL); - } else { - STACK_WIND_COOKIE (frame, afr_blocking_entrylk_cbk, - (void *) (long) cookie, - priv->children[child_index], - priv->children[child_index]->fops->entrylk, - int_lock->domain, - &int_lock->lockee[lockee_no].loc, - int_lock->lockee[lockee_no].basename, - ENTRYLK_LOCK, ENTRYLK_WRLCK, NULL); - } - - break; - } - - return 0; + /*Accounting for child_index increments on 'down' + *and 'fd-less' children */ + + if (local->fd) { + STACK_WIND_COOKIE(frame, afr_blocking_entrylk_cbk, + (void *)(long)cookie, + priv->children[child_index], + priv->children[child_index]->fops->fentrylk, + int_lock->domain, local->fd, + int_lock->lockee[lockee_no].basename, + ENTRYLK_LOCK, ENTRYLK_WRLCK, NULL); + } else { + STACK_WIND_COOKIE( + frame, afr_blocking_entrylk_cbk, (void *)(long)cookie, + priv->children[child_index], + priv->children[child_index]->fops->entrylk, + int_lock->domain, &int_lock->lockee[lockee_no].loc, + int_lock->lockee[lockee_no].basename, ENTRYLK_LOCK, + ENTRYLK_WRLCK, NULL); + } + + break; + } + + return 0; } int32_t -afr_blocking_lock (call_frame_t *frame, xlator_t *this) +afr_blocking_lock(call_frame_t *frame, xlator_t *this) { - afr_internal_lock_t *int_lock = NULL; - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - int up_count = 0; + afr_internal_lock_t *int_lock = NULL; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + int up_count = 0; - priv = this->private; - local = frame->local; - int_lock = &local->internal_lock; + priv = this->private; + local = frame->local; + int_lock = &local->internal_lock; - switch (local->transaction.type) { + switch (local->transaction.type) { case AFR_DATA_TRANSACTION: case AFR_METADATA_TRANSACTION: - initialize_inodelk_variables (frame, this); - break; + initialize_inodelk_variables(frame, this); + break; case AFR_ENTRY_RENAME_TRANSACTION: case AFR_ENTRY_TRANSACTION: - up_count = AFR_COUNT (local->child_up, priv->child_count); - int_lock->lk_call_count = int_lock->lk_expected_count - = (int_lock->lockee_count * - up_count); - initialize_entrylk_variables (frame, this); - break; - } + up_count = AFR_COUNT(local->child_up, priv->child_count); + int_lock->lk_call_count = int_lock->lk_expected_count = + (int_lock->lockee_count * up_count); + initialize_entrylk_variables(frame, this); + break; + } - afr_lock_blocking (frame, this, 0); + afr_lock_blocking(frame, this, 0); - return 0; + return 0; } static int32_t -afr_nonblocking_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) +afr_nonblocking_entrylk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - afr_internal_lock_t *int_lock = NULL; - afr_local_t *local = NULL; - int call_count = 0; - int child_index = (long) cookie; - int copies = 0; - int index = 0; - int lockee_no = 0; - afr_private_t *priv = NULL; - - priv = this->private; - - copies = priv->child_count; - index = child_index % copies; - lockee_no = child_index / copies; - - local = frame->local; - int_lock = &local->internal_lock; - - LOCK (&frame->lock); - { - if (op_ret < 0 ) { - if (op_errno == ENOSYS) { - /* return ENOTSUP */ - gf_msg (this->name, GF_LOG_ERROR, - ENOSYS, AFR_MSG_LOCK_XLATOR_NOT_LOADED, - "subvolume does not support " - "locking. please load features/locks" - " xlator on server"); - local->op_ret = op_ret; - int_lock->lock_op_ret = op_ret; - - int_lock->lock_op_errno = op_errno; - local->op_errno = op_errno; - } - } else if (op_ret == 0) { - int_lock->lockee[lockee_no].locked_nodes[index] |= \ - LOCKED_YES; - int_lock->lockee[lockee_no].locked_count++; - int_lock->entrylk_lock_count++; - } - - call_count = --int_lock->lk_call_count; + afr_internal_lock_t *int_lock = NULL; + afr_local_t *local = NULL; + int call_count = 0; + int child_index = (long)cookie; + int copies = 0; + int index = 0; + int lockee_no = 0; + afr_private_t *priv = NULL; + + priv = this->private; + + copies = priv->child_count; + index = child_index % copies; + lockee_no = child_index / copies; + + local = frame->local; + int_lock = &local->internal_lock; + + LOCK(&frame->lock); + { + if (op_ret < 0) { + if (op_errno == ENOSYS) { + /* return ENOTSUP */ + gf_msg(this->name, GF_LOG_ERROR, ENOSYS, + AFR_MSG_LOCK_XLATOR_NOT_LOADED, + "subvolume does not support " + "locking. please load features/locks" + " xlator on server"); + local->op_ret = op_ret; + int_lock->lock_op_ret = op_ret; + + int_lock->lock_op_errno = op_errno; + local->op_errno = op_errno; + } + } else if (op_ret == 0) { + int_lock->lockee[lockee_no].locked_nodes[index] |= LOCKED_YES; + int_lock->lockee[lockee_no].locked_count++; + int_lock->entrylk_lock_count++; } - UNLOCK (&frame->lock); - - if (call_count == 0) { - gf_msg_trace (this->name, 0, - "Last locking reply received"); - /* all locks successful. Proceed to call FOP */ - if (int_lock->entrylk_lock_count == - int_lock->lk_expected_count) { - gf_msg_trace (this->name, 0, - "All servers locked. Calling the cbk"); - int_lock->lock_op_ret = 0; - int_lock->lock_cbk (frame, this); - } - /* Not all locks were successful. Unlock and try locking - again, this time with serially blocking locks */ - else { - gf_msg_trace (this->name, 0, - "%d servers locked. Trying again " - "with blocking calls", - int_lock->lock_count); - - afr_unlock_now(frame, this); - } + + call_count = --int_lock->lk_call_count; + } + UNLOCK(&frame->lock); + + if (call_count == 0) { + gf_msg_trace(this->name, 0, "Last locking reply received"); + /* all locks successful. Proceed to call FOP */ + if (int_lock->entrylk_lock_count == int_lock->lk_expected_count) { + gf_msg_trace(this->name, 0, "All servers locked. Calling the cbk"); + int_lock->lock_op_ret = 0; + int_lock->lock_cbk(frame, this); + } + /* Not all locks were successful. Unlock and try locking + again, this time with serially blocking locks */ + else { + gf_msg_trace(this->name, 0, + "%d servers locked. Trying again " + "with blocking calls", + int_lock->lock_count); + + afr_unlock_now(frame, this); } + } - return 0; + return 0; } int -afr_nonblocking_entrylk (call_frame_t *frame, xlator_t *this) +afr_nonblocking_entrylk(call_frame_t *frame, xlator_t *this) { - afr_internal_lock_t *int_lock = NULL; - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - afr_fd_ctx_t *fd_ctx = NULL; - int copies = 0; - int index = 0; - int lockee_no = 0; - int32_t call_count = 0; - int i = 0; - - local = frame->local; - int_lock = &local->internal_lock; - priv = this->private; - - copies = priv->child_count; - initialize_entrylk_variables (frame, this); + afr_internal_lock_t *int_lock = NULL; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + afr_fd_ctx_t *fd_ctx = NULL; + int copies = 0; + int index = 0; + int lockee_no = 0; + int32_t call_count = 0; + int i = 0; + + local = frame->local; + int_lock = &local->internal_lock; + priv = this->private; + + copies = priv->child_count; + initialize_entrylk_variables(frame, this); + + if (local->fd) { + fd_ctx = afr_fd_ctx_get(local->fd, this); + if (!fd_ctx) { + gf_msg(this->name, GF_LOG_INFO, 0, AFR_MSG_FD_CTX_GET_FAILED, + "unable to get fd ctx for fd=%p", local->fd); + + local->op_ret = -1; + int_lock->lock_op_ret = -1; + local->op_errno = EINVAL; + int_lock->lock_op_errno = EINVAL; + + afr_unlock_now(frame, this); + return -1; + } - if (local->fd) { - fd_ctx = afr_fd_ctx_get (local->fd, this); - if (!fd_ctx) { - gf_msg (this->name, GF_LOG_INFO, 0, - AFR_MSG_FD_CTX_GET_FAILED, - "unable to get fd ctx for fd=%p", - local->fd); - - local->op_ret = -1; - int_lock->lock_op_ret = -1; - local->op_errno = EINVAL; - int_lock->lock_op_errno = EINVAL; - - afr_unlock_now (frame, this); - return -1; - } + call_count = int_lock->lockee_count * internal_lock_count(frame, this); + int_lock->lk_call_count = call_count; + int_lock->lk_expected_count = call_count; - call_count = int_lock->lockee_count * internal_lock_count (frame, this); - int_lock->lk_call_count = call_count; - int_lock->lk_expected_count = call_count; + if (!call_count) { + gf_msg(this->name, GF_LOG_INFO, 0, AFR_MSG_INFO_COMMON, + "fd not open on any subvolumes. aborting."); + afr_unlock_now(frame, this); + goto out; + } - if (!call_count) { - gf_msg (this->name, GF_LOG_INFO, 0, - AFR_MSG_INFO_COMMON, - "fd not open on any subvolumes. aborting."); - afr_unlock_now (frame, this); - goto out; - } + /* Send non-blocking entrylk calls only on up children + and where the fd has been opened */ + for (i = 0; i < int_lock->lockee_count * priv->child_count; i++) { + index = i % copies; + lockee_no = i / copies; + if (local->child_up[index]) { + STACK_WIND_COOKIE(frame, afr_nonblocking_entrylk_cbk, + (void *)(long)i, priv->children[index], + priv->children[index]->fops->fentrylk, + this->name, local->fd, + int_lock->lockee[lockee_no].basename, + ENTRYLK_LOCK_NB, ENTRYLK_WRLCK, NULL); + if (!--call_count) + break; + } + } + } else { + call_count = int_lock->lockee_count * internal_lock_count(frame, this); + int_lock->lk_call_count = call_count; + int_lock->lk_expected_count = call_count; - /* Send non-blocking entrylk calls only on up children - and where the fd has been opened */ - for (i = 0; i < int_lock->lockee_count*priv->child_count; i++) { - index = i%copies; - lockee_no = i/copies; - if (local->child_up[index]) { - STACK_WIND_COOKIE (frame, afr_nonblocking_entrylk_cbk, - (void *) (long) i, - priv->children[index], - priv->children[index]->fops->fentrylk, - this->name, local->fd, - int_lock->lockee[lockee_no].basename, - ENTRYLK_LOCK_NB, ENTRYLK_WRLCK, - NULL); - if (!--call_count) - break; - } - } - } else { - call_count = int_lock->lockee_count * internal_lock_count (frame, this); - int_lock->lk_call_count = call_count; - int_lock->lk_expected_count = call_count; - - for (i = 0; i < int_lock->lockee_count*priv->child_count; i++) { - index = i%copies; - lockee_no = i/copies; - if (local->child_up[index]) { - STACK_WIND_COOKIE (frame, afr_nonblocking_entrylk_cbk, - (void *) (long) i, - priv->children[index], - priv->children[index]->fops->entrylk, - this->name, &int_lock->lockee[lockee_no].loc, - int_lock->lockee[lockee_no].basename, - ENTRYLK_LOCK_NB, ENTRYLK_WRLCK, - NULL); - - if (!--call_count) - break; - } - } + for (i = 0; i < int_lock->lockee_count * priv->child_count; i++) { + index = i % copies; + lockee_no = i / copies; + if (local->child_up[index]) { + STACK_WIND_COOKIE(frame, afr_nonblocking_entrylk_cbk, + (void *)(long)i, priv->children[index], + priv->children[index]->fops->entrylk, + this->name, &int_lock->lockee[lockee_no].loc, + int_lock->lockee[lockee_no].basename, + ENTRYLK_LOCK_NB, ENTRYLK_WRLCK, NULL); + + if (!--call_count) + break; + } } + } out: - return 0; + return 0; } int32_t -afr_nonblocking_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) +afr_nonblocking_inodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - afr_internal_lock_t *int_lock = NULL; - afr_local_t *local = NULL; - int call_count = 0; - int child_index = (long) cookie; + afr_internal_lock_t *int_lock = NULL; + afr_local_t *local = NULL; + int call_count = 0; + int child_index = (long)cookie; - local = frame->local; - int_lock = &local->internal_lock; + local = frame->local; + int_lock = &local->internal_lock; - if (op_ret == 0 && local->transaction.type == AFR_DATA_TRANSACTION) { - LOCK (&local->inode->lock); - { - local->inode_ctx->lock_count++; - } - UNLOCK (&local->inode->lock); + if (op_ret == 0 && local->transaction.type == AFR_DATA_TRANSACTION) { + LOCK(&local->inode->lock); + { + local->inode_ctx->lock_count++; } + UNLOCK(&local->inode->lock); + } - LOCK (&frame->lock); - { - if (op_ret < 0) { - if (op_errno == ENOSYS) { - /* return ENOTSUP */ - gf_msg (this->name, GF_LOG_ERROR, ENOSYS, - AFR_MSG_LOCK_XLATOR_NOT_LOADED, - "subvolume does not support " - "locking. please load features/locks" - " xlator on server"); - local->op_ret = op_ret; - int_lock->lock_op_ret = op_ret; - int_lock->lock_op_errno = op_errno; - local->op_errno = op_errno; - } - } else { - int_lock->locked_nodes[child_index] |= LOCKED_YES; - int_lock->lock_count++; - } - - call_count = --int_lock->lk_call_count; + LOCK(&frame->lock); + { + if (op_ret < 0) { + if (op_errno == ENOSYS) { + /* return ENOTSUP */ + gf_msg(this->name, GF_LOG_ERROR, ENOSYS, + AFR_MSG_LOCK_XLATOR_NOT_LOADED, + "subvolume does not support " + "locking. please load features/locks" + " xlator on server"); + local->op_ret = op_ret; + int_lock->lock_op_ret = op_ret; + int_lock->lock_op_errno = op_errno; + local->op_errno = op_errno; + } + } else { + int_lock->locked_nodes[child_index] |= LOCKED_YES; + int_lock->lock_count++; } - UNLOCK (&frame->lock); - - if (call_count == 0) { - gf_msg_trace (this->name, 0, - "Last inode locking reply received"); - /* all locks successful. Proceed to call FOP */ - if (int_lock->lock_count == int_lock->lk_expected_count) { - gf_msg_trace (this->name, 0, - "All servers locked. Calling the cbk"); - int_lock->lock_op_ret = 0; - int_lock->lock_cbk (frame, this); - } - /* Not all locks were successful. Unlock and try locking - again, this time with serially blocking locks */ - else { - gf_msg_trace (this->name, 0, - "%d servers locked. " - "Trying again with blocking calls", - int_lock->lock_count); - - afr_unlock_now(frame, this); - } + + call_count = --int_lock->lk_call_count; + } + UNLOCK(&frame->lock); + + if (call_count == 0) { + gf_msg_trace(this->name, 0, "Last inode locking reply received"); + /* all locks successful. Proceed to call FOP */ + if (int_lock->lock_count == int_lock->lk_expected_count) { + gf_msg_trace(this->name, 0, "All servers locked. Calling the cbk"); + int_lock->lock_op_ret = 0; + int_lock->lock_cbk(frame, this); + } + /* Not all locks were successful. Unlock and try locking + again, this time with serially blocking locks */ + else { + gf_msg_trace(this->name, 0, + "%d servers locked. " + "Trying again with blocking calls", + int_lock->lock_count); + + afr_unlock_now(frame, this); } + } - return 0; + return 0; } int -afr_nonblocking_inodelk (call_frame_t *frame, xlator_t *this) +afr_nonblocking_inodelk(call_frame_t *frame, xlator_t *this) { - afr_internal_lock_t *int_lock = NULL; - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - afr_fd_ctx_t *fd_ctx = NULL; - int32_t call_count = 0; - int i = 0; - int ret = 0; - - local = frame->local; - int_lock = &local->internal_lock; - priv = this->private; - - initialize_inodelk_variables (frame, this); - - if (local->fd) { - fd_ctx = afr_fd_ctx_get (local->fd, this); - if (!fd_ctx) { - gf_msg (this->name, GF_LOG_INFO, 0, - AFR_MSG_FD_CTX_GET_FAILED, - "unable to get fd ctx for fd=%p", - local->fd); - - local->op_ret = -1; - int_lock->lock_op_ret = -1; - local->op_errno = EINVAL; - int_lock->lock_op_errno = EINVAL; - - afr_unlock_now (frame, this); - ret = -1; - goto out; - } + afr_internal_lock_t *int_lock = NULL; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + afr_fd_ctx_t *fd_ctx = NULL; + int32_t call_count = 0; + int i = 0; + int ret = 0; + + local = frame->local; + int_lock = &local->internal_lock; + priv = this->private; + + initialize_inodelk_variables(frame, this); + + if (local->fd) { + fd_ctx = afr_fd_ctx_get(local->fd, this); + if (!fd_ctx) { + gf_msg(this->name, GF_LOG_INFO, 0, AFR_MSG_FD_CTX_GET_FAILED, + "unable to get fd ctx for fd=%p", local->fd); + + local->op_ret = -1; + int_lock->lock_op_ret = -1; + local->op_errno = EINVAL; + int_lock->lock_op_errno = EINVAL; + + afr_unlock_now(frame, this); + ret = -1; + goto out; } + } - call_count = internal_lock_count (frame, this); - int_lock->lk_call_count = call_count; - int_lock->lk_expected_count = call_count; + call_count = internal_lock_count(frame, this); + int_lock->lk_call_count = call_count; + int_lock->lk_expected_count = call_count; - if (!call_count) { - gf_msg (this->name, GF_LOG_INFO, 0, - AFR_MSG_SUBVOLS_DOWN, - "All bricks are down, aborting."); - afr_unlock_now (frame, this); - goto out; - } + if (!call_count) { + gf_msg(this->name, GF_LOG_INFO, 0, AFR_MSG_SUBVOLS_DOWN, + "All bricks are down, aborting."); + afr_unlock_now(frame, this); + goto out; + } - /* Send non-blocking inodelk calls only on up children - and where the fd has been opened */ - for (i = 0; i < priv->child_count; i++) { - if (!local->child_up[i]) - continue; - - if (local->fd) { - STACK_WIND_COOKIE (frame, afr_nonblocking_inodelk_cbk, - (void *) (long) i, - priv->children[i], - priv->children[i]->fops->finodelk, - int_lock->domain, local->fd, - F_SETLK, &int_lock->flock, NULL); - } else { - - STACK_WIND_COOKIE (frame, afr_nonblocking_inodelk_cbk, - (void *) (long) i, - priv->children[i], - priv->children[i]->fops->inodelk, - int_lock->domain, &local->loc, - F_SETLK, &int_lock->flock, NULL); - } - if (!--call_count) - break; + /* Send non-blocking inodelk calls only on up children + and where the fd has been opened */ + for (i = 0; i < priv->child_count; i++) { + if (!local->child_up[i]) + continue; + + if (local->fd) { + STACK_WIND_COOKIE( + frame, afr_nonblocking_inodelk_cbk, (void *)(long)i, + priv->children[i], priv->children[i]->fops->finodelk, + int_lock->domain, local->fd, F_SETLK, &int_lock->flock, NULL); + } else { + STACK_WIND_COOKIE( + frame, afr_nonblocking_inodelk_cbk, (void *)(long)i, + priv->children[i], priv->children[i]->fops->inodelk, + int_lock->domain, &local->loc, F_SETLK, &int_lock->flock, NULL); } + if (!--call_count) + break; + } out: - return ret; + return ret; } int32_t -afr_unlock (call_frame_t *frame, xlator_t *this) +afr_unlock(call_frame_t *frame, xlator_t *this) { - afr_local_t *local = NULL; - afr_lock_t *lock = NULL; - - local = frame->local; - - if (!local->transaction.eager_lock_on) - goto out; - lock = &local->inode_ctx->lock[local->transaction.type]; - LOCK (&local->inode->lock); - { - list_del_init (&local->transaction.owner_list); - if (list_empty (&lock->owners) && list_empty (&lock->post_op)) { - local->transaction.do_eager_unlock = _gf_true; - /*TODO: Need to get metadata use on_disk and inherit/uninherit - *GF_ASSERT (!local->inode_ctx->on_disk[local->transaction.type]); - *GF_ASSERT (!local->inode_ctx->inherited[local->transaction.type]); - */ - GF_ASSERT (lock->release); - } - } - UNLOCK (&local->inode->lock); - if (!local->transaction.do_eager_unlock) { - local->internal_lock.lock_cbk (frame, this); - return 0; + afr_local_t *local = NULL; + afr_lock_t *lock = NULL; + + local = frame->local; + + if (!local->transaction.eager_lock_on) + goto out; + lock = &local->inode_ctx->lock[local->transaction.type]; + LOCK(&local->inode->lock); + { + list_del_init(&local->transaction.owner_list); + if (list_empty(&lock->owners) && list_empty(&lock->post_op)) { + local->transaction.do_eager_unlock = _gf_true; + /*TODO: Need to get metadata use on_disk and inherit/uninherit + *GF_ASSERT (!local->inode_ctx->on_disk[local->transaction.type]); + *GF_ASSERT (!local->inode_ctx->inherited[local->transaction.type]); + */ + GF_ASSERT(lock->release); } + } + UNLOCK(&local->inode->lock); + if (!local->transaction.do_eager_unlock) { + local->internal_lock.lock_cbk(frame, this); + return 0; + } out: - afr_unlock_now (frame, this); - return 0; + afr_unlock_now(frame, this); + return 0; } diff --git a/xlators/cluster/afr/src/afr-open.c b/xlators/cluster/afr/src/afr-open.c index 3057deed604..a202d528ec8 100644 --- a/xlators/cluster/afr/src/afr-open.c +++ b/xlators/cluster/afr/src/afr-open.c @@ -36,336 +36,318 @@ #include "afr-dir-write.h" #include "afr-transaction.h" - gf_boolean_t -afr_is_fd_fixable (fd_t *fd) +afr_is_fd_fixable(fd_t *fd) { - if (!fd || !fd->inode) - return _gf_false; - else if (fd_is_anonymous (fd)) - return _gf_false; - else if (gf_uuid_is_null (fd->inode->gfid)) - return _gf_false; - - return _gf_true; + if (!fd || !fd->inode) + return _gf_false; + else if (fd_is_anonymous(fd)) + return _gf_false; + else if (gf_uuid_is_null(fd->inode->gfid)) + return _gf_false; + + return _gf_true; } - int -afr_open_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *prebuf, - struct iatt *postbuf, dict_t *xdata) +afr_open_ftruncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) { - afr_local_t * local = frame->local; + afr_local_t *local = frame->local; - AFR_STACK_UNWIND (open, frame, local->op_ret, local->op_errno, - local->fd, xdata); - return 0; + AFR_STACK_UNWIND(open, frame, local->op_ret, local->op_errno, local->fd, + xdata); + return 0; } int -afr_open_cbk (call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, int32_t op_errno, - fd_t *fd, dict_t *xdata) +afr_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + int32_t op_errno, fd_t *fd, dict_t *xdata) { - afr_local_t *local = NULL; - int call_count = -1; - int child_index = (long) cookie; - afr_fd_ctx_t *fd_ctx = NULL; - - local = frame->local; - fd_ctx = local->fd_ctx; - - LOCK (&frame->lock); - { - if (op_ret == -1) { - local->op_errno = op_errno; - fd_ctx->opened_on[child_index] = AFR_FD_NOT_OPENED; - } else { - local->op_ret = op_ret; - fd_ctx->opened_on[child_index] = AFR_FD_OPENED; - if (!local->xdata_rsp && xdata) - local->xdata_rsp = dict_ref (xdata); - } + afr_local_t *local = NULL; + int call_count = -1; + int child_index = (long)cookie; + afr_fd_ctx_t *fd_ctx = NULL; + + local = frame->local; + fd_ctx = local->fd_ctx; + + LOCK(&frame->lock); + { + if (op_ret == -1) { + local->op_errno = op_errno; + fd_ctx->opened_on[child_index] = AFR_FD_NOT_OPENED; + } else { + local->op_ret = op_ret; + fd_ctx->opened_on[child_index] = AFR_FD_OPENED; + if (!local->xdata_rsp && xdata) + local->xdata_rsp = dict_ref(xdata); } - UNLOCK (&frame->lock); - - call_count = afr_frame_return (frame); - - if (call_count == 0) { - if ((fd_ctx->flags & O_TRUNC) && (local->op_ret >= 0)) { - STACK_WIND (frame, afr_open_ftruncate_cbk, - this, this->fops->ftruncate, - fd, 0, NULL); - } else { - AFR_STACK_UNWIND (open, frame, local->op_ret, - local->op_errno, local->cont.open.fd, - local->xdata_rsp); - } + } + UNLOCK(&frame->lock); + + call_count = afr_frame_return(frame); + + if (call_count == 0) { + if ((fd_ctx->flags & O_TRUNC) && (local->op_ret >= 0)) { + STACK_WIND(frame, afr_open_ftruncate_cbk, this, + this->fops->ftruncate, fd, 0, NULL); + } else { + AFR_STACK_UNWIND(open, frame, local->op_ret, local->op_errno, + local->cont.open.fd, local->xdata_rsp); } + } - return 0; + return 0; } - int -afr_open_continue (call_frame_t *frame, xlator_t *this, int err) +afr_open_continue(call_frame_t *frame, xlator_t *this, int err) { - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - int call_count = 0; - int i = 0; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + int call_count = 0; + int i = 0; - local = frame->local; - priv = this->private; + local = frame->local; + priv = this->private; - if (err) { - AFR_STACK_UNWIND (open, frame, -1, err, NULL, NULL); - } else { - local->call_count = AFR_COUNT (local->child_up, - priv->child_count); - call_count = local->call_count; - - for (i = 0; i < priv->child_count; i++) { - if (local->child_up[i]) { - STACK_WIND_COOKIE (frame, afr_open_cbk, - (void *)(long)i, - priv->children[i], - priv->children[i]->fops->open, - &local->loc, - (local->cont.open.flags & ~O_TRUNC), - local->cont.open.fd, - local->xdata_req); - if (!--call_count) - break; - } - } + if (err) { + AFR_STACK_UNWIND(open, frame, -1, err, NULL, NULL); + } else { + local->call_count = AFR_COUNT(local->child_up, priv->child_count); + call_count = local->call_count; + + for (i = 0; i < priv->child_count; i++) { + if (local->child_up[i]) { + STACK_WIND_COOKIE(frame, afr_open_cbk, (void *)(long)i, + priv->children[i], + priv->children[i]->fops->open, &local->loc, + (local->cont.open.flags & ~O_TRUNC), + local->cont.open.fd, local->xdata_req); + if (!--call_count) + break; + } } - return 0; + } + return 0; } int -afr_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, - fd_t *fd, dict_t *xdata) +afr_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + fd_t *fd, dict_t *xdata) { - afr_private_t *priv = NULL; - afr_local_t *local = NULL; - int spb_choice = 0; - int event_generation = 0; - int ret = 0; - int32_t op_errno = 0; - afr_fd_ctx_t *fd_ctx = NULL; - - //We can't let truncation to happen outside transaction. - - priv = this->private; - - local = AFR_FRAME_INIT (frame, op_errno); - if (!local) - goto out; - - local->op = GF_FOP_OPEN; - fd_ctx = afr_fd_ctx_get (fd, this); - if (!fd_ctx) { - op_errno = ENOMEM; - goto out; - } - - if (!afr_is_consistent_io_possible (local, priv, &op_errno)) - goto out; - - local->inode = inode_ref (loc->inode); - loc_copy (&local->loc, loc); - local->fd_ctx = fd_ctx; - fd_ctx->flags = flags; - if (xdata) - local->xdata_req = dict_ref (xdata); - - local->cont.open.flags = flags; - local->cont.open.fd = fd_ref (fd); - - ret = afr_inode_get_readable (frame, local->inode, this, - NULL, &event_generation, - AFR_DATA_TRANSACTION); - if ((ret < 0) && - (afr_inode_split_brain_choice_get (local->inode, - this, &spb_choice) == 0) && - spb_choice < 0) { - afr_inode_refresh (frame, this, local->inode, - local->inode->gfid, afr_open_continue); - } else { - afr_open_continue (frame, this, 0); - } - - return 0; + afr_private_t *priv = NULL; + afr_local_t *local = NULL; + int spb_choice = 0; + int event_generation = 0; + int ret = 0; + int32_t op_errno = 0; + afr_fd_ctx_t *fd_ctx = NULL; + + // We can't let truncation to happen outside transaction. + + priv = this->private; + + local = AFR_FRAME_INIT(frame, op_errno); + if (!local) + goto out; + + local->op = GF_FOP_OPEN; + fd_ctx = afr_fd_ctx_get(fd, this); + if (!fd_ctx) { + op_errno = ENOMEM; + goto out; + } + + if (!afr_is_consistent_io_possible(local, priv, &op_errno)) + goto out; + + local->inode = inode_ref(loc->inode); + loc_copy(&local->loc, loc); + local->fd_ctx = fd_ctx; + fd_ctx->flags = flags; + if (xdata) + local->xdata_req = dict_ref(xdata); + + local->cont.open.flags = flags; + local->cont.open.fd = fd_ref(fd); + + ret = afr_inode_get_readable(frame, local->inode, this, NULL, + &event_generation, AFR_DATA_TRANSACTION); + if ((ret < 0) && + (afr_inode_split_brain_choice_get(local->inode, this, &spb_choice) == + 0) && + spb_choice < 0) { + afr_inode_refresh(frame, this, local->inode, local->inode->gfid, + afr_open_continue); + } else { + afr_open_continue(frame, this, 0); + } + + return 0; out: - AFR_STACK_UNWIND (open, frame, -1, op_errno, fd, NULL); + AFR_STACK_UNWIND(open, frame, -1, op_errno, fd, NULL); - return 0; + return 0; } int -afr_openfd_fix_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, fd_t *fd, - dict_t *xdata) +afr_openfd_fix_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, fd_t *fd, + dict_t *xdata) { - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - afr_fd_ctx_t *fd_ctx = NULL; - int call_count = 0; - int child_index = (long) cookie; - - priv = this->private; - local = frame->local; - + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + afr_fd_ctx_t *fd_ctx = NULL; + int call_count = 0; + int child_index = (long)cookie; + + priv = this->private; + local = frame->local; + + if (op_ret >= 0) { + gf_msg_debug(this->name, 0, + "fd for %s opened " + "successfully on subvolume %s", + local->loc.path, priv->children[child_index]->name); + } else { + gf_msg(this->name, fop_log_level(GF_FOP_OPEN, op_errno), op_errno, + AFR_MSG_OPEN_FAIL, + "Failed to open %s on " + "subvolume %s", + local->loc.path, priv->children[child_index]->name); + } + + fd_ctx = local->fd_ctx; + + LOCK(&local->fd->lock); + { if (op_ret >= 0) { - gf_msg_debug (this->name, 0, "fd for %s opened " - "successfully on subvolume %s", local->loc.path, - priv->children[child_index]->name); + fd_ctx->opened_on[child_index] = AFR_FD_OPENED; } else { - gf_msg (this->name, fop_log_level (GF_FOP_OPEN, op_errno), - op_errno, AFR_MSG_OPEN_FAIL, "Failed to open %s on " - "subvolume %s", local->loc.path, - priv->children[child_index]->name); + fd_ctx->opened_on[child_index] = AFR_FD_NOT_OPENED; } + } + UNLOCK(&local->fd->lock); - fd_ctx = local->fd_ctx; + call_count = afr_frame_return(frame); + if (call_count == 0) + AFR_STACK_DESTROY(frame); - LOCK (&local->fd->lock); - { - if (op_ret >= 0) { - fd_ctx->opened_on[child_index] = AFR_FD_OPENED; - } else { - fd_ctx->opened_on[child_index] = AFR_FD_NOT_OPENED; - } - } - UNLOCK (&local->fd->lock); + return 0; +} - call_count = afr_frame_return (frame); - if (call_count == 0) - AFR_STACK_DESTROY (frame); +static int +afr_fd_ctx_need_open(fd_t *fd, xlator_t *this, unsigned char *need_open) +{ + afr_fd_ctx_t *fd_ctx = NULL; + afr_private_t *priv = NULL; + int i = 0; + int count = 0; + priv = this->private; + + fd_ctx = afr_fd_ctx_get(fd, this); + if (!fd_ctx) return 0; -} + LOCK(&fd->lock); + { + for (i = 0; i < priv->child_count; i++) { + if (fd_ctx->opened_on[i] == AFR_FD_NOT_OPENED && + priv->child_up[i]) { + fd_ctx->opened_on[i] = AFR_FD_OPENING; + need_open[i] = 1; + count++; + } else { + need_open[i] = 0; + } + } + } + UNLOCK(&fd->lock); -static int -afr_fd_ctx_need_open (fd_t *fd, xlator_t *this, unsigned char *need_open) -{ - afr_fd_ctx_t *fd_ctx = NULL; - afr_private_t *priv = NULL; - int i = 0; - int count = 0; - - priv = this->private; - - fd_ctx = afr_fd_ctx_get (fd, this); - if (!fd_ctx) - return 0; - - LOCK (&fd->lock); - { - for (i = 0; i < priv->child_count; i++) { - if (fd_ctx->opened_on[i] == AFR_FD_NOT_OPENED && - priv->child_up[i]) { - fd_ctx->opened_on[i] = AFR_FD_OPENING; - need_open[i] = 1; - count++; - } else { - need_open[i] = 0; - } - } - } - UNLOCK (&fd->lock); - - return count; + return count; } - void -afr_fix_open (fd_t *fd, xlator_t *this) +afr_fix_open(fd_t *fd, xlator_t *this) { - afr_private_t *priv = NULL; - int i = 0; - call_frame_t *frame = NULL; - afr_local_t *local = NULL; - int ret = -1; - int32_t op_errno = 0; - afr_fd_ctx_t *fd_ctx = NULL; - unsigned char *need_open = NULL; - int call_count = 0; + afr_private_t *priv = NULL; + int i = 0; + call_frame_t *frame = NULL; + afr_local_t *local = NULL; + int ret = -1; + int32_t op_errno = 0; + afr_fd_ctx_t *fd_ctx = NULL; + unsigned char *need_open = NULL; + int call_count = 0; - priv = this->private; + priv = this->private; - if (!afr_is_fd_fixable (fd)) - goto out; + if (!afr_is_fd_fixable(fd)) + goto out; - fd_ctx = afr_fd_ctx_get (fd, this); - if (!fd_ctx) - goto out; + fd_ctx = afr_fd_ctx_get(fd, this); + if (!fd_ctx) + goto out; - need_open = alloca0 (priv->child_count); + need_open = alloca0(priv->child_count); - call_count = afr_fd_ctx_need_open (fd, this, need_open); - if (!call_count) - goto out; + call_count = afr_fd_ctx_need_open(fd, this, need_open); + if (!call_count) + goto out; - frame = create_frame (this, this->ctx->pool); - if (!frame) - goto out; + frame = create_frame(this, this->ctx->pool); + if (!frame) + goto out; - local = AFR_FRAME_INIT (frame, op_errno); - if (!local) - goto out; + local = AFR_FRAME_INIT(frame, op_errno); + if (!local) + goto out; - local->loc.inode = inode_ref (fd->inode); - ret = loc_path (&local->loc, NULL); - if (ret < 0) - goto out; + local->loc.inode = inode_ref(fd->inode); + ret = loc_path(&local->loc, NULL); + if (ret < 0) + goto out; - local->fd = fd_ref (fd); - local->fd_ctx = fd_ctx; + local->fd = fd_ref(fd); + local->fd_ctx = fd_ctx; - local->call_count = call_count; + local->call_count = call_count; - gf_msg_debug (this->name, 0, "need open count: %d", - call_count); + gf_msg_debug(this->name, 0, "need open count: %d", call_count); - for (i = 0; i < priv->child_count; i++) { - if (!need_open[i]) - continue; - - if (IA_IFDIR == fd->inode->ia_type) { - gf_msg_debug (this->name, 0, - "opening fd for dir %s on subvolume %s", - local->loc.path, priv->children[i]->name); - - STACK_WIND_COOKIE (frame, afr_openfd_fix_open_cbk, - (void*) (long) i, - priv->children[i], - priv->children[i]->fops->opendir, - &local->loc, local->fd, - NULL); - } else { - gf_msg_debug (this->name, 0, - "opening fd for file %s on subvolume %s", - local->loc.path, priv->children[i]->name); - - STACK_WIND_COOKIE (frame, afr_openfd_fix_open_cbk, - (void *)(long) i, - priv->children[i], - priv->children[i]->fops->open, - &local->loc, - fd_ctx->flags & (~O_TRUNC), - local->fd, NULL); - } - - if (!--call_count) - break; + for (i = 0; i < priv->child_count; i++) { + if (!need_open[i]) + continue; + + if (IA_IFDIR == fd->inode->ia_type) { + gf_msg_debug(this->name, 0, "opening fd for dir %s on subvolume %s", + local->loc.path, priv->children[i]->name); + + STACK_WIND_COOKIE(frame, afr_openfd_fix_open_cbk, (void *)(long)i, + priv->children[i], + priv->children[i]->fops->opendir, &local->loc, + local->fd, NULL); + } else { + gf_msg_debug(this->name, 0, + "opening fd for file %s on subvolume %s", + local->loc.path, priv->children[i]->name); + + STACK_WIND_COOKIE(frame, afr_openfd_fix_open_cbk, (void *)(long)i, + priv->children[i], priv->children[i]->fops->open, + &local->loc, fd_ctx->flags & (~O_TRUNC), + local->fd, NULL); } - return; + if (!--call_count) + break; + } + + return; out: - if (frame) - AFR_STACK_DESTROY (frame); + if (frame) + AFR_STACK_DESTROY(frame); } diff --git a/xlators/cluster/afr/src/afr-read-txn.c b/xlators/cluster/afr/src/afr-read-txn.c index 945c050da03..1df39c35fce 100644 --- a/xlators/cluster/afr/src/afr-read-txn.c +++ b/xlators/cluster/afr/src/afr-read-txn.c @@ -13,347 +13,343 @@ #include "afr-messages.h" void -afr_pending_read_increment (afr_private_t *priv, int child_index) +afr_pending_read_increment(afr_private_t *priv, int child_index) { - if (child_index < 0 || child_index > priv->child_count) - return; + if (child_index < 0 || child_index > priv->child_count) + return; - GF_ATOMIC_INC(priv->pending_reads[child_index]); + GF_ATOMIC_INC(priv->pending_reads[child_index]); } void -afr_pending_read_decrement (afr_private_t *priv, int child_index) +afr_pending_read_decrement(afr_private_t *priv, int child_index) { - if (child_index < 0 || child_index > priv->child_count) - return; + if (child_index < 0 || child_index > priv->child_count) + return; - GF_ATOMIC_DEC(priv->pending_reads[child_index]); + GF_ATOMIC_DEC(priv->pending_reads[child_index]); } static gf_boolean_t -afr_ta_dict_contains_pending_xattr (dict_t *dict, afr_private_t *priv, - int child) +afr_ta_dict_contains_pending_xattr(dict_t *dict, afr_private_t *priv, int child) { - int *pending = NULL; - int ret = 0; - int i = 0; - - ret = dict_get_ptr (dict, priv->pending_key[child], (void *)&pending); - if (ret == 0) { - for (i = 0; i < AFR_NUM_CHANGE_LOGS; i++) { - /* Not doing a ntoh32(pending) as we just want to check - * if it is non-zero or not. */ - if (pending[i]) { - return _gf_true; - } - } + int *pending = NULL; + int ret = 0; + int i = 0; + + ret = dict_get_ptr(dict, priv->pending_key[child], (void *)&pending); + if (ret == 0) { + for (i = 0; i < AFR_NUM_CHANGE_LOGS; i++) { + /* Not doing a ntoh32(pending) as we just want to check + * if it is non-zero or not. */ + if (pending[i]) { + return _gf_true; + } } + } - return _gf_false; + return _gf_false; } void -afr_read_txn_wind (call_frame_t *frame, xlator_t *this, int subvol) +afr_read_txn_wind(call_frame_t *frame, xlator_t *this, int subvol) { - afr_local_t *local = NULL; - afr_private_t *priv = NULL; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; - local = frame->local; - priv = this->private; + local = frame->local; + priv = this->private; - afr_pending_read_decrement (priv, local->read_subvol); - local->read_subvol = subvol; - afr_pending_read_increment (priv, subvol); - local->readfn (frame, this, subvol); + afr_pending_read_decrement(priv, local->read_subvol); + local->read_subvol = subvol; + afr_pending_read_increment(priv, subvol); + local->readfn(frame, this, subvol); } int -afr_read_txn_next_subvol (call_frame_t *frame, xlator_t *this) +afr_read_txn_next_subvol(call_frame_t *frame, xlator_t *this) { - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - int i = 0; - int subvol = -1; - - local = frame->local; - priv = this->private; - - - for (i = 0; i < priv->child_count; i++) { - if (!local->readable[i]) { - /* don't even bother trying here. - just mark as attempted and move on. */ - local->read_attempted[i] = 1; - continue; - } - - if (!local->read_attempted[i]) { - subvol = i; - break; - } - } - - /* If no more subvols were available for reading, we leave - @subvol as -1, which is an indication we have run out of - readable subvols. */ - if (subvol != -1) - local->read_attempted[subvol] = 1; - afr_read_txn_wind (frame, this, subvol); - - return 0; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + int i = 0; + int subvol = -1; + + local = frame->local; + priv = this->private; + + for (i = 0; i < priv->child_count; i++) { + if (!local->readable[i]) { + /* don't even bother trying here. + just mark as attempted and move on. */ + local->read_attempted[i] = 1; + continue; + } + + if (!local->read_attempted[i]) { + subvol = i; + break; + } + } + + /* If no more subvols were available for reading, we leave + @subvol as -1, which is an indication we have run out of + readable subvols. */ + if (subvol != -1) + local->read_attempted[subvol] = 1; + afr_read_txn_wind(frame, this, subvol); + + return 0; } static int -afr_ta_read_txn_done (int ret, call_frame_t *ta_frame, void *opaque) +afr_ta_read_txn_done(int ret, call_frame_t *ta_frame, void *opaque) { - STACK_DESTROY(ta_frame->root); - return 0; + STACK_DESTROY(ta_frame->root); + return 0; } static int -afr_ta_read_txn (void *opaque) +afr_ta_read_txn(void *opaque) { - call_frame_t *frame = NULL; - xlator_t *this = NULL; - int read_subvol = -1; - int up_child = AFR_CHILD_UNKNOWN; - int possible_bad_child = AFR_CHILD_UNKNOWN; - int ret = 0; - int op_errno = ENOMEM; - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - struct gf_flock flock = {0, }; - dict_t *xdata_req = NULL; - dict_t *xdata_rsp = NULL; - int **pending = NULL; - loc_t loc = {0,}; - - frame = (call_frame_t *)opaque; - this = frame->this; - local = frame->local; - priv = this->private; - - if (local->child_up[AFR_CHILD_ZERO]) { - up_child = AFR_CHILD_ZERO; - possible_bad_child = AFR_CHILD_ONE; - } else if (local->child_up[AFR_CHILD_ONE]) { - up_child = AFR_CHILD_ONE; - possible_bad_child = AFR_CHILD_ZERO; - } - - GF_ASSERT (up_child != AFR_CHILD_UNKNOWN); - - /* Query the up_child to see if it blames the down one. */ - xdata_req = dict_new(); - if (!xdata_req) - goto out; - - pending = afr_matrix_create (priv->child_count, AFR_NUM_CHANGE_LOGS); - if (!pending) - goto out; - - ret = afr_set_pending_dict (priv, xdata_req, pending); - if (ret < 0) - goto out; - - if (local->fd) { - ret = syncop_fxattrop (priv->children[up_child], local->fd, - GF_XATTROP_ADD_ARRAY, xdata_req, NULL, - &xdata_rsp, NULL); - } else { - ret = syncop_xattrop (priv->children[up_child], &local->loc, - GF_XATTROP_ADD_ARRAY, xdata_req, NULL, - &xdata_rsp, NULL); - } - if (ret || !xdata_rsp) { - gf_msg (this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB, - "Failed xattrop for gfid %s on %s", - uuid_utoa (local->inode->gfid), - priv->children[up_child]->name); - op_errno = -ret; - goto out; - } - - if (afr_ta_dict_contains_pending_xattr (xdata_rsp, priv, - possible_bad_child)) { - read_subvol = up_child; - goto out; - } - dict_unref (xdata_rsp); - /* Query thin-arbiter to see if it blames any data brick. */ - ret = afr_fill_ta_loc (this, &loc); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB, - "Failed to populate thin-arbiter loc for: %s.", - loc.name); - goto out; - } - flock.l_type = F_WRLCK;/*start and length are already zero. */ - ret = syncop_inodelk (priv->children[THIN_ARBITER_BRICK_INDEX], - AFR_TA_DOM_MODIFY, &loc, F_SETLKW, &flock, - NULL, NULL); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB, - "gfid:%s: Failed to get AFR_TA_DOM_MODIFY lock on %s.", - uuid_utoa (local->inode->gfid), - priv->pending_key[THIN_ARBITER_BRICK_INDEX]); - op_errno = -ret; - goto out; - } - - ret = syncop_xattrop (priv->children[THIN_ARBITER_BRICK_INDEX], &loc, + call_frame_t *frame = NULL; + xlator_t *this = NULL; + int read_subvol = -1; + int up_child = AFR_CHILD_UNKNOWN; + int possible_bad_child = AFR_CHILD_UNKNOWN; + int ret = 0; + int op_errno = ENOMEM; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + struct gf_flock flock = { + 0, + }; + dict_t *xdata_req = NULL; + dict_t *xdata_rsp = NULL; + int **pending = NULL; + loc_t loc = { + 0, + }; + + frame = (call_frame_t *)opaque; + this = frame->this; + local = frame->local; + priv = this->private; + + if (local->child_up[AFR_CHILD_ZERO]) { + up_child = AFR_CHILD_ZERO; + possible_bad_child = AFR_CHILD_ONE; + } else if (local->child_up[AFR_CHILD_ONE]) { + up_child = AFR_CHILD_ONE; + possible_bad_child = AFR_CHILD_ZERO; + } + + GF_ASSERT(up_child != AFR_CHILD_UNKNOWN); + + /* Query the up_child to see if it blames the down one. */ + xdata_req = dict_new(); + if (!xdata_req) + goto out; + + pending = afr_matrix_create(priv->child_count, AFR_NUM_CHANGE_LOGS); + if (!pending) + goto out; + + ret = afr_set_pending_dict(priv, xdata_req, pending); + if (ret < 0) + goto out; + + if (local->fd) { + ret = syncop_fxattrop(priv->children[up_child], local->fd, GF_XATTROP_ADD_ARRAY, xdata_req, NULL, &xdata_rsp, NULL); - if (ret || !xdata_rsp) { - gf_msg (this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB, - "gfid:%s: Failed xattrop on %s.", - uuid_utoa (local->inode->gfid), - priv->pending_key[THIN_ARBITER_BRICK_INDEX]); - op_errno = -ret; - goto unlock; - } - - if (!afr_ta_dict_contains_pending_xattr(xdata_rsp, priv, up_child)) { - read_subvol = up_child; - } else { - gf_msg (this->name, GF_LOG_ERROR, EIO, AFR_MSG_THIN_ARB, - "Failing read for gfid %s since good brick %s is down", - uuid_utoa (local->inode->gfid), - priv->children[possible_bad_child]->name); - op_errno = EIO; - } + } else { + ret = syncop_xattrop(priv->children[up_child], &local->loc, + GF_XATTROP_ADD_ARRAY, xdata_req, NULL, &xdata_rsp, + NULL); + } + if (ret || !xdata_rsp) { + gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB, + "Failed xattrop for gfid %s on %s", + uuid_utoa(local->inode->gfid), priv->children[up_child]->name); + op_errno = -ret; + goto out; + } + + if (afr_ta_dict_contains_pending_xattr(xdata_rsp, priv, + possible_bad_child)) { + read_subvol = up_child; + goto out; + } + dict_unref(xdata_rsp); + /* Query thin-arbiter to see if it blames any data brick. */ + ret = afr_fill_ta_loc(this, &loc); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB, + "Failed to populate thin-arbiter loc for: %s.", loc.name); + goto out; + } + flock.l_type = F_WRLCK; /*start and length are already zero. */ + ret = syncop_inodelk(priv->children[THIN_ARBITER_BRICK_INDEX], + AFR_TA_DOM_MODIFY, &loc, F_SETLKW, &flock, NULL, NULL); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB, + "gfid:%s: Failed to get AFR_TA_DOM_MODIFY lock on %s.", + uuid_utoa(local->inode->gfid), + priv->pending_key[THIN_ARBITER_BRICK_INDEX]); + op_errno = -ret; + goto out; + } + + ret = syncop_xattrop(priv->children[THIN_ARBITER_BRICK_INDEX], &loc, + GF_XATTROP_ADD_ARRAY, xdata_req, NULL, &xdata_rsp, + NULL); + if (ret || !xdata_rsp) { + gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB, + "gfid:%s: Failed xattrop on %s.", uuid_utoa(local->inode->gfid), + priv->pending_key[THIN_ARBITER_BRICK_INDEX]); + op_errno = -ret; + goto unlock; + } + + if (!afr_ta_dict_contains_pending_xattr(xdata_rsp, priv, up_child)) { + read_subvol = up_child; + } else { + gf_msg(this->name, GF_LOG_ERROR, EIO, AFR_MSG_THIN_ARB, + "Failing read for gfid %s since good brick %s is down", + uuid_utoa(local->inode->gfid), + priv->children[possible_bad_child]->name); + op_errno = EIO; + } unlock: - flock.l_type = F_UNLCK; - ret = syncop_inodelk (priv->children[THIN_ARBITER_BRICK_INDEX], - AFR_TA_DOM_MODIFY, &loc, F_SETLK, &flock, - NULL, NULL); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB, - "gfid:%s: Failed to unlock AFR_TA_DOM_MODIFY lock on " - "%s.", uuid_utoa (local->inode->gfid), - priv->pending_key[THIN_ARBITER_BRICK_INDEX]); - } + flock.l_type = F_UNLCK; + ret = syncop_inodelk(priv->children[THIN_ARBITER_BRICK_INDEX], + AFR_TA_DOM_MODIFY, &loc, F_SETLK, &flock, NULL, NULL); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB, + "gfid:%s: Failed to unlock AFR_TA_DOM_MODIFY lock on " + "%s.", + uuid_utoa(local->inode->gfid), + priv->pending_key[THIN_ARBITER_BRICK_INDEX]); + } out: - if (xdata_req) - dict_unref(xdata_req); - if (xdata_rsp) - dict_unref(xdata_rsp); - if (pending) - afr_matrix_cleanup (pending, priv->child_count); - loc_wipe (&loc); - - if (read_subvol == -1) { - local->op_ret = -1; - local->op_errno = op_errno; - } - afr_read_txn_wind (frame, this, read_subvol); - return ret; + if (xdata_req) + dict_unref(xdata_req); + if (xdata_rsp) + dict_unref(xdata_rsp); + if (pending) + afr_matrix_cleanup(pending, priv->child_count); + loc_wipe(&loc); + + if (read_subvol == -1) { + local->op_ret = -1; + local->op_errno = op_errno; + } + afr_read_txn_wind(frame, this, read_subvol); + return ret; } void -afr_ta_read_txn_synctask (call_frame_t *frame, xlator_t *this) +afr_ta_read_txn_synctask(call_frame_t *frame, xlator_t *this) { - call_frame_t *ta_frame = NULL; - afr_local_t *local = NULL; - int ret = 0; - - local = frame->local; - ta_frame = afr_ta_frame_create(this); - if (!ta_frame) { - local->op_ret = -1; - local->op_errno = ENOMEM; - goto out; - } - ret = synctask_new (this->ctx->env, afr_ta_read_txn, - afr_ta_read_txn_done, ta_frame, frame); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, ENOMEM, - AFR_MSG_THIN_ARB, "Failed to launch " - "afr_ta_read_txn synctask for gfid %s.", - uuid_utoa(local->inode->gfid)); - local->op_ret = -1; - local->op_errno = ENOMEM; - STACK_DESTROY(ta_frame->root); - goto out; - } - return; + call_frame_t *ta_frame = NULL; + afr_local_t *local = NULL; + int ret = 0; + + local = frame->local; + ta_frame = afr_ta_frame_create(this); + if (!ta_frame) { + local->op_ret = -1; + local->op_errno = ENOMEM; + goto out; + } + ret = synctask_new(this->ctx->env, afr_ta_read_txn, afr_ta_read_txn_done, + ta_frame, frame); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, AFR_MSG_THIN_ARB, + "Failed to launch " + "afr_ta_read_txn synctask for gfid %s.", + uuid_utoa(local->inode->gfid)); + local->op_ret = -1; + local->op_errno = ENOMEM; + STACK_DESTROY(ta_frame->root); + goto out; + } + return; out: - afr_read_txn_wind (frame, this, -1); + afr_read_txn_wind(frame, this, -1); } int -afr_read_txn_refresh_done (call_frame_t *frame, xlator_t *this, int err) +afr_read_txn_refresh_done(call_frame_t *frame, xlator_t *this, int err) { - afr_private_t *priv = NULL; - afr_local_t *local = NULL; - int read_subvol = -1; - inode_t *inode = NULL; - int ret = -1; - int spb_choice = -1; - - local = frame->local; - inode = local->inode; - priv = this->private; - - if (err) { - if (!priv->thin_arbiter_count) - goto readfn; - if (err != EINVAL) - goto readfn; - /* We need to query the good bricks and/or thin-arbiter.*/ - afr_ta_read_txn_synctask (frame, this); - return 0; - } + afr_private_t *priv = NULL; + afr_local_t *local = NULL; + int read_subvol = -1; + inode_t *inode = NULL; + int ret = -1; + int spb_choice = -1; + + local = frame->local; + inode = local->inode; + priv = this->private; + + if (err) { + if (!priv->thin_arbiter_count) + goto readfn; + if (err != EINVAL) + goto readfn; + /* We need to query the good bricks and/or thin-arbiter.*/ + afr_ta_read_txn_synctask(frame, this); + return 0; + } - read_subvol = afr_read_subvol_select_by_policy (inode, this, - local->readable, NULL); - if (read_subvol == -1) { - err = EIO; - goto readfn; - } + read_subvol = afr_read_subvol_select_by_policy(inode, this, local->readable, + NULL); + if (read_subvol == -1) { + err = EIO; + goto readfn; + } - if (local->read_attempted[read_subvol]) { - afr_read_txn_next_subvol (frame, this); - return 0; - } + if (local->read_attempted[read_subvol]) { + afr_read_txn_next_subvol(frame, this); + return 0; + } - local->read_attempted[read_subvol] = 1; + local->read_attempted[read_subvol] = 1; readfn: - if (read_subvol == -1) { - ret = afr_inode_split_brain_choice_get (inode, this, - &spb_choice); - if ((ret == 0) && spb_choice >= 0) - read_subvol = spb_choice; - } - - if (read_subvol == -1) { - AFR_SET_ERROR_AND_CHECK_SPLIT_BRAIN (-1, err); - } - afr_read_txn_wind (frame, this, read_subvol); - - return 0; + if (read_subvol == -1) { + ret = afr_inode_split_brain_choice_get(inode, this, &spb_choice); + if ((ret == 0) && spb_choice >= 0) + read_subvol = spb_choice; + } + + if (read_subvol == -1) { + AFR_SET_ERROR_AND_CHECK_SPLIT_BRAIN(-1, err); + } + afr_read_txn_wind(frame, this, read_subvol); + + return 0; } int -afr_read_txn_continue (call_frame_t *frame, xlator_t *this, int subvol) +afr_read_txn_continue(call_frame_t *frame, xlator_t *this, int subvol) { - afr_local_t *local = NULL; + afr_local_t *local = NULL; - local = frame->local; + local = frame->local; - if (!local->refreshed) { - local->refreshed = _gf_true; - afr_inode_refresh (frame, this, local->inode, NULL, - afr_read_txn_refresh_done); - } else { - afr_read_txn_next_subvol (frame, this); - } + if (!local->refreshed) { + local->refreshed = _gf_true; + afr_inode_refresh(frame, this, local->inode, NULL, + afr_read_txn_refresh_done); + } else { + afr_read_txn_next_subvol(frame, this); + } - return 0; + return 0; } - /* afr_read_txn_wipe: clean internal variables in @local in order to make @@ -362,24 +358,24 @@ afr_read_txn_continue (call_frame_t *frame, xlator_t *this, int subvol) */ void -afr_read_txn_wipe (call_frame_t *frame, xlator_t *this) +afr_read_txn_wipe(call_frame_t *frame, xlator_t *this) { - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - int i = 0; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + int i = 0; - local = frame->local; - priv = this->private; + local = frame->local; + priv = this->private; - local->readfn = NULL; + local->readfn = NULL; - if (local->inode) - inode_unref (local->inode); + if (local->inode) + inode_unref(local->inode); - for (i = 0; i < priv->child_count; i++) { - local->read_attempted[i] = 0; - local->readable[i] = 0; - } + for (i = 0; i < priv->child_count; i++) { + local->read_attempted[i] = 0; + local->readable[i] = 0; + } } /* @@ -410,92 +406,94 @@ afr_read_txn_wipe (call_frame_t *frame, xlator_t *this) */ int -afr_read_txn (call_frame_t *frame, xlator_t *this, inode_t *inode, - afr_read_txn_wind_t readfn, afr_transaction_type type) +afr_read_txn(call_frame_t *frame, xlator_t *this, inode_t *inode, + afr_read_txn_wind_t readfn, afr_transaction_type type) { - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - unsigned char *data = NULL; - unsigned char *metadata = NULL; - int read_subvol = -1; - int event_generation = 0; - int ret = -1; - - priv = this->private; - local = frame->local; - data = alloca0 (priv->child_count); - metadata = alloca0 (priv->child_count); - - afr_read_txn_wipe (frame, this); - - local->readfn = readfn; - local->inode = inode_ref (inode); - local->is_read_txn = _gf_true; - local->transaction.type = type; - - if (priv->quorum_count && !afr_has_quorum (local->child_up, this)) { - local->op_ret = -1; - local->op_errno = afr_quorum_errno(priv); - goto read; - } - - if (!afr_is_consistent_io_possible (local, priv, &local->op_errno)) { - local->op_ret = -1; - goto read; - } - - if (priv->thin_arbiter_count && - AFR_COUNT (local->child_up, priv->child_count) != - priv->child_count) { - afr_ta_read_txn_synctask (frame, this); - return 0; - } - - ret = afr_inode_read_subvol_get (inode, this, data, metadata, - &event_generation); - if (ret == -1) - /* very first transaction on this inode */ - goto refresh; - AFR_INTERSECT (local->readable, data, metadata, priv->child_count); - - gf_msg_debug (this->name, 0, "%s: generation now vs cached: %d, " - "%d", uuid_utoa (inode->gfid), local->event_generation, - event_generation); - if (afr_is_inode_refresh_reqd (inode, this, local->event_generation, - event_generation)) - /* servers have disconnected / reconnected, and possibly - rebooted, very likely changing the state of freshness - of copies */ - goto refresh; - - read_subvol = afr_read_subvol_select_by_policy (inode, this, - local->readable, NULL); - - if (read_subvol < 0 || read_subvol > priv->child_count) { - gf_msg_debug (this->name, 0, "Unreadable subvolume %d found " - "with event generation %d for gfid %s.", - read_subvol, event_generation, uuid_utoa(inode->gfid)); - goto refresh; - } - - if (!local->child_up[read_subvol]) { - /* should never happen, just in case */ - gf_msg (this->name, GF_LOG_WARNING, 0, - AFR_MSG_READ_SUBVOL_ERROR, "subvolume %d is the " - "read subvolume in this generation, but is not up", - read_subvol); - goto refresh; - } - - local->read_attempted[read_subvol] = 1; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + unsigned char *data = NULL; + unsigned char *metadata = NULL; + int read_subvol = -1; + int event_generation = 0; + int ret = -1; + + priv = this->private; + local = frame->local; + data = alloca0(priv->child_count); + metadata = alloca0(priv->child_count); + + afr_read_txn_wipe(frame, this); + + local->readfn = readfn; + local->inode = inode_ref(inode); + local->is_read_txn = _gf_true; + local->transaction.type = type; + + if (priv->quorum_count && !afr_has_quorum(local->child_up, this)) { + local->op_ret = -1; + local->op_errno = afr_quorum_errno(priv); + goto read; + } + + if (!afr_is_consistent_io_possible(local, priv, &local->op_errno)) { + local->op_ret = -1; + goto read; + } + + if (priv->thin_arbiter_count && + AFR_COUNT(local->child_up, priv->child_count) != priv->child_count) { + afr_ta_read_txn_synctask(frame, this); + return 0; + } + + ret = afr_inode_read_subvol_get(inode, this, data, metadata, + &event_generation); + if (ret == -1) + /* very first transaction on this inode */ + goto refresh; + AFR_INTERSECT(local->readable, data, metadata, priv->child_count); + + gf_msg_debug(this->name, 0, + "%s: generation now vs cached: %d, " + "%d", + uuid_utoa(inode->gfid), local->event_generation, + event_generation); + if (afr_is_inode_refresh_reqd(inode, this, local->event_generation, + event_generation)) + /* servers have disconnected / reconnected, and possibly + rebooted, very likely changing the state of freshness + of copies */ + goto refresh; + + read_subvol = afr_read_subvol_select_by_policy(inode, this, local->readable, + NULL); + + if (read_subvol < 0 || read_subvol > priv->child_count) { + gf_msg_debug(this->name, 0, + "Unreadable subvolume %d found " + "with event generation %d for gfid %s.", + read_subvol, event_generation, uuid_utoa(inode->gfid)); + goto refresh; + } + + if (!local->child_up[read_subvol]) { + /* should never happen, just in case */ + gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_READ_SUBVOL_ERROR, + "subvolume %d is the " + "read subvolume in this generation, but is not up", + read_subvol); + goto refresh; + } + + local->read_attempted[read_subvol] = 1; read: - afr_read_txn_wind (frame, this, read_subvol); + afr_read_txn_wind(frame, this, read_subvol); - return 0; + return 0; refresh: - afr_inode_refresh (frame, this, inode, NULL, afr_read_txn_refresh_done); + afr_inode_refresh(frame, this, inode, NULL, afr_read_txn_refresh_done); - return 0; + return 0; } diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c index be5dd327c9d..c48c47683c3 100644 --- a/xlators/cluster/afr/src/afr-self-heal-common.c +++ b/xlators/cluster/afr/src/afr-self-heal-common.c @@ -8,7 +8,6 @@ cases as published by the Free Software Foundation. */ - #include "afr.h" #include "afr-self-heal.h" #include "byte-order.h" @@ -17,757 +16,767 @@ #include "events.h" void -afr_heal_synctask (xlator_t *this, afr_local_t *local); +afr_heal_synctask(xlator_t *this, afr_local_t *local); int -afr_lookup_and_heal_gfid (xlator_t *this, inode_t *parent, const char *name, - inode_t *inode, struct afr_reply *replies, - int source, unsigned char *sources, void *gfid) -{ - afr_private_t *priv = NULL; - call_frame_t *frame = NULL; - afr_local_t *local = NULL; - unsigned char *wind_on = NULL; - ia_type_t ia_type = IA_INVAL; - dict_t *xdata = NULL; - loc_t loc = {0, }; - int ret = 0; - int i = 0; - - priv = this->private; - wind_on = alloca0 (priv->child_count); - ia_type = replies[source].poststat.ia_type; - if ((ia_type == IA_INVAL) && - (AFR_COUNT(sources, priv->child_count) == priv->child_count)) { - /* If a file is present on some bricks of the replica but parent - * dir does not have pending xattrs, all bricks are sources and - * the 'source' we selected earlier might be one where the file - * is not actually present. Hence check if file is present in - * any of the sources.*/ - for (i = 0; i < priv->child_count; i++) { - if (i == source) - continue; - if (sources[i] && replies[i].valid && - replies[i].op_ret == 0) { - ia_type = replies[i].poststat.ia_type; - break; - } - } - } - - /* gfid heal on those subvolumes that do not have gfid associated - * with the inode and update those replies. - */ +afr_lookup_and_heal_gfid(xlator_t *this, inode_t *parent, const char *name, + inode_t *inode, struct afr_reply *replies, int source, + unsigned char *sources, void *gfid) +{ + afr_private_t *priv = NULL; + call_frame_t *frame = NULL; + afr_local_t *local = NULL; + unsigned char *wind_on = NULL; + ia_type_t ia_type = IA_INVAL; + dict_t *xdata = NULL; + loc_t loc = { + 0, + }; + int ret = 0; + int i = 0; + + priv = this->private; + wind_on = alloca0(priv->child_count); + ia_type = replies[source].poststat.ia_type; + if ((ia_type == IA_INVAL) && + (AFR_COUNT(sources, priv->child_count) == priv->child_count)) { + /* If a file is present on some bricks of the replica but parent + * dir does not have pending xattrs, all bricks are sources and + * the 'source' we selected earlier might be one where the file + * is not actually present. Hence check if file is present in + * any of the sources.*/ for (i = 0; i < priv->child_count; i++) { - if (!replies[i].valid || replies[i].op_ret != 0) - continue; - if (!gf_uuid_is_null (replies[i].poststat.ia_gfid) || - replies[i].poststat.ia_type != ia_type) - continue; - - wind_on[i] = 1; + if (i == source) + continue; + if (sources[i] && replies[i].valid && replies[i].op_ret == 0) { + ia_type = replies[i].poststat.ia_type; + break; + } } + } - if (AFR_COUNT(wind_on, priv->child_count) == 0) - return 0; + /* gfid heal on those subvolumes that do not have gfid associated + * with the inode and update those replies. + */ + for (i = 0; i < priv->child_count; i++) { + if (!replies[i].valid || replies[i].op_ret != 0) + continue; + if (!gf_uuid_is_null(replies[i].poststat.ia_gfid) || + replies[i].poststat.ia_type != ia_type) + continue; - xdata = dict_new (); - if (!xdata) { - ret = -ENOMEM; - goto out; - } - - ret = dict_set_gfuuid (xdata, "gfid-req", gfid, true); - if (ret) { - ret = -ENOMEM; - goto out; - } + wind_on[i] = 1; + } - frame = afr_frame_create (this, &ret); - if (!frame) { - ret = -ret; - goto out; - } + if (AFR_COUNT(wind_on, priv->child_count) == 0) + return 0; - local = frame->local; - loc.parent = inode_ref (parent); - gf_uuid_copy (loc.pargfid, parent->gfid); - loc.name = name; - loc.inode = inode_ref (inode); + xdata = dict_new(); + if (!xdata) { + ret = -ENOMEM; + goto out; + } - AFR_ONLIST (wind_on, frame, afr_selfheal_discover_cbk, lookup, - &loc, xdata); + ret = dict_set_gfuuid(xdata, "gfid-req", gfid, true); + if (ret) { + ret = -ENOMEM; + goto out; + } - for (i = 0; i < priv->child_count; i++) { - if (!wind_on[i]) - continue; - afr_reply_wipe (&replies[i]); - afr_reply_copy (&replies[i], &local->replies[i]); - } + frame = afr_frame_create(this, &ret); + if (!frame) { + ret = -ret; + goto out; + } + + local = frame->local; + loc.parent = inode_ref(parent); + gf_uuid_copy(loc.pargfid, parent->gfid); + loc.name = name; + loc.inode = inode_ref(inode); + + AFR_ONLIST(wind_on, frame, afr_selfheal_discover_cbk, lookup, &loc, xdata); + + for (i = 0; i < priv->child_count; i++) { + if (!wind_on[i]) + continue; + afr_reply_wipe(&replies[i]); + afr_reply_copy(&replies[i], &local->replies[i]); + } out: - loc_wipe (&loc); - if (frame) - AFR_STACK_DESTROY (frame); - if (xdata) - dict_unref (xdata); + loc_wipe(&loc); + if (frame) + AFR_STACK_DESTROY(frame); + if (xdata) + dict_unref(xdata); - return ret; + return ret; } int -afr_gfid_sbrain_source_from_src_brick (xlator_t *this, - struct afr_reply *replies, - char *src_brick) +afr_gfid_sbrain_source_from_src_brick(xlator_t *this, struct afr_reply *replies, + char *src_brick) { - int i = 0; - afr_private_t *priv = NULL; + int i = 0; + afr_private_t *priv = NULL; - priv = this->private; - for (i = 0; i < priv->child_count; i++) { - if (!replies[i].valid || replies[i].op_ret == -1) - continue; - if (strcmp (priv->children[i]->name, src_brick) == 0) - return i; - } - return -1; + priv = this->private; + for (i = 0; i < priv->child_count; i++) { + if (!replies[i].valid || replies[i].op_ret == -1) + continue; + if (strcmp(priv->children[i]->name, src_brick) == 0) + return i; + } + return -1; } int -afr_selfheal_gfid_mismatch_by_majority (struct afr_reply *replies, - int child_count) -{ - int j = 0; - int i = 0; - int src = -1; - int votes[child_count]; - - for (i = 0; i < child_count; i++) { - if (!replies[i].valid || replies[i].op_ret == -1) - continue; - - votes[i] = 1; - for (j = i+1; j < child_count; j++) { - if ((!gf_uuid_compare (replies[i].poststat.ia_gfid, - replies[j].poststat.ia_gfid))) - votes[i]++; - if (votes[i] > child_count / 2) { - src = i; - goto out; - } - } +afr_selfheal_gfid_mismatch_by_majority(struct afr_reply *replies, + int child_count) +{ + int j = 0; + int i = 0; + int src = -1; + int votes[child_count]; + + for (i = 0; i < child_count; i++) { + if (!replies[i].valid || replies[i].op_ret == -1) + continue; + + votes[i] = 1; + for (j = i + 1; j < child_count; j++) { + if ((!gf_uuid_compare(replies[i].poststat.ia_gfid, + replies[j].poststat.ia_gfid))) + votes[i]++; + if (votes[i] > child_count / 2) { + src = i; + goto out; + } } + } out: - return src; + return src; } -int afr_gfid_sbrain_source_from_bigger_file (struct afr_reply *replies, - int child_count) +int +afr_gfid_sbrain_source_from_bigger_file(struct afr_reply *replies, + int child_count) { - int i = 0; - int src = -1; - uint64_t size = 0; + int i = 0; + int src = -1; + uint64_t size = 0; - for (i = 0; i < child_count; i++) { - if (!replies[i].valid || replies[i].op_ret == -1) - continue; - if (size < replies[i].poststat.ia_size) { - src = i; - size = replies[i].poststat.ia_size; - } else if (replies[i].poststat.ia_size == size) { - src = -1; - } - } - return src; -} - -int afr_gfid_sbrain_source_from_latest_mtime (struct afr_reply *replies, - int child_count) -{ - int i = 0; - int src = -1; - uint32_t mtime = 0; - uint32_t mtime_nsec = 0; - - for (i = 0; i < child_count; i++) { - if (!replies[i].valid || replies[i].op_ret != 0) - continue; - if ((mtime < replies[i].poststat.ia_mtime) || - ((mtime == replies[i].poststat.ia_mtime) && - (mtime_nsec < replies[i].poststat.ia_mtime_nsec))) { - src = i; - mtime = replies[i].poststat.ia_mtime; - mtime_nsec = replies[i].poststat.ia_mtime_nsec; - } else if ((mtime == replies[i].poststat.ia_mtime) && - (mtime_nsec == replies[i].poststat.ia_mtime_nsec)) { - src = -1; - } + for (i = 0; i < child_count; i++) { + if (!replies[i].valid || replies[i].op_ret == -1) + continue; + if (size < replies[i].poststat.ia_size) { + src = i; + size = replies[i].poststat.ia_size; + } else if (replies[i].poststat.ia_size == size) { + src = -1; } - return src; + } + return src; } int -afr_gfid_split_brain_source (xlator_t *this, struct afr_reply *replies, - inode_t *inode, uuid_t pargfid, const char *bname, - int src_idx, int child_idx, - unsigned char *locked_on, int *src, dict_t *xdata) -{ - afr_private_t *priv = NULL; - char g1[64] = {0,}; - char g2[64] = {0,}; - int up_count = 0; - int heal_op = -1; - int ret = -1; - char *src_brick = NULL; - - *src = -1; - priv = this->private; - up_count = AFR_COUNT (locked_on, priv->child_count); - if (up_count != priv->child_count) { - gf_msg (this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN, - "All the bricks should be up to resolve the gfid split " - "barin"); - if (xdata) { - ret = dict_set_str (xdata, "gfid-heal-msg", "All the " - "bricks should be up to resolve the" - " gfid split barin"); - if (ret) - gf_msg (this->name, GF_LOG_ERROR, 0, - AFR_MSG_DICT_SET_FAILED, "Error setting" - " gfid-heal-msg dict"); - } - goto out; +afr_gfid_sbrain_source_from_latest_mtime(struct afr_reply *replies, + int child_count) +{ + int i = 0; + int src = -1; + uint32_t mtime = 0; + uint32_t mtime_nsec = 0; + + for (i = 0; i < child_count; i++) { + if (!replies[i].valid || replies[i].op_ret != 0) + continue; + if ((mtime < replies[i].poststat.ia_mtime) || + ((mtime == replies[i].poststat.ia_mtime) && + (mtime_nsec < replies[i].poststat.ia_mtime_nsec))) { + src = i; + mtime = replies[i].poststat.ia_mtime; + mtime_nsec = replies[i].poststat.ia_mtime_nsec; + } else if ((mtime == replies[i].poststat.ia_mtime) && + (mtime_nsec == replies[i].poststat.ia_mtime_nsec)) { + src = -1; } + } + return src; +} +int +afr_gfid_split_brain_source(xlator_t *this, struct afr_reply *replies, + inode_t *inode, uuid_t pargfid, const char *bname, + int src_idx, int child_idx, + unsigned char *locked_on, int *src, dict_t *xdata) +{ + afr_private_t *priv = NULL; + char g1[64] = { + 0, + }; + char g2[64] = { + 0, + }; + int up_count = 0; + int heal_op = -1; + int ret = -1; + char *src_brick = NULL; + + *src = -1; + priv = this->private; + up_count = AFR_COUNT(locked_on, priv->child_count); + if (up_count != priv->child_count) { + gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN, + "All the bricks should be up to resolve the gfid split " + "barin"); if (xdata) { - ret = dict_get_int32 (xdata, "heal-op", &heal_op); - if (ret) - goto fav_child; - } else { - goto fav_child; + ret = dict_set_str(xdata, "gfid-heal-msg", + "All the " + "bricks should be up to resolve the" + " gfid split barin"); + if (ret) + gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_DICT_SET_FAILED, + "Error setting" + " gfid-heal-msg dict"); } + goto out; + } - switch (heal_op) { + if (xdata) { + ret = dict_get_int32(xdata, "heal-op", &heal_op); + if (ret) + goto fav_child; + } else { + goto fav_child; + } + + switch (heal_op) { case GF_SHD_OP_SBRAIN_HEAL_FROM_BIGGER_FILE: - *src = afr_gfid_sbrain_source_from_bigger_file (replies, - priv->child_count); - if (*src == -1) { - gf_msg (this->name, GF_LOG_ERROR, 0, - AFR_MSG_SPLIT_BRAIN, "No bigger file"); - if (xdata) { - ret = dict_set_str (xdata, "gfid-heal-msg", - "No bigger file"); - if (ret) - gf_msg (this->name, GF_LOG_ERROR, 0, - AFR_MSG_DICT_SET_FAILED, "Error" - " setting gfid-heal-msg dict"); - } + *src = afr_gfid_sbrain_source_from_bigger_file(replies, + priv->child_count); + if (*src == -1) { + gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN, + "No bigger file"); + if (xdata) { + ret = dict_set_str(xdata, "gfid-heal-msg", + "No bigger file"); + if (ret) + gf_msg(this->name, GF_LOG_ERROR, 0, + AFR_MSG_DICT_SET_FAILED, + "Error" + " setting gfid-heal-msg dict"); } - break; + } + break; case GF_SHD_OP_SBRAIN_HEAL_FROM_LATEST_MTIME: - *src = afr_gfid_sbrain_source_from_latest_mtime (replies, - priv->child_count); - if (*src == -1) { - gf_msg (this->name, GF_LOG_ERROR, 0, - AFR_MSG_SPLIT_BRAIN, "No difference in mtime"); - if (xdata) { - ret = dict_set_str (xdata, "gfid-heal-msg", - "No difference in mtime"); - if (ret) - gf_msg (this->name, GF_LOG_ERROR, 0, - AFR_MSG_DICT_SET_FAILED, "Error" - "setting gfid-heal-msg dict"); - } + *src = afr_gfid_sbrain_source_from_latest_mtime(replies, + priv->child_count); + if (*src == -1) { + gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN, + "No difference in mtime"); + if (xdata) { + ret = dict_set_str(xdata, "gfid-heal-msg", + "No difference in mtime"); + if (ret) + gf_msg(this->name, GF_LOG_ERROR, 0, + AFR_MSG_DICT_SET_FAILED, + "Error" + "setting gfid-heal-msg dict"); } - break; + } + break; case GF_SHD_OP_SBRAIN_HEAL_FROM_BRICK: - ret = dict_get_str (xdata, "child-name", &src_brick); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - AFR_MSG_SPLIT_BRAIN, "Error getting the source " - "brick"); - break; - } - *src = afr_gfid_sbrain_source_from_src_brick (this, replies, - src_brick); - if (*src == -1) { - gf_msg (this->name, GF_LOG_ERROR, 0, - AFR_MSG_SPLIT_BRAIN, "Error getting the source " - "brick"); - if (xdata) { - ret = dict_set_str (xdata, "gfid-heal-msg", - "Error getting the source " - "brick"); - if (ret) - gf_msg (this->name, GF_LOG_ERROR, 0, - AFR_MSG_DICT_SET_FAILED, "Error" - " setting gfid-heal-msg dict"); - } - } + ret = dict_get_str(xdata, "child-name", &src_brick); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN, + "Error getting the source " + "brick"); break; + } + *src = afr_gfid_sbrain_source_from_src_brick(this, replies, + src_brick); + if (*src == -1) { + gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN, + "Error getting the source " + "brick"); + if (xdata) { + ret = dict_set_str(xdata, "gfid-heal-msg", + "Error getting the source " + "brick"); + if (ret) + gf_msg(this->name, GF_LOG_ERROR, 0, + AFR_MSG_DICT_SET_FAILED, + "Error" + " setting gfid-heal-msg dict"); + } + } + break; default: - break; - } - goto out; + break; + } + goto out; fav_child: - switch (priv->fav_child_policy) { + switch (priv->fav_child_policy) { case AFR_FAV_CHILD_BY_SIZE: - *src = afr_sh_fav_by_size (this, replies, inode); - break; + *src = afr_sh_fav_by_size(this, replies, inode); + break; case AFR_FAV_CHILD_BY_MTIME: - *src = afr_sh_fav_by_mtime (this, replies, inode); - break; + *src = afr_sh_fav_by_mtime(this, replies, inode); + break; case AFR_FAV_CHILD_BY_CTIME: - *src = afr_sh_fav_by_ctime(this, replies, inode); - break; + *src = afr_sh_fav_by_ctime(this, replies, inode); + break; case AFR_FAV_CHILD_BY_MAJORITY: - if (priv->child_count != 2) - *src = afr_selfheal_gfid_mismatch_by_majority (replies, - priv->child_count); - else - *src = -1; - - if (*src == -1) { - gf_msg (this->name, GF_LOG_ERROR, 0, - AFR_MSG_SPLIT_BRAIN, "No majority to resolve " - "gfid split brain"); - } - break; + if (priv->child_count != 2) + *src = afr_selfheal_gfid_mismatch_by_majority( + replies, priv->child_count); + else + *src = -1; + + if (*src == -1) { + gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN, + "No majority to resolve " + "gfid split brain"); + } + break; default: - break; - } + break; + } out: - if (*src == -1) { - gf_msg (this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN, - "Gfid mismatch detected for /%s>, %s on %s and" - " %s on %s.", uuid_utoa (pargfid), bname, - uuid_utoa_r (replies[child_idx].poststat.ia_gfid, g1), - priv->children[child_idx]->name, - uuid_utoa_r (replies[src_idx].poststat.ia_gfid, g2), - priv->children[src_idx]->name); - gf_event (EVENT_AFR_SPLIT_BRAIN, "subvol=%s;type=gfid;file=" - "/%s>;count=2;child-%d=%s;gfid-%d=%s;" - "child-%d=%s;gfid-%d=%s", this->name, - uuid_utoa (pargfid), bname, child_idx, - priv->children[child_idx]->name, child_idx, - uuid_utoa_r (replies[child_idx].poststat.ia_gfid, g1), - src_idx, priv->children[src_idx]->name, src_idx, - uuid_utoa_r (replies[src_idx].poststat.ia_gfid, g2)); - return -1; - } - return 0; + if (*src == -1) { + gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN, + "Gfid mismatch detected for /%s>, %s on %s and" + " %s on %s.", + uuid_utoa(pargfid), bname, + uuid_utoa_r(replies[child_idx].poststat.ia_gfid, g1), + priv->children[child_idx]->name, + uuid_utoa_r(replies[src_idx].poststat.ia_gfid, g2), + priv->children[src_idx]->name); + gf_event(EVENT_AFR_SPLIT_BRAIN, + "subvol=%s;type=gfid;file=" + "/%s>;count=2;child-%d=%s;gfid-%d=%s;" + "child-%d=%s;gfid-%d=%s", + this->name, uuid_utoa(pargfid), bname, child_idx, + priv->children[child_idx]->name, child_idx, + uuid_utoa_r(replies[child_idx].poststat.ia_gfid, g1), src_idx, + priv->children[src_idx]->name, src_idx, + uuid_utoa_r(replies[src_idx].poststat.ia_gfid, g2)); + return -1; + } + return 0; } - int -afr_selfheal_post_op_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, dict_t *xattr, dict_t *xdata) +afr_selfheal_post_op_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, dict_t *xattr, dict_t *xdata) { - afr_local_t *local = NULL; + afr_local_t *local = NULL; - local = frame->local; + local = frame->local; - local->op_ret = op_ret; - local->op_errno = op_errno; - syncbarrier_wake (&local->barrier); + local->op_ret = op_ret; + local->op_errno = op_errno; + syncbarrier_wake(&local->barrier); - return 0; + return 0; } - int -afr_selfheal_post_op (call_frame_t *frame, xlator_t *this, inode_t *inode, - int subvol, dict_t *xattr, dict_t *xdata) +afr_selfheal_post_op(call_frame_t *frame, xlator_t *this, inode_t *inode, + int subvol, dict_t *xattr, dict_t *xdata) { - afr_private_t *priv = NULL; - afr_local_t *local = NULL; - loc_t loc = {0, }; - int ret = 0; + afr_private_t *priv = NULL; + afr_local_t *local = NULL; + loc_t loc = { + 0, + }; + int ret = 0; - priv = this->private; - local = frame->local; + priv = this->private; + local = frame->local; - loc.inode = inode_ref (inode); - gf_uuid_copy (loc.gfid, inode->gfid); + loc.inode = inode_ref(inode); + gf_uuid_copy(loc.gfid, inode->gfid); - local->op_ret = 0; + local->op_ret = 0; - STACK_WIND (frame, afr_selfheal_post_op_cbk, priv->children[subvol], - priv->children[subvol]->fops->xattrop, &loc, - GF_XATTROP_ADD_ARRAY, xattr, xdata); + STACK_WIND(frame, afr_selfheal_post_op_cbk, priv->children[subvol], + priv->children[subvol]->fops->xattrop, &loc, + GF_XATTROP_ADD_ARRAY, xattr, xdata); - syncbarrier_wait (&local->barrier, 1); - if (local->op_ret < 0) - ret = -local->op_errno; + syncbarrier_wait(&local->barrier, 1); + if (local->op_ret < 0) + ret = -local->op_errno; - loc_wipe (&loc); - local->op_ret = 0; + loc_wipe(&loc); + local->op_ret = 0; - return ret; + return ret; } int -afr_check_stale_error (struct afr_reply *replies, afr_private_t *priv) +afr_check_stale_error(struct afr_reply *replies, afr_private_t *priv) { - int i = 0; - int op_errno = 0; - int tmp_errno = 0; - int stale_count = 0; + int i = 0; + int op_errno = 0; + int tmp_errno = 0; + int stale_count = 0; - for (i = 0; i < priv->child_count; i++) { - tmp_errno = replies[i].op_errno; - if (tmp_errno == ENOENT || tmp_errno == ESTALE) { - op_errno = afr_higher_errno (op_errno, tmp_errno); - stale_count++; - } + for (i = 0; i < priv->child_count; i++) { + tmp_errno = replies[i].op_errno; + if (tmp_errno == ENOENT || tmp_errno == ESTALE) { + op_errno = afr_higher_errno(op_errno, tmp_errno); + stale_count++; } - if (stale_count != priv->child_count) - return -ENOTCONN; - else - return -op_errno; + } + if (stale_count != priv->child_count) + return -ENOTCONN; + else + return -op_errno; } int -afr_sh_generic_fop_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, struct iatt *pre, struct iatt *post, - dict_t *xdata) +afr_sh_generic_fop_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, struct iatt *pre, + struct iatt *post, dict_t *xdata) { - int i = (long) cookie; - afr_local_t *local = NULL; + int i = (long)cookie; + afr_local_t *local = NULL; - local = frame->local; + local = frame->local; - local->replies[i].valid = 1; - local->replies[i].op_ret = op_ret; - local->replies[i].op_errno = op_errno; - if (pre) - local->replies[i].prestat = *pre; - if (post) - local->replies[i].poststat = *post; - if (xdata) - local->replies[i].xdata = dict_ref (xdata); + local->replies[i].valid = 1; + local->replies[i].op_ret = op_ret; + local->replies[i].op_errno = op_errno; + if (pre) + local->replies[i].prestat = *pre; + if (post) + local->replies[i].poststat = *post; + if (xdata) + local->replies[i].xdata = dict_ref(xdata); - syncbarrier_wake (&local->barrier); + syncbarrier_wake(&local->barrier); - return 0; + return 0; } int -afr_selfheal_restore_time (call_frame_t *frame, xlator_t *this, inode_t *inode, - int source, unsigned char *healed_sinks, - struct afr_reply *replies) +afr_selfheal_restore_time(call_frame_t *frame, xlator_t *this, inode_t *inode, + int source, unsigned char *healed_sinks, + struct afr_reply *replies) { - loc_t loc = {0, }; + loc_t loc = { + 0, + }; - loc.inode = inode_ref (inode); - gf_uuid_copy (loc.gfid, inode->gfid); + loc.inode = inode_ref(inode); + gf_uuid_copy(loc.gfid, inode->gfid); - AFR_ONLIST (healed_sinks, frame, afr_sh_generic_fop_cbk, setattr, &loc, - &replies[source].poststat, - (GF_SET_ATTR_ATIME|GF_SET_ATTR_MTIME), NULL); + AFR_ONLIST(healed_sinks, frame, afr_sh_generic_fop_cbk, setattr, &loc, + &replies[source].poststat, + (GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME), NULL); - loc_wipe (&loc); + loc_wipe(&loc); - return 0; + return 0; } dict_t * -afr_selfheal_output_xattr (xlator_t *this, gf_boolean_t is_full_crawl, - afr_transaction_type type, int *output_dirty, - int **output_matrix, int subvol, - int **full_heal_mtx_out) -{ - int j = 0; - int idx = 0; - int d_idx = 0; - int ret = 0; - int *raw = 0; - dict_t *xattr = NULL; - afr_private_t *priv = NULL; - - priv = this->private; - idx = afr_index_for_transaction_type (type); - d_idx = afr_index_for_transaction_type (AFR_DATA_TRANSACTION); - - xattr = dict_new (); - if (!xattr) - return NULL; - - /* clear dirty */ - raw = GF_CALLOC (sizeof(int), AFR_NUM_CHANGE_LOGS, gf_afr_mt_int32_t); - if (!raw) - goto err; - - raw[idx] = hton32 (output_dirty[subvol]); - ret = dict_set_bin (xattr, AFR_DIRTY, raw, - sizeof(int) * AFR_NUM_CHANGE_LOGS); - if (ret) { - GF_FREE (raw); - goto err; - } +afr_selfheal_output_xattr(xlator_t *this, gf_boolean_t is_full_crawl, + afr_transaction_type type, int *output_dirty, + int **output_matrix, int subvol, + int **full_heal_mtx_out) +{ + int j = 0; + int idx = 0; + int d_idx = 0; + int ret = 0; + int *raw = 0; + dict_t *xattr = NULL; + afr_private_t *priv = NULL; + + priv = this->private; + idx = afr_index_for_transaction_type(type); + d_idx = afr_index_for_transaction_type(AFR_DATA_TRANSACTION); + + xattr = dict_new(); + if (!xattr) + return NULL; - /* clear/set pending */ - for (j = 0; j < priv->child_count; j++) { - raw = GF_CALLOC (sizeof(int), AFR_NUM_CHANGE_LOGS, - gf_afr_mt_int32_t); - if (!raw) - goto err; - - raw[idx] = hton32 (output_matrix[subvol][j]); - if (is_full_crawl) - raw[d_idx] = hton32 (full_heal_mtx_out[subvol][j]); - - ret = dict_set_bin (xattr, priv->pending_key[j], - raw, sizeof(int) * AFR_NUM_CHANGE_LOGS); - if (ret) { - GF_FREE (raw); - goto err; - } - } + /* clear dirty */ + raw = GF_CALLOC(sizeof(int), AFR_NUM_CHANGE_LOGS, gf_afr_mt_int32_t); + if (!raw) + goto err; + + raw[idx] = hton32(output_dirty[subvol]); + ret = dict_set_bin(xattr, AFR_DIRTY, raw, + sizeof(int) * AFR_NUM_CHANGE_LOGS); + if (ret) { + GF_FREE(raw); + goto err; + } + + /* clear/set pending */ + for (j = 0; j < priv->child_count; j++) { + raw = GF_CALLOC(sizeof(int), AFR_NUM_CHANGE_LOGS, gf_afr_mt_int32_t); + if (!raw) + goto err; + + raw[idx] = hton32(output_matrix[subvol][j]); + if (is_full_crawl) + raw[d_idx] = hton32(full_heal_mtx_out[subvol][j]); + + ret = dict_set_bin(xattr, priv->pending_key[j], raw, + sizeof(int) * AFR_NUM_CHANGE_LOGS); + if (ret) { + GF_FREE(raw); + goto err; + } + } - return xattr; + return xattr; err: - if (xattr) - dict_unref (xattr); - return NULL; + if (xattr) + dict_unref(xattr); + return NULL; } - int -afr_selfheal_undo_pending (call_frame_t *frame, xlator_t *this, inode_t *inode, - unsigned char *sources, unsigned char *sinks, - unsigned char *healed_sinks, - unsigned char *undid_pending, - afr_transaction_type type, struct afr_reply *replies, - unsigned char *locked_on) -{ - afr_private_t *priv = NULL; - afr_local_t *local = NULL; - int i = 0; - int j = 0; - unsigned char *pending = NULL; - int *input_dirty = NULL; - int **input_matrix = NULL; - int **full_heal_mtx_in = NULL; - int **full_heal_mtx_out = NULL; - int *output_dirty = NULL; - int **output_matrix = NULL; - dict_t *xattr = NULL; - dict_t *xdata = NULL; - - priv = this->private; - local = frame->local; - - pending = alloca0 (priv->child_count); - - input_dirty = alloca0 (priv->child_count * sizeof (int)); - input_matrix = ALLOC_MATRIX (priv->child_count, int); - full_heal_mtx_in = ALLOC_MATRIX (priv->child_count, int); - full_heal_mtx_out = ALLOC_MATRIX (priv->child_count, int); - output_dirty = alloca0 (priv->child_count * sizeof (int)); - output_matrix = ALLOC_MATRIX (priv->child_count, int); - - xdata = dict_new (); - if (!xdata) - return -1; +afr_selfheal_undo_pending(call_frame_t *frame, xlator_t *this, inode_t *inode, + unsigned char *sources, unsigned char *sinks, + unsigned char *healed_sinks, + unsigned char *undid_pending, + afr_transaction_type type, struct afr_reply *replies, + unsigned char *locked_on) +{ + afr_private_t *priv = NULL; + afr_local_t *local = NULL; + int i = 0; + int j = 0; + unsigned char *pending = NULL; + int *input_dirty = NULL; + int **input_matrix = NULL; + int **full_heal_mtx_in = NULL; + int **full_heal_mtx_out = NULL; + int *output_dirty = NULL; + int **output_matrix = NULL; + dict_t *xattr = NULL; + dict_t *xdata = NULL; + + priv = this->private; + local = frame->local; + + pending = alloca0(priv->child_count); + + input_dirty = alloca0(priv->child_count * sizeof(int)); + input_matrix = ALLOC_MATRIX(priv->child_count, int); + full_heal_mtx_in = ALLOC_MATRIX(priv->child_count, int); + full_heal_mtx_out = ALLOC_MATRIX(priv->child_count, int); + output_dirty = alloca0(priv->child_count * sizeof(int)); + output_matrix = ALLOC_MATRIX(priv->child_count, int); + + xdata = dict_new(); + if (!xdata) + return -1; - afr_selfheal_extract_xattr (this, replies, type, input_dirty, - input_matrix); - - if (local->need_full_crawl) - afr_selfheal_extract_xattr (this, replies, AFR_DATA_TRANSACTION, - NULL, full_heal_mtx_in); - - for (i = 0; i < priv->child_count; i++) - if (sinks[i] && !healed_sinks[i]) - pending[i] = 1; - - for (i = 0; i < priv->child_count; i++) { - for (j = 0; j < priv->child_count; j++) { - if (pending[j]) { - output_matrix[i][j] = 1; - if (type == AFR_ENTRY_TRANSACTION) - full_heal_mtx_out[i][j] = 1; - } else if (locked_on[j]) { - output_matrix[i][j] = -input_matrix[i][j]; - if (type == AFR_ENTRY_TRANSACTION) - full_heal_mtx_out[i][j] = -full_heal_mtx_in[i][j]; - } - } - } - - for (i = 0; i < priv->child_count; i++) { - if (!pending[i]) - output_dirty[i] = -input_dirty[i]; - } - - for (i = 0; i < priv->child_count; i++) { - if (!locked_on[i]) - /* perform post-op only on subvols we had locked - and inspected on. - */ - continue; - if (undid_pending[i]) - /* We already unset the pending xattrs in - * _afr_fav_child_reset_sink_xattrs(). */ - continue; - - xattr = afr_selfheal_output_xattr (this, local->need_full_crawl, - type, output_dirty, - output_matrix, i, - full_heal_mtx_out); - if (!xattr) { - continue; - } - - if ((type == AFR_ENTRY_TRANSACTION) && (priv->esh_granular)) { - if (xdata && - dict_set_int8 (xdata, GF_XATTROP_PURGE_INDEX, 1)) - gf_msg (this->name, GF_LOG_WARNING, 0, - AFR_MSG_DICT_SET_FAILED, "Failed to set" - " dict value for %s", - GF_XATTROP_PURGE_INDEX); - } + afr_selfheal_extract_xattr(this, replies, type, input_dirty, input_matrix); + + if (local->need_full_crawl) + afr_selfheal_extract_xattr(this, replies, AFR_DATA_TRANSACTION, NULL, + full_heal_mtx_in); - afr_selfheal_post_op (frame, this, inode, i, xattr, xdata); - dict_unref (xattr); - } + for (i = 0; i < priv->child_count; i++) + if (sinks[i] && !healed_sinks[i]) + pending[i] = 1; + + for (i = 0; i < priv->child_count; i++) { + for (j = 0; j < priv->child_count; j++) { + if (pending[j]) { + output_matrix[i][j] = 1; + if (type == AFR_ENTRY_TRANSACTION) + full_heal_mtx_out[i][j] = 1; + } else if (locked_on[j]) { + output_matrix[i][j] = -input_matrix[i][j]; + if (type == AFR_ENTRY_TRANSACTION) + full_heal_mtx_out[i][j] = -full_heal_mtx_in[i][j]; + } + } + } - if (xdata) - dict_unref (xdata); + for (i = 0; i < priv->child_count; i++) { + if (!pending[i]) + output_dirty[i] = -input_dirty[i]; + } - return 0; + for (i = 0; i < priv->child_count; i++) { + if (!locked_on[i]) + /* perform post-op only on subvols we had locked + and inspected on. + */ + continue; + if (undid_pending[i]) + /* We already unset the pending xattrs in + * _afr_fav_child_reset_sink_xattrs(). */ + continue; + + xattr = afr_selfheal_output_xattr(this, local->need_full_crawl, type, + output_dirty, output_matrix, i, + full_heal_mtx_out); + if (!xattr) { + continue; + } + + if ((type == AFR_ENTRY_TRANSACTION) && (priv->esh_granular)) { + if (xdata && dict_set_int8(xdata, GF_XATTROP_PURGE_INDEX, 1)) + gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_DICT_SET_FAILED, + "Failed to set" + " dict value for %s", + GF_XATTROP_PURGE_INDEX); + } + + afr_selfheal_post_op(frame, this, inode, i, xattr, xdata); + dict_unref(xattr); + } + + if (xdata) + dict_unref(xdata); + + return 0; } void -afr_reply_copy (struct afr_reply *dst, struct afr_reply *src) -{ - dict_t *xdata = NULL; - - dst->valid = src->valid; - dst->op_ret = src->op_ret; - dst->op_errno = src->op_errno; - dst->prestat = src->prestat; - dst->poststat = src->poststat; - dst->preparent = src->preparent; - dst->postparent = src->postparent; - dst->preparent2 = src->preparent2; - dst->postparent2 = src->postparent2; - if (src->xdata) - xdata = dict_ref (src->xdata); - else - xdata = NULL; - if (dst->xdata) - dict_unref (dst->xdata); - dst->xdata = xdata; - if (xdata && dict_get_str_boolean (xdata, "fips-mode-rchecksum", - _gf_false) == _gf_true) { - memcpy (dst->checksum, src->checksum, SHA256_DIGEST_LENGTH); - } else { - memcpy (dst->checksum, src->checksum, MD5_DIGEST_LENGTH); - } - dst->fips_mode_rchecksum = src->fips_mode_rchecksum; +afr_reply_copy(struct afr_reply *dst, struct afr_reply *src) +{ + dict_t *xdata = NULL; + + dst->valid = src->valid; + dst->op_ret = src->op_ret; + dst->op_errno = src->op_errno; + dst->prestat = src->prestat; + dst->poststat = src->poststat; + dst->preparent = src->preparent; + dst->postparent = src->postparent; + dst->preparent2 = src->preparent2; + dst->postparent2 = src->postparent2; + if (src->xdata) + xdata = dict_ref(src->xdata); + else + xdata = NULL; + if (dst->xdata) + dict_unref(dst->xdata); + dst->xdata = xdata; + if (xdata && dict_get_str_boolean(xdata, "fips-mode-rchecksum", + _gf_false) == _gf_true) { + memcpy(dst->checksum, src->checksum, SHA256_DIGEST_LENGTH); + } else { + memcpy(dst->checksum, src->checksum, MD5_DIGEST_LENGTH); + } + dst->fips_mode_rchecksum = src->fips_mode_rchecksum; } void -afr_replies_copy (struct afr_reply *dst, struct afr_reply *src, int count) +afr_replies_copy(struct afr_reply *dst, struct afr_reply *src, int count) { - int i = 0; + int i = 0; - if (dst == src) - return; + if (dst == src) + return; - for (i = 0; i < count; i++) { - afr_reply_copy (&dst[i], &src[i]); - } + for (i = 0; i < count; i++) { + afr_reply_copy(&dst[i], &src[i]); + } } int -afr_selfheal_fill_dirty (xlator_t *this, int *dirty, int subvol, - int idx, dict_t *xdata) +afr_selfheal_fill_dirty(xlator_t *this, int *dirty, int subvol, int idx, + dict_t *xdata) { - void *pending_raw = NULL; - int pending[3] = {0, }; + void *pending_raw = NULL; + int pending[3] = { + 0, + }; - if (!dirty) - return 0; + if (!dirty) + return 0; - if (dict_get_ptr (xdata, AFR_DIRTY, &pending_raw)) - return -1; + if (dict_get_ptr(xdata, AFR_DIRTY, &pending_raw)) + return -1; - if (!pending_raw) - return -1; + if (!pending_raw) + return -1; - memcpy (pending, pending_raw, sizeof(pending)); + memcpy(pending, pending_raw, sizeof(pending)); - dirty[subvol] = ntoh32 (pending[idx]); + dirty[subvol] = ntoh32(pending[idx]); - return 0; + return 0; } - int -afr_selfheal_fill_matrix (xlator_t *this, int **matrix, int subvol, - int idx, dict_t *xdata) +afr_selfheal_fill_matrix(xlator_t *this, int **matrix, int subvol, int idx, + dict_t *xdata) { - int i = 0; - void *pending_raw = NULL; - int pending[3] = {0, }; - afr_private_t *priv = NULL; + int i = 0; + void *pending_raw = NULL; + int pending[3] = { + 0, + }; + afr_private_t *priv = NULL; - priv = this->private; + priv = this->private; - if (!matrix) - return 0; + if (!matrix) + return 0; - for (i = 0; i < priv->child_count; i++) { - if (dict_get_ptr (xdata, priv->pending_key[i], &pending_raw)) - continue; + for (i = 0; i < priv->child_count; i++) { + if (dict_get_ptr(xdata, priv->pending_key[i], &pending_raw)) + continue; - if (!pending_raw) - continue; + if (!pending_raw) + continue; - memcpy (pending, pending_raw, sizeof(pending)); + memcpy(pending, pending_raw, sizeof(pending)); - matrix[subvol][i] = ntoh32 (pending[idx]); - } + matrix[subvol][i] = ntoh32(pending[idx]); + } - return 0; + return 0; } - int -afr_selfheal_extract_xattr (xlator_t *this, struct afr_reply *replies, - afr_transaction_type type, int *dirty, int **matrix) +afr_selfheal_extract_xattr(xlator_t *this, struct afr_reply *replies, + afr_transaction_type type, int *dirty, int **matrix) { - afr_private_t *priv = NULL; - int i = 0; - dict_t *xdata = NULL; - int idx = -1; + afr_private_t *priv = NULL; + int i = 0; + dict_t *xdata = NULL; + int idx = -1; - idx = afr_index_for_transaction_type (type); + idx = afr_index_for_transaction_type(type); - priv = this->private; + priv = this->private; - for (i = 0; i < priv->child_count; i++) { - if (!replies[i].valid || replies[i].op_ret != 0) - continue; + for (i = 0; i < priv->child_count; i++) { + if (!replies[i].valid || replies[i].op_ret != 0) + continue; - if (!replies[i].xdata) - continue; + if (!replies[i].xdata) + continue; - xdata = replies[i].xdata; + xdata = replies[i].xdata; - afr_selfheal_fill_dirty (this, dirty, i, idx, xdata); - afr_selfheal_fill_matrix (this, matrix, i, idx, xdata); - } + afr_selfheal_fill_dirty(this, dirty, i, idx, xdata); + afr_selfheal_fill_matrix(this, matrix, i, idx, xdata); + } - return 0; + return 0; } /* @@ -777,573 +786,547 @@ afr_selfheal_extract_xattr (xlator_t *this, struct afr_reply *replies, * This can happen if data was directly modified in the backend or for snapshots */ void -afr_mark_largest_file_as_source (xlator_t *this, unsigned char *sources, - struct afr_reply *replies) +afr_mark_largest_file_as_source(xlator_t *this, unsigned char *sources, + struct afr_reply *replies) { - int i = 0; - afr_private_t *priv = NULL; - uint64_t size = 0; + int i = 0; + afr_private_t *priv = NULL; + uint64_t size = 0; - /* Find source with biggest file size */ - priv = this->private; - for (i = 0; i < priv->child_count; i++) { - if (!sources[i]) - continue; - if (!replies[i].valid || replies[i].op_ret != 0) { - sources[i] = 0; - continue; - } - if (size <= replies[i].poststat.ia_size) { - size = replies[i].poststat.ia_size; - } + /* Find source with biggest file size */ + priv = this->private; + for (i = 0; i < priv->child_count; i++) { + if (!sources[i]) + continue; + if (!replies[i].valid || replies[i].op_ret != 0) { + sources[i] = 0; + continue; } - - /* Mark sources with less size as not source */ - for (i = 0; i < priv->child_count; i++) { - if (!sources[i]) - continue; - if (size > replies[i].poststat.ia_size) - sources[i] = 0; + if (size <= replies[i].poststat.ia_size) { + size = replies[i].poststat.ia_size; } + } + + /* Mark sources with less size as not source */ + for (i = 0; i < priv->child_count; i++) { + if (!sources[i]) + continue; + if (size > replies[i].poststat.ia_size) + sources[i] = 0; + } } void -afr_mark_latest_mtime_file_as_source (xlator_t *this, unsigned char *sources, - struct afr_reply *replies) -{ - int i = 0; - afr_private_t *priv = NULL; - uint32_t mtime = 0; - uint32_t mtime_nsec = 0; - - priv = this->private; - for (i = 0; i < priv->child_count; i++) { - if (!sources[i]) - continue; - if (!replies[i].valid || replies[i].op_ret != 0) { - sources[i] = 0; - continue; - } - if ((mtime < replies[i].poststat.ia_mtime) || - ((mtime == replies[i].poststat.ia_mtime) && - (mtime_nsec < replies[i].poststat.ia_mtime_nsec))) { - mtime = replies[i].poststat.ia_mtime; - mtime_nsec = replies[i].poststat.ia_mtime_nsec; - } - } - for (i = 0; i < priv->child_count; i++) { - if (!sources[i]) - continue; - if ((mtime > replies[i].poststat.ia_mtime) || - ((mtime == replies[i].poststat.ia_mtime) && - (mtime_nsec > replies[i].poststat.ia_mtime_nsec))) { - sources[i] = 0; - } - } +afr_mark_latest_mtime_file_as_source(xlator_t *this, unsigned char *sources, + struct afr_reply *replies) +{ + int i = 0; + afr_private_t *priv = NULL; + uint32_t mtime = 0; + uint32_t mtime_nsec = 0; + + priv = this->private; + for (i = 0; i < priv->child_count; i++) { + if (!sources[i]) + continue; + if (!replies[i].valid || replies[i].op_ret != 0) { + sources[i] = 0; + continue; + } + if ((mtime < replies[i].poststat.ia_mtime) || + ((mtime == replies[i].poststat.ia_mtime) && + (mtime_nsec < replies[i].poststat.ia_mtime_nsec))) { + mtime = replies[i].poststat.ia_mtime; + mtime_nsec = replies[i].poststat.ia_mtime_nsec; + } + } + for (i = 0; i < priv->child_count; i++) { + if (!sources[i]) + continue; + if ((mtime > replies[i].poststat.ia_mtime) || + ((mtime == replies[i].poststat.ia_mtime) && + (mtime_nsec > replies[i].poststat.ia_mtime_nsec))) { + sources[i] = 0; + } + } } void -afr_mark_active_sinks (xlator_t *this, unsigned char *sources, - unsigned char *locked_on, unsigned char *sinks) +afr_mark_active_sinks(xlator_t *this, unsigned char *sources, + unsigned char *locked_on, unsigned char *sinks) { - int i = 0; - afr_private_t *priv = NULL; + int i = 0; + afr_private_t *priv = NULL; - priv = this->private; + priv = this->private; - for (i = 0; i < priv->child_count; i++) { - if (!sources[i] && locked_on[i]) - sinks[i] = 1; - else - sinks[i] = 0; - } + for (i = 0; i < priv->child_count; i++) { + if (!sources[i] && locked_on[i]) + sinks[i] = 1; + else + sinks[i] = 0; + } } gf_boolean_t -afr_dict_contains_heal_op (call_frame_t *frame) +afr_dict_contains_heal_op(call_frame_t *frame) { - afr_local_t *local = NULL; - dict_t *xdata_req = NULL; - int ret = 0; - int heal_op = -1; + afr_local_t *local = NULL; + dict_t *xdata_req = NULL; + int ret = 0; + int heal_op = -1; - local = frame->local; - xdata_req = local->xdata_req; - ret = dict_get_int32 (xdata_req, "heal-op", &heal_op); - if (ret) - return _gf_false; - if (local->xdata_rsp == NULL) { - local->xdata_rsp = dict_new(); - if (!local->xdata_rsp) - return _gf_true; - } - ret = dict_set_str (local->xdata_rsp, "sh-fail-msg", - "File not in split-brain"); + local = frame->local; + xdata_req = local->xdata_req; + ret = dict_get_int32(xdata_req, "heal-op", &heal_op); + if (ret) + return _gf_false; + if (local->xdata_rsp == NULL) { + local->xdata_rsp = dict_new(); + if (!local->xdata_rsp) + return _gf_true; + } + ret = dict_set_str(local->xdata_rsp, "sh-fail-msg", + "File not in split-brain"); - return _gf_true; + return _gf_true; } gf_boolean_t -afr_can_decide_split_brain_source_sinks (struct afr_reply *replies, - int child_count) +afr_can_decide_split_brain_source_sinks(struct afr_reply *replies, + int child_count) { - int i = 0; + int i = 0; - for (i = 0; i < child_count; i++) - if (replies[i].valid != 1 || replies[i].op_ret != 0) - return _gf_false; + for (i = 0; i < child_count; i++) + if (replies[i].valid != 1 || replies[i].op_ret != 0) + return _gf_false; - return _gf_true; + return _gf_true; } int -afr_mark_split_brain_source_sinks_by_heal_op (call_frame_t *frame, - xlator_t *this, unsigned char *sources, - unsigned char *sinks, - unsigned char *healed_sinks, - unsigned char *locked_on, - struct afr_reply *replies, - afr_transaction_type type, int heal_op) -{ - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - dict_t *xdata_req = NULL; - dict_t *xdata_rsp = NULL; - int ret = 0; - int i = 0; - char *name = NULL; - int source = -1; - - local = frame->local; - priv = this->private; - xdata_req = local->xdata_req; - - for (i = 0; i < priv->child_count; i++) { - if (locked_on[i]) - if (sources[i] || !sinks[i] || !healed_sinks[i]) { - ret = -1; - goto out; - } - } - if (local->xdata_rsp == NULL) { - local->xdata_rsp = dict_new(); - if (!local->xdata_rsp) { - ret = -1; - goto out; - } - } - xdata_rsp = local->xdata_rsp; - - if (!afr_can_decide_split_brain_source_sinks (replies, - priv->child_count)) { - ret = dict_set_str (xdata_rsp, "sh-fail-msg", - SBRAIN_HEAL_NO_GO_MSG); +afr_mark_split_brain_source_sinks_by_heal_op( + call_frame_t *frame, xlator_t *this, unsigned char *sources, + unsigned char *sinks, unsigned char *healed_sinks, unsigned char *locked_on, + struct afr_reply *replies, afr_transaction_type type, int heal_op) +{ + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + dict_t *xdata_req = NULL; + dict_t *xdata_rsp = NULL; + int ret = 0; + int i = 0; + char *name = NULL; + int source = -1; + + local = frame->local; + priv = this->private; + xdata_req = local->xdata_req; + + for (i = 0; i < priv->child_count; i++) { + if (locked_on[i]) + if (sources[i] || !sinks[i] || !healed_sinks[i]) { ret = -1; goto out; - } + } + } + if (local->xdata_rsp == NULL) { + local->xdata_rsp = dict_new(); + if (!local->xdata_rsp) { + ret = -1; + goto out; + } + } + xdata_rsp = local->xdata_rsp; + + if (!afr_can_decide_split_brain_source_sinks(replies, priv->child_count)) { + ret = dict_set_str(xdata_rsp, "sh-fail-msg", SBRAIN_HEAL_NO_GO_MSG); + ret = -1; + goto out; + } - for (i = 0 ; i < priv->child_count; i++) - if (locked_on[i]) - sources[i] = 1; - switch (heal_op) { + for (i = 0; i < priv->child_count; i++) + if (locked_on[i]) + sources[i] = 1; + switch (heal_op) { case GF_SHD_OP_SBRAIN_HEAL_FROM_BIGGER_FILE: - if (type == AFR_METADATA_TRANSACTION) { - ret = dict_set_str (xdata_rsp, "sh-fail-msg", - "Use source-brick option to" - " heal metadata split-brain"); - if (!ret) - ret = -1; - goto out; - } - afr_mark_largest_file_as_source (this, sources, replies); - if (AFR_COUNT (sources, priv->child_count) != 1) { - ret = dict_set_str (xdata_rsp, "sh-fail-msg", - "No bigger file"); - if (!ret) - ret = -1; - goto out; - } - break; + if (type == AFR_METADATA_TRANSACTION) { + ret = dict_set_str(xdata_rsp, "sh-fail-msg", + "Use source-brick option to" + " heal metadata split-brain"); + if (!ret) + ret = -1; + goto out; + } + afr_mark_largest_file_as_source(this, sources, replies); + if (AFR_COUNT(sources, priv->child_count) != 1) { + ret = dict_set_str(xdata_rsp, "sh-fail-msg", "No bigger file"); + if (!ret) + ret = -1; + goto out; + } + break; case GF_SHD_OP_SBRAIN_HEAL_FROM_LATEST_MTIME: - if (type == AFR_METADATA_TRANSACTION) { - ret = dict_set_str (xdata_rsp, "sh-fail-msg", - "Use source-brick option to" - " heal metadata split-brain"); - if (!ret) - ret = -1; - goto out; - } - afr_mark_latest_mtime_file_as_source (this, sources, replies); - if (AFR_COUNT (sources, priv->child_count) != 1) { - ret = dict_set_str (xdata_rsp, "sh-fail-msg", - "No difference in mtime"); - if (!ret) - ret = -1; - goto out; - } - break; + if (type == AFR_METADATA_TRANSACTION) { + ret = dict_set_str(xdata_rsp, "sh-fail-msg", + "Use source-brick option to" + " heal metadata split-brain"); + if (!ret) + ret = -1; + goto out; + } + afr_mark_latest_mtime_file_as_source(this, sources, replies); + if (AFR_COUNT(sources, priv->child_count) != 1) { + ret = dict_set_str(xdata_rsp, "sh-fail-msg", + "No difference in mtime"); + if (!ret) + ret = -1; + goto out; + } + break; case GF_SHD_OP_SBRAIN_HEAL_FROM_BRICK: - ret = dict_get_str (xdata_req, "child-name", &name); - if (ret) - goto out; - source = afr_get_child_index_from_name (this, name); - if (source < 0) { - ret = dict_set_str (xdata_rsp, "sh-fail-msg", - "Invalid brick name"); - if (!ret) - ret = -1; - goto out; - } - if (locked_on[source] != 1) { - ret = dict_set_str (xdata_rsp, "sh-fail-msg", - "Brick is not up"); - if (!ret) - ret = -1; - goto out; - } - memset (sources, 0, sizeof (*sources) * priv->child_count); - sources[source] = 1; - break; - default: - ret = -1; + ret = dict_get_str(xdata_req, "child-name", &name); + if (ret) goto out; - } - for (i = 0 ; i < priv->child_count; i++) { - if (sources[i]) { - source = i; - break; - } - } - sinks[source] = 0; - healed_sinks[source] = 0; - ret = source; + source = afr_get_child_index_from_name(this, name); + if (source < 0) { + ret = dict_set_str(xdata_rsp, "sh-fail-msg", + "Invalid brick name"); + if (!ret) + ret = -1; + goto out; + } + if (locked_on[source] != 1) { + ret = dict_set_str(xdata_rsp, "sh-fail-msg", "Brick is not up"); + if (!ret) + ret = -1; + goto out; + } + memset(sources, 0, sizeof(*sources) * priv->child_count); + sources[source] = 1; + break; + default: + ret = -1; + goto out; + } + for (i = 0; i < priv->child_count; i++) { + if (sources[i]) { + source = i; + break; + } + } + sinks[source] = 0; + healed_sinks[source] = 0; + ret = source; out: - if (ret < 0) - memset (sources, 0, sizeof (*sources) * priv->child_count); - return ret; - + if (ret < 0) + memset(sources, 0, sizeof(*sources) * priv->child_count); + return ret; } int -afr_sh_fav_by_majority (xlator_t *this, struct afr_reply *replies, - inode_t *inode) -{ - afr_private_t *priv; - int vote_count = -1; - int fav_child = -1; - int i = 0; - int k = 0; - - priv = this->private; - - for (i = 0; i < priv->child_count; i++) { - if (replies[i].valid == 1) { - gf_msg_debug (this->name, 0, "Child:%s " - "mtime_sec = %ld, size = %lu for gfid %s", - priv->children[i]->name, - replies[i].poststat.ia_mtime, - replies[i].poststat.ia_size, - uuid_utoa (inode->gfid)); - vote_count = 0; - for (k = 0; k < priv->child_count; k++) { - if ((replies[k].poststat.ia_mtime == - replies[i].poststat.ia_mtime) && - (replies[k].poststat.ia_size == - replies[i].poststat.ia_size) - ) { - vote_count++; - } - } - if (vote_count > priv->child_count/2) { - fav_child = i; - break; - } +afr_sh_fav_by_majority(xlator_t *this, struct afr_reply *replies, + inode_t *inode) +{ + afr_private_t *priv; + int vote_count = -1; + int fav_child = -1; + int i = 0; + int k = 0; + + priv = this->private; + + for (i = 0; i < priv->child_count; i++) { + if (replies[i].valid == 1) { + gf_msg_debug(this->name, 0, + "Child:%s " + "mtime_sec = %ld, size = %lu for gfid %s", + priv->children[i]->name, replies[i].poststat.ia_mtime, + replies[i].poststat.ia_size, uuid_utoa(inode->gfid)); + vote_count = 0; + for (k = 0; k < priv->child_count; k++) { + if ((replies[k].poststat.ia_mtime == + replies[i].poststat.ia_mtime) && + (replies[k].poststat.ia_size == + replies[i].poststat.ia_size)) { + vote_count++; } + } + if (vote_count > priv->child_count / 2) { + fav_child = i; + break; + } } - return fav_child; + } + return fav_child; } /* * afr_sh_fav_by_mtime: Choose favorite child by mtime. */ int -afr_sh_fav_by_mtime (xlator_t *this, struct afr_reply *replies, inode_t *inode) -{ - afr_private_t *priv; - int fav_child = -1; - int i = 0; - uint32_t cmp_mtime = 0; - uint32_t cmp_mtime_nsec = 0; - - priv = this->private; - - for (i = 0; i < priv->child_count; i++) { - if (replies[i].valid == 1) { - gf_msg_debug (this->name, 0, "Child:%s " - "mtime = %ld, mtime_nsec = %d for gfid %s", - priv->children[i]->name, - replies[i].poststat.ia_mtime, - replies[i].poststat.ia_mtime_nsec, - uuid_utoa (inode->gfid)); - if (replies[i].poststat.ia_mtime > cmp_mtime) { - cmp_mtime = replies[i].poststat.ia_mtime; - cmp_mtime_nsec = - replies[i].poststat.ia_mtime_nsec; - fav_child = i; - } else if ((replies[i].poststat.ia_mtime == cmp_mtime) - && (replies[i].poststat.ia_mtime_nsec > - cmp_mtime_nsec)) { - cmp_mtime = replies[i].poststat.ia_mtime; - cmp_mtime_nsec = - replies[i].poststat.ia_mtime_nsec; - fav_child = i; - } - } - } - return fav_child; +afr_sh_fav_by_mtime(xlator_t *this, struct afr_reply *replies, inode_t *inode) +{ + afr_private_t *priv; + int fav_child = -1; + int i = 0; + uint32_t cmp_mtime = 0; + uint32_t cmp_mtime_nsec = 0; + + priv = this->private; + + for (i = 0; i < priv->child_count; i++) { + if (replies[i].valid == 1) { + gf_msg_debug(this->name, 0, + "Child:%s " + "mtime = %ld, mtime_nsec = %d for gfid %s", + priv->children[i]->name, replies[i].poststat.ia_mtime, + replies[i].poststat.ia_mtime_nsec, + uuid_utoa(inode->gfid)); + if (replies[i].poststat.ia_mtime > cmp_mtime) { + cmp_mtime = replies[i].poststat.ia_mtime; + cmp_mtime_nsec = replies[i].poststat.ia_mtime_nsec; + fav_child = i; + } else if ((replies[i].poststat.ia_mtime == cmp_mtime) && + (replies[i].poststat.ia_mtime_nsec > cmp_mtime_nsec)) { + cmp_mtime = replies[i].poststat.ia_mtime; + cmp_mtime_nsec = replies[i].poststat.ia_mtime_nsec; + fav_child = i; + } + } + } + return fav_child; } /* * afr_sh_fav_by_ctime: Choose favorite child by ctime. */ int -afr_sh_fav_by_ctime (xlator_t *this, struct afr_reply *replies, inode_t *inode) -{ - afr_private_t *priv; - int fav_child = -1; - int i = 0; - uint32_t cmp_ctime = 0; - uint32_t cmp_ctime_nsec = 0; - - priv = this->private; - - for (i = 0; i < priv->child_count; i++) { - if (replies[i].valid == 1) { - gf_msg_debug (this->name, 0, "Child:%s " - "ctime = %ld, ctime_nsec = %d for gfid %s", - priv->children[i]->name, - replies[i].poststat.ia_ctime, - replies[i].poststat.ia_ctime_nsec, - uuid_utoa (inode->gfid)); - if (replies[i].poststat.ia_ctime > cmp_ctime) { - cmp_ctime = replies[i].poststat.ia_ctime; - cmp_ctime_nsec = - replies[i].poststat.ia_ctime_nsec; - fav_child = i; - } else if ((replies[i].poststat.ia_ctime == cmp_ctime) - && (replies[i].poststat.ia_ctime_nsec > - cmp_ctime_nsec)) { - cmp_ctime = replies[i].poststat.ia_ctime; - cmp_ctime_nsec = - replies[i].poststat.ia_ctime_nsec; - fav_child = i; - } - } - } - return fav_child; +afr_sh_fav_by_ctime(xlator_t *this, struct afr_reply *replies, inode_t *inode) +{ + afr_private_t *priv; + int fav_child = -1; + int i = 0; + uint32_t cmp_ctime = 0; + uint32_t cmp_ctime_nsec = 0; + + priv = this->private; + + for (i = 0; i < priv->child_count; i++) { + if (replies[i].valid == 1) { + gf_msg_debug(this->name, 0, + "Child:%s " + "ctime = %ld, ctime_nsec = %d for gfid %s", + priv->children[i]->name, replies[i].poststat.ia_ctime, + replies[i].poststat.ia_ctime_nsec, + uuid_utoa(inode->gfid)); + if (replies[i].poststat.ia_ctime > cmp_ctime) { + cmp_ctime = replies[i].poststat.ia_ctime; + cmp_ctime_nsec = replies[i].poststat.ia_ctime_nsec; + fav_child = i; + } else if ((replies[i].poststat.ia_ctime == cmp_ctime) && + (replies[i].poststat.ia_ctime_nsec > cmp_ctime_nsec)) { + cmp_ctime = replies[i].poststat.ia_ctime; + cmp_ctime_nsec = replies[i].poststat.ia_ctime_nsec; + fav_child = i; + } + } + } + return fav_child; } /* * afr_sh_fav_by_size: Choose favorite child by size. */ int -afr_sh_fav_by_size (xlator_t *this, struct afr_reply *replies, inode_t *inode) +afr_sh_fav_by_size(xlator_t *this, struct afr_reply *replies, inode_t *inode) { - afr_private_t *priv; - int fav_child = -1; - int i = 0; - uint64_t cmp_sz = 0; + afr_private_t *priv; + int fav_child = -1; + int i = 0; + uint64_t cmp_sz = 0; - priv = this->private; + priv = this->private; - for (i = 0; i < priv->child_count; i++) { - if (replies[i].valid == 1) { - gf_msg_debug (this->name, 0, "Child:%s " - "file size = %lu for gfid %s", - priv->children[i]->name, - replies[i].poststat.ia_size, - uuid_utoa (inode->gfid)); - if (replies[i].poststat.ia_size > cmp_sz) { - cmp_sz = replies[i].poststat.ia_size; - fav_child = i; - } - } + for (i = 0; i < priv->child_count; i++) { + if (replies[i].valid == 1) { + gf_msg_debug(this->name, 0, + "Child:%s " + "file size = %lu for gfid %s", + priv->children[i]->name, replies[i].poststat.ia_size, + uuid_utoa(inode->gfid)); + if (replies[i].poststat.ia_size > cmp_sz) { + cmp_sz = replies[i].poststat.ia_size; + fav_child = i; + } } - return fav_child; + } + return fav_child; } int -afr_sh_get_fav_by_policy (xlator_t *this, struct afr_reply *replies, - inode_t *inode, char **policy_str) +afr_sh_get_fav_by_policy(xlator_t *this, struct afr_reply *replies, + inode_t *inode, char **policy_str) { - afr_private_t *priv = NULL; - int fav_child = -1; + afr_private_t *priv = NULL; + int fav_child = -1; - priv = this->private; - if (!afr_can_decide_split_brain_source_sinks (replies, - priv->child_count)) { - return -1; - } + priv = this->private; + if (!afr_can_decide_split_brain_source_sinks(replies, priv->child_count)) { + return -1; + } - switch (priv->fav_child_policy) { + switch (priv->fav_child_policy) { case AFR_FAV_CHILD_BY_SIZE: - fav_child = afr_sh_fav_by_size (this, replies, inode); - if (policy_str && fav_child >= 0) { - *policy_str = "SIZE"; - } - break; + fav_child = afr_sh_fav_by_size(this, replies, inode); + if (policy_str && fav_child >= 0) { + *policy_str = "SIZE"; + } + break; case AFR_FAV_CHILD_BY_CTIME: - fav_child = afr_sh_fav_by_ctime (this, replies, inode); - if (policy_str && fav_child >= 0) { - *policy_str = "CTIME"; - } - break; + fav_child = afr_sh_fav_by_ctime(this, replies, inode); + if (policy_str && fav_child >= 0) { + *policy_str = "CTIME"; + } + break; case AFR_FAV_CHILD_BY_MTIME: - fav_child = afr_sh_fav_by_mtime (this, replies, inode); - if (policy_str && fav_child >= 0) { - *policy_str = "MTIME"; - } - break; + fav_child = afr_sh_fav_by_mtime(this, replies, inode); + if (policy_str && fav_child >= 0) { + *policy_str = "MTIME"; + } + break; case AFR_FAV_CHILD_BY_MAJORITY: - fav_child = afr_sh_fav_by_majority (this, replies, inode); - if (policy_str && fav_child >= 0) { - *policy_str = "MAJORITY"; - } - break; + fav_child = afr_sh_fav_by_majority(this, replies, inode); + if (policy_str && fav_child >= 0) { + *policy_str = "MAJORITY"; + } + break; case AFR_FAV_CHILD_NONE: default: - break; - } + break; + } - return fav_child; + return fav_child; } int -afr_mark_split_brain_source_sinks_by_policy (call_frame_t *frame, - xlator_t *this, - inode_t *inode, - unsigned char *sources, - unsigned char *sinks, - unsigned char *healed_sinks, - unsigned char *locked_on, - struct afr_reply *replies, - afr_transaction_type type) -{ - afr_private_t *priv = NULL; - int fav_child = -1; - char mtime_str[256]; - char ctime_str[256]; - char *policy_str = NULL; - struct tm *tm_ptr; - time_t time; - - priv = this->private; - - fav_child = afr_sh_get_fav_by_policy (this, replies, inode, - &policy_str); - if (fav_child > priv->child_count - 1) { - gf_msg (this->name, GF_LOG_ERROR, 0, - AFR_MSG_SBRAIN_FAV_CHILD_POLICY, "Invalid child (%d) " - "selected by policy %s.", fav_child, policy_str); - } else if (fav_child >= 0) { - time = replies[fav_child].poststat.ia_mtime; - tm_ptr = localtime (&time); - strftime (mtime_str, sizeof (mtime_str), "%Y-%m-%d %H:%M:%S", - tm_ptr); - time = replies[fav_child].poststat.ia_ctime; - tm_ptr = localtime (&time); - strftime (ctime_str, sizeof (ctime_str), "%Y-%m-%d %H:%M:%S", - tm_ptr); - - gf_msg (this->name, GF_LOG_WARNING, 0, - AFR_MSG_SBRAIN_FAV_CHILD_POLICY, "Source %s " - "selected as authentic to resolve conflicting " - "data in file (gfid:%s) by %s (%lu bytes @ %s mtime, " - "%s ctime).", - priv->children[fav_child]->name, - uuid_utoa (inode->gfid), - policy_str, - replies[fav_child].poststat.ia_size, - mtime_str, - ctime_str); - - sources[fav_child] = 1; - sinks[fav_child] = 0; - healed_sinks[fav_child] = 0; - } - return fav_child; +afr_mark_split_brain_source_sinks_by_policy( + call_frame_t *frame, xlator_t *this, inode_t *inode, unsigned char *sources, + unsigned char *sinks, unsigned char *healed_sinks, unsigned char *locked_on, + struct afr_reply *replies, afr_transaction_type type) +{ + afr_private_t *priv = NULL; + int fav_child = -1; + char mtime_str[256]; + char ctime_str[256]; + char *policy_str = NULL; + struct tm *tm_ptr; + time_t time; + + priv = this->private; + + fav_child = afr_sh_get_fav_by_policy(this, replies, inode, &policy_str); + if (fav_child > priv->child_count - 1) { + gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SBRAIN_FAV_CHILD_POLICY, + "Invalid child (%d) " + "selected by policy %s.", + fav_child, policy_str); + } else if (fav_child >= 0) { + time = replies[fav_child].poststat.ia_mtime; + tm_ptr = localtime(&time); + strftime(mtime_str, sizeof(mtime_str), "%Y-%m-%d %H:%M:%S", tm_ptr); + time = replies[fav_child].poststat.ia_ctime; + tm_ptr = localtime(&time); + strftime(ctime_str, sizeof(ctime_str), "%Y-%m-%d %H:%M:%S", tm_ptr); + + gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_SBRAIN_FAV_CHILD_POLICY, + "Source %s " + "selected as authentic to resolve conflicting " + "data in file (gfid:%s) by %s (%lu bytes @ %s mtime, " + "%s ctime).", + priv->children[fav_child]->name, uuid_utoa(inode->gfid), + policy_str, replies[fav_child].poststat.ia_size, mtime_str, + ctime_str); + + sources[fav_child] = 1; + sinks[fav_child] = 0; + healed_sinks[fav_child] = 0; + } + return fav_child; } gf_boolean_t -afr_is_file_empty_on_all_children (afr_private_t *priv, - struct afr_reply *replies) +afr_is_file_empty_on_all_children(afr_private_t *priv, + struct afr_reply *replies) { - int i = 0; + int i = 0; - for (i = 0; i < priv->child_count; i++) { - if ((!replies[i].valid) || (replies[i].op_ret != 0) || - (replies[i].poststat.ia_size != 0)) - return _gf_false; - } + for (i = 0; i < priv->child_count; i++) { + if ((!replies[i].valid) || (replies[i].op_ret != 0) || + (replies[i].poststat.ia_size != 0)) + return _gf_false; + } - return _gf_true; + return _gf_true; } int -afr_mark_source_sinks_if_file_empty (xlator_t *this, unsigned char *sources, - unsigned char *sinks, - unsigned char *healed_sinks, - unsigned char *locked_on, - struct afr_reply *replies, - afr_transaction_type type) -{ - int source = -1; - int i = 0; - afr_private_t *priv = this->private; - struct iatt stbuf = {0, }; - - if ((AFR_COUNT (locked_on, priv->child_count) < priv->child_count) || - (afr_success_count(replies, priv->child_count) < priv->child_count)) - return -1; +afr_mark_source_sinks_if_file_empty(xlator_t *this, unsigned char *sources, + unsigned char *sinks, + unsigned char *healed_sinks, + unsigned char *locked_on, + struct afr_reply *replies, + afr_transaction_type type) +{ + int source = -1; + int i = 0; + afr_private_t *priv = this->private; + struct iatt stbuf = { + 0, + }; + + if ((AFR_COUNT(locked_on, priv->child_count) < priv->child_count) || + (afr_success_count(replies, priv->child_count) < priv->child_count)) + return -1; - if (type == AFR_DATA_TRANSACTION) { - if (!afr_is_file_empty_on_all_children(priv, replies)) - return -1; - goto mark; - } - - /*For AFR_METADATA_TRANSACTION, metadata must be same on all bricks.*/ - stbuf = replies[0].poststat; - for (i = 1; i < priv->child_count; i++) { - if ((!IA_EQUAL (stbuf, replies[i].poststat, type)) || - (!IA_EQUAL (stbuf, replies[i].poststat, uid)) || - (!IA_EQUAL (stbuf, replies[i].poststat, gid)) || - (!IA_EQUAL (stbuf, replies[i].poststat, prot))) - return -1; - } - for (i = 1; i < priv->child_count; i++) { - if (!afr_xattrs_are_equal (replies[0].xdata, - replies[i].xdata)) - return -1; - } + if (type == AFR_DATA_TRANSACTION) { + if (!afr_is_file_empty_on_all_children(priv, replies)) + return -1; + goto mark; + } + + /*For AFR_METADATA_TRANSACTION, metadata must be same on all bricks.*/ + stbuf = replies[0].poststat; + for (i = 1; i < priv->child_count; i++) { + if ((!IA_EQUAL(stbuf, replies[i].poststat, type)) || + (!IA_EQUAL(stbuf, replies[i].poststat, uid)) || + (!IA_EQUAL(stbuf, replies[i].poststat, gid)) || + (!IA_EQUAL(stbuf, replies[i].poststat, prot))) + return -1; + } + for (i = 1; i < priv->child_count; i++) { + if (!afr_xattrs_are_equal(replies[0].xdata, replies[i].xdata)) + return -1; + } mark: - /* data/metadata is same on all bricks. Pick one of them as source. Rest - * are sinks.*/ - for (i = 0 ; i < priv->child_count; i++) { - if (source == -1) { - source = i; - sources[i] = 1; - sinks[i] = 0; - healed_sinks[i] = 0; - continue; - } - sources[i] = 0; - sinks[i] = 1; - healed_sinks[i] = 1; + /* data/metadata is same on all bricks. Pick one of them as source. Rest + * are sinks.*/ + for (i = 0; i < priv->child_count; i++) { + if (source == -1) { + source = i; + sources[i] = 1; + sinks[i] = 0; + healed_sinks[i] = 0; + continue; } + sources[i] = 0; + sinks[i] = 1; + healed_sinks[i] = 1; + } - return source; + return source; } /* Return a source depending on the type of heal_op, and set sources[source], @@ -1354,171 +1337,156 @@ mark: * sinks[node] are 1. This should be the case if the file is in split-brain. */ int -afr_mark_split_brain_source_sinks (call_frame_t *frame, xlator_t *this, - inode_t *inode, - unsigned char *sources, - unsigned char *sinks, - unsigned char *healed_sinks, - unsigned char *locked_on, - struct afr_reply *replies, - afr_transaction_type type) -{ - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - dict_t *xdata_req = NULL; - int heal_op = -1; - int ret = -1; - int source = -1; - - local = frame->local; - priv = this->private; - xdata_req = local->xdata_req; - - source = afr_mark_source_sinks_if_file_empty (this, sources, sinks, - healed_sinks, locked_on, - replies, type); - if (source >= 0) - return source; - - ret = dict_get_int32 (xdata_req, "heal-op", &heal_op); - if (ret) - goto autoheal; - - source = afr_mark_split_brain_source_sinks_by_heal_op (frame, this, - sources, sinks, - healed_sinks, - locked_on, replies, - type, heal_op); +afr_mark_split_brain_source_sinks( + call_frame_t *frame, xlator_t *this, inode_t *inode, unsigned char *sources, + unsigned char *sinks, unsigned char *healed_sinks, unsigned char *locked_on, + struct afr_reply *replies, afr_transaction_type type) +{ + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + dict_t *xdata_req = NULL; + int heal_op = -1; + int ret = -1; + int source = -1; + + local = frame->local; + priv = this->private; + xdata_req = local->xdata_req; + + source = afr_mark_source_sinks_if_file_empty( + this, sources, sinks, healed_sinks, locked_on, replies, type); + if (source >= 0) return source; + ret = dict_get_int32(xdata_req, "heal-op", &heal_op); + if (ret) + goto autoheal; + + source = afr_mark_split_brain_source_sinks_by_heal_op( + frame, this, sources, sinks, healed_sinks, locked_on, replies, type, + heal_op); + return source; + autoheal: - /* Automatically heal if fav_child_policy is set. */ - if (priv->fav_child_policy != AFR_FAV_CHILD_NONE) { - source = afr_mark_split_brain_source_sinks_by_policy (frame, - this, - inode, - sources, - sinks, - healed_sinks, - locked_on, - replies, - type); - if (source != -1) { - ret = dict_set_int32 (xdata_req, "fav-child-policy", 1); - if (ret) - return -1; - } + /* Automatically heal if fav_child_policy is set. */ + if (priv->fav_child_policy != AFR_FAV_CHILD_NONE) { + source = afr_mark_split_brain_source_sinks_by_policy( + frame, this, inode, sources, sinks, healed_sinks, locked_on, + replies, type); + if (source != -1) { + ret = dict_set_int32(xdata_req, "fav-child-policy", 1); + if (ret) + return -1; } + } - return source; + return source; } int -_afr_fav_child_reset_sink_xattrs (call_frame_t *frame, xlator_t *this, - inode_t *inode, int source, - unsigned char *healed_sinks, - unsigned char *undid_pending, - afr_transaction_type type, - unsigned char *locked_on, - struct afr_reply *replies) +_afr_fav_child_reset_sink_xattrs(call_frame_t *frame, xlator_t *this, + inode_t *inode, int source, + unsigned char *healed_sinks, + unsigned char *undid_pending, + afr_transaction_type type, + unsigned char *locked_on, + struct afr_reply *replies) { - afr_private_t *priv = NULL; - afr_local_t *local = NULL; - int *input_dirty = NULL; - int **input_matrix = NULL; - int *output_dirty = NULL; - int **output_matrix = NULL; - dict_t *xattr = NULL; - dict_t *xdata = NULL; - int i = 0; + afr_private_t *priv = NULL; + afr_local_t *local = NULL; + int *input_dirty = NULL; + int **input_matrix = NULL; + int *output_dirty = NULL; + int **output_matrix = NULL; + dict_t *xattr = NULL; + dict_t *xdata = NULL; + int i = 0; - priv = this->private; - local = frame->local; + priv = this->private; + local = frame->local; - if (!dict_get (local->xdata_req, "fav-child-policy")) - return 0; + if (!dict_get(local->xdata_req, "fav-child-policy")) + return 0; - xdata = dict_new(); - if (!xdata) - return -1; + xdata = dict_new(); + if (!xdata) + return -1; - input_dirty = alloca0 (priv->child_count * sizeof (int)); - input_matrix = ALLOC_MATRIX (priv->child_count, int); - output_dirty = alloca0 (priv->child_count * sizeof (int)); - output_matrix = ALLOC_MATRIX (priv->child_count, int); + input_dirty = alloca0(priv->child_count * sizeof(int)); + input_matrix = ALLOC_MATRIX(priv->child_count, int); + output_dirty = alloca0(priv->child_count * sizeof(int)); + output_matrix = ALLOC_MATRIX(priv->child_count, int); - afr_selfheal_extract_xattr (this, replies, type, input_dirty, - input_matrix); + afr_selfheal_extract_xattr(this, replies, type, input_dirty, input_matrix); - for (i = 0; i < priv->child_count; i++) { - if (i == source || !healed_sinks[i]) - continue; - output_dirty[i] = -input_dirty[i]; - output_matrix[i][source] = -input_matrix[i][source]; - } + for (i = 0; i < priv->child_count; i++) { + if (i == source || !healed_sinks[i]) + continue; + output_dirty[i] = -input_dirty[i]; + output_matrix[i][source] = -input_matrix[i][source]; + } - for (i = 0; i < priv->child_count; i++) { - if (!healed_sinks[i] || !locked_on[i]) - continue; - xattr = afr_selfheal_output_xattr (this, _gf_false, type, - output_dirty, output_matrix, - i, NULL); + for (i = 0; i < priv->child_count; i++) { + if (!healed_sinks[i] || !locked_on[i]) + continue; + xattr = afr_selfheal_output_xattr(this, _gf_false, type, output_dirty, + output_matrix, i, NULL); - afr_selfheal_post_op (frame, this, inode, i, xattr, xdata); + afr_selfheal_post_op(frame, this, inode, i, xattr, xdata); - undid_pending[i] = 1; - dict_unref (xattr); - } + undid_pending[i] = 1; + dict_unref(xattr); + } - if (xdata) - dict_unref (xdata); + if (xdata) + dict_unref(xdata); - return 0; + return 0; } gf_boolean_t -afr_does_witness_exist (xlator_t *this, uint64_t *witness) +afr_does_witness_exist(xlator_t *this, uint64_t *witness) { - int i = 0; - afr_private_t *priv = NULL; + int i = 0; + afr_private_t *priv = NULL; - priv = this->private; + priv = this->private; - for (i = 0; i < priv->child_count; i++) { - if (witness[i]) - return _gf_true; - } - return _gf_false; + for (i = 0; i < priv->child_count; i++) { + if (witness[i]) + return _gf_true; + } + return _gf_false; } unsigned int -afr_get_quorum_count (afr_private_t *priv) +afr_get_quorum_count(afr_private_t *priv) { - if (priv->quorum_count == AFR_QUORUM_AUTO) { - return priv->child_count/2 + 1; - } else { - return priv->quorum_count; - } + if (priv->quorum_count == AFR_QUORUM_AUTO) { + return priv->child_count / 2 + 1; + } else { + return priv->quorum_count; + } } void -afr_selfheal_post_op_failure_accounting (afr_private_t *priv, char *accused, - unsigned char *sources, - unsigned char *locked_on) +afr_selfheal_post_op_failure_accounting(afr_private_t *priv, char *accused, + unsigned char *sources, + unsigned char *locked_on) { - int i = 0; - unsigned int quorum_count = 0; + int i = 0; + unsigned int quorum_count = 0; - if (AFR_COUNT (sources, priv->child_count) != 0) - return; + if (AFR_COUNT(sources, priv->child_count) != 0) + return; - quorum_count = afr_get_quorum_count (priv); - for (i = 0; i < priv->child_count; i++) { - if ((accused[i] < quorum_count) && locked_on[i]) { - sources[i] = 1; - } + quorum_count = afr_get_quorum_count(priv); + for (i = 0; i < priv->child_count; i++) { + if ((accused[i] < quorum_count) && locked_on[i]) { + sources[i] = 1; } - return; + } + return; } /* @@ -1541,663 +1509,675 @@ afr_selfheal_post_op_failure_accounting (afr_private_t *priv, char *accused, */ int -afr_selfheal_find_direction (call_frame_t *frame, xlator_t *this, - struct afr_reply *replies, - afr_transaction_type type, - unsigned char *locked_on, unsigned char *sources, - unsigned char *sinks, uint64_t *witness, - gf_boolean_t *pflag) -{ - afr_private_t *priv = NULL; - int i = 0; - int j = 0; - int *dirty = NULL; /* Denotes if dirty xattr is set */ - int **matrix = NULL;/* Changelog matrix */ - char *accused = NULL;/* Accused others without any self-accusal */ - char *pending = NULL;/* Have pending operations on others */ - char *self_accused = NULL; /* Accused itself */ - int min_participants = -1; - - priv = this->private; - - dirty = alloca0 (priv->child_count * sizeof (int)); - accused = alloca0 (priv->child_count); - pending = alloca0 (priv->child_count); - self_accused = alloca0 (priv->child_count); - matrix = ALLOC_MATRIX(priv->child_count, int); - memset (witness, 0, sizeof (*witness) * priv->child_count); - - /* First construct the pending matrix for further analysis */ - afr_selfheal_extract_xattr (this, replies, type, dirty, matrix); - - if (pflag) { - for (i = 0; i < priv->child_count; i++) { - for (j = 0; j < priv->child_count; j++) - if (matrix[i][j]) - *pflag = _gf_true; - if (*pflag) - break; - } - } - - if (type == AFR_DATA_TRANSACTION) { - min_participants = priv->child_count; - } else { - min_participants = AFR_SH_MIN_PARTICIPANTS; +afr_selfheal_find_direction(call_frame_t *frame, xlator_t *this, + struct afr_reply *replies, + afr_transaction_type type, unsigned char *locked_on, + unsigned char *sources, unsigned char *sinks, + uint64_t *witness, gf_boolean_t *pflag) +{ + afr_private_t *priv = NULL; + int i = 0; + int j = 0; + int *dirty = NULL; /* Denotes if dirty xattr is set */ + int **matrix = NULL; /* Changelog matrix */ + char *accused = NULL; /* Accused others without any self-accusal */ + char *pending = NULL; /* Have pending operations on others */ + char *self_accused = NULL; /* Accused itself */ + int min_participants = -1; + + priv = this->private; + + dirty = alloca0(priv->child_count * sizeof(int)); + accused = alloca0(priv->child_count); + pending = alloca0(priv->child_count); + self_accused = alloca0(priv->child_count); + matrix = ALLOC_MATRIX(priv->child_count, int); + memset(witness, 0, sizeof(*witness) * priv->child_count); + + /* First construct the pending matrix for further analysis */ + afr_selfheal_extract_xattr(this, replies, type, dirty, matrix); + + if (pflag) { + for (i = 0; i < priv->child_count; i++) { + for (j = 0; j < priv->child_count; j++) + if (matrix[i][j]) + *pflag = _gf_true; + if (*pflag) + break; } - if (afr_success_count (replies, - priv->child_count) < min_participants) { - /* Treat this just like locks not being acquired */ - return -ENOTCONN; + } + + if (type == AFR_DATA_TRANSACTION) { + min_participants = priv->child_count; + } else { + min_participants = AFR_SH_MIN_PARTICIPANTS; + } + if (afr_success_count(replies, priv->child_count) < min_participants) { + /* Treat this just like locks not being acquired */ + return -ENOTCONN; + } + + /* short list all self-accused */ + for (i = 0; i < priv->child_count; i++) { + if (matrix[i][i]) + self_accused[i] = 1; + } + + /* Next short list all accused to exclude them from being sources */ + /* Self-accused can't accuse others as they are FOOLs */ + for (i = 0; i < priv->child_count; i++) { + for (j = 0; j < priv->child_count; j++) { + if (matrix[i][j]) { + if (!self_accused[i]) + accused[j] += 1; + if (i != j) + pending[i] += 1; + } } + } - /* short list all self-accused */ + /* Short list all non-accused as sources */ + for (i = 0; i < priv->child_count; i++) { + if (!accused[i] && locked_on[i]) + sources[i] = 1; + else + sources[i] = 0; + } + + /* Everyone accused by non-self-accused sources are sinks */ + memset(sinks, 0, priv->child_count); + for (i = 0; i < priv->child_count; i++) { + if (!sources[i]) + continue; + if (self_accused[i]) + continue; + for (j = 0; j < priv->child_count; j++) { + if (matrix[i][j]) + sinks[j] = 1; + } + } + + /* For breaking ties provide with number of fops they witnessed */ + + /* + * count the pending fops witnessed from itself to others when it is + * self-accused + */ + for (i = 0; i < priv->child_count; i++) { + if (!self_accused[i]) + continue; + for (j = 0; j < priv->child_count; j++) { + if (i == j) + continue; + witness[i] += matrix[i][j]; + } + } + + if (type == AFR_DATA_TRANSACTION) + afr_selfheal_post_op_failure_accounting(priv, accused, sources, + locked_on); + + /* If no sources, all locked nodes are sinks - split brain */ + if (AFR_COUNT(sources, priv->child_count) == 0) { for (i = 0; i < priv->child_count; i++) { - if (matrix[i][i]) - self_accused[i] = 1; + if (locked_on[i]) + sinks[i] = 1; } + } - /* Next short list all accused to exclude them from being sources */ - /* Self-accused can't accuse others as they are FOOLs */ - for (i = 0; i < priv->child_count; i++) { - for (j = 0; j < priv->child_count; j++) { - if (matrix[i][j]) { - if (!self_accused[i]) - accused[j] += 1; - if (i != j) - pending[i] += 1; - } - } - } - - /* Short list all non-accused as sources */ - for (i = 0; i < priv->child_count; i++) { - if (!accused[i] && locked_on[i]) - sources[i] = 1; - else - sources[i] = 0; - } - - /* Everyone accused by non-self-accused sources are sinks */ - memset (sinks, 0, priv->child_count); + /* One more class of witness similar to dirty in v2 is where no pending + * exists but we have self-accusing markers. This can happen in afr-v1 + * if the brick crashes just after doing xattrop on self but + * before xattrop on the other xattrs on the brick in pre-op. */ + if (AFR_COUNT(pending, priv->child_count) == 0) { for (i = 0; i < priv->child_count; i++) { - if (!sources[i]) - continue; - if (self_accused[i]) - continue; - for (j = 0; j < priv->child_count; j++) { - if (matrix[i][j]) - sinks[j] = 1; - } + if (self_accused[i]) + witness[i] += matrix[i][i]; } - - /* For breaking ties provide with number of fops they witnessed */ - - /* - * count the pending fops witnessed from itself to others when it is - * self-accused + } else { + /* In afr-v1 if a file is self-accused and has pending + * operations on others then it is similar to 'dirty' in afr-v2. + * Consider such cases as witness. */ for (i = 0; i < priv->child_count; i++) { - if (!self_accused[i]) - continue; - for (j = 0; j < priv->child_count; j++) { - if (i == j) - continue; - witness[i] += matrix[i][j]; - } + if (self_accused[i] && pending[i]) + witness[i] += matrix[i][i]; } + } - if (type == AFR_DATA_TRANSACTION) - afr_selfheal_post_op_failure_accounting (priv, accused, - sources, locked_on); + /* count the number of dirty fops witnessed */ + for (i = 0; i < priv->child_count; i++) + witness[i] += dirty[i]; - /* If no sources, all locked nodes are sinks - split brain */ - if (AFR_COUNT (sources, priv->child_count) == 0) { - for (i = 0; i < priv->child_count; i++) { - if (locked_on[i]) - sinks[i] = 1; - } - } - - /* One more class of witness similar to dirty in v2 is where no pending - * exists but we have self-accusing markers. This can happen in afr-v1 - * if the brick crashes just after doing xattrop on self but - * before xattrop on the other xattrs on the brick in pre-op. */ - if (AFR_COUNT (pending, priv->child_count) == 0) { - for (i = 0; i < priv->child_count; i++) { - if (self_accused[i]) - witness[i] += matrix[i][i]; - } - } else { - /* In afr-v1 if a file is self-accused and has pending - * operations on others then it is similar to 'dirty' in afr-v2. - * Consider such cases as witness. - */ - for (i = 0; i < priv->child_count; i++) { - if (self_accused[i] && pending[i]) - witness[i] += matrix[i][i]; - } - } - - - /* count the number of dirty fops witnessed */ - for (i = 0; i < priv->child_count; i++) - witness[i] += dirty[i]; - - return 0; + return 0; } void -afr_log_selfheal (uuid_t gfid, xlator_t *this, int ret, char *type, - int source, unsigned char *sources, - unsigned char *healed_sinks) -{ - char *status = NULL; - char *sinks_str = NULL; - char *p = NULL; - char *sources_str = NULL; - char *q = NULL; - afr_private_t *priv = NULL; - gf_loglevel_t loglevel = GF_LOG_NONE; - int i = 0; - - priv = this->private; - sinks_str = alloca0 (priv->child_count * 8); - p = sinks_str; - sources_str = alloca0 (priv->child_count * 8); - q = sources_str; - for (i = 0; i < priv->child_count; i++) { - if (healed_sinks[i]) - p += sprintf (p, "%d ", i); - if (sources[i]) { - if (source == i) { - q += sprintf (q, "[%d] ", i); - } else { - q += sprintf (q, "%d ", i); - } - } - } - - if (ret < 0) { - status = "Failed"; - loglevel = GF_LOG_DEBUG; - } else { - status = "Completed"; - loglevel = GF_LOG_INFO; - } - - gf_msg (this->name, loglevel, 0, - AFR_MSG_SELF_HEAL_INFO, "%s %s selfheal on %s. " - "sources=%s sinks=%s", status, type, uuid_utoa (gfid), - sources_str, sinks_str); +afr_log_selfheal(uuid_t gfid, xlator_t *this, int ret, char *type, int source, + unsigned char *sources, unsigned char *healed_sinks) +{ + char *status = NULL; + char *sinks_str = NULL; + char *p = NULL; + char *sources_str = NULL; + char *q = NULL; + afr_private_t *priv = NULL; + gf_loglevel_t loglevel = GF_LOG_NONE; + int i = 0; + + priv = this->private; + sinks_str = alloca0(priv->child_count * 8); + p = sinks_str; + sources_str = alloca0(priv->child_count * 8); + q = sources_str; + for (i = 0; i < priv->child_count; i++) { + if (healed_sinks[i]) + p += sprintf(p, "%d ", i); + if (sources[i]) { + if (source == i) { + q += sprintf(q, "[%d] ", i); + } else { + q += sprintf(q, "%d ", i); + } + } + } + + if (ret < 0) { + status = "Failed"; + loglevel = GF_LOG_DEBUG; + } else { + status = "Completed"; + loglevel = GF_LOG_INFO; + } + + gf_msg(this->name, loglevel, 0, AFR_MSG_SELF_HEAL_INFO, + "%s %s selfheal on %s. " + "sources=%s sinks=%s", + status, type, uuid_utoa(gfid), sources_str, sinks_str); } int -afr_selfheal_discover_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, inode_t *inode, - struct iatt *buf, dict_t *xdata, struct iatt *parbuf) -{ - afr_local_t *local = NULL; - int i = -1; - GF_UNUSED int ret = -1; - int8_t need_heal = 1; - - local = frame->local; - i = (long) cookie; - - local->replies[i].valid = 1; - local->replies[i].op_ret = op_ret; - local->replies[i].op_errno = op_errno; - if (buf) - local->replies[i].poststat = *buf; - if (parbuf) - local->replies[i].postparent = *parbuf; - if (xdata) { - local->replies[i].xdata = dict_ref (xdata); - ret = dict_get_int8 (xdata, "link-count", &need_heal); - local->replies[i].need_heal = need_heal; - } else { - local->replies[i].need_heal = need_heal; - } - - syncbarrier_wake (&local->barrier); - - return 0; +afr_selfheal_discover_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, inode_t *inode, + struct iatt *buf, dict_t *xdata, struct iatt *parbuf) +{ + afr_local_t *local = NULL; + int i = -1; + GF_UNUSED int ret = -1; + int8_t need_heal = 1; + + local = frame->local; + i = (long)cookie; + + local->replies[i].valid = 1; + local->replies[i].op_ret = op_ret; + local->replies[i].op_errno = op_errno; + if (buf) + local->replies[i].poststat = *buf; + if (parbuf) + local->replies[i].postparent = *parbuf; + if (xdata) { + local->replies[i].xdata = dict_ref(xdata); + ret = dict_get_int8(xdata, "link-count", &need_heal); + local->replies[i].need_heal = need_heal; + } else { + local->replies[i].need_heal = need_heal; + } + + syncbarrier_wake(&local->barrier); + + return 0; } - inode_t * -afr_selfheal_unlocked_lookup_on (call_frame_t *frame, inode_t *parent, - const char *name, struct afr_reply *replies, - unsigned char *lookup_on, dict_t *xattr) -{ - loc_t loc = {0, }; - dict_t *xattr_req = NULL; - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - inode_t *inode = NULL; - - local = frame->local; - priv = frame->this->private; - - xattr_req = dict_new (); - if (!xattr_req) - return NULL; +afr_selfheal_unlocked_lookup_on(call_frame_t *frame, inode_t *parent, + const char *name, struct afr_reply *replies, + unsigned char *lookup_on, dict_t *xattr) +{ + loc_t loc = { + 0, + }; + dict_t *xattr_req = NULL; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + inode_t *inode = NULL; + + local = frame->local; + priv = frame->this->private; + + xattr_req = dict_new(); + if (!xattr_req) + return NULL; - if (xattr) - dict_copy (xattr, xattr_req); + if (xattr) + dict_copy(xattr, xattr_req); - if (afr_xattr_req_prepare (frame->this, xattr_req) != 0) { - dict_unref (xattr_req); - return NULL; - } + if (afr_xattr_req_prepare(frame->this, xattr_req) != 0) { + dict_unref(xattr_req); + return NULL; + } - inode = inode_new (parent->table); - if (!inode) { - dict_unref (xattr_req); - return NULL; - } + inode = inode_new(parent->table); + if (!inode) { + dict_unref(xattr_req); + return NULL; + } - loc.parent = inode_ref (parent); - gf_uuid_copy (loc.pargfid, parent->gfid); - loc.name = name; - loc.inode = inode_ref (inode); + loc.parent = inode_ref(parent); + gf_uuid_copy(loc.pargfid, parent->gfid); + loc.name = name; + loc.inode = inode_ref(inode); - AFR_ONLIST (lookup_on, frame, afr_selfheal_discover_cbk, lookup, &loc, - xattr_req); + AFR_ONLIST(lookup_on, frame, afr_selfheal_discover_cbk, lookup, &loc, + xattr_req); - afr_replies_copy (replies, local->replies, priv->child_count); + afr_replies_copy(replies, local->replies, priv->child_count); - loc_wipe (&loc); - dict_unref (xattr_req); + loc_wipe(&loc); + dict_unref(xattr_req); - return inode; + return inode; } int -afr_selfheal_unlocked_discover_on (call_frame_t *frame, inode_t *inode, - uuid_t gfid, struct afr_reply *replies, - unsigned char *discover_on) +afr_selfheal_unlocked_discover_on(call_frame_t *frame, inode_t *inode, + uuid_t gfid, struct afr_reply *replies, + unsigned char *discover_on) { - loc_t loc = {0, }; - dict_t *xattr_req = NULL; - afr_local_t *local = NULL; - afr_private_t *priv = NULL; + loc_t loc = { + 0, + }; + dict_t *xattr_req = NULL; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; - local = frame->local; - priv = frame->this->private; + local = frame->local; + priv = frame->this->private; - xattr_req = dict_new (); - if (!xattr_req) - return -ENOMEM; + xattr_req = dict_new(); + if (!xattr_req) + return -ENOMEM; - if (afr_xattr_req_prepare (frame->this, xattr_req) != 0) { - dict_unref (xattr_req); - return -ENOMEM; - } + if (afr_xattr_req_prepare(frame->this, xattr_req) != 0) { + dict_unref(xattr_req); + return -ENOMEM; + } - loc.inode = inode_ref (inode); - gf_uuid_copy (loc.gfid, gfid); + loc.inode = inode_ref(inode); + gf_uuid_copy(loc.gfid, gfid); - AFR_ONLIST (discover_on, frame, afr_selfheal_discover_cbk, lookup, &loc, - xattr_req); + AFR_ONLIST(discover_on, frame, afr_selfheal_discover_cbk, lookup, &loc, + xattr_req); - afr_replies_copy (replies, local->replies, priv->child_count); + afr_replies_copy(replies, local->replies, priv->child_count); - loc_wipe (&loc); - dict_unref (xattr_req); + loc_wipe(&loc); + dict_unref(xattr_req); - return 0; + return 0; } int -afr_selfheal_unlocked_discover (call_frame_t *frame, inode_t *inode, - uuid_t gfid, struct afr_reply *replies) +afr_selfheal_unlocked_discover(call_frame_t *frame, inode_t *inode, uuid_t gfid, + struct afr_reply *replies) { - afr_private_t *priv = NULL; + afr_private_t *priv = NULL; - priv = frame->this->private; + priv = frame->this->private; - return afr_selfheal_unlocked_discover_on (frame, inode, gfid, replies, - priv->child_up); + return afr_selfheal_unlocked_discover_on(frame, inode, gfid, replies, + priv->child_up); } unsigned int -afr_success_count (struct afr_reply *replies, unsigned int count) +afr_success_count(struct afr_reply *replies, unsigned int count) { - int i = 0; - unsigned int success = 0; + int i = 0; + unsigned int success = 0; - for (i = 0; i < count; i++) - if (replies[i].valid && replies[i].op_ret == 0) - success++; - return success; + for (i = 0; i < count; i++) + if (replies[i].valid && replies[i].op_ret == 0) + success++; + return success; } int -afr_selfheal_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, dict_t *xdata) +afr_selfheal_lock_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, dict_t *xdata) { - afr_local_t *local = NULL; - int i = 0; + afr_local_t *local = NULL; + int i = 0; - local = frame->local; - i = (long) cookie; + local = frame->local; + i = (long)cookie; - local->replies[i].valid = 1; - local->replies[i].op_ret = op_ret; - local->replies[i].op_errno = op_errno; + local->replies[i].valid = 1; + local->replies[i].op_ret = op_ret; + local->replies[i].op_errno = op_errno; - syncbarrier_wake (&local->barrier); + syncbarrier_wake(&local->barrier); - return 0; + return 0; } - int -afr_locked_fill (call_frame_t *frame, xlator_t *this, - unsigned char *locked_on) +afr_locked_fill(call_frame_t *frame, xlator_t *this, unsigned char *locked_on) { - int i = 0; - afr_private_t *priv = NULL; - afr_local_t *local = NULL; - int count = 0; + int i = 0; + afr_private_t *priv = NULL; + afr_local_t *local = NULL; + int count = 0; - local = frame->local; - priv = this->private; + local = frame->local; + priv = this->private; - for (i = 0; i < priv->child_count; i++) { - if (local->replies[i].valid && local->replies[i].op_ret == 0) { - locked_on[i] = 1; - count++; - } else { - locked_on[i] = 0; - } - } + for (i = 0; i < priv->child_count; i++) { + if (local->replies[i].valid && local->replies[i].op_ret == 0) { + locked_on[i] = 1; + count++; + } else { + locked_on[i] = 0; + } + } - return count; + return count; } - int -afr_selfheal_tryinodelk (call_frame_t *frame, xlator_t *this, inode_t *inode, - char *dom, off_t off, size_t size, - unsigned char *locked_on) +afr_selfheal_tryinodelk(call_frame_t *frame, xlator_t *this, inode_t *inode, + char *dom, off_t off, size_t size, + unsigned char *locked_on) { - loc_t loc = {0,}; - struct gf_flock flock = {0, }; + loc_t loc = { + 0, + }; + struct gf_flock flock = { + 0, + }; - loc.inode = inode_ref (inode); - gf_uuid_copy (loc.gfid, inode->gfid); + loc.inode = inode_ref(inode); + gf_uuid_copy(loc.gfid, inode->gfid); - flock.l_type = F_WRLCK; - flock.l_start = off; - flock.l_len = size; + flock.l_type = F_WRLCK; + flock.l_start = off; + flock.l_len = size; - AFR_ONALL (frame, afr_selfheal_lock_cbk, inodelk, dom, - &loc, F_SETLK, &flock, NULL); + AFR_ONALL(frame, afr_selfheal_lock_cbk, inodelk, dom, &loc, F_SETLK, &flock, + NULL); - loc_wipe (&loc); + loc_wipe(&loc); - return afr_locked_fill (frame, this, locked_on); + return afr_locked_fill(frame, this, locked_on); } - int -afr_selfheal_inodelk (call_frame_t *frame, xlator_t *this, inode_t *inode, - char *dom, off_t off, size_t size, - unsigned char *locked_on) +afr_selfheal_inodelk(call_frame_t *frame, xlator_t *this, inode_t *inode, + char *dom, off_t off, size_t size, + unsigned char *locked_on) { - loc_t loc = {0,}; - struct gf_flock flock = {0, }; - afr_local_t *local = NULL; - int i = 0; - afr_private_t *priv = NULL; + loc_t loc = { + 0, + }; + struct gf_flock flock = { + 0, + }; + afr_local_t *local = NULL; + int i = 0; + afr_private_t *priv = NULL; - priv = this->private; - local = frame->local; + priv = this->private; + local = frame->local; - loc.inode = inode_ref (inode); - gf_uuid_copy (loc.gfid, inode->gfid); + loc.inode = inode_ref(inode); + gf_uuid_copy(loc.gfid, inode->gfid); - flock.l_type = F_WRLCK; - flock.l_start = off; - flock.l_len = size; + flock.l_type = F_WRLCK; + flock.l_start = off; + flock.l_len = size; - AFR_ONALL (frame, afr_selfheal_lock_cbk, inodelk, dom, - &loc, F_SETLK, &flock, NULL); + AFR_ONALL(frame, afr_selfheal_lock_cbk, inodelk, dom, &loc, F_SETLK, &flock, + NULL); - for (i = 0; i < priv->child_count; i++) { - if (local->replies[i].op_ret == -1 && - local->replies[i].op_errno == EAGAIN) { - afr_locked_fill (frame, this, locked_on); - afr_selfheal_uninodelk (frame, this, inode, dom, off, - size, locked_on); + for (i = 0; i < priv->child_count; i++) { + if (local->replies[i].op_ret == -1 && + local->replies[i].op_errno == EAGAIN) { + afr_locked_fill(frame, this, locked_on); + afr_selfheal_uninodelk(frame, this, inode, dom, off, size, + locked_on); - AFR_SEQ (frame, afr_selfheal_lock_cbk, inodelk, dom, - &loc, F_SETLKW, &flock, NULL); - break; - } - } + AFR_SEQ(frame, afr_selfheal_lock_cbk, inodelk, dom, &loc, F_SETLKW, + &flock, NULL); + break; + } + } - loc_wipe (&loc); + loc_wipe(&loc); - return afr_locked_fill (frame, this, locked_on); + return afr_locked_fill(frame, this, locked_on); } static void -afr_get_lock_and_eagain_counts (afr_private_t *priv, struct afr_reply *replies, - int *lock_count, int *eagain_count) -{ - int i = 0; - - for (i = 0; i < priv->child_count; i++) { - if (!replies[i].valid) - continue; - if (replies[i].op_ret == 0) { - (*lock_count)++; - } else if (replies[i].op_ret == -1 && - replies[i].op_errno == EAGAIN) { - (*eagain_count)++; - } - } +afr_get_lock_and_eagain_counts(afr_private_t *priv, struct afr_reply *replies, + int *lock_count, int *eagain_count) +{ + int i = 0; + + for (i = 0; i < priv->child_count; i++) { + if (!replies[i].valid) + continue; + if (replies[i].op_ret == 0) { + (*lock_count)++; + } else if (replies[i].op_ret == -1 && replies[i].op_errno == EAGAIN) { + (*eagain_count)++; + } + } } /*Do blocking locks if number of locks acquired is majority and there were some * EAGAINs. Useful for odd-way replication*/ int -afr_selfheal_tie_breaker_inodelk (call_frame_t *frame, xlator_t *this, - inode_t *inode, char *dom, off_t off, - size_t size, unsigned char *locked_on) +afr_selfheal_tie_breaker_inodelk(call_frame_t *frame, xlator_t *this, + inode_t *inode, char *dom, off_t off, + size_t size, unsigned char *locked_on) { - loc_t loc = {0,}; - struct gf_flock flock = {0, }; - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - int lock_count = 0; - int eagain_count = 0; + loc_t loc = { + 0, + }; + struct gf_flock flock = { + 0, + }; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + int lock_count = 0; + int eagain_count = 0; - priv = this->private; - local = frame->local; + priv = this->private; + local = frame->local; - loc.inode = inode_ref (inode); - gf_uuid_copy (loc.gfid, inode->gfid); + loc.inode = inode_ref(inode); + gf_uuid_copy(loc.gfid, inode->gfid); - flock.l_type = F_WRLCK; - flock.l_start = off; - flock.l_len = size; + flock.l_type = F_WRLCK; + flock.l_start = off; + flock.l_len = size; - AFR_ONALL (frame, afr_selfheal_lock_cbk, inodelk, dom, - &loc, F_SETLK, &flock, NULL); + AFR_ONALL(frame, afr_selfheal_lock_cbk, inodelk, dom, &loc, F_SETLK, &flock, + NULL); - afr_get_lock_and_eagain_counts (priv, local->replies, &lock_count, - &eagain_count); + afr_get_lock_and_eagain_counts(priv, local->replies, &lock_count, + &eagain_count); - if (lock_count > priv->child_count/2 && eagain_count) { - afr_locked_fill (frame, this, locked_on); - afr_selfheal_uninodelk (frame, this, inode, dom, off, - size, locked_on); + if (lock_count > priv->child_count / 2 && eagain_count) { + afr_locked_fill(frame, this, locked_on); + afr_selfheal_uninodelk(frame, this, inode, dom, off, size, locked_on); - AFR_SEQ (frame, afr_selfheal_lock_cbk, inodelk, dom, - &loc, F_SETLKW, &flock, NULL); - } + AFR_SEQ(frame, afr_selfheal_lock_cbk, inodelk, dom, &loc, F_SETLKW, + &flock, NULL); + } - loc_wipe (&loc); + loc_wipe(&loc); - return afr_locked_fill (frame, this, locked_on); + return afr_locked_fill(frame, this, locked_on); } int -afr_selfheal_uninodelk (call_frame_t *frame, xlator_t *this, inode_t *inode, - char *dom, off_t off, size_t size, - const unsigned char *locked_on) +afr_selfheal_uninodelk(call_frame_t *frame, xlator_t *this, inode_t *inode, + char *dom, off_t off, size_t size, + const unsigned char *locked_on) { - loc_t loc = {0,}; - struct gf_flock flock = {0, }; + loc_t loc = { + 0, + }; + struct gf_flock flock = { + 0, + }; + loc.inode = inode_ref(inode); + gf_uuid_copy(loc.gfid, inode->gfid); - loc.inode = inode_ref (inode); - gf_uuid_copy (loc.gfid, inode->gfid); + flock.l_type = F_UNLCK; + flock.l_start = off; + flock.l_len = size; - flock.l_type = F_UNLCK; - flock.l_start = off; - flock.l_len = size; + AFR_ONLIST(locked_on, frame, afr_selfheal_lock_cbk, inodelk, dom, &loc, + F_SETLK, &flock, NULL); - AFR_ONLIST (locked_on, frame, afr_selfheal_lock_cbk, inodelk, - dom, &loc, F_SETLK, &flock, NULL); + loc_wipe(&loc); - loc_wipe (&loc); - - return 0; + return 0; } - int -afr_selfheal_tryentrylk (call_frame_t *frame, xlator_t *this, inode_t *inode, - char *dom, const char *name, unsigned char *locked_on) +afr_selfheal_tryentrylk(call_frame_t *frame, xlator_t *this, inode_t *inode, + char *dom, const char *name, unsigned char *locked_on) { - loc_t loc = {0,}; + loc_t loc = { + 0, + }; - loc.inode = inode_ref (inode); - gf_uuid_copy (loc.gfid, inode->gfid); + loc.inode = inode_ref(inode); + gf_uuid_copy(loc.gfid, inode->gfid); - AFR_ONALL (frame, afr_selfheal_lock_cbk, entrylk, dom, - &loc, name, ENTRYLK_LOCK_NB, ENTRYLK_WRLCK, NULL); + AFR_ONALL(frame, afr_selfheal_lock_cbk, entrylk, dom, &loc, name, + ENTRYLK_LOCK_NB, ENTRYLK_WRLCK, NULL); - loc_wipe (&loc); + loc_wipe(&loc); - return afr_locked_fill (frame, this, locked_on); + return afr_locked_fill(frame, this, locked_on); } - int -afr_selfheal_entrylk (call_frame_t *frame, xlator_t *this, inode_t *inode, - char *dom, const char *name, unsigned char *locked_on) +afr_selfheal_entrylk(call_frame_t *frame, xlator_t *this, inode_t *inode, + char *dom, const char *name, unsigned char *locked_on) { - loc_t loc = {0,}; - afr_local_t *local = NULL; - int i = 0; - afr_private_t *priv = NULL; + loc_t loc = { + 0, + }; + afr_local_t *local = NULL; + int i = 0; + afr_private_t *priv = NULL; - priv = this->private; - local = frame->local; + priv = this->private; + local = frame->local; - loc.inode = inode_ref (inode); - gf_uuid_copy (loc.gfid, inode->gfid); + loc.inode = inode_ref(inode); + gf_uuid_copy(loc.gfid, inode->gfid); - AFR_ONALL (frame, afr_selfheal_lock_cbk, entrylk, dom, &loc, - name, ENTRYLK_LOCK_NB, ENTRYLK_WRLCK, NULL); + AFR_ONALL(frame, afr_selfheal_lock_cbk, entrylk, dom, &loc, name, + ENTRYLK_LOCK_NB, ENTRYLK_WRLCK, NULL); - for (i = 0; i < priv->child_count; i++) { - if (local->replies[i].op_ret == -1 && - local->replies[i].op_errno == EAGAIN) { - afr_locked_fill (frame, this, locked_on); - afr_selfheal_unentrylk (frame, this, inode, dom, name, - locked_on, NULL); + for (i = 0; i < priv->child_count; i++) { + if (local->replies[i].op_ret == -1 && + local->replies[i].op_errno == EAGAIN) { + afr_locked_fill(frame, this, locked_on); + afr_selfheal_unentrylk(frame, this, inode, dom, name, locked_on, + NULL); - AFR_SEQ (frame, afr_selfheal_lock_cbk, entrylk, dom, - &loc, name, ENTRYLK_LOCK, ENTRYLK_WRLCK, NULL); - break; - } - } + AFR_SEQ(frame, afr_selfheal_lock_cbk, entrylk, dom, &loc, name, + ENTRYLK_LOCK, ENTRYLK_WRLCK, NULL); + break; + } + } - loc_wipe (&loc); + loc_wipe(&loc); - return afr_locked_fill (frame, this, locked_on); + return afr_locked_fill(frame, this, locked_on); } int -afr_selfheal_tie_breaker_entrylk (call_frame_t *frame, xlator_t *this, - inode_t *inode, char *dom, const char *name, - unsigned char *locked_on) +afr_selfheal_tie_breaker_entrylk(call_frame_t *frame, xlator_t *this, + inode_t *inode, char *dom, const char *name, + unsigned char *locked_on) { - loc_t loc = {0,}; - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - int lock_count = 0; - int eagain_count = 0; + loc_t loc = { + 0, + }; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + int lock_count = 0; + int eagain_count = 0; - priv = this->private; - local = frame->local; + priv = this->private; + local = frame->local; - loc.inode = inode_ref (inode); - gf_uuid_copy (loc.gfid, inode->gfid); + loc.inode = inode_ref(inode); + gf_uuid_copy(loc.gfid, inode->gfid); - AFR_ONALL (frame, afr_selfheal_lock_cbk, entrylk, dom, &loc, - name, ENTRYLK_LOCK_NB, ENTRYLK_WRLCK, NULL); + AFR_ONALL(frame, afr_selfheal_lock_cbk, entrylk, dom, &loc, name, + ENTRYLK_LOCK_NB, ENTRYLK_WRLCK, NULL); - afr_get_lock_and_eagain_counts (priv, local->replies, &lock_count, - &eagain_count); + afr_get_lock_and_eagain_counts(priv, local->replies, &lock_count, + &eagain_count); - if (lock_count > priv->child_count/2 && eagain_count) { - afr_locked_fill (frame, this, locked_on); - afr_selfheal_unentrylk (frame, this, inode, dom, name, - locked_on, NULL); + if (lock_count > priv->child_count / 2 && eagain_count) { + afr_locked_fill(frame, this, locked_on); + afr_selfheal_unentrylk(frame, this, inode, dom, name, locked_on, NULL); - AFR_SEQ (frame, afr_selfheal_lock_cbk, entrylk, dom, - &loc, name, ENTRYLK_LOCK, ENTRYLK_WRLCK, NULL); - } + AFR_SEQ(frame, afr_selfheal_lock_cbk, entrylk, dom, &loc, name, + ENTRYLK_LOCK, ENTRYLK_WRLCK, NULL); + } - loc_wipe (&loc); + loc_wipe(&loc); - return afr_locked_fill (frame, this, locked_on); + return afr_locked_fill(frame, this, locked_on); } - int -afr_selfheal_unentrylk (call_frame_t *frame, xlator_t *this, inode_t *inode, - char *dom, const char *name, unsigned char *locked_on, - dict_t *xdata) +afr_selfheal_unentrylk(call_frame_t *frame, xlator_t *this, inode_t *inode, + char *dom, const char *name, unsigned char *locked_on, + dict_t *xdata) { - loc_t loc = {0,}; + loc_t loc = { + 0, + }; - loc.inode = inode_ref (inode); - gf_uuid_copy (loc.gfid, inode->gfid); + loc.inode = inode_ref(inode); + gf_uuid_copy(loc.gfid, inode->gfid); - AFR_ONLIST (locked_on, frame, afr_selfheal_lock_cbk, entrylk, - dom, &loc, name, ENTRYLK_UNLOCK, ENTRYLK_WRLCK, xdata); + AFR_ONLIST(locked_on, frame, afr_selfheal_lock_cbk, entrylk, dom, &loc, + name, ENTRYLK_UNLOCK, ENTRYLK_WRLCK, xdata); - loc_wipe (&loc); + loc_wipe(&loc); - return 0; + return 0; } gf_boolean_t -afr_is_data_set (xlator_t *this, dict_t *xdata) +afr_is_data_set(xlator_t *this, dict_t *xdata) { - return afr_is_pending_set (this, xdata, AFR_DATA_TRANSACTION); + return afr_is_pending_set(this, xdata, AFR_DATA_TRANSACTION); } gf_boolean_t -afr_is_metadata_set (xlator_t *this, dict_t *xdata) +afr_is_metadata_set(xlator_t *this, dict_t *xdata) { - return afr_is_pending_set (this, xdata, AFR_METADATA_TRANSACTION); + return afr_is_pending_set(this, xdata, AFR_METADATA_TRANSACTION); } gf_boolean_t -afr_is_entry_set (xlator_t *this, dict_t *xdata) +afr_is_entry_set(xlator_t *this, dict_t *xdata) { - return afr_is_pending_set (this, xdata, AFR_ENTRY_TRANSACTION); + return afr_is_pending_set(this, xdata, AFR_ENTRY_TRANSACTION); } /* @@ -2210,317 +2190,307 @@ afr_is_entry_set (xlator_t *this, dict_t *xdata) */ int -afr_selfheal_unlocked_inspect (call_frame_t *frame, xlator_t *this, - uuid_t gfid, inode_t **link_inode, - gf_boolean_t *data_selfheal, - gf_boolean_t *metadata_selfheal, - gf_boolean_t *entry_selfheal) -{ - afr_private_t *priv = NULL; - inode_t *inode = NULL; - int i = 0; - int valid_cnt = 0; - struct iatt first = {0, }; - int first_idx = 0; - struct afr_reply *replies = NULL; - int ret = -1; - - priv = this->private; - - inode = afr_inode_find (this, gfid); - if (!inode) - goto out; +afr_selfheal_unlocked_inspect(call_frame_t *frame, xlator_t *this, uuid_t gfid, + inode_t **link_inode, gf_boolean_t *data_selfheal, + gf_boolean_t *metadata_selfheal, + gf_boolean_t *entry_selfheal) +{ + afr_private_t *priv = NULL; + inode_t *inode = NULL; + int i = 0; + int valid_cnt = 0; + struct iatt first = { + 0, + }; + int first_idx = 0; + struct afr_reply *replies = NULL; + int ret = -1; + + priv = this->private; + + inode = afr_inode_find(this, gfid); + if (!inode) + goto out; - replies = alloca0 (sizeof (*replies) * priv->child_count); + replies = alloca0(sizeof(*replies) * priv->child_count); - ret = afr_selfheal_unlocked_discover (frame, inode, gfid, replies); - if (ret) - goto out; + ret = afr_selfheal_unlocked_discover(frame, inode, gfid, replies); + if (ret) + goto out; - for (i = 0; i < priv->child_count; i++) { - if (!replies[i].valid) - continue; - if (replies[i].op_ret == -1) - continue; - - /* The data segment of the changelog can be non-zero to indicate - * the directory needs a full heal. So the check below ensures - * it's not a directory before setting the data_selfheal boolean. - */ - if (data_selfheal && !IA_ISDIR (replies[i].poststat.ia_type) && - afr_is_data_set (this, replies[i].xdata)) - *data_selfheal = _gf_true; - - if (metadata_selfheal && - afr_is_metadata_set (this, replies[i].xdata)) - *metadata_selfheal = _gf_true; - - if (entry_selfheal && afr_is_entry_set (this, replies[i].xdata)) - *entry_selfheal = _gf_true; - - valid_cnt++; - if (valid_cnt == 1) { - first = replies[i].poststat; - first_idx = i; - continue; - } - - if (!IA_EQUAL (first, replies[i].poststat, type)) { - gf_msg (this->name, GF_LOG_ERROR, 0, - AFR_MSG_SPLIT_BRAIN, - "TYPE mismatch %d vs %d on %s for gfid:%s", - (int) first.ia_type, - (int) replies[i].poststat.ia_type, - priv->children[i]->name, - uuid_utoa (replies[i].poststat.ia_gfid)); - gf_event (EVENT_AFR_SPLIT_BRAIN, "subvol=%s;" - "type=file;gfid=%s;" - "ia_type-%d=%s;ia_type-%d=%s", - this->name, - uuid_utoa (replies[i].poststat.ia_gfid), - first_idx, - gf_inode_type_to_str (first.ia_type), i, - gf_inode_type_to_str (replies[i].poststat.ia_type)); - ret = -EIO; - goto out; - } - - if (!IA_EQUAL (first, replies[i].poststat, uid)) { - gf_msg_debug (this->name, 0, - "UID mismatch " - "%d vs %d on %s for gfid:%s", - (int) first.ia_uid, - (int) replies[i].poststat.ia_uid, - priv->children[i]->name, - uuid_utoa (replies[i].poststat.ia_gfid)); - - if (metadata_selfheal) - *metadata_selfheal = _gf_true; - } - - if (!IA_EQUAL (first, replies[i].poststat, gid)) { - gf_msg_debug (this->name, 0, - "GID mismatch " - "%d vs %d on %s for gfid:%s", - (int) first.ia_uid, - (int) replies[i].poststat.ia_uid, - priv->children[i]->name, - uuid_utoa (replies[i].poststat.ia_gfid)); - - if (metadata_selfheal) - *metadata_selfheal = _gf_true; - } - - if (!IA_EQUAL (first, replies[i].poststat, prot)) { - gf_msg_debug (this->name, 0, - "MODE mismatch " - "%d vs %d on %s for gfid:%s", - (int) st_mode_from_ia (first.ia_prot, 0), - (int) st_mode_from_ia - (replies[i].poststat.ia_prot, 0), - priv->children[i]->name, - uuid_utoa (replies[i].poststat.ia_gfid)); - - if (metadata_selfheal) - *metadata_selfheal = _gf_true; - } - - if (IA_ISREG(first.ia_type) && - !IA_EQUAL (first, replies[i].poststat, size)) { - gf_msg_debug (this->name, 0, - "SIZE mismatch " - "%lld vs %lld on %s for gfid:%s", - (long long) first.ia_size, - (long long) replies[i].poststat.ia_size, - priv->children[i]->name, - uuid_utoa (replies[i].poststat.ia_gfid)); - - if (data_selfheal) - *data_selfheal = _gf_true; - } - } - - if (valid_cnt > 0 && link_inode) { - *link_inode = inode_link (inode, NULL, NULL, &first); - if (!*link_inode) { - ret = -EINVAL; - goto out; - } - } else if (valid_cnt < 2) { - ret = afr_check_stale_error (replies, priv); - goto out; - } + for (i = 0; i < priv->child_count; i++) { + if (!replies[i].valid) + continue; + if (replies[i].op_ret == -1) + continue; - ret = 0; + /* The data segment of the changelog can be non-zero to indicate + * the directory needs a full heal. So the check below ensures + * it's not a directory before setting the data_selfheal boolean. + */ + if (data_selfheal && !IA_ISDIR(replies[i].poststat.ia_type) && + afr_is_data_set(this, replies[i].xdata)) + *data_selfheal = _gf_true; + + if (metadata_selfheal && afr_is_metadata_set(this, replies[i].xdata)) + *metadata_selfheal = _gf_true; + + if (entry_selfheal && afr_is_entry_set(this, replies[i].xdata)) + *entry_selfheal = _gf_true; + + valid_cnt++; + if (valid_cnt == 1) { + first = replies[i].poststat; + first_idx = i; + continue; + } + + if (!IA_EQUAL(first, replies[i].poststat, type)) { + gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN, + "TYPE mismatch %d vs %d on %s for gfid:%s", + (int)first.ia_type, (int)replies[i].poststat.ia_type, + priv->children[i]->name, + uuid_utoa(replies[i].poststat.ia_gfid)); + gf_event(EVENT_AFR_SPLIT_BRAIN, + "subvol=%s;" + "type=file;gfid=%s;" + "ia_type-%d=%s;ia_type-%d=%s", + this->name, uuid_utoa(replies[i].poststat.ia_gfid), + first_idx, gf_inode_type_to_str(first.ia_type), i, + gf_inode_type_to_str(replies[i].poststat.ia_type)); + ret = -EIO; + goto out; + } + + if (!IA_EQUAL(first, replies[i].poststat, uid)) { + gf_msg_debug(this->name, 0, + "UID mismatch " + "%d vs %d on %s for gfid:%s", + (int)first.ia_uid, (int)replies[i].poststat.ia_uid, + priv->children[i]->name, + uuid_utoa(replies[i].poststat.ia_gfid)); + + if (metadata_selfheal) + *metadata_selfheal = _gf_true; + } + + if (!IA_EQUAL(first, replies[i].poststat, gid)) { + gf_msg_debug(this->name, 0, + "GID mismatch " + "%d vs %d on %s for gfid:%s", + (int)first.ia_uid, (int)replies[i].poststat.ia_uid, + priv->children[i]->name, + uuid_utoa(replies[i].poststat.ia_gfid)); + + if (metadata_selfheal) + *metadata_selfheal = _gf_true; + } + + if (!IA_EQUAL(first, replies[i].poststat, prot)) { + gf_msg_debug(this->name, 0, + "MODE mismatch " + "%d vs %d on %s for gfid:%s", + (int)st_mode_from_ia(first.ia_prot, 0), + (int)st_mode_from_ia(replies[i].poststat.ia_prot, 0), + priv->children[i]->name, + uuid_utoa(replies[i].poststat.ia_gfid)); + + if (metadata_selfheal) + *metadata_selfheal = _gf_true; + } + + if (IA_ISREG(first.ia_type) && + !IA_EQUAL(first, replies[i].poststat, size)) { + gf_msg_debug(this->name, 0, + "SIZE mismatch " + "%lld vs %lld on %s for gfid:%s", + (long long)first.ia_size, + (long long)replies[i].poststat.ia_size, + priv->children[i]->name, + uuid_utoa(replies[i].poststat.ia_gfid)); + + if (data_selfheal) + *data_selfheal = _gf_true; + } + } + + if (valid_cnt > 0 && link_inode) { + *link_inode = inode_link(inode, NULL, NULL, &first); + if (!*link_inode) { + ret = -EINVAL; + goto out; + } + } else if (valid_cnt < 2) { + ret = afr_check_stale_error(replies, priv); + goto out; + } + + ret = 0; out: - if (inode) - inode_unref (inode); - if (replies) - afr_replies_wipe (replies, priv->child_count); + if (inode) + inode_unref(inode); + if (replies) + afr_replies_wipe(replies, priv->child_count); - return ret; + return ret; } - inode_t * -afr_inode_find (xlator_t *this, uuid_t gfid) +afr_inode_find(xlator_t *this, uuid_t gfid) { - inode_table_t *table = NULL; - inode_t *inode = NULL; + inode_table_t *table = NULL; + inode_t *inode = NULL; - table = this->itable; - if (!table) - return NULL; + table = this->itable; + if (!table) + return NULL; - inode = inode_find (table, gfid); - if (inode) - return inode; + inode = inode_find(table, gfid); + if (inode) + return inode; - inode = inode_new (table); - if (!inode) - return NULL; + inode = inode_new(table); + if (!inode) + return NULL; - gf_uuid_copy (inode->gfid, gfid); + gf_uuid_copy(inode->gfid, gfid); - return inode; + return inode; } - call_frame_t * -afr_frame_create (xlator_t *this, int32_t *op_errno) +afr_frame_create(xlator_t *this, int32_t *op_errno) { - call_frame_t *frame = NULL; - afr_local_t *local = NULL; - pid_t pid = GF_CLIENT_PID_SELF_HEALD; + call_frame_t *frame = NULL; + afr_local_t *local = NULL; + pid_t pid = GF_CLIENT_PID_SELF_HEALD; - frame = create_frame (this, this->ctx->pool); - if (!frame) - return NULL; + frame = create_frame(this, this->ctx->pool); + if (!frame) + return NULL; - local = AFR_FRAME_INIT (frame, (*op_errno)); - if (!local) { - STACK_DESTROY (frame->root); - return NULL; - } + local = AFR_FRAME_INIT(frame, (*op_errno)); + if (!local) { + STACK_DESTROY(frame->root); + return NULL; + } - syncopctx_setfspid (&pid); + syncopctx_setfspid(&pid); - frame->root->pid = pid; + frame->root->pid = pid; - afr_set_lk_owner (frame, this, frame->root); + afr_set_lk_owner(frame, this, frame->root); - return frame; + return frame; } int -afr_selfheal_newentry_mark (call_frame_t *frame, xlator_t *this, inode_t *inode, - int source, struct afr_reply *replies, - unsigned char *sources, unsigned char *newentry) +afr_selfheal_newentry_mark(call_frame_t *frame, xlator_t *this, inode_t *inode, + int source, struct afr_reply *replies, + unsigned char *sources, unsigned char *newentry) { - int ret = 0; - int i = 0; - afr_private_t *priv = NULL; - dict_t *xattr = NULL; - int **changelog = NULL; + int ret = 0; + int i = 0; + afr_private_t *priv = NULL; + dict_t *xattr = NULL; + int **changelog = NULL; - priv = this->private; + priv = this->private; - gf_uuid_copy (inode->gfid, replies[source].poststat.ia_gfid); + gf_uuid_copy(inode->gfid, replies[source].poststat.ia_gfid); - xattr = dict_new(); - if (!xattr) - return -ENOMEM; + xattr = dict_new(); + if (!xattr) + return -ENOMEM; - changelog = afr_mark_pending_changelog (priv, newentry, xattr, - replies[source].poststat.ia_type); + changelog = afr_mark_pending_changelog(priv, newentry, xattr, + replies[source].poststat.ia_type); - if (!changelog) { - ret = -ENOMEM; - goto out; - } + if (!changelog) { + ret = -ENOMEM; + goto out; + } - for (i = 0; i < priv->child_count; i++) { - if (!sources[i]) - continue; - ret |= afr_selfheal_post_op (frame, this, inode, i, xattr, - NULL); - } + for (i = 0; i < priv->child_count; i++) { + if (!sources[i]) + continue; + ret |= afr_selfheal_post_op(frame, this, inode, i, xattr, NULL); + } out: - if (changelog) - afr_matrix_cleanup (changelog, priv->child_count); - if (xattr) - dict_unref (xattr); - return ret; + if (changelog) + afr_matrix_cleanup(changelog, priv->child_count); + if (xattr) + dict_unref(xattr); + return ret; } int -afr_selfheal_do (call_frame_t *frame, xlator_t *this, uuid_t gfid) -{ - int ret = -1; - int entry_ret = 1; - int metadata_ret = 1; - int data_ret = 1; - int or_ret = 0; - inode_t *inode = NULL; - fd_t *fd = NULL; - gf_boolean_t data_selfheal = _gf_false; - gf_boolean_t metadata_selfheal = _gf_false; - gf_boolean_t entry_selfheal = _gf_false; - afr_private_t *priv = NULL; - gf_boolean_t dataheal_enabled = _gf_false; - - priv = this->private; - - ret = gf_string2boolean (priv->data_self_heal, &dataheal_enabled); - if (ret) - goto out; +afr_selfheal_do(call_frame_t *frame, xlator_t *this, uuid_t gfid) +{ + int ret = -1; + int entry_ret = 1; + int metadata_ret = 1; + int data_ret = 1; + int or_ret = 0; + inode_t *inode = NULL; + fd_t *fd = NULL; + gf_boolean_t data_selfheal = _gf_false; + gf_boolean_t metadata_selfheal = _gf_false; + gf_boolean_t entry_selfheal = _gf_false; + afr_private_t *priv = NULL; + gf_boolean_t dataheal_enabled = _gf_false; + + priv = this->private; + + ret = gf_string2boolean(priv->data_self_heal, &dataheal_enabled); + if (ret) + goto out; - ret = afr_selfheal_unlocked_inspect (frame, this, gfid, &inode, - &data_selfheal, - &metadata_selfheal, - &entry_selfheal); - if (ret) - goto out; + ret = afr_selfheal_unlocked_inspect(frame, this, gfid, &inode, + &data_selfheal, &metadata_selfheal, + &entry_selfheal); + if (ret) + goto out; - if (!(data_selfheal || metadata_selfheal || entry_selfheal)) { - ret = 2; - goto out; - } + if (!(data_selfheal || metadata_selfheal || entry_selfheal)) { + ret = 2; + goto out; + } - if (inode->ia_type == IA_IFREG) { - ret = afr_selfheal_data_open (this, inode, &fd); - if (!fd) { - ret = -EIO; - goto out; - } + if (inode->ia_type == IA_IFREG) { + ret = afr_selfheal_data_open(this, inode, &fd); + if (!fd) { + ret = -EIO; + goto out; } + } - if (data_selfheal && dataheal_enabled) - data_ret = afr_selfheal_data (frame, this, fd); + if (data_selfheal && dataheal_enabled) + data_ret = afr_selfheal_data(frame, this, fd); - if (metadata_selfheal && priv->metadata_self_heal) - metadata_ret = afr_selfheal_metadata (frame, this, inode); + if (metadata_selfheal && priv->metadata_self_heal) + metadata_ret = afr_selfheal_metadata(frame, this, inode); - if (entry_selfheal && priv->entry_self_heal) - entry_ret = afr_selfheal_entry (frame, this, inode); + if (entry_selfheal && priv->entry_self_heal) + entry_ret = afr_selfheal_entry(frame, this, inode); - or_ret = (data_ret | metadata_ret | entry_ret); + or_ret = (data_ret | metadata_ret | entry_ret); - if (data_ret == -EIO || metadata_ret == -EIO || entry_ret == -EIO) - ret = -EIO; - else if (data_ret == 1 && metadata_ret == 1 && entry_ret == 1) - ret = 1; - else if (or_ret < 0) - ret = or_ret; - else - ret = 0; + if (data_ret == -EIO || metadata_ret == -EIO || entry_ret == -EIO) + ret = -EIO; + else if (data_ret == 1 && metadata_ret == 1 && entry_ret == 1) + ret = 1; + else if (or_ret < 0) + ret = or_ret; + else + ret = 0; out: - if (inode) - inode_unref (inode); - if (fd) - fd_unref (fd); - return ret; + if (inode) + inode_unref(inode); + if (fd) + fd_unref(fd); + return ret; } /* * This is the entry point for healing a given GFID. The return values for this @@ -2532,160 +2502,160 @@ out: */ int -afr_selfheal (xlator_t *this, uuid_t gfid) +afr_selfheal(xlator_t *this, uuid_t gfid) { - int ret = -1; - call_frame_t *frame = NULL; - afr_local_t *local = NULL; + int ret = -1; + call_frame_t *frame = NULL; + afr_local_t *local = NULL; - frame = afr_frame_create (this, NULL); - if (!frame) - return ret; + frame = afr_frame_create(this, NULL); + if (!frame) + return ret; - local = frame->local; - local->xdata_req = dict_new(); + local = frame->local; + local->xdata_req = dict_new(); - ret = afr_selfheal_do (frame, this, gfid); + ret = afr_selfheal_do(frame, this, gfid); - if (frame) - AFR_STACK_DESTROY (frame); + if (frame) + AFR_STACK_DESTROY(frame); - return ret; + return ret; } -afr_local_t* -__afr_dequeue_heals (afr_private_t *priv) +afr_local_t * +__afr_dequeue_heals(afr_private_t *priv) { - afr_local_t *local = NULL; + afr_local_t *local = NULL; - if (list_empty (&priv->heal_waiting)) - goto none; - if ((priv->background_self_heal_count > 0) && - (priv->healers >= priv->background_self_heal_count)) - goto none; + if (list_empty(&priv->heal_waiting)) + goto none; + if ((priv->background_self_heal_count > 0) && + (priv->healers >= priv->background_self_heal_count)) + goto none; - local = list_entry (priv->heal_waiting.next, afr_local_t, healer); - priv->heal_waiters--; - GF_ASSERT (priv->heal_waiters >= 0); - list_del_init(&local->healer); - list_add(&local->healer, &priv->healing); - priv->healers++; - return local; + local = list_entry(priv->heal_waiting.next, afr_local_t, healer); + priv->heal_waiters--; + GF_ASSERT(priv->heal_waiters >= 0); + list_del_init(&local->healer); + list_add(&local->healer, &priv->healing); + priv->healers++; + return local; none: - gf_msg_debug (THIS->name, 0, "Nothing dequeued. " - "Num healers: %d, Num Waiters: %d", - priv->healers, priv->heal_waiters); - return NULL; + gf_msg_debug(THIS->name, 0, + "Nothing dequeued. " + "Num healers: %d, Num Waiters: %d", + priv->healers, priv->heal_waiters); + return NULL; } int -afr_refresh_selfheal_wrap (void *opaque) +afr_refresh_selfheal_wrap(void *opaque) { - call_frame_t *heal_frame = opaque; - afr_local_t *local = heal_frame->local; - int ret = 0; + call_frame_t *heal_frame = opaque; + afr_local_t *local = heal_frame->local; + int ret = 0; - ret = afr_selfheal (heal_frame->this, local->refreshinode->gfid); - return ret; + ret = afr_selfheal(heal_frame->this, local->refreshinode->gfid); + return ret; } int -afr_refresh_heal_done (int ret, call_frame_t *frame, void *opaque) -{ - call_frame_t *heal_frame = opaque; - xlator_t *this = heal_frame->this; - afr_private_t *priv = this->private; - afr_local_t *local = heal_frame->local; - - LOCK (&priv->lock); - { - list_del_init(&local->healer); - priv->healers--; - GF_ASSERT (priv->healers >= 0); - local = __afr_dequeue_heals (priv); - } - UNLOCK (&priv->lock); +afr_refresh_heal_done(int ret, call_frame_t *frame, void *opaque) +{ + call_frame_t *heal_frame = opaque; + xlator_t *this = heal_frame->this; + afr_private_t *priv = this->private; + afr_local_t *local = heal_frame->local; - AFR_STACK_DESTROY (heal_frame); + LOCK(&priv->lock); + { + list_del_init(&local->healer); + priv->healers--; + GF_ASSERT(priv->healers >= 0); + local = __afr_dequeue_heals(priv); + } + UNLOCK(&priv->lock); - if (local) - afr_heal_synctask (this, local); - return 0; + AFR_STACK_DESTROY(heal_frame); + if (local) + afr_heal_synctask(this, local); + return 0; } void -afr_heal_synctask (xlator_t *this, afr_local_t *local) +afr_heal_synctask(xlator_t *this, afr_local_t *local) { - int ret = 0; - call_frame_t *heal_frame = NULL; + int ret = 0; + call_frame_t *heal_frame = NULL; - heal_frame = local->heal_frame; - ret = synctask_new (this->ctx->env, afr_refresh_selfheal_wrap, - afr_refresh_heal_done, heal_frame, heal_frame); - if (ret < 0) - /* Heal not launched. Will be queued when the next inode - * refresh happens and shd hasn't healed it yet. */ - afr_refresh_heal_done (ret, heal_frame, heal_frame); + heal_frame = local->heal_frame; + ret = synctask_new(this->ctx->env, afr_refresh_selfheal_wrap, + afr_refresh_heal_done, heal_frame, heal_frame); + if (ret < 0) + /* Heal not launched. Will be queued when the next inode + * refresh happens and shd hasn't healed it yet. */ + afr_refresh_heal_done(ret, heal_frame, heal_frame); } gf_boolean_t -afr_throttled_selfheal (call_frame_t *frame, xlator_t *this) -{ - gf_boolean_t can_heal = _gf_true; - afr_private_t *priv = this->private; - afr_local_t *local = frame->local; - - LOCK (&priv->lock); - { - if ((priv->background_self_heal_count > 0) && - (priv->heal_wait_qlen + priv->background_self_heal_count) > - (priv->heal_waiters + priv->healers)) { - list_add_tail(&local->healer, &priv->heal_waiting); - priv->heal_waiters++; - local = __afr_dequeue_heals (priv); - } else { - can_heal = _gf_false; - } - } - UNLOCK (&priv->lock); - - if (can_heal) { - if (local) - afr_heal_synctask (this, local); - else - gf_msg_debug (this->name, 0, "Max number of heals are " - "pending, background self-heal rejected."); +afr_throttled_selfheal(call_frame_t *frame, xlator_t *this) +{ + gf_boolean_t can_heal = _gf_true; + afr_private_t *priv = this->private; + afr_local_t *local = frame->local; + + LOCK(&priv->lock); + { + if ((priv->background_self_heal_count > 0) && + (priv->heal_wait_qlen + priv->background_self_heal_count) > + (priv->heal_waiters + priv->healers)) { + list_add_tail(&local->healer, &priv->heal_waiting); + priv->heal_waiters++; + local = __afr_dequeue_heals(priv); + } else { + can_heal = _gf_false; } + } + UNLOCK(&priv->lock); + + if (can_heal) { + if (local) + afr_heal_synctask(this, local); + else + gf_msg_debug(this->name, 0, + "Max number of heals are " + "pending, background self-heal rejected."); + } - return can_heal; + return can_heal; } int -afr_choose_source_by_policy (afr_private_t *priv, unsigned char *sources, - afr_transaction_type type) +afr_choose_source_by_policy(afr_private_t *priv, unsigned char *sources, + afr_transaction_type type) { - int source = -1; - int i = 0; + int source = -1; + int i = 0; - /* Give preference to local child to save on bandwidth */ - for (i = 0; i < priv->child_count; i++) { - if (priv->local[i] && sources[i]) { - if ((type == AFR_DATA_TRANSACTION) && - AFR_IS_ARBITER_BRICK (priv, i)) - continue; + /* Give preference to local child to save on bandwidth */ + for (i = 0; i < priv->child_count; i++) { + if (priv->local[i] && sources[i]) { + if ((type == AFR_DATA_TRANSACTION) && AFR_IS_ARBITER_BRICK(priv, i)) + continue; - source = i; - goto out; - } + source = i; + goto out; } + } - for (i = 0; i < priv->child_count; i++) { - if (sources[i]) { - source = i; - goto out; - } + for (i = 0; i < priv->child_count; i++) { + if (sources[i]) { + source = i; + goto out; } + } out: - return source; + return source; } diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c index c83ef0b7e30..a477fae8039 100644 --- a/xlators/cluster/afr/src/afr-self-heal-data.c +++ b/xlators/cluster/afr/src/afr-self-heal-data.c @@ -8,7 +8,6 @@ cases as published by the Free Software Foundation. */ - #include "afr.h" #include "afr-self-heal.h" #include "byte-order.h" @@ -17,596 +16,571 @@ #include "events.h" enum { - AFR_SELFHEAL_DATA_FULL = 0, - AFR_SELFHEAL_DATA_DIFF, + AFR_SELFHEAL_DATA_FULL = 0, + AFR_SELFHEAL_DATA_DIFF, }; - #define HAS_HOLES(i) ((i->ia_blocks * 512) < (i->ia_size)) static int -__checksum_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, uint32_t weak, uint8_t *strong, - dict_t *xdata) +__checksum_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, + int op_errno, uint32_t weak, uint8_t *strong, dict_t *xdata) { - afr_local_t *local = NULL; - struct afr_reply *replies = NULL; - int i = (long) cookie; - - local = frame->local; - replies = local->replies; - - replies[i].valid = 1; - replies[i].op_ret = op_ret; - replies[i].op_errno = op_errno; - if (xdata) { - replies[i].buf_has_zeroes = dict_get_str_boolean (xdata, - "buf-has-zeroes", _gf_false); - replies[i].fips_mode_rchecksum = dict_get_str_boolean (xdata, - "fips-mode-rchecksum", _gf_false); - } - if (strong) { - if (replies[i].fips_mode_rchecksum) { - memcpy (local->replies[i].checksum, strong, - SHA256_DIGEST_LENGTH); - } else { - memcpy (local->replies[i].checksum, strong, - MD5_DIGEST_LENGTH); - } + afr_local_t *local = NULL; + struct afr_reply *replies = NULL; + int i = (long)cookie; + + local = frame->local; + replies = local->replies; + + replies[i].valid = 1; + replies[i].op_ret = op_ret; + replies[i].op_errno = op_errno; + if (xdata) { + replies[i].buf_has_zeroes = dict_get_str_boolean( + xdata, "buf-has-zeroes", _gf_false); + replies[i].fips_mode_rchecksum = dict_get_str_boolean( + xdata, "fips-mode-rchecksum", _gf_false); + } + if (strong) { + if (replies[i].fips_mode_rchecksum) { + memcpy(local->replies[i].checksum, strong, SHA256_DIGEST_LENGTH); + } else { + memcpy(local->replies[i].checksum, strong, MD5_DIGEST_LENGTH); } + } - syncbarrier_wake (&local->barrier); - return 0; + syncbarrier_wake(&local->barrier); + return 0; } static gf_boolean_t -__afr_can_skip_data_block_heal (call_frame_t *frame, xlator_t *this, fd_t *fd, - int source, unsigned char *healed_sinks, - off_t offset, size_t size, - struct iatt *poststat) +__afr_can_skip_data_block_heal(call_frame_t *frame, xlator_t *this, fd_t *fd, + int source, unsigned char *healed_sinks, + off_t offset, size_t size, struct iatt *poststat) { - afr_private_t *priv = NULL; - afr_local_t *local = NULL; - unsigned char *wind_subvols = NULL; - gf_boolean_t checksum_match = _gf_true; - struct afr_reply *replies = NULL; - dict_t *xdata = NULL; - int i = 0; - - priv = this->private; - local = frame->local; - replies = local->replies; - - xdata = dict_new(); - if (!xdata) - goto out; - if (dict_set_int32 (xdata, "check-zero-filled", 1)) { - dict_unref (xdata); - goto out; - } + afr_private_t *priv = NULL; + afr_local_t *local = NULL; + unsigned char *wind_subvols = NULL; + gf_boolean_t checksum_match = _gf_true; + struct afr_reply *replies = NULL; + dict_t *xdata = NULL; + int i = 0; + + priv = this->private; + local = frame->local; + replies = local->replies; + + xdata = dict_new(); + if (!xdata) + goto out; + if (dict_set_int32(xdata, "check-zero-filled", 1)) { + dict_unref(xdata); + goto out; + } + + wind_subvols = alloca0(priv->child_count); + for (i = 0; i < priv->child_count; i++) { + if (i == source || healed_sinks[i]) + wind_subvols[i] = 1; + } + + AFR_ONLIST(wind_subvols, frame, __checksum_cbk, rchecksum, fd, offset, size, + xdata); + if (xdata) + dict_unref(xdata); + + if (!replies[source].valid || replies[source].op_ret != 0) + return _gf_false; - wind_subvols = alloca0 (priv->child_count); - for (i = 0; i < priv->child_count; i++) { - if (i == source || healed_sinks[i]) - wind_subvols[i] = 1; - } - - AFR_ONLIST (wind_subvols, frame, __checksum_cbk, rchecksum, fd, - offset, size, xdata); - if (xdata) - dict_unref (xdata); - - if (!replies[source].valid || replies[source].op_ret != 0) - return _gf_false; - - for (i = 0; i < priv->child_count; i++) { - if (i == source) - continue; - if (replies[i].valid) { - if (memcmp (replies[source].checksum, - replies[i].checksum, - replies[source].fips_mode_rchecksum ? - SHA256_DIGEST_LENGTH : MD5_DIGEST_LENGTH)) { - checksum_match = _gf_false; - break; - } - } - } - - if (checksum_match) { - if (HAS_HOLES (poststat)) - return _gf_true; - - /* For non-sparse files, we might be better off writing the - * zeroes to sinks to avoid mismatch of disk-usage in bricks. */ - if (local->replies[source].buf_has_zeroes) - return _gf_false; - else - return _gf_true; + for (i = 0; i < priv->child_count; i++) { + if (i == source) + continue; + if (replies[i].valid) { + if (memcmp(replies[source].checksum, replies[i].checksum, + replies[source].fips_mode_rchecksum + ? SHA256_DIGEST_LENGTH + : MD5_DIGEST_LENGTH)) { + checksum_match = _gf_false; + break; + } } + } + + if (checksum_match) { + if (HAS_HOLES(poststat)) + return _gf_true; + + /* For non-sparse files, we might be better off writing the + * zeroes to sinks to avoid mismatch of disk-usage in bricks. */ + if (local->replies[source].buf_has_zeroes) + return _gf_false; + else + return _gf_true; + } out: - return _gf_false; + return _gf_false; } - static gf_boolean_t -__afr_is_sink_zero_filled (xlator_t *this, fd_t *fd, size_t size, - off_t offset, int sink) +__afr_is_sink_zero_filled(xlator_t *this, fd_t *fd, size_t size, off_t offset, + int sink) { - afr_private_t *priv = NULL; - struct iobref *iobref = NULL; - struct iovec *iovec = NULL; - int count = 0; - int ret = 0; - gf_boolean_t zero_filled = _gf_false; - - priv = this->private; - ret = syncop_readv (priv->children[sink], fd, size, offset, 0, &iovec, - &count, &iobref, NULL, NULL, NULL); - if (ret < 0) - goto out; - ret = iov_0filled (iovec, count); - if (!ret) - zero_filled = _gf_true; + afr_private_t *priv = NULL; + struct iobref *iobref = NULL; + struct iovec *iovec = NULL; + int count = 0; + int ret = 0; + gf_boolean_t zero_filled = _gf_false; + + priv = this->private; + ret = syncop_readv(priv->children[sink], fd, size, offset, 0, &iovec, + &count, &iobref, NULL, NULL, NULL); + if (ret < 0) + goto out; + ret = iov_0filled(iovec, count); + if (!ret) + zero_filled = _gf_true; out: - if (iovec) - GF_FREE (iovec); - if (iobref) - iobref_unref (iobref); - return zero_filled; + if (iovec) + GF_FREE(iovec); + if (iobref) + iobref_unref(iobref); + return zero_filled; } static int -__afr_selfheal_data_read_write (call_frame_t *frame, xlator_t *this, fd_t *fd, - int source, unsigned char *healed_sinks, - off_t offset, size_t size, - struct afr_reply *replies, int type) +__afr_selfheal_data_read_write(call_frame_t *frame, xlator_t *this, fd_t *fd, + int source, unsigned char *healed_sinks, + off_t offset, size_t size, + struct afr_reply *replies, int type) { - struct iovec *iovec = NULL; - int count = 0; - struct iobref *iobref = NULL; - int ret = 0; - int i = 0; - afr_private_t *priv = NULL; - - priv = this->private; - - ret = syncop_readv (priv->children[source], fd, size, offset, 0, - &iovec, &count, &iobref, NULL, NULL, NULL); - if (ret <= 0) - return ret; - - for (i = 0; i < priv->child_count; i++) { - if (!healed_sinks[i]) - continue; - - /* - * TODO: Use fiemap() and discard() to heal holes - * in the future. - * - * For now, - * - * - if the source had any holes at all, - * AND - * - if we are writing past the original file size - * of the sink - * AND - * - is NOT the last block of the source file. if - * the block contains EOF, it has to be written - * in order to set the file size even if the - * last block is 0-filled. - * AND - * - if the read buffer is filled with only 0's - * - * then, skip writing to this source. We don't depend - * on the write to happen to update the size as we - * have performed an ftruncate() upfront anyways. - */ -#define is_last_block(o,b,s) ((s >= o) && (s <= (o + b))) - if (HAS_HOLES ((&replies[source].poststat)) && - offset >= replies[i].poststat.ia_size && - !is_last_block (offset, size, - replies[source].poststat.ia_size) && - (iov_0filled (iovec, count) == 0)) - continue; - - /* Avoid filling up sparse regions of the sink with 0-filled - * writes.*/ - if (type == AFR_SELFHEAL_DATA_FULL && - HAS_HOLES ((&replies[source].poststat)) && - ((offset + size) <= replies[i].poststat.ia_size) && - (iov_0filled (iovec, count) == 0) && - __afr_is_sink_zero_filled (this, fd, size, offset, i)) { - continue; - } - - ret = syncop_writev (priv->children[i], fd, iovec, count, - offset, iobref, 0, NULL, NULL, NULL, NULL); - if (ret != iov_length (iovec, count)) { - /* write() failed on this sink. unset the corresponding - member in sinks[] (which is healed_sinks[] in the - caller) so that this server does NOT get considered - as successfully healed. - */ - healed_sinks[i] = 0; - } - } - if (iovec) - GF_FREE (iovec); - if (iobref) - iobref_unref (iobref); - - return ret; + struct iovec *iovec = NULL; + int count = 0; + struct iobref *iobref = NULL; + int ret = 0; + int i = 0; + afr_private_t *priv = NULL; + + priv = this->private; + + ret = syncop_readv(priv->children[source], fd, size, offset, 0, &iovec, + &count, &iobref, NULL, NULL, NULL); + if (ret <= 0) + return ret; + + for (i = 0; i < priv->child_count; i++) { + if (!healed_sinks[i]) + continue; + + /* + * TODO: Use fiemap() and discard() to heal holes + * in the future. + * + * For now, + * + * - if the source had any holes at all, + * AND + * - if we are writing past the original file size + * of the sink + * AND + * - is NOT the last block of the source file. if + * the block contains EOF, it has to be written + * in order to set the file size even if the + * last block is 0-filled. + * AND + * - if the read buffer is filled with only 0's + * + * then, skip writing to this source. We don't depend + * on the write to happen to update the size as we + * have performed an ftruncate() upfront anyways. + */ +#define is_last_block(o, b, s) ((s >= o) && (s <= (o + b))) + if (HAS_HOLES((&replies[source].poststat)) && + offset >= replies[i].poststat.ia_size && + !is_last_block(offset, size, replies[source].poststat.ia_size) && + (iov_0filled(iovec, count) == 0)) + continue; + + /* Avoid filling up sparse regions of the sink with 0-filled + * writes.*/ + if (type == AFR_SELFHEAL_DATA_FULL && + HAS_HOLES((&replies[source].poststat)) && + ((offset + size) <= replies[i].poststat.ia_size) && + (iov_0filled(iovec, count) == 0) && + __afr_is_sink_zero_filled(this, fd, size, offset, i)) { + continue; + } + + ret = syncop_writev(priv->children[i], fd, iovec, count, offset, iobref, + 0, NULL, NULL, NULL, NULL); + if (ret != iov_length(iovec, count)) { + /* write() failed on this sink. unset the corresponding + member in sinks[] (which is healed_sinks[] in the + caller) so that this server does NOT get considered + as successfully healed. + */ + healed_sinks[i] = 0; + } + } + if (iovec) + GF_FREE(iovec); + if (iobref) + iobref_unref(iobref); + + return ret; } static int -afr_selfheal_data_block (call_frame_t *frame, xlator_t *this, fd_t *fd, - int source, unsigned char *healed_sinks, off_t offset, - size_t size, int type, struct afr_reply *replies) +afr_selfheal_data_block(call_frame_t *frame, xlator_t *this, fd_t *fd, + int source, unsigned char *healed_sinks, off_t offset, + size_t size, int type, struct afr_reply *replies) { - int ret = -1; - int sink_count = 0; - afr_private_t *priv = NULL; - unsigned char *data_lock = NULL; - - priv = this->private; - sink_count = AFR_COUNT (healed_sinks, priv->child_count); - data_lock = alloca0 (priv->child_count); - - ret = afr_selfheal_inodelk (frame, this, fd->inode, this->name, - offset, size, data_lock); - { - if (ret < sink_count) { - ret = -ENOTCONN; - goto unlock; - } - - if (type == AFR_SELFHEAL_DATA_DIFF && - __afr_can_skip_data_block_heal (frame, this, fd, source, - healed_sinks, offset, size, - &replies[source].poststat)) { - ret = 0; - goto unlock; - } - - ret = __afr_selfheal_data_read_write (frame, this, fd, source, - healed_sinks, offset, size, - replies, type); - } -unlock: - afr_selfheal_uninodelk (frame, this, fd->inode, this->name, - offset, size, data_lock); - return ret; -} + int ret = -1; + int sink_count = 0; + afr_private_t *priv = NULL; + unsigned char *data_lock = NULL; + + priv = this->private; + sink_count = AFR_COUNT(healed_sinks, priv->child_count); + data_lock = alloca0(priv->child_count); + + ret = afr_selfheal_inodelk(frame, this, fd->inode, this->name, offset, size, + data_lock); + { + if (ret < sink_count) { + ret = -ENOTCONN; + goto unlock; + } + if (type == AFR_SELFHEAL_DATA_DIFF && + __afr_can_skip_data_block_heal(frame, this, fd, source, + healed_sinks, offset, size, + &replies[source].poststat)) { + ret = 0; + goto unlock; + } + ret = __afr_selfheal_data_read_write( + frame, this, fd, source, healed_sinks, offset, size, replies, type); + } +unlock: + afr_selfheal_uninodelk(frame, this, fd->inode, this->name, offset, size, + data_lock); + return ret; +} static int -afr_selfheal_data_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, - unsigned char *healed_sinks) +afr_selfheal_data_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, + unsigned char *healed_sinks) { - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - int i = 0; - - local = frame->local; - priv = this->private; - - if (!priv->ensure_durability) - return 0; - - AFR_ONLIST (healed_sinks, frame, afr_sh_generic_fop_cbk, fsync, fd, 0, - NULL); - - for (i = 0; i < priv->child_count; i++) - if (healed_sinks[i] && local->replies[i].op_ret != 0) - /* fsync() failed. Do NOT consider this server - as successfully healed. Mark it so. - */ - healed_sinks[i] = 0; - return 0; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + int i = 0; + + local = frame->local; + priv = this->private; + + if (!priv->ensure_durability) + return 0; + + AFR_ONLIST(healed_sinks, frame, afr_sh_generic_fop_cbk, fsync, fd, 0, NULL); + + for (i = 0; i < priv->child_count; i++) + if (healed_sinks[i] && local->replies[i].op_ret != 0) + /* fsync() failed. Do NOT consider this server + as successfully healed. Mark it so. + */ + healed_sinks[i] = 0; + return 0; } static int -afr_data_self_heal_type_get (afr_private_t *priv, unsigned char *healed_sinks, - int source, struct afr_reply *replies) +afr_data_self_heal_type_get(afr_private_t *priv, unsigned char *healed_sinks, + int source, struct afr_reply *replies) { - int type = AFR_SELFHEAL_DATA_FULL; - int i = 0; - - if (priv->data_self_heal_algorithm == NULL) { - type = AFR_SELFHEAL_DATA_FULL; - for (i = 0; i < priv->child_count; i++) { - if (!healed_sinks[i] && i != source) - continue; - if (replies[i].poststat.ia_size) { - type = AFR_SELFHEAL_DATA_DIFF; - break; - } - } - } else if (strcmp (priv->data_self_heal_algorithm, "full") == 0) { - type = AFR_SELFHEAL_DATA_FULL; - } else if (strcmp (priv->data_self_heal_algorithm, "diff") == 0) { + int type = AFR_SELFHEAL_DATA_FULL; + int i = 0; + + if (priv->data_self_heal_algorithm == NULL) { + type = AFR_SELFHEAL_DATA_FULL; + for (i = 0; i < priv->child_count; i++) { + if (!healed_sinks[i] && i != source) + continue; + if (replies[i].poststat.ia_size) { type = AFR_SELFHEAL_DATA_DIFF; + break; + } } - return type; + } else if (strcmp(priv->data_self_heal_algorithm, "full") == 0) { + type = AFR_SELFHEAL_DATA_FULL; + } else if (strcmp(priv->data_self_heal_algorithm, "diff") == 0) { + type = AFR_SELFHEAL_DATA_DIFF; + } + return type; } static int -afr_selfheal_data_do (call_frame_t *frame, xlator_t *this, fd_t *fd, - int source, unsigned char *healed_sinks, - struct afr_reply *replies) +afr_selfheal_data_do(call_frame_t *frame, xlator_t *this, fd_t *fd, int source, + unsigned char *healed_sinks, struct afr_reply *replies) { - afr_private_t *priv = NULL; - off_t off = 0; - size_t block = 0; - int type = AFR_SELFHEAL_DATA_FULL; - int ret = -1; - call_frame_t *iter_frame = NULL; - unsigned char arbiter_sink_status = 0; - - priv = this->private; - if (priv->arbiter_count) { - arbiter_sink_status = healed_sinks[ARBITER_BRICK_INDEX]; - healed_sinks[ARBITER_BRICK_INDEX] = 0; + afr_private_t *priv = NULL; + off_t off = 0; + size_t block = 0; + int type = AFR_SELFHEAL_DATA_FULL; + int ret = -1; + call_frame_t *iter_frame = NULL; + unsigned char arbiter_sink_status = 0; + + priv = this->private; + if (priv->arbiter_count) { + arbiter_sink_status = healed_sinks[ARBITER_BRICK_INDEX]; + healed_sinks[ARBITER_BRICK_INDEX] = 0; + } + + block = 128 * 1024 * priv->data_self_heal_window_size; + + type = afr_data_self_heal_type_get(priv, healed_sinks, source, replies); + + iter_frame = afr_copy_frame(frame); + if (!iter_frame) { + ret = -ENOMEM; + goto out; + } + + for (off = 0; off < replies[source].poststat.ia_size; off += block) { + if (AFR_COUNT(healed_sinks, priv->child_count) == 0) { + ret = -ENOTCONN; + goto out; } - block = 128 * 1024 * priv->data_self_heal_window_size; - - type = afr_data_self_heal_type_get (priv, healed_sinks, source, - replies); + ret = afr_selfheal_data_block(iter_frame, this, fd, source, + healed_sinks, off, block, type, replies); + if (ret < 0) + goto out; - iter_frame = afr_copy_frame (frame); - if (!iter_frame) { - ret = -ENOMEM; - goto out; + AFR_STACK_RESET(iter_frame); + if (iter_frame->local == NULL) { + ret = -ENOTCONN; + goto out; } + } - for (off = 0; off < replies[source].poststat.ia_size; off += block) { - if (AFR_COUNT (healed_sinks, priv->child_count) == 0) { - ret = -ENOTCONN; - goto out; - } - - ret = afr_selfheal_data_block (iter_frame, this, fd, source, - healed_sinks, off, block, type, - replies); - if (ret < 0) - goto out; - - AFR_STACK_RESET (iter_frame); - if (iter_frame->local == NULL) { - ret = -ENOTCONN; - goto out; - } - } - - ret = afr_selfheal_data_fsync (frame, this, fd, healed_sinks); + ret = afr_selfheal_data_fsync(frame, this, fd, healed_sinks); out: - if (arbiter_sink_status) - healed_sinks[ARBITER_BRICK_INDEX] = arbiter_sink_status; + if (arbiter_sink_status) + healed_sinks[ARBITER_BRICK_INDEX] = arbiter_sink_status; - if (iter_frame) - AFR_STACK_DESTROY (iter_frame); - return ret; + if (iter_frame) + AFR_STACK_DESTROY(iter_frame); + return ret; } - static int -__afr_selfheal_truncate_sinks (call_frame_t *frame, xlator_t *this, - fd_t *fd, unsigned char *healed_sinks, - uint64_t size) +__afr_selfheal_truncate_sinks(call_frame_t *frame, xlator_t *this, fd_t *fd, + unsigned char *healed_sinks, uint64_t size) { - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - unsigned char arbiter_sink_status = 0; - int i = 0; - - local = frame->local; - priv = this->private; - - if (priv->arbiter_count) { - arbiter_sink_status = healed_sinks[ARBITER_BRICK_INDEX]; - healed_sinks[ARBITER_BRICK_INDEX] = 0; - } - - AFR_ONLIST (healed_sinks, frame, afr_sh_generic_fop_cbk, ftruncate, fd, - size, NULL); - - for (i = 0; i < priv->child_count; i++) - if (healed_sinks[i] && local->replies[i].op_ret == -1) - /* truncate() failed. Do NOT consider this server - as successfully healed. Mark it so. - */ - healed_sinks[i] = 0; - - if (arbiter_sink_status) - healed_sinks[ARBITER_BRICK_INDEX] = arbiter_sink_status; - return 0; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + unsigned char arbiter_sink_status = 0; + int i = 0; + + local = frame->local; + priv = this->private; + + if (priv->arbiter_count) { + arbiter_sink_status = healed_sinks[ARBITER_BRICK_INDEX]; + healed_sinks[ARBITER_BRICK_INDEX] = 0; + } + + AFR_ONLIST(healed_sinks, frame, afr_sh_generic_fop_cbk, ftruncate, fd, size, + NULL); + + for (i = 0; i < priv->child_count; i++) + if (healed_sinks[i] && local->replies[i].op_ret == -1) + /* truncate() failed. Do NOT consider this server + as successfully healed. Mark it so. + */ + healed_sinks[i] = 0; + + if (arbiter_sink_status) + healed_sinks[ARBITER_BRICK_INDEX] = arbiter_sink_status; + return 0; } gf_boolean_t -afr_has_source_witnesses (xlator_t *this, unsigned char *sources, - uint64_t *witness) +afr_has_source_witnesses(xlator_t *this, unsigned char *sources, + uint64_t *witness) { - int i = 0; - afr_private_t *priv = NULL; + int i = 0; + afr_private_t *priv = NULL; - priv = this->private; + priv = this->private; - for (i = 0; i < priv->child_count; i++) { - if (sources[i] && witness[i]) - return _gf_true; - } - return _gf_false; + for (i = 0; i < priv->child_count; i++) { + if (sources[i] && witness[i]) + return _gf_true; + } + return _gf_false; } static gf_boolean_t -afr_does_size_mismatch (xlator_t *this, unsigned char *sources, - struct afr_reply *replies) +afr_does_size_mismatch(xlator_t *this, unsigned char *sources, + struct afr_reply *replies) { - int i = 0; - afr_private_t *priv = NULL; - struct iatt *min = NULL; - struct iatt *max = NULL; + int i = 0; + afr_private_t *priv = NULL; + struct iatt *min = NULL; + struct iatt *max = NULL; - priv = this->private; + priv = this->private; - for (i = 0; i < priv->child_count; i++) { - if (!replies[i].valid) - continue; + for (i = 0; i < priv->child_count; i++) { + if (!replies[i].valid) + continue; - if (replies[i].op_ret < 0) - continue; + if (replies[i].op_ret < 0) + continue; - if (!sources[i]) - continue; + if (!sources[i]) + continue; - if (AFR_IS_ARBITER_BRICK (priv, i) && - (replies[i].poststat.ia_size == 0)) - continue; + if (AFR_IS_ARBITER_BRICK(priv, i) && (replies[i].poststat.ia_size == 0)) + continue; - if (!min) - min = &replies[i].poststat; + if (!min) + min = &replies[i].poststat; - if (!max) - max = &replies[i].poststat; + if (!max) + max = &replies[i].poststat; - if (min->ia_size > replies[i].poststat.ia_size) - min = &replies[i].poststat; + if (min->ia_size > replies[i].poststat.ia_size) + min = &replies[i].poststat; - if (max->ia_size < replies[i].poststat.ia_size) - max = &replies[i].poststat; - } + if (max->ia_size < replies[i].poststat.ia_size) + max = &replies[i].poststat; + } - if (min && max) { - if (min->ia_size != max->ia_size) - return _gf_true; - } + if (min && max) { + if (min->ia_size != max->ia_size) + return _gf_true; + } - return _gf_false; + return _gf_false; } static void -afr_mark_biggest_witness_as_source (xlator_t *this, unsigned char *sources, - uint64_t *witness) +afr_mark_biggest_witness_as_source(xlator_t *this, unsigned char *sources, + uint64_t *witness) { - int i = 0; - afr_private_t *priv = NULL; - uint64_t biggest_witness = 0; - - priv = this->private; - /* Find source with biggest witness count */ - for (i = 0; i < priv->child_count; i++) { - if (!sources[i]) - continue; - if (biggest_witness < witness[i]) - biggest_witness = witness[i]; - } - - /* Mark files with less witness count as not source */ - for (i = 0; i < priv->child_count; i++) { - if (!sources[i]) - continue; - if (witness[i] < biggest_witness) - sources[i] = 0; - } - - return; + int i = 0; + afr_private_t *priv = NULL; + uint64_t biggest_witness = 0; + + priv = this->private; + /* Find source with biggest witness count */ + for (i = 0; i < priv->child_count; i++) { + if (!sources[i]) + continue; + if (biggest_witness < witness[i]) + biggest_witness = witness[i]; + } + + /* Mark files with less witness count as not source */ + for (i = 0; i < priv->child_count; i++) { + if (!sources[i]) + continue; + if (witness[i] < biggest_witness) + sources[i] = 0; + } + + return; } /* This is a tie breaker function. Only one source be assigned here */ static void -afr_mark_newest_file_as_source (xlator_t *this, unsigned char *sources, - struct afr_reply *replies) +afr_mark_newest_file_as_source(xlator_t *this, unsigned char *sources, + struct afr_reply *replies) { - int i = 0; - afr_private_t *priv = NULL; - int source = -1; - uint32_t max_ctime = 0; - - priv = this->private; - /* Find source with latest ctime */ - for (i = 0; i < priv->child_count; i++) { - if (!sources[i]) - continue; - - if (max_ctime <= replies[i].poststat.ia_ctime) { - source = i; - max_ctime = replies[i].poststat.ia_ctime; - } + int i = 0; + afr_private_t *priv = NULL; + int source = -1; + uint32_t max_ctime = 0; + + priv = this->private; + /* Find source with latest ctime */ + for (i = 0; i < priv->child_count; i++) { + if (!sources[i]) + continue; + + if (max_ctime <= replies[i].poststat.ia_ctime) { + source = i; + max_ctime = replies[i].poststat.ia_ctime; } + } - /* Only mark one of the files as source to break ties */ - memset (sources, 0, sizeof (*sources) * priv->child_count); - sources[source] = 1; + /* Only mark one of the files as source to break ties */ + memset(sources, 0, sizeof(*sources) * priv->child_count); + sources[source] = 1; } static int -__afr_selfheal_data_finalize_source (call_frame_t *frame, xlator_t *this, - inode_t *inode, - unsigned char *sources, - unsigned char *sinks, - unsigned char *healed_sinks, - unsigned char *locked_on, - unsigned char *undid_pending, - struct afr_reply *replies, - uint64_t *witness) +__afr_selfheal_data_finalize_source( + call_frame_t *frame, xlator_t *this, inode_t *inode, unsigned char *sources, + unsigned char *sinks, unsigned char *healed_sinks, unsigned char *locked_on, + unsigned char *undid_pending, struct afr_reply *replies, uint64_t *witness) { - afr_private_t *priv = NULL; - int source = -1; - int sources_count = 0; - priv = this->private; - - sources_count = AFR_COUNT (sources, priv->child_count); - - if ((AFR_CMP (locked_on, healed_sinks, priv->child_count) == 0) - || !sources_count) { - /* split brain */ - source = afr_mark_split_brain_source_sinks (frame, this, inode, - sources, sinks, - healed_sinks, - locked_on, replies, - AFR_DATA_TRANSACTION); - if (source < 0) { - gf_event (EVENT_AFR_SPLIT_BRAIN, "subvol=%s;type=data;" - "file=%s", this->name, uuid_utoa(inode->gfid)); - return -EIO; - } - - _afr_fav_child_reset_sink_xattrs (frame, this, inode, source, - healed_sinks, undid_pending, - AFR_DATA_TRANSACTION, - locked_on, replies); - goto out; - } - - /* No split brain at this point. If we were called from - * afr_heal_splitbrain_file(), abort.*/ - if (afr_dict_contains_heal_op(frame)) - return -EIO; - - /* If there are no witnesses/size-mismatches on sources we are done*/ - if (!afr_does_size_mismatch (this, sources, replies) && - !afr_has_source_witnesses (this, sources, witness)) - goto out; - - afr_mark_largest_file_as_source (this, sources, replies); - afr_mark_biggest_witness_as_source (this, sources, witness); - afr_mark_newest_file_as_source (this, sources, replies); - if (priv->arbiter_count) - /* Choose non-arbiter brick as source for empty files. */ - afr_mark_source_sinks_if_file_empty (this, sources, sinks, - healed_sinks, locked_on, - replies, - AFR_DATA_TRANSACTION); + afr_private_t *priv = NULL; + int source = -1; + int sources_count = 0; + priv = this->private; + + sources_count = AFR_COUNT(sources, priv->child_count); + + if ((AFR_CMP(locked_on, healed_sinks, priv->child_count) == 0) || + !sources_count) { + /* split brain */ + source = afr_mark_split_brain_source_sinks( + frame, this, inode, sources, sinks, healed_sinks, locked_on, + replies, AFR_DATA_TRANSACTION); + if (source < 0) { + gf_event(EVENT_AFR_SPLIT_BRAIN, + "subvol=%s;type=data;" + "file=%s", + this->name, uuid_utoa(inode->gfid)); + return -EIO; + } + + _afr_fav_child_reset_sink_xattrs( + frame, this, inode, source, healed_sinks, undid_pending, + AFR_DATA_TRANSACTION, locked_on, replies); + goto out; + } + + /* No split brain at this point. If we were called from + * afr_heal_splitbrain_file(), abort.*/ + if (afr_dict_contains_heal_op(frame)) + return -EIO; + + /* If there are no witnesses/size-mismatches on sources we are done*/ + if (!afr_does_size_mismatch(this, sources, replies) && + !afr_has_source_witnesses(this, sources, witness)) + goto out; + + afr_mark_largest_file_as_source(this, sources, replies); + afr_mark_biggest_witness_as_source(this, sources, witness); + afr_mark_newest_file_as_source(this, sources, replies); + if (priv->arbiter_count) + /* Choose non-arbiter brick as source for empty files. */ + afr_mark_source_sinks_if_file_empty(this, sources, sinks, healed_sinks, + locked_on, replies, + AFR_DATA_TRANSACTION); out: - afr_mark_active_sinks (this, sources, locked_on, healed_sinks); - source = afr_choose_source_by_policy (priv, sources, - AFR_DATA_TRANSACTION); + afr_mark_active_sinks(this, sources, locked_on, healed_sinks); + source = afr_choose_source_by_policy(priv, sources, AFR_DATA_TRANSACTION); - return source; + return source; } /* @@ -619,296 +593,287 @@ out: * for self-healing, or -1 if no healing is necessary/split brain. */ int -__afr_selfheal_data_prepare (call_frame_t *frame, xlator_t *this, - inode_t *inode, unsigned char *locked_on, - unsigned char *sources, unsigned char *sinks, - unsigned char *healed_sinks, - unsigned char *undid_pending, - struct afr_reply *replies, gf_boolean_t *pflag) +__afr_selfheal_data_prepare(call_frame_t *frame, xlator_t *this, inode_t *inode, + unsigned char *locked_on, unsigned char *sources, + unsigned char *sinks, unsigned char *healed_sinks, + unsigned char *undid_pending, + struct afr_reply *replies, gf_boolean_t *pflag) { - int ret = -1; - int source = -1; - afr_private_t *priv = NULL; - uint64_t *witness = NULL; - - priv = this->private; - - ret = afr_selfheal_unlocked_discover (frame, inode, inode->gfid, - replies); - - if (ret) - return ret; - - witness = alloca0(priv->child_count * sizeof (*witness)); - ret = afr_selfheal_find_direction (frame, this, replies, - AFR_DATA_TRANSACTION, - locked_on, sources, sinks, witness, - pflag); - if (ret) - return ret; - - /* Initialize the healed_sinks[] array optimistically to - the intersection of to-be-healed (i.e sinks[]) and - the list of servers which are up (i.e locked_on[]). - As we encounter failures in the healing process, we - will unmark the respective servers in the healed_sinks[] - array. - */ - AFR_INTERSECT (healed_sinks, sinks, locked_on, priv->child_count); - - source = __afr_selfheal_data_finalize_source (frame, this, inode, - sources, sinks, - healed_sinks, - locked_on, undid_pending, - replies, witness); - if (source < 0) - return -EIO; - - return source; + int ret = -1; + int source = -1; + afr_private_t *priv = NULL; + uint64_t *witness = NULL; + + priv = this->private; + + ret = afr_selfheal_unlocked_discover(frame, inode, inode->gfid, replies); + + if (ret) + return ret; + + witness = alloca0(priv->child_count * sizeof(*witness)); + ret = afr_selfheal_find_direction(frame, this, replies, + AFR_DATA_TRANSACTION, locked_on, sources, + sinks, witness, pflag); + if (ret) + return ret; + + /* Initialize the healed_sinks[] array optimistically to + the intersection of to-be-healed (i.e sinks[]) and + the list of servers which are up (i.e locked_on[]). + As we encounter failures in the healing process, we + will unmark the respective servers in the healed_sinks[] + array. + */ + AFR_INTERSECT(healed_sinks, sinks, locked_on, priv->child_count); + + source = __afr_selfheal_data_finalize_source( + frame, this, inode, sources, sinks, healed_sinks, locked_on, + undid_pending, replies, witness); + if (source < 0) + return -EIO; + + return source; } - static int -__afr_selfheal_data (call_frame_t *frame, xlator_t *this, fd_t *fd, - unsigned char *locked_on) +__afr_selfheal_data(call_frame_t *frame, xlator_t *this, fd_t *fd, + unsigned char *locked_on) { - afr_private_t *priv = NULL; - int ret = -1; - unsigned char *sources = NULL; - unsigned char *sinks = NULL; - unsigned char *data_lock = NULL; - unsigned char *healed_sinks = NULL; - unsigned char *undid_pending = NULL; - struct afr_reply *locked_replies = NULL; - int source = -1; - gf_boolean_t did_sh = _gf_true; - gf_boolean_t is_arbiter_the_only_sink = _gf_false; - gf_boolean_t empty_file = _gf_false; - - priv = this->private; - - sources = alloca0 (priv->child_count); - sinks = alloca0 (priv->child_count); - healed_sinks = alloca0 (priv->child_count); - data_lock = alloca0 (priv->child_count); - undid_pending = alloca0 (priv->child_count); - - locked_replies = alloca0 (sizeof (*locked_replies) * priv->child_count); - - ret = afr_selfheal_inodelk (frame, this, fd->inode, this->name, 0, 0, - data_lock); - { - if (ret < priv->child_count) { - gf_msg_debug (this->name, 0, "%s: Skipping " - "self-heal as only %d number " - "of subvolumes " - "could be locked", - uuid_utoa (fd->inode->gfid), - ret); - ret = -ENOTCONN; - goto unlock; - } - - ret = __afr_selfheal_data_prepare (frame, this, fd->inode, - data_lock, sources, sinks, - healed_sinks, undid_pending, - locked_replies, NULL); - if (ret < 0) - goto unlock; - - if (AFR_COUNT(healed_sinks, priv->child_count) == 0) { - did_sh = _gf_false; - goto unlock; - } - - source = ret; - - if (AFR_IS_ARBITER_BRICK(priv, source)) { - empty_file = afr_is_file_empty_on_all_children (priv, - locked_replies); - if (empty_file) - goto restore_time; - - did_sh = _gf_false; - goto unlock; - } - - if (priv->arbiter_count && - AFR_COUNT (healed_sinks, priv->child_count) == 1 && - healed_sinks[ARBITER_BRICK_INDEX]) { - is_arbiter_the_only_sink = _gf_true; - goto restore_time; - } - - ret = __afr_selfheal_truncate_sinks (frame, this, fd, healed_sinks, - locked_replies[source].poststat.ia_size); - if (ret < 0) - goto unlock; - - ret = 0; - - } -unlock: - afr_selfheal_uninodelk (frame, this, fd->inode, this->name, 0, 0, - data_lock); + afr_private_t *priv = NULL; + int ret = -1; + unsigned char *sources = NULL; + unsigned char *sinks = NULL; + unsigned char *data_lock = NULL; + unsigned char *healed_sinks = NULL; + unsigned char *undid_pending = NULL; + struct afr_reply *locked_replies = NULL; + int source = -1; + gf_boolean_t did_sh = _gf_true; + gf_boolean_t is_arbiter_the_only_sink = _gf_false; + gf_boolean_t empty_file = _gf_false; + + priv = this->private; + + sources = alloca0(priv->child_count); + sinks = alloca0(priv->child_count); + healed_sinks = alloca0(priv->child_count); + data_lock = alloca0(priv->child_count); + undid_pending = alloca0(priv->child_count); + + locked_replies = alloca0(sizeof(*locked_replies) * priv->child_count); + + ret = afr_selfheal_inodelk(frame, this, fd->inode, this->name, 0, 0, + data_lock); + { + if (ret < priv->child_count) { + gf_msg_debug(this->name, 0, + "%s: Skipping " + "self-heal as only %d number " + "of subvolumes " + "could be locked", + uuid_utoa(fd->inode->gfid), ret); + ret = -ENOTCONN; + goto unlock; + } + + ret = __afr_selfheal_data_prepare(frame, this, fd->inode, data_lock, + sources, sinks, healed_sinks, + undid_pending, locked_replies, NULL); if (ret < 0) - goto out; + goto unlock; - if (!did_sh) - goto out; + if (AFR_COUNT(healed_sinks, priv->child_count) == 0) { + did_sh = _gf_false; + goto unlock; + } - ret = afr_selfheal_data_do (frame, this, fd, source, healed_sinks, - locked_replies); - if (ret) - goto out; + source = ret; + + if (AFR_IS_ARBITER_BRICK(priv, source)) { + empty_file = afr_is_file_empty_on_all_children(priv, + locked_replies); + if (empty_file) + goto restore_time; + + did_sh = _gf_false; + goto unlock; + } + + if (priv->arbiter_count && + AFR_COUNT(healed_sinks, priv->child_count) == 1 && + healed_sinks[ARBITER_BRICK_INDEX]) { + is_arbiter_the_only_sink = _gf_true; + goto restore_time; + } + + ret = __afr_selfheal_truncate_sinks( + frame, this, fd, healed_sinks, + locked_replies[source].poststat.ia_size); + if (ret < 0) + goto unlock; + + ret = 0; + } +unlock: + afr_selfheal_uninodelk(frame, this, fd->inode, this->name, 0, 0, data_lock); + if (ret < 0) + goto out; + + if (!did_sh) + goto out; + + ret = afr_selfheal_data_do(frame, this, fd, source, healed_sinks, + locked_replies); + if (ret) + goto out; restore_time: - afr_selfheal_restore_time (frame, this, fd->inode, source, - healed_sinks, locked_replies); - - if (!is_arbiter_the_only_sink || !empty_file) { - ret = afr_selfheal_inodelk (frame, this, fd->inode, this->name, - 0, 0, data_lock); - if (ret < priv->child_count) { - ret = -ENOTCONN; - did_sh = _gf_false; - goto skip_undo_pending; - } + afr_selfheal_restore_time(frame, this, fd->inode, source, healed_sinks, + locked_replies); + + if (!is_arbiter_the_only_sink || !empty_file) { + ret = afr_selfheal_inodelk(frame, this, fd->inode, this->name, 0, 0, + data_lock); + if (ret < priv->child_count) { + ret = -ENOTCONN; + did_sh = _gf_false; + goto skip_undo_pending; } - ret = afr_selfheal_undo_pending (frame, this, fd->inode, - sources, sinks, healed_sinks, - undid_pending, AFR_DATA_TRANSACTION, - locked_replies, data_lock); + } + ret = afr_selfheal_undo_pending( + frame, this, fd->inode, sources, sinks, healed_sinks, undid_pending, + AFR_DATA_TRANSACTION, locked_replies, data_lock); skip_undo_pending: - afr_selfheal_uninodelk (frame, this, fd->inode, this->name, 0, 0, - data_lock); + afr_selfheal_uninodelk(frame, this, fd->inode, this->name, 0, 0, data_lock); out: - if (did_sh) - afr_log_selfheal (fd->inode->gfid, this, ret, "data", source, - sources, healed_sinks); - else - ret = 1; + if (did_sh) + afr_log_selfheal(fd->inode->gfid, this, ret, "data", source, sources, + healed_sinks); + else + ret = 1; - if (locked_replies) - afr_replies_wipe (locked_replies, priv->child_count); + if (locked_replies) + afr_replies_wipe(locked_replies, priv->child_count); - return ret; + return ret; } int -afr_selfheal_data_open_cbk (call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, int32_t op_errno, - fd_t *fd, dict_t *xdata) +afr_selfheal_data_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, fd_t *fd, + dict_t *xdata) { - afr_local_t *local = NULL; - int i = (long) cookie; + afr_local_t *local = NULL; + int i = (long)cookie; - local = frame->local; + local = frame->local; - local->replies[i].valid = 1; - local->replies[i].op_ret = op_ret; - local->replies[i].op_errno = op_errno; + local->replies[i].valid = 1; + local->replies[i].op_ret = op_ret; + local->replies[i].op_errno = op_errno; - syncbarrier_wake (&local->barrier); + syncbarrier_wake(&local->barrier); - return 0; + return 0; } int -afr_selfheal_data_open (xlator_t *this, inode_t *inode, fd_t **fd) +afr_selfheal_data_open(xlator_t *this, inode_t *inode, fd_t **fd) { - int ret = 0; - fd_t *fd_tmp = NULL; - loc_t loc = {0,}; - call_frame_t *frame = NULL; - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - int i = 0; - - priv = this->private; - - fd_tmp = fd_create (inode, 0); - if (!fd_tmp) - return -ENOMEM; - - loc.inode = inode_ref (inode); - gf_uuid_copy (loc.gfid, inode->gfid); - - frame = afr_frame_create (this, &ret); - if (!frame) { - ret = -ret; - fd_unref (fd_tmp); - goto out; + int ret = 0; + fd_t *fd_tmp = NULL; + loc_t loc = { + 0, + }; + call_frame_t *frame = NULL; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + int i = 0; + + priv = this->private; + + fd_tmp = fd_create(inode, 0); + if (!fd_tmp) + return -ENOMEM; + + loc.inode = inode_ref(inode); + gf_uuid_copy(loc.gfid, inode->gfid); + + frame = afr_frame_create(this, &ret); + if (!frame) { + ret = -ret; + fd_unref(fd_tmp); + goto out; + } + local = frame->local; + + AFR_ONLIST(local->child_up, frame, afr_selfheal_data_open_cbk, open, &loc, + O_RDWR | O_LARGEFILE, fd_tmp, NULL); + + ret = -ENOTCONN; + for (i = 0; i < priv->child_count; i++) { + if (!local->replies[i].valid) + continue; + + if (local->replies[i].op_ret < 0) { + ret = -local->replies[i].op_errno; + continue; } - local = frame->local; - - AFR_ONLIST (local->child_up, frame, afr_selfheal_data_open_cbk, open, - &loc, O_RDWR|O_LARGEFILE, fd_tmp, NULL); - - ret = -ENOTCONN; - for (i = 0; i < priv->child_count; i++) { - if (!local->replies[i].valid) - continue; - if (local->replies[i].op_ret < 0) { - ret = -local->replies[i].op_errno; - continue; - } + ret = 0; + break; + } - ret = 0; - break; - } - - if (ret < 0) { - fd_unref (fd_tmp); - goto out; - } else { - fd_bind (fd_tmp); - } + if (ret < 0) { + fd_unref(fd_tmp); + goto out; + } else { + fd_bind(fd_tmp); + } - *fd = fd_tmp; + *fd = fd_tmp; out: - loc_wipe (&loc); - if (frame) - AFR_STACK_DESTROY (frame); - return ret; + loc_wipe(&loc); + if (frame) + AFR_STACK_DESTROY(frame); + return ret; } int -afr_selfheal_data (call_frame_t *frame, xlator_t *this, fd_t *fd) +afr_selfheal_data(call_frame_t *frame, xlator_t *this, fd_t *fd) { - afr_private_t *priv = NULL; - unsigned char *locked_on = NULL; - int ret = 0; - inode_t *inode = fd->inode; - - priv = this->private; - - locked_on = alloca0 (priv->child_count); - - ret = afr_selfheal_tie_breaker_inodelk (frame, this, inode, - priv->sh_domain, 0, 0, - locked_on); - { - if (ret < priv->child_count) { - gf_msg_debug (this->name, 0, "%s: Skipping " - "self-heal as only %d number of " - "subvolumes could be locked", - uuid_utoa (fd->inode->gfid), - ret); - /* Either less than two subvols available, or another - selfheal (from another server) is in progress. Skip - for now in any case there isn't anything to do. - */ - ret = -ENOTCONN; - goto unlock; - } - - ret = __afr_selfheal_data (frame, this, fd, locked_on); - } + afr_private_t *priv = NULL; + unsigned char *locked_on = NULL; + int ret = 0; + inode_t *inode = fd->inode; + + priv = this->private; + + locked_on = alloca0(priv->child_count); + + ret = afr_selfheal_tie_breaker_inodelk(frame, this, inode, priv->sh_domain, + 0, 0, locked_on); + { + if (ret < priv->child_count) { + gf_msg_debug(this->name, 0, + "%s: Skipping " + "self-heal as only %d number of " + "subvolumes could be locked", + uuid_utoa(fd->inode->gfid), ret); + /* Either less than two subvols available, or another + selfheal (from another server) is in progress. Skip + for now in any case there isn't anything to do. + */ + ret = -ENOTCONN; + goto unlock; + } + + ret = __afr_selfheal_data(frame, this, fd, locked_on); + } unlock: - afr_selfheal_uninodelk (frame, this, inode, priv->sh_domain, 0, 0, - locked_on); + afr_selfheal_uninodelk(frame, this, inode, priv->sh_domain, 0, 0, + locked_on); - return ret; + return ret; } diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c index e6022cc939b..bf7a6b9d1e8 100644 --- a/xlators/cluster/afr/src/afr-self-heal-entry.c +++ b/xlators/cluster/afr/src/afr-self-heal-entry.c @@ -8,7 +8,6 @@ cases as published by the Free Software Foundation. */ - #include "afr.h" #include "afr-self-heal.h" #include "byte-order.h" @@ -18,1105 +17,1090 @@ #include "events.h" static int -afr_selfheal_entry_delete (xlator_t *this, inode_t *dir, const char *name, - inode_t *inode, int child, struct afr_reply *replies) +afr_selfheal_entry_delete(xlator_t *this, inode_t *dir, const char *name, + inode_t *inode, int child, struct afr_reply *replies) { - afr_private_t *priv = NULL; - xlator_t *subvol = NULL; - int ret = 0; - loc_t loc = {0, }; - char g[64]; - - priv = this->private; - - subvol = priv->children[child]; - - loc.parent = inode_ref (dir); - gf_uuid_copy (loc.pargfid, dir->gfid); - loc.name = name; - loc.inode = inode_ref (inode); - - if (replies[child].valid && replies[child].op_ret == 0) { - switch (replies[child].poststat.ia_type) { - case IA_IFDIR: - gf_msg (this->name, GF_LOG_WARNING, 0, - AFR_MSG_EXPUNGING_FILE_OR_DIR, - "expunging dir %s/%s (%s) on %s", - uuid_utoa (dir->gfid), name, - uuid_utoa_r (replies[child].poststat.ia_gfid, g), - subvol->name); - ret = syncop_rmdir (subvol, &loc, 1, NULL, NULL); - break; - default: - gf_msg (this->name, GF_LOG_WARNING, 0, - AFR_MSG_EXPUNGING_FILE_OR_DIR, - "expunging file %s/%s (%s) on %s", - uuid_utoa (dir->gfid), name, - uuid_utoa_r (replies[child].poststat.ia_gfid, g), - subvol->name); - ret = syncop_unlink (subvol, &loc, NULL, NULL); - break; - } - } - - loc_wipe (&loc); - - return ret; + afr_private_t *priv = NULL; + xlator_t *subvol = NULL; + int ret = 0; + loc_t loc = { + 0, + }; + char g[64]; + + priv = this->private; + + subvol = priv->children[child]; + + loc.parent = inode_ref(dir); + gf_uuid_copy(loc.pargfid, dir->gfid); + loc.name = name; + loc.inode = inode_ref(inode); + + if (replies[child].valid && replies[child].op_ret == 0) { + switch (replies[child].poststat.ia_type) { + case IA_IFDIR: + gf_msg(this->name, GF_LOG_WARNING, 0, + AFR_MSG_EXPUNGING_FILE_OR_DIR, + "expunging dir %s/%s (%s) on %s", uuid_utoa(dir->gfid), + name, uuid_utoa_r(replies[child].poststat.ia_gfid, g), + subvol->name); + ret = syncop_rmdir(subvol, &loc, 1, NULL, NULL); + break; + default: + gf_msg(this->name, GF_LOG_WARNING, 0, + AFR_MSG_EXPUNGING_FILE_OR_DIR, + "expunging file %s/%s (%s) on %s", uuid_utoa(dir->gfid), + name, uuid_utoa_r(replies[child].poststat.ia_gfid, g), + subvol->name); + ret = syncop_unlink(subvol, &loc, NULL, NULL); + break; + } + } + + loc_wipe(&loc); + + return ret; } int -afr_selfheal_recreate_entry (call_frame_t *frame, int dst, int source, - unsigned char *sources, inode_t *dir, - const char *name, inode_t *inode, - struct afr_reply *replies) +afr_selfheal_recreate_entry(call_frame_t *frame, int dst, int source, + unsigned char *sources, inode_t *dir, + const char *name, inode_t *inode, + struct afr_reply *replies) { - int ret = 0; - loc_t loc = {0,}; - loc_t srcloc = {0,}; - xlator_t *this = frame->this; - afr_private_t *priv = NULL; - dict_t *xdata = NULL; - struct iatt *iatt = NULL; - char *linkname = NULL; - mode_t mode = 0; - struct iatt newent = {0,}; - unsigned char *newentry = NULL; - - priv = this->private; - iatt = &replies[source].poststat; - if (iatt->ia_type == IA_INVAL || gf_uuid_is_null (iatt->ia_gfid)) { - gf_msg (this->name, GF_LOG_ERROR, 0, AFR_MSG_SELF_HEAL_FAILED, - "Invalid ia_type (%d) or gfid(%s). source brick=%d, " - "pargfid=%s, name=%s", iatt->ia_type, - uuid_utoa(iatt->ia_gfid), source, - uuid_utoa(dir->gfid), name); - ret = -EINVAL; + int ret = 0; + loc_t loc = { + 0, + }; + loc_t srcloc = { + 0, + }; + xlator_t *this = frame->this; + afr_private_t *priv = NULL; + dict_t *xdata = NULL; + struct iatt *iatt = NULL; + char *linkname = NULL; + mode_t mode = 0; + struct iatt newent = { + 0, + }; + unsigned char *newentry = NULL; + + priv = this->private; + iatt = &replies[source].poststat; + if (iatt->ia_type == IA_INVAL || gf_uuid_is_null(iatt->ia_gfid)) { + gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SELF_HEAL_FAILED, + "Invalid ia_type (%d) or gfid(%s). source brick=%d, " + "pargfid=%s, name=%s", + iatt->ia_type, uuid_utoa(iatt->ia_gfid), source, + uuid_utoa(dir->gfid), name); + ret = -EINVAL; + goto out; + } + + xdata = dict_new(); + if (!xdata) + return -ENOMEM; + newentry = alloca0(priv->child_count); + loc.parent = inode_ref(dir); + gf_uuid_copy(loc.pargfid, dir->gfid); + loc.name = name; + loc.inode = inode_ref(inode); + + ret = afr_selfheal_entry_delete(this, dir, name, inode, dst, replies); + if (ret) + goto out; + + ret = dict_set_gfuuid(xdata, "gfid-req", replies[source].poststat.ia_gfid, + true); + if (ret) + goto out; + + srcloc.inode = inode_ref(inode); + gf_uuid_copy(srcloc.gfid, iatt->ia_gfid); + if (iatt->ia_type != IA_IFDIR) + ret = syncop_lookup(priv->children[dst], &srcloc, 0, 0, 0, 0); + if (iatt->ia_type == IA_IFDIR || ret == -ENOENT || ret == -ESTALE) { + newentry[dst] = 1; + ret = afr_selfheal_newentry_mark(frame, this, inode, source, replies, + sources, newentry); + if (ret) + goto out; + } + + mode = st_mode_from_ia(iatt->ia_prot, iatt->ia_type); + + switch (iatt->ia_type) { + case IA_IFDIR: + ret = syncop_mkdir(priv->children[dst], &loc, mode, 0, xdata, NULL); + break; + case IA_IFLNK: + if (!newentry[dst]) { + ret = syncop_link(priv->children[dst], &srcloc, &loc, &newent, + NULL, NULL); + } else { + ret = syncop_readlink(priv->children[source], &srcloc, + &linkname, 4096, NULL, NULL); + if (ret <= 0) + goto out; + ret = syncop_symlink(priv->children[dst], &loc, linkname, NULL, + xdata, NULL); + } + break; + default: + ret = dict_set_int32(xdata, GLUSTERFS_INTERNAL_FOP_KEY, 1); + if (ret) goto out; - } - - xdata = dict_new(); - if (!xdata) - return -ENOMEM; - newentry = alloca0 (priv->child_count); - loc.parent = inode_ref (dir); - gf_uuid_copy (loc.pargfid, dir->gfid); - loc.name = name; - loc.inode = inode_ref (inode); - - ret = afr_selfheal_entry_delete (this, dir, name, inode, dst, replies); - if (ret) - goto out; - - ret = dict_set_gfuuid (xdata, "gfid-req", - replies[source].poststat.ia_gfid, true); - if (ret) - goto out; - - srcloc.inode = inode_ref (inode); - gf_uuid_copy (srcloc.gfid, iatt->ia_gfid); - if (iatt->ia_type != IA_IFDIR) - ret = syncop_lookup (priv->children[dst], &srcloc, 0, 0, 0, 0); - if (iatt->ia_type == IA_IFDIR || ret == -ENOENT || ret == -ESTALE) { - newentry[dst] = 1; - ret = afr_selfheal_newentry_mark (frame, this, inode, source, - replies, sources, newentry); - if (ret) - goto out; - } - - mode = st_mode_from_ia (iatt->ia_prot, iatt->ia_type); - - switch (iatt->ia_type) { - case IA_IFDIR: - ret = syncop_mkdir (priv->children[dst], &loc, mode, 0, - xdata, NULL); - break; - case IA_IFLNK: - if (!newentry[dst]) { - ret = syncop_link (priv->children[dst], &srcloc, &loc, - &newent, NULL, NULL); - } else { - ret = syncop_readlink (priv->children[source], &srcloc, - &linkname, 4096, NULL, NULL); - if (ret <= 0) - goto out; - ret = syncop_symlink (priv->children[dst], &loc, - linkname, NULL, xdata, NULL); - } - break; - default: - ret = dict_set_int32 (xdata, GLUSTERFS_INTERNAL_FOP_KEY, 1); - if (ret) - goto out; - ret = syncop_mknod (priv->children[dst], &loc, mode, - makedev (ia_major(iatt->ia_rdev), ia_minor (iatt->ia_rdev)), - &newent, xdata, NULL); - break; - } + ret = syncop_mknod( + priv->children[dst], &loc, mode, + makedev(ia_major(iatt->ia_rdev), ia_minor(iatt->ia_rdev)), + &newent, xdata, NULL); + break; + } out: - if (xdata) - dict_unref (xdata); - GF_FREE (linkname); - loc_wipe (&loc); - loc_wipe (&srcloc); - return ret; + if (xdata) + dict_unref(xdata); + GF_FREE(linkname); + loc_wipe(&loc); + loc_wipe(&srcloc); + return ret; } static int -__afr_selfheal_heal_dirent (call_frame_t *frame, xlator_t *this, fd_t *fd, - char *name, inode_t *inode, int source, - unsigned char *sources, unsigned char *healed_sinks, - unsigned char *locked_on, struct afr_reply *replies) +__afr_selfheal_heal_dirent(call_frame_t *frame, xlator_t *this, fd_t *fd, + char *name, inode_t *inode, int source, + unsigned char *sources, unsigned char *healed_sinks, + unsigned char *locked_on, struct afr_reply *replies) { - int ret = 0; - afr_private_t *priv = NULL; - int i = 0; + int ret = 0; + afr_private_t *priv = NULL; + int i = 0; - priv = this->private; + priv = this->private; - if (!replies[source].valid) - return -EIO; + if (!replies[source].valid) + return -EIO; - /* Skip healing this entry if the last lookup on it failed for reasons - * other than ENOENT. - */ - if ((replies[source].op_ret < 0) && - (replies[source].op_errno != ENOENT)) - return -replies[source].op_errno; - - if (replies[source].op_ret == 0) { - ret = afr_lookup_and_heal_gfid (this, fd->inode, name, - inode, replies, source, sources, - &replies[source].poststat.ia_gfid); - if (ret) - return ret; + /* Skip healing this entry if the last lookup on it failed for reasons + * other than ENOENT. + */ + if ((replies[source].op_ret < 0) && (replies[source].op_errno != ENOENT)) + return -replies[source].op_errno; + + if (replies[source].op_ret == 0) { + ret = afr_lookup_and_heal_gfid(this, fd->inode, name, inode, replies, + source, sources, + &replies[source].poststat.ia_gfid); + if (ret) + return ret; + } + + for (i = 0; i < priv->child_count; i++) { + if (!healed_sinks[i]) + continue; + if (replies[source].op_ret == -1 && + replies[source].op_errno == ENOENT) { + ret = afr_selfheal_entry_delete(this, fd->inode, name, inode, i, + replies); + } else { + if (!gf_uuid_compare(replies[i].poststat.ia_gfid, + replies[source].poststat.ia_gfid)) + continue; + + ret = afr_selfheal_recreate_entry(frame, i, source, sources, + fd->inode, name, inode, replies); } + if (ret < 0) + break; + } - for (i = 0; i < priv->child_count; i++) { - if (!healed_sinks[i]) - continue; - if (replies[source].op_ret == -1 && - replies[source].op_errno == ENOENT) { - ret = afr_selfheal_entry_delete (this, fd->inode, name, - inode, i, replies); - } else { - if (!gf_uuid_compare (replies[i].poststat.ia_gfid, - replies[source].poststat.ia_gfid)) - continue; - - ret = afr_selfheal_recreate_entry (frame, i, source, - sources, fd->inode, - name, inode, - replies); - } - if (ret < 0) - break; - } - - return ret; + return ret; } static int -afr_selfheal_detect_gfid_and_type_mismatch (xlator_t *this, - struct afr_reply *replies, - inode_t *inode, - uuid_t pargfid, - char *bname, int src_idx, - unsigned char *locked_on, - int *src) +afr_selfheal_detect_gfid_and_type_mismatch(xlator_t *this, + struct afr_reply *replies, + inode_t *inode, uuid_t pargfid, + char *bname, int src_idx, + unsigned char *locked_on, int *src) { - int i = 0; - int ret = -1; - afr_private_t *priv = NULL; - void *gfid = NULL; - ia_type_t ia_type = IA_INVAL; - - priv = this->private; - gfid = &replies[src_idx].poststat.ia_gfid; - ia_type = replies[src_idx].poststat.ia_type; + int i = 0; + int ret = -1; + afr_private_t *priv = NULL; + void *gfid = NULL; + ia_type_t ia_type = IA_INVAL; + + priv = this->private; + gfid = &replies[src_idx].poststat.ia_gfid; + ia_type = replies[src_idx].poststat.ia_type; + + for (i = 0; i < priv->child_count; i++) { + if (i == src_idx) + continue; + + if (!replies[i].valid) + continue; + + if (replies[i].op_ret != 0) + continue; + + if (gf_uuid_compare(gfid, replies[i].poststat.ia_gfid) && + (ia_type == replies[i].poststat.ia_type)) { + ret = afr_gfid_split_brain_source(this, replies, inode, pargfid, + bname, src_idx, i, locked_on, src, + NULL); + if (ret) + gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN, + "Skipping conservative merge on the " + "file."); + return ret; + } - for (i = 0; i < priv->child_count; i++) { - if (i == src_idx) - continue; - - if (!replies[i].valid) - continue; - - if (replies[i].op_ret != 0) - continue; - - if (gf_uuid_compare (gfid, replies[i].poststat.ia_gfid) && - (ia_type == replies[i].poststat.ia_type)) { - ret = afr_gfid_split_brain_source (this, replies, inode, - pargfid, bname, - src_idx, i, - locked_on, src, - NULL); - if (ret) - gf_msg (this->name, GF_LOG_ERROR, 0, - AFR_MSG_SPLIT_BRAIN, - "Skipping conservative merge on the " - "file."); - return ret; - } - - if (ia_type != replies[i].poststat.ia_type) { - gf_msg (this->name, GF_LOG_ERROR, 0, - AFR_MSG_SPLIT_BRAIN, "Type mismatch detected " - "for /%s>, %s on %s and %s on %s. " - "Skipping conservative merge on the file.", - uuid_utoa (pargfid), bname, - gf_inode_type_to_str (replies[i].poststat.ia_type), - priv->children[i]->name, - gf_inode_type_to_str (replies[src_idx].poststat.ia_type), - priv->children[src_idx]->name); - gf_event (EVENT_AFR_SPLIT_BRAIN, "subvol=%s;type=file;" - "file=/%s>;count=2;child-%d=%s;type-" - "%d=%s;child-%d=%s;type-%d=%s", - this->name, uuid_utoa (pargfid), bname, i, - priv->children[i]->name, i, - gf_inode_type_to_str(replies[i].poststat.ia_type), - src_idx, priv->children[src_idx]->name, src_idx, - gf_inode_type_to_str(replies[src_idx].poststat.ia_type)); - return -1; - } + if (ia_type != replies[i].poststat.ia_type) { + gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN, + "Type mismatch detected " + "for /%s>, %s on %s and %s on %s. " + "Skipping conservative merge on the file.", + uuid_utoa(pargfid), bname, + gf_inode_type_to_str(replies[i].poststat.ia_type), + priv->children[i]->name, + gf_inode_type_to_str(replies[src_idx].poststat.ia_type), + priv->children[src_idx]->name); + gf_event(EVENT_AFR_SPLIT_BRAIN, + "subvol=%s;type=file;" + "file=/%s>;count=2;child-%d=%s;type-" + "%d=%s;child-%d=%s;type-%d=%s", + this->name, uuid_utoa(pargfid), bname, i, + priv->children[i]->name, i, + gf_inode_type_to_str(replies[i].poststat.ia_type), src_idx, + priv->children[src_idx]->name, src_idx, + gf_inode_type_to_str(replies[src_idx].poststat.ia_type)); + return -1; } + } - return 0; + return 0; } static int -__afr_selfheal_merge_dirent (call_frame_t *frame, xlator_t *this, fd_t *fd, - char *name, inode_t *inode, unsigned char *sources, - unsigned char *healed_sinks, unsigned char *locked_on, - struct afr_reply *replies) +__afr_selfheal_merge_dirent(call_frame_t *frame, xlator_t *this, fd_t *fd, + char *name, inode_t *inode, unsigned char *sources, + unsigned char *healed_sinks, + unsigned char *locked_on, struct afr_reply *replies) { - int ret = 0; - int i = 0; - int source = -1; - int src = -1; - afr_private_t *priv = NULL; + int ret = 0; + int i = 0; + int source = -1; + int src = -1; + afr_private_t *priv = NULL; + + priv = this->private; + + for (i = 0; i < priv->child_count; i++) { + if (replies[i].valid && replies[i].op_ret == 0) { + source = i; + break; + } + } - priv = this->private; + if (source == -1) { + /* entry got deleted in the mean time? */ + return 0; + } - for (i = 0; i < priv->child_count; i++) { - if (replies[i].valid && replies[i].op_ret == 0) { - source = i; - break; - } - } - - if (source == -1) { - /* entry got deleted in the mean time? */ - return 0; - } - - /* Set all the sources as 1, otheriwse newentry_mark won't be set */ - for (i = 0; i < priv->child_count; i++) { - if (replies[i].valid && replies[i].op_ret == 0) { - sources[i] = 1; - } - } - - ret = afr_lookup_and_heal_gfid (this, fd->inode, name, inode, replies, - source, sources, - &replies[source].poststat.ia_gfid); - if (ret) - return ret; + /* Set all the sources as 1, otheriwse newentry_mark won't be set */ + for (i = 0; i < priv->child_count; i++) { + if (replies[i].valid && replies[i].op_ret == 0) { + sources[i] = 1; + } + } - /* In case of type mismatch / unable to resolve gfid mismatch on the - * entry, return -1.*/ - ret = afr_selfheal_detect_gfid_and_type_mismatch (this, replies, inode, - fd->inode->gfid, - name, source, - locked_on, &src); + ret = afr_lookup_and_heal_gfid(this, fd->inode, name, inode, replies, + source, sources, + &replies[source].poststat.ia_gfid); + if (ret) + return ret; - if (ret < 0) - return ret; - if (src != -1) { - source = src; - for (i = 0; i < priv->child_count; i++) { - if (i != src && replies[i].valid && - gf_uuid_compare (replies[src].poststat.ia_gfid, - replies[i].poststat.ia_gfid)) { - sources[i] = 0; - } - } + /* In case of type mismatch / unable to resolve gfid mismatch on the + * entry, return -1.*/ + ret = afr_selfheal_detect_gfid_and_type_mismatch( + this, replies, inode, fd->inode->gfid, name, source, locked_on, &src); + + if (ret < 0) + return ret; + if (src != -1) { + source = src; + for (i = 0; i < priv->child_count; i++) { + if (i != src && replies[i].valid && + gf_uuid_compare(replies[src].poststat.ia_gfid, + replies[i].poststat.ia_gfid)) { + sources[i] = 0; + } } + } - for (i = 0; i < priv->child_count; i++) { - if (i == source || !healed_sinks[i]) - continue; + for (i = 0; i < priv->child_count; i++) { + if (i == source || !healed_sinks[i]) + continue; - if (src != -1) { - if (!gf_uuid_compare (replies[src].poststat.ia_gfid, - replies[i].poststat.ia_gfid)) - continue; - } else if (replies[i].op_errno != ENOENT) { - continue; - } + if (src != -1) { + if (!gf_uuid_compare(replies[src].poststat.ia_gfid, + replies[i].poststat.ia_gfid)) + continue; + } else if (replies[i].op_errno != ENOENT) { + continue; + } - ret |= afr_selfheal_recreate_entry (frame, i, source, sources, - fd->inode, name, inode, - replies); - } + ret |= afr_selfheal_recreate_entry(frame, i, source, sources, fd->inode, + name, inode, replies); + } - return ret; + return ret; } - static int -__afr_selfheal_entry_dirent (call_frame_t *frame, xlator_t *this, fd_t *fd, - char *name, inode_t *inode, int source, - unsigned char *sources, unsigned char *healed_sinks, - unsigned char *locked_on, - struct afr_reply *replies) +__afr_selfheal_entry_dirent(call_frame_t *frame, xlator_t *this, fd_t *fd, + char *name, inode_t *inode, int source, + unsigned char *sources, unsigned char *healed_sinks, + unsigned char *locked_on, struct afr_reply *replies) { - int ret = -1; - - if (source < 0) - ret = __afr_selfheal_merge_dirent (frame, this, fd, name, inode, - sources, healed_sinks, - locked_on, replies); - else - ret = __afr_selfheal_heal_dirent (frame, this, fd, name, inode, - source, sources, healed_sinks, - locked_on, replies); - return ret; + int ret = -1; + + if (source < 0) + ret = __afr_selfheal_merge_dirent(frame, this, fd, name, inode, sources, + healed_sinks, locked_on, replies); + else + ret = __afr_selfheal_heal_dirent(frame, this, fd, name, inode, source, + sources, healed_sinks, locked_on, + replies); + return ret; } static gf_boolean_t -is_full_heal_marker_present (xlator_t *this, dict_t *xdata, int idx) +is_full_heal_marker_present(xlator_t *this, dict_t *xdata, int idx) { - int i = 0; - int pending[3] = {0,}; - void *pending_raw = NULL; - afr_private_t *priv = NULL; + int i = 0; + int pending[3] = { + 0, + }; + void *pending_raw = NULL; + afr_private_t *priv = NULL; - priv = this->private; + priv = this->private; - if (!xdata) - return _gf_false; + if (!xdata) + return _gf_false; - /* Iterate over each of the priv->pending_keys[] elements and then - * see if any of them have data segment non-zero. If they do, return - * true. Else return false. - */ - for (i = 0; i < priv->child_count; i++) { - if (dict_get_ptr (xdata, priv->pending_key[i], &pending_raw)) - continue; + /* Iterate over each of the priv->pending_keys[] elements and then + * see if any of them have data segment non-zero. If they do, return + * true. Else return false. + */ + for (i = 0; i < priv->child_count; i++) { + if (dict_get_ptr(xdata, priv->pending_key[i], &pending_raw)) + continue; - if (!pending_raw) - continue; + if (!pending_raw) + continue; - memcpy (pending, pending_raw, sizeof (pending)); - if (ntoh32 (pending[idx])) - return _gf_true; - } + memcpy(pending, pending_raw, sizeof(pending)); + if (ntoh32(pending[idx])) + return _gf_true; + } - return _gf_false; + return _gf_false; } static gf_boolean_t -afr_need_full_heal (xlator_t *this, struct afr_reply *replies, int source, - unsigned char *healed_sinks, afr_transaction_type type) +afr_need_full_heal(xlator_t *this, struct afr_reply *replies, int source, + unsigned char *healed_sinks, afr_transaction_type type) { - int i = 0; - int idx = 0; - afr_private_t *priv = NULL; + int i = 0; + int idx = 0; + afr_private_t *priv = NULL; - priv = this->private; + priv = this->private; - if (!priv->esh_granular) - return _gf_true; + if (!priv->esh_granular) + return _gf_true; - if (type != AFR_ENTRY_TRANSACTION) - return _gf_true; + if (type != AFR_ENTRY_TRANSACTION) + return _gf_true; - priv = this->private; - idx = afr_index_for_transaction_type (AFR_DATA_TRANSACTION); + priv = this->private; + idx = afr_index_for_transaction_type(AFR_DATA_TRANSACTION); - /* If there is a clear source, check whether the full-heal-indicator - * is present in its xdata. Otherwise, we need to examine all the - * participating bricks and then figure if *even* one of them has a - * full-heal-indicator. - */ + /* If there is a clear source, check whether the full-heal-indicator + * is present in its xdata. Otherwise, we need to examine all the + * participating bricks and then figure if *even* one of them has a + * full-heal-indicator. + */ - if (source != -1) { - if (is_full_heal_marker_present (this, replies[source].xdata, - idx)) - return _gf_true; - } + if (source != -1) { + if (is_full_heal_marker_present(this, replies[source].xdata, idx)) + return _gf_true; + } - /* else ..*/ + /* else ..*/ - for (i = 0; i < priv->child_count; i++) { - if (!healed_sinks[i]) - continue; + for (i = 0; i < priv->child_count; i++) { + if (!healed_sinks[i]) + continue; - if (is_full_heal_marker_present (this, replies[i].xdata, idx)) - return _gf_true; - } + if (is_full_heal_marker_present(this, replies[i].xdata, idx)) + return _gf_true; + } - return _gf_false; + return _gf_false; } static int -__afr_selfheal_entry_finalize_source (xlator_t *this, unsigned char *sources, - unsigned char *healed_sinks, - unsigned char *locked_on, - struct afr_reply *replies, - uint64_t *witness) +__afr_selfheal_entry_finalize_source(xlator_t *this, unsigned char *sources, + unsigned char *healed_sinks, + unsigned char *locked_on, + struct afr_reply *replies, + uint64_t *witness) { - afr_private_t *priv = NULL; - int source = -1; - int sources_count = 0; + afr_private_t *priv = NULL; + int source = -1; + int sources_count = 0; - priv = this->private; + priv = this->private; - sources_count = AFR_COUNT (sources, priv->child_count); + sources_count = AFR_COUNT(sources, priv->child_count); - if ((AFR_CMP (locked_on, healed_sinks, priv->child_count) == 0) - || !sources_count || afr_does_witness_exist (this, witness)) { + if ((AFR_CMP(locked_on, healed_sinks, priv->child_count) == 0) || + !sources_count || afr_does_witness_exist(this, witness)) { + memset(sources, 0, sizeof(*sources) * priv->child_count); + afr_mark_active_sinks(this, sources, locked_on, healed_sinks); + return -1; + } - memset (sources, 0, sizeof (*sources) * priv->child_count); - afr_mark_active_sinks (this, sources, locked_on, healed_sinks); - return -1; - } - - source = afr_choose_source_by_policy (priv, sources, - AFR_ENTRY_TRANSACTION); - return source; + source = afr_choose_source_by_policy(priv, sources, AFR_ENTRY_TRANSACTION); + return source; } int -__afr_selfheal_entry_prepare (call_frame_t *frame, xlator_t *this, - inode_t *inode, unsigned char *locked_on, - unsigned char *sources, unsigned char *sinks, - unsigned char *healed_sinks, - struct afr_reply *replies, int *source_p, - gf_boolean_t *pflag) +__afr_selfheal_entry_prepare(call_frame_t *frame, xlator_t *this, + inode_t *inode, unsigned char *locked_on, + unsigned char *sources, unsigned char *sinks, + unsigned char *healed_sinks, + struct afr_reply *replies, int *source_p, + gf_boolean_t *pflag) { - int ret = -1; - int source = -1; - afr_private_t *priv = NULL; - uint64_t *witness = NULL; + int ret = -1; + int source = -1; + afr_private_t *priv = NULL; + uint64_t *witness = NULL; - priv = this->private; + priv = this->private; - ret = afr_selfheal_unlocked_discover (frame, inode, inode->gfid, - replies); - if (ret) - return ret; - - witness = alloca0 (sizeof (*witness) * priv->child_count); - ret = afr_selfheal_find_direction (frame, this, replies, - AFR_ENTRY_TRANSACTION, - locked_on, sources, sinks, witness, - pflag); - if (ret) - return ret; - - /* Initialize the healed_sinks[] array optimistically to - the intersection of to-be-healed (i.e sinks[]) and - the list of servers which are up (i.e locked_on[]). - - As we encounter failures in the healing process, we - will unmark the respective servers in the healed_sinks[] - array. - */ - AFR_INTERSECT (healed_sinks, sinks, locked_on, priv->child_count); - - source = __afr_selfheal_entry_finalize_source (this, sources, - healed_sinks, - locked_on, replies, - witness); - - if (source < 0) { - /* If source is < 0 (typically split-brain), we perform a - conservative merge of entries rather than erroring out */ - } - *source_p = source; - - return ret; -} + ret = afr_selfheal_unlocked_discover(frame, inode, inode->gfid, replies); + if (ret) + return ret; -static int -afr_selfheal_entry_dirent (call_frame_t *frame, xlator_t *this, - fd_t *fd, char *name, inode_t *parent_idx_inode, - xlator_t *subvol, gf_boolean_t full_crawl) -{ - int ret = 0; - int source = -1; - unsigned char *locked_on = NULL; - unsigned char *sources = NULL; - unsigned char *sinks = NULL; - unsigned char *healed_sinks = NULL; - inode_t *inode = NULL; - struct afr_reply *replies = NULL; - struct afr_reply *par_replies = NULL; - afr_private_t *priv = NULL; - dict_t *xattr = NULL; - - priv = this->private; - - xattr = dict_new (); - if (!xattr) - return -ENOMEM; - ret = dict_set_int32 (xattr, GF_GFIDLESS_LOOKUP, 1); - if (ret) { - dict_unref (xattr); - return -1; - } + witness = alloca0(sizeof(*witness) * priv->child_count); + ret = afr_selfheal_find_direction(frame, this, replies, + AFR_ENTRY_TRANSACTION, locked_on, sources, + sinks, witness, pflag); + if (ret) + return ret; - sources = alloca0 (priv->child_count); - sinks = alloca0 (priv->child_count); - healed_sinks = alloca0 (priv->child_count); - locked_on = alloca0 (priv->child_count); - - replies = alloca0 (priv->child_count * sizeof(*replies)); - par_replies = alloca0 (priv->child_count * sizeof(*par_replies)); - - ret = afr_selfheal_entrylk (frame, this, fd->inode, this->name, NULL, - locked_on); - { - if (ret < AFR_SH_MIN_PARTICIPANTS) { - gf_msg_debug (this->name, 0, "%s: Skipping " - "entry self-heal as only %d sub-volumes " - " could be locked in %s domain", - uuid_utoa (fd->inode->gfid), - ret, this->name); - ret = -ENOTCONN; - goto unlock; - } - - ret = __afr_selfheal_entry_prepare (frame, this, fd->inode, - locked_on, - sources, sinks, - healed_sinks, par_replies, - &source, NULL); - if (ret < 0) - goto unlock; - - inode = afr_selfheal_unlocked_lookup_on (frame, fd->inode, name, - replies, locked_on, - xattr); - if (!inode) { - ret = -ENOMEM; - goto unlock; - } - - ret = __afr_selfheal_entry_dirent (frame, this, fd, name, inode, - source, sources, healed_sinks, - locked_on, replies); - - if ((ret == 0) && (priv->esh_granular) && parent_idx_inode) { - ret = afr_shd_index_purge (subvol, parent_idx_inode, - name, inode->ia_type); - /* Why is ret force-set to 0? We do not care about - * index purge failing for full heal as it is quite - * possible during replace-brick that not all files - * and directories have their name indices present in - * entry-changes/. - */ - ret = 0; - } - } + /* Initialize the healed_sinks[] array optimistically to + the intersection of to-be-healed (i.e sinks[]) and + the list of servers which are up (i.e locked_on[]). -unlock: - afr_selfheal_unentrylk (frame, this, fd->inode, this->name, NULL, - locked_on, NULL); - if (inode) - inode_unref (inode); - if (replies) - afr_replies_wipe (replies, priv->child_count); - if (par_replies) - afr_replies_wipe (par_replies, priv->child_count); - if (xattr) - dict_unref (xattr); - - return ret; -} + As we encounter failures in the healing process, we + will unmark the respective servers in the healed_sinks[] + array. + */ + AFR_INTERSECT(healed_sinks, sinks, locked_on, priv->child_count); + source = __afr_selfheal_entry_finalize_source(this, sources, healed_sinks, + locked_on, replies, witness); -static inode_t * -afr_shd_entry_changes_index_inode (xlator_t *this, xlator_t *subvol, - uuid_t pargfid) + if (source < 0) { + /* If source is < 0 (typically split-brain), we perform a + conservative merge of entries rather than erroring out */ + } + *source_p = source; + + return ret; +} + +static int +afr_selfheal_entry_dirent(call_frame_t *frame, xlator_t *this, fd_t *fd, + char *name, inode_t *parent_idx_inode, + xlator_t *subvol, gf_boolean_t full_crawl) { - int ret = -1; - void *index_gfid = NULL; - loc_t rootloc = {0,}; - loc_t loc = {0,}; - dict_t *xattr = NULL; - inode_t *inode = NULL; - struct iatt iatt = {0,}; - - rootloc.inode = inode_ref (this->itable->root); - gf_uuid_copy (rootloc.gfid, rootloc.inode->gfid); - - ret = syncop_getxattr (subvol, &rootloc, &xattr, - GF_XATTROP_ENTRY_CHANGES_GFID, NULL, NULL); - if (ret || !xattr) { - errno = -ret; - goto out; + int ret = 0; + int source = -1; + unsigned char *locked_on = NULL; + unsigned char *sources = NULL; + unsigned char *sinks = NULL; + unsigned char *healed_sinks = NULL; + inode_t *inode = NULL; + struct afr_reply *replies = NULL; + struct afr_reply *par_replies = NULL; + afr_private_t *priv = NULL; + dict_t *xattr = NULL; + + priv = this->private; + + xattr = dict_new(); + if (!xattr) + return -ENOMEM; + ret = dict_set_int32(xattr, GF_GFIDLESS_LOOKUP, 1); + if (ret) { + dict_unref(xattr); + return -1; + } + + sources = alloca0(priv->child_count); + sinks = alloca0(priv->child_count); + healed_sinks = alloca0(priv->child_count); + locked_on = alloca0(priv->child_count); + + replies = alloca0(priv->child_count * sizeof(*replies)); + par_replies = alloca0(priv->child_count * sizeof(*par_replies)); + + ret = afr_selfheal_entrylk(frame, this, fd->inode, this->name, NULL, + locked_on); + { + if (ret < AFR_SH_MIN_PARTICIPANTS) { + gf_msg_debug(this->name, 0, + "%s: Skipping " + "entry self-heal as only %d sub-volumes " + " could be locked in %s domain", + uuid_utoa(fd->inode->gfid), ret, this->name); + ret = -ENOTCONN; + goto unlock; } - ret = dict_get_ptr (xattr, GF_XATTROP_ENTRY_CHANGES_GFID, &index_gfid); - if (ret) { - errno = EINVAL; - goto out; - } + ret = __afr_selfheal_entry_prepare(frame, this, fd->inode, locked_on, + sources, sinks, healed_sinks, + par_replies, &source, NULL); + if (ret < 0) + goto unlock; - loc.inode = inode_new (this->itable); - if (!loc.inode) { - errno = ENOMEM; - goto out; + inode = afr_selfheal_unlocked_lookup_on(frame, fd->inode, name, replies, + locked_on, xattr); + if (!inode) { + ret = -ENOMEM; + goto unlock; } - gf_uuid_copy (loc.pargfid, index_gfid); - loc.name = gf_strdup (uuid_utoa (pargfid)); - - ret = syncop_lookup (subvol, &loc, &iatt, NULL, NULL, NULL); - if (ret < 0) { - errno = -ret; - goto out; + ret = __afr_selfheal_entry_dirent(frame, this, fd, name, inode, source, + sources, healed_sinks, locked_on, + replies); + + if ((ret == 0) && (priv->esh_granular) && parent_idx_inode) { + ret = afr_shd_index_purge(subvol, parent_idx_inode, name, + inode->ia_type); + /* Why is ret force-set to 0? We do not care about + * index purge failing for full heal as it is quite + * possible during replace-brick that not all files + * and directories have their name indices present in + * entry-changes/. + */ + ret = 0; } + } - inode = inode_link (loc.inode, NULL, NULL, &iatt); +unlock: + afr_selfheal_unentrylk(frame, this, fd->inode, this->name, NULL, locked_on, + NULL); + if (inode) + inode_unref(inode); + if (replies) + afr_replies_wipe(replies, priv->child_count); + if (par_replies) + afr_replies_wipe(par_replies, priv->child_count); + if (xattr) + dict_unref(xattr); + + return ret; +} + +static inode_t * +afr_shd_entry_changes_index_inode(xlator_t *this, xlator_t *subvol, + uuid_t pargfid) +{ + int ret = -1; + void *index_gfid = NULL; + loc_t rootloc = { + 0, + }; + loc_t loc = { + 0, + }; + dict_t *xattr = NULL; + inode_t *inode = NULL; + struct iatt iatt = { + 0, + }; + + rootloc.inode = inode_ref(this->itable->root); + gf_uuid_copy(rootloc.gfid, rootloc.inode->gfid); + + ret = syncop_getxattr(subvol, &rootloc, &xattr, + GF_XATTROP_ENTRY_CHANGES_GFID, NULL, NULL); + if (ret || !xattr) { + errno = -ret; + goto out; + } + + ret = dict_get_ptr(xattr, GF_XATTROP_ENTRY_CHANGES_GFID, &index_gfid); + if (ret) { + errno = EINVAL; + goto out; + } + + loc.inode = inode_new(this->itable); + if (!loc.inode) { + errno = ENOMEM; + goto out; + } + + gf_uuid_copy(loc.pargfid, index_gfid); + loc.name = gf_strdup(uuid_utoa(pargfid)); + + ret = syncop_lookup(subvol, &loc, &iatt, NULL, NULL, NULL); + if (ret < 0) { + errno = -ret; + goto out; + } + + inode = inode_link(loc.inode, NULL, NULL, &iatt); out: - if (xattr) - dict_unref (xattr); - loc_wipe (&rootloc); - GF_FREE ((char *)loc.name); - loc_wipe (&loc); + if (xattr) + dict_unref(xattr); + loc_wipe(&rootloc); + GF_FREE((char *)loc.name); + loc_wipe(&loc); - return inode; + return inode; } static int -afr_selfheal_entry_do_subvol (call_frame_t *frame, xlator_t *this, - fd_t *fd, int child) +afr_selfheal_entry_do_subvol(call_frame_t *frame, xlator_t *this, fd_t *fd, + int child) { - int ret = 0; - gf_dirent_t entries; - gf_dirent_t *entry = NULL; - off_t offset = 0; - call_frame_t *iter_frame = NULL; - xlator_t *subvol = NULL; - afr_private_t *priv = NULL; - gf_boolean_t mismatch = _gf_false; - afr_local_t *local = NULL; - loc_t loc = {0,}; - - priv = this->private; - subvol = priv->children[child]; - - INIT_LIST_HEAD (&entries.list); - - local = frame->local; - - iter_frame = afr_copy_frame (frame); - if (!iter_frame) - return -ENOMEM; - - loc.inode = afr_shd_entry_changes_index_inode (this, subvol, - fd->inode->gfid); - - while ((ret = syncop_readdir (subvol, fd, 131072, offset, &entries, - NULL, NULL))) { - if (ret > 0) - ret = 0; - list_for_each_entry (entry, &entries.list, list) { - offset = entry->d_off; - - if (!strcmp (entry->d_name, ".") || - !strcmp (entry->d_name, "..")) - continue; - - if (__is_root_gfid (fd->inode->gfid) && - !strcmp (entry->d_name, GF_REPLICATE_TRASH_DIR)) - continue; - - ret = afr_selfheal_entry_dirent (iter_frame, this, fd, - entry->d_name, - loc.inode, subvol, - local->need_full_crawl); - AFR_STACK_RESET (iter_frame); - if (iter_frame->local == NULL) { - ret = -ENOTCONN; - break; - } - - if (ret == -1) { - /* gfid or type mismatch. */ - mismatch = _gf_true; - ret = 0; - } - if (ret) - break; - } - - gf_dirent_free (&entries); - if (ret) - break; - } - - loc_wipe (&loc); - - AFR_STACK_DESTROY (iter_frame); - if (mismatch == _gf_true) - /* undo pending will be skipped */ - ret = -1; - return ret; -} + int ret = 0; + gf_dirent_t entries; + gf_dirent_t *entry = NULL; + off_t offset = 0; + call_frame_t *iter_frame = NULL; + xlator_t *subvol = NULL; + afr_private_t *priv = NULL; + gf_boolean_t mismatch = _gf_false; + afr_local_t *local = NULL; + loc_t loc = { + 0, + }; + + priv = this->private; + subvol = priv->children[child]; + + INIT_LIST_HEAD(&entries.list); + + local = frame->local; + + iter_frame = afr_copy_frame(frame); + if (!iter_frame) + return -ENOMEM; + + loc.inode = afr_shd_entry_changes_index_inode(this, subvol, + fd->inode->gfid); + + while ((ret = syncop_readdir(subvol, fd, 131072, offset, &entries, NULL, + NULL))) { + if (ret > 0) + ret = 0; + list_for_each_entry(entry, &entries.list, list) + { + offset = entry->d_off; -static int -afr_selfheal_entry_granular_dirent (xlator_t *subvol, gf_dirent_t *entry, - loc_t *parent, void *data) -{ - int ret = 0; - loc_t loc = {0,}; - struct iatt iatt = {0,}; - afr_granular_esh_args_t *args = data; - - /* Look up the actual inode associated with entry. If the lookup returns - * ESTALE or ENOENT, then it means we have a stale index. Remove it. - * This is analogous to the check in afr_shd_index_heal() except that - * here it is achieved through LOOKUP and in afr_shd_index_heal() through - * a GETXATTR. - */ + if (!strcmp(entry->d_name, ".") || !strcmp(entry->d_name, "..")) + continue; + + if (__is_root_gfid(fd->inode->gfid) && + !strcmp(entry->d_name, GF_REPLICATE_TRASH_DIR)) + continue; - loc.inode = inode_new (args->xl->itable); - loc.parent = inode_ref (args->heal_fd->inode); - gf_uuid_copy (loc.pargfid, loc.parent->gfid); - loc.name = entry->d_name; - - ret = syncop_lookup (args->xl, &loc, &iatt, NULL, NULL, NULL); - if ((ret == -ENOENT) || (ret == -ESTALE)) { - /* The name indices under the pgfid index dir are guaranteed - * to be regular files. Hence the hardcoding. - */ - afr_shd_index_purge (subvol, parent->inode, entry->d_name, - IA_IFREG); + ret = afr_selfheal_entry_dirent(iter_frame, this, fd, entry->d_name, + loc.inode, subvol, + local->need_full_crawl); + AFR_STACK_RESET(iter_frame); + if (iter_frame->local == NULL) { + ret = -ENOTCONN; + break; + } + + if (ret == -1) { + /* gfid or type mismatch. */ + mismatch = _gf_true; ret = 0; - goto out; + } + if (ret) + break; } - /* TBD: afr_shd_zero_xattrop? */ - ret = afr_selfheal_entry_dirent (args->frame, args->xl, args->heal_fd, - entry->d_name, parent->inode, subvol, - _gf_false); - AFR_STACK_RESET (args->frame); - if (args->frame->local == NULL) - ret = -ENOTCONN; + gf_dirent_free(&entries); + if (ret) + break; + } - if (ret == -1) - args->mismatch = _gf_true; + loc_wipe(&loc); -out: - loc_wipe (&loc); - return 0; + AFR_STACK_DESTROY(iter_frame); + if (mismatch == _gf_true) + /* undo pending will be skipped */ + ret = -1; + return ret; } static int -afr_selfheal_entry_granular (call_frame_t *frame, xlator_t *this, fd_t *fd, - int subvol_idx, gf_boolean_t is_src) +afr_selfheal_entry_granular_dirent(xlator_t *subvol, gf_dirent_t *entry, + loc_t *parent, void *data) { - int ret = 0; - loc_t loc = {0,}; - xlator_t *subvol = NULL; - afr_private_t *priv = NULL; - afr_granular_esh_args_t args = {0,}; - - priv = this->private; - subvol = priv->children[subvol_idx]; - - args.frame = afr_copy_frame (frame); - args.xl = this; - /* args.heal_fd represents the fd associated with the original directory - * on which entry heal is being attempted. + int ret = 0; + loc_t loc = { + 0, + }; + struct iatt iatt = { + 0, + }; + afr_granular_esh_args_t *args = data; + + /* Look up the actual inode associated with entry. If the lookup returns + * ESTALE or ENOENT, then it means we have a stale index. Remove it. + * This is analogous to the check in afr_shd_index_heal() except that + * here it is achieved through LOOKUP and in afr_shd_index_heal() through + * a GETXATTR. + */ + + loc.inode = inode_new(args->xl->itable); + loc.parent = inode_ref(args->heal_fd->inode); + gf_uuid_copy(loc.pargfid, loc.parent->gfid); + loc.name = entry->d_name; + + ret = syncop_lookup(args->xl, &loc, &iatt, NULL, NULL, NULL); + if ((ret == -ENOENT) || (ret == -ESTALE)) { + /* The name indices under the pgfid index dir are guaranteed + * to be regular files. Hence the hardcoding. */ - args.heal_fd = fd; + afr_shd_index_purge(subvol, parent->inode, entry->d_name, IA_IFREG); + ret = 0; + goto out; + } + /* TBD: afr_shd_zero_xattrop? */ + + ret = afr_selfheal_entry_dirent(args->frame, args->xl, args->heal_fd, + entry->d_name, parent->inode, subvol, + _gf_false); + AFR_STACK_RESET(args->frame); + if (args->frame->local == NULL) + ret = -ENOTCONN; + + if (ret == -1) + args->mismatch = _gf_true; - /* @subvol here represents the subvolume of AFR where - * indices/entry-changes/ will be processed +out: + loc_wipe(&loc); + return 0; +} + +static int +afr_selfheal_entry_granular(call_frame_t *frame, xlator_t *this, fd_t *fd, + int subvol_idx, gf_boolean_t is_src) +{ + int ret = 0; + loc_t loc = { + 0, + }; + xlator_t *subvol = NULL; + afr_private_t *priv = NULL; + afr_granular_esh_args_t args = { + 0, + }; + + priv = this->private; + subvol = priv->children[subvol_idx]; + + args.frame = afr_copy_frame(frame); + args.xl = this; + /* args.heal_fd represents the fd associated with the original directory + * on which entry heal is being attempted. + */ + args.heal_fd = fd; + + /* @subvol here represents the subvolume of AFR where + * indices/entry-changes/ will be processed + */ + loc.inode = afr_shd_entry_changes_index_inode(this, subvol, + fd->inode->gfid); + if (!loc.inode) { + /* If granular heal failed on the sink (as it might sometimes + * because it is the src that would mostly contain the granular + * changelogs and the sink's entry-changes would be empty), + * do not treat heal as failure. */ - loc.inode = afr_shd_entry_changes_index_inode (this, subvol, - fd->inode->gfid); - if (!loc.inode) { - /* If granular heal failed on the sink (as it might sometimes - * because it is the src that would mostly contain the granular - * changelogs and the sink's entry-changes would be empty), - * do not treat heal as failure. - */ - if (is_src) - return -errno; - else - return 0; - } + if (is_src) + return -errno; + else + return 0; + } - ret = syncop_dir_scan (subvol, &loc, GF_CLIENT_PID_SELF_HEALD, - &args, afr_selfheal_entry_granular_dirent); + ret = syncop_dir_scan(subvol, &loc, GF_CLIENT_PID_SELF_HEALD, &args, + afr_selfheal_entry_granular_dirent); - loc_wipe (&loc); + loc_wipe(&loc); - if (args.mismatch == _gf_true) - ret = -1; + if (args.mismatch == _gf_true) + ret = -1; - return ret; + return ret; } static int -afr_selfheal_entry_do (call_frame_t *frame, xlator_t *this, fd_t *fd, - int source, unsigned char *sources, - unsigned char *healed_sinks) +afr_selfheal_entry_do(call_frame_t *frame, xlator_t *this, fd_t *fd, int source, + unsigned char *sources, unsigned char *healed_sinks) { - int i = 0; - int ret = 0; - gf_boolean_t mismatch = _gf_false; - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - - priv = this->private; - local = frame->local; - - gf_msg (this->name, GF_LOG_INFO, 0, - AFR_MSG_SELF_HEAL_INFO, "performing entry selfheal on %s", - uuid_utoa (fd->inode->gfid)); - - for (i = 0; i < priv->child_count; i++) { - /* Expunge */ - if (!healed_sinks[i]) - continue; - - if (!local->need_full_crawl) - /* Why call afr_selfheal_entry_granular() on a "healed sink", - * given that it is the source that contains the granular - * indices? - * If the index for this directory is non-existent or empty on - * this subvol (=> clear sink), then it will return early - * without failure status. - * If the index is non-empty and it is yet a 'healed sink', then - * it is due to a split-brain in which case we anyway need to - * crawl the indices/entry-changes/pargfid directory. - */ - ret = afr_selfheal_entry_granular (frame, this, fd, i, - _gf_false); - else - ret = afr_selfheal_entry_do_subvol (frame, this, fd, i); - - if (ret == -1) { - /* gfid or type mismatch. */ - mismatch = _gf_true; - ret = 0; - } - if (ret) - break; - } - - if (!ret && source != -1) { - /* Impunge */ - if (local->need_full_crawl) - ret = afr_selfheal_entry_do_subvol (frame, this, fd, - source); - else - ret = afr_selfheal_entry_granular (frame, this, fd, - source, _gf_true); + int i = 0; + int ret = 0; + gf_boolean_t mismatch = _gf_false; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + + priv = this->private; + local = frame->local; + + gf_msg(this->name, GF_LOG_INFO, 0, AFR_MSG_SELF_HEAL_INFO, + "performing entry selfheal on %s", uuid_utoa(fd->inode->gfid)); + + for (i = 0; i < priv->child_count; i++) { + /* Expunge */ + if (!healed_sinks[i]) + continue; + + if (!local->need_full_crawl) + /* Why call afr_selfheal_entry_granular() on a "healed sink", + * given that it is the source that contains the granular + * indices? + * If the index for this directory is non-existent or empty on + * this subvol (=> clear sink), then it will return early + * without failure status. + * If the index is non-empty and it is yet a 'healed sink', then + * it is due to a split-brain in which case we anyway need to + * crawl the indices/entry-changes/pargfid directory. + */ + ret = afr_selfheal_entry_granular(frame, this, fd, i, _gf_false); + else + ret = afr_selfheal_entry_do_subvol(frame, this, fd, i); + + if (ret == -1) { + /* gfid or type mismatch. */ + mismatch = _gf_true; + ret = 0; } + if (ret) + break; + } - if (mismatch == _gf_true) - /* undo pending will be skipped */ - ret = -1; - return ret; + if (!ret && source != -1) { + /* Impunge */ + if (local->need_full_crawl) + ret = afr_selfheal_entry_do_subvol(frame, this, fd, source); + else + ret = afr_selfheal_entry_granular(frame, this, fd, source, + _gf_true); + } + + if (mismatch == _gf_true) + /* undo pending will be skipped */ + ret = -1; + return ret; } static int -__afr_selfheal_entry (call_frame_t *frame, xlator_t *this, fd_t *fd, - unsigned char *locked_on) +__afr_selfheal_entry(call_frame_t *frame, xlator_t *this, fd_t *fd, + unsigned char *locked_on) { - int ret = -1; - int source = -1; - unsigned char *sources = NULL; - unsigned char *sinks = NULL; - unsigned char *data_lock = NULL; - unsigned char *postop_lock = NULL; - unsigned char *healed_sinks = NULL; - unsigned char *undid_pending = NULL; - struct afr_reply *locked_replies = NULL; - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - gf_boolean_t did_sh = _gf_true; - - priv = this->private; - local = frame->local; - - sources = alloca0 (priv->child_count); - sinks = alloca0 (priv->child_count); - healed_sinks = alloca0 (priv->child_count); - undid_pending = alloca0 (priv->child_count); - data_lock = alloca0 (priv->child_count); - postop_lock = alloca0 (priv->child_count); - - locked_replies = alloca0 (sizeof (*locked_replies) * priv->child_count); - - ret = afr_selfheal_entrylk (frame, this, fd->inode, this->name, NULL, - data_lock); - { - if (ret < AFR_SH_MIN_PARTICIPANTS) { - gf_msg_debug (this->name, 0, "%s: Skipping " - "entry self-heal as only %d sub-volumes could " - "be locked in %s domain", - uuid_utoa (fd->inode->gfid), ret, - this->name); - ret = -ENOTCONN; - goto unlock; - } - - ret = __afr_selfheal_entry_prepare (frame, this, fd->inode, - data_lock, sources, sinks, - healed_sinks, - locked_replies, &source, - NULL); - if (AFR_COUNT(healed_sinks, priv->child_count) == 0) { - did_sh = _gf_false; - goto unlock; - } - - local->need_full_crawl = afr_need_full_heal (this, - locked_replies, - source, - healed_sinks, - AFR_ENTRY_TRANSACTION); - } -unlock: - afr_selfheal_unentrylk (frame, this, fd->inode, this->name, NULL, - data_lock, NULL); - if (ret < 0) - goto out; + int ret = -1; + int source = -1; + unsigned char *sources = NULL; + unsigned char *sinks = NULL; + unsigned char *data_lock = NULL; + unsigned char *postop_lock = NULL; + unsigned char *healed_sinks = NULL; + unsigned char *undid_pending = NULL; + struct afr_reply *locked_replies = NULL; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + gf_boolean_t did_sh = _gf_true; + + priv = this->private; + local = frame->local; + + sources = alloca0(priv->child_count); + sinks = alloca0(priv->child_count); + healed_sinks = alloca0(priv->child_count); + undid_pending = alloca0(priv->child_count); + data_lock = alloca0(priv->child_count); + postop_lock = alloca0(priv->child_count); + + locked_replies = alloca0(sizeof(*locked_replies) * priv->child_count); + + ret = afr_selfheal_entrylk(frame, this, fd->inode, this->name, NULL, + data_lock); + { + if (ret < AFR_SH_MIN_PARTICIPANTS) { + gf_msg_debug(this->name, 0, + "%s: Skipping " + "entry self-heal as only %d sub-volumes could " + "be locked in %s domain", + uuid_utoa(fd->inode->gfid), ret, this->name); + ret = -ENOTCONN; + goto unlock; + } - if (!did_sh) - goto out; + ret = __afr_selfheal_entry_prepare(frame, this, fd->inode, data_lock, + sources, sinks, healed_sinks, + locked_replies, &source, NULL); + if (AFR_COUNT(healed_sinks, priv->child_count) == 0) { + did_sh = _gf_false; + goto unlock; + } - ret = afr_selfheal_entry_do (frame, this, fd, source, sources, - healed_sinks); - if (ret) - goto out; - - /* Take entrylks in xlator domain before doing post-op (undo-pending) in - * entry self-heal. This is to prevent a parallel name self-heal on - * an entry under @fd->inode from reading pending xattrs while it is - * being modified by SHD after entry sh below, given that - * name self-heal takes locks ONLY in xlator domain and is free to read - * pending changelog in the absence of the following locking. - */ - ret = afr_selfheal_entrylk (frame, this, fd->inode, this->name, NULL, - postop_lock); - { - if (AFR_CMP (data_lock, postop_lock, priv->child_count) != 0) { - gf_msg_debug (this->name, 0, "%s: Skipping " - "post-op after entry self-heal as %d " - "sub-volumes, as opposed to %d, " - "could be locked in %s domain", - uuid_utoa (fd->inode->gfid), - ret, AFR_COUNT (data_lock, - priv->child_count), this->name); - ret = -ENOTCONN; - goto postop_unlock; - } - - ret = afr_selfheal_undo_pending (frame, this, fd->inode, - sources, sinks, healed_sinks, - undid_pending, - AFR_ENTRY_TRANSACTION, - locked_replies, postop_lock); + local->need_full_crawl = afr_need_full_heal( + this, locked_replies, source, healed_sinks, AFR_ENTRY_TRANSACTION); + } +unlock: + afr_selfheal_unentrylk(frame, this, fd->inode, this->name, NULL, data_lock, + NULL); + if (ret < 0) + goto out; + + if (!did_sh) + goto out; + + ret = afr_selfheal_entry_do(frame, this, fd, source, sources, healed_sinks); + if (ret) + goto out; + + /* Take entrylks in xlator domain before doing post-op (undo-pending) in + * entry self-heal. This is to prevent a parallel name self-heal on + * an entry under @fd->inode from reading pending xattrs while it is + * being modified by SHD after entry sh below, given that + * name self-heal takes locks ONLY in xlator domain and is free to read + * pending changelog in the absence of the following locking. + */ + ret = afr_selfheal_entrylk(frame, this, fd->inode, this->name, NULL, + postop_lock); + { + if (AFR_CMP(data_lock, postop_lock, priv->child_count) != 0) { + gf_msg_debug(this->name, 0, + "%s: Skipping " + "post-op after entry self-heal as %d " + "sub-volumes, as opposed to %d, " + "could be locked in %s domain", + uuid_utoa(fd->inode->gfid), ret, + AFR_COUNT(data_lock, priv->child_count), this->name); + ret = -ENOTCONN; + goto postop_unlock; } + + ret = afr_selfheal_undo_pending( + frame, this, fd->inode, sources, sinks, healed_sinks, undid_pending, + AFR_ENTRY_TRANSACTION, locked_replies, postop_lock); + } postop_unlock: - afr_selfheal_unentrylk (frame, this, fd->inode, this->name, NULL, - postop_lock, NULL); + afr_selfheal_unentrylk(frame, this, fd->inode, this->name, NULL, + postop_lock, NULL); out: - if (did_sh) - afr_log_selfheal (fd->inode->gfid, this, ret, "entry", source, - sources, healed_sinks); - else - ret = 1; - - if (locked_replies) - afr_replies_wipe (locked_replies, priv->child_count); - return ret; + if (did_sh) + afr_log_selfheal(fd->inode->gfid, this, ret, "entry", source, sources, + healed_sinks); + else + ret = 1; + + if (locked_replies) + afr_replies_wipe(locked_replies, priv->child_count); + return ret; } - static fd_t * -afr_selfheal_data_opendir (xlator_t *this, inode_t *inode) +afr_selfheal_data_opendir(xlator_t *this, inode_t *inode) { - loc_t loc = {0,}; - int ret = 0; - fd_t *fd = NULL; - - fd = fd_create (inode, 0); - if (!fd) - return NULL; - - loc.inode = inode_ref (inode); - gf_uuid_copy (loc.gfid, inode->gfid); - - ret = syncop_opendir (this, &loc, fd, NULL, NULL); - if (ret) { - fd_unref (fd); - fd = NULL; - } else { - fd_bind (fd); - } - - loc_wipe (&loc); - return fd; + loc_t loc = { + 0, + }; + int ret = 0; + fd_t *fd = NULL; + + fd = fd_create(inode, 0); + if (!fd) + return NULL; + + loc.inode = inode_ref(inode); + gf_uuid_copy(loc.gfid, inode->gfid); + + ret = syncop_opendir(this, &loc, fd, NULL, NULL); + if (ret) { + fd_unref(fd); + fd = NULL; + } else { + fd_bind(fd); + } + + loc_wipe(&loc); + return fd; } - int -afr_selfheal_entry (call_frame_t *frame, xlator_t *this, inode_t *inode) +afr_selfheal_entry(call_frame_t *frame, xlator_t *this, inode_t *inode) { - afr_private_t *priv = NULL; - unsigned char *locked_on = NULL; - fd_t *fd = NULL; - int ret = 0; - - priv = this->private; - - fd = afr_selfheal_data_opendir (this, inode); - if (!fd) - return -EIO; - - locked_on = alloca0 (priv->child_count); - - ret = afr_selfheal_tie_breaker_entrylk (frame, this, inode, - priv->sh_domain, NULL, - locked_on); - { - if (ret < AFR_SH_MIN_PARTICIPANTS) { - gf_msg_debug (this->name, 0, "%s: Skipping " - "entry self-heal as only %d sub-volumes could " - "be locked in %s domain", - uuid_utoa (fd->inode->gfid), ret, - priv->sh_domain); - /* Either less than two subvols available, or another - selfheal (from another server) is in progress. Skip - for now in any case there isn't anything to do. - */ - ret = -ENOTCONN; - goto unlock; - } - - ret = __afr_selfheal_entry (frame, this, fd, locked_on); - } + afr_private_t *priv = NULL; + unsigned char *locked_on = NULL; + fd_t *fd = NULL; + int ret = 0; + + priv = this->private; + + fd = afr_selfheal_data_opendir(this, inode); + if (!fd) + return -EIO; + + locked_on = alloca0(priv->child_count); + + ret = afr_selfheal_tie_breaker_entrylk(frame, this, inode, priv->sh_domain, + NULL, locked_on); + { + if (ret < AFR_SH_MIN_PARTICIPANTS) { + gf_msg_debug(this->name, 0, + "%s: Skipping " + "entry self-heal as only %d sub-volumes could " + "be locked in %s domain", + uuid_utoa(fd->inode->gfid), ret, priv->sh_domain); + /* Either less than two subvols available, or another + selfheal (from another server) is in progress. Skip + for now in any case there isn't anything to do. + */ + ret = -ENOTCONN; + goto unlock; + } + + ret = __afr_selfheal_entry(frame, this, fd, locked_on); + } unlock: - afr_selfheal_unentrylk (frame, this, inode, priv->sh_domain, NULL, - locked_on, NULL); + afr_selfheal_unentrylk(frame, this, inode, priv->sh_domain, NULL, locked_on, + NULL); - if (fd) - fd_unref (fd); + if (fd) + fd_unref(fd); - return ret; + return ret; } diff --git a/xlators/cluster/afr/src/afr-self-heal-metadata.c b/xlators/cluster/afr/src/afr-self-heal-metadata.c index 199f8961480..be6e574b6ca 100644 --- a/xlators/cluster/afr/src/afr-self-heal-metadata.c +++ b/xlators/cluster/afr/src/afr-self-heal-metadata.c @@ -8,109 +8,108 @@ cases as published by the Free Software Foundation. */ - #include "afr.h" #include "afr-self-heal.h" #include "byte-order.h" #include "protocol-common.h" #include "events.h" -#define AFR_HEAL_ATTR (GF_SET_ATTR_UID|GF_SET_ATTR_GID|GF_SET_ATTR_MODE) +#define AFR_HEAL_ATTR (GF_SET_ATTR_UID | GF_SET_ATTR_GID | GF_SET_ATTR_MODE) static gf_boolean_t -_afr_ignorable_key_match (dict_t *d, char *k, data_t *val, void *mdata) +_afr_ignorable_key_match(dict_t *d, char *k, data_t *val, void *mdata) { - return afr_is_xattr_ignorable (k); + return afr_is_xattr_ignorable(k); } void -afr_delete_ignorable_xattrs (dict_t *xattr) +afr_delete_ignorable_xattrs(dict_t *xattr) { - dict_foreach_match (xattr, _afr_ignorable_key_match, NULL, - dict_remove_foreach_fn, NULL); + dict_foreach_match(xattr, _afr_ignorable_key_match, NULL, + dict_remove_foreach_fn, NULL); } int -__afr_selfheal_metadata_do (call_frame_t *frame, xlator_t *this, inode_t *inode, - int source, unsigned char *healed_sinks, - struct afr_reply *locked_replies) +__afr_selfheal_metadata_do(call_frame_t *frame, xlator_t *this, inode_t *inode, + int source, unsigned char *healed_sinks, + struct afr_reply *locked_replies) { - int ret = -1; - loc_t loc = {0,}; - dict_t *xattr = NULL; - dict_t *old_xattr = NULL; - afr_private_t *priv = NULL; - int i = 0; - - priv = this->private; - - loc.inode = inode_ref (inode); - gf_uuid_copy (loc.gfid, inode->gfid); - - gf_msg (this->name, GF_LOG_INFO, 0, - AFR_MSG_SELF_HEAL_INFO, "performing metadata selfheal on %s", - uuid_utoa (inode->gfid)); - - ret = syncop_getxattr (priv->children[source], &loc, &xattr, NULL, - NULL, NULL); - if (ret < 0) { - ret = -EIO; - goto out; - } - - afr_delete_ignorable_xattrs (xattr); - - for (i = 0; i < priv->child_count; i++) { - if (old_xattr) { - dict_unref (old_xattr); - old_xattr = NULL; - } - - if (!healed_sinks[i]) - continue; - - ret = syncop_setattr (priv->children[i], &loc, - &locked_replies[source].poststat, - AFR_HEAL_ATTR, NULL, NULL, NULL, NULL); - if (ret) - healed_sinks[i] = 0; - - ret = syncop_getxattr (priv->children[i], &loc, &old_xattr, 0, - NULL, NULL); - if (old_xattr) { - afr_delete_ignorable_xattrs (old_xattr); - ret = syncop_removexattr (priv->children[i], &loc, "", - old_xattr, NULL); - if (ret) - healed_sinks[i] = 0; - } - - ret = syncop_setxattr (priv->children[i], &loc, xattr, 0, NULL, - NULL); - if (ret) - healed_sinks[i] = 0; - } - ret = 0; + int ret = -1; + loc_t loc = { + 0, + }; + dict_t *xattr = NULL; + dict_t *old_xattr = NULL; + afr_private_t *priv = NULL; + int i = 0; + + priv = this->private; + + loc.inode = inode_ref(inode); + gf_uuid_copy(loc.gfid, inode->gfid); + + gf_msg(this->name, GF_LOG_INFO, 0, AFR_MSG_SELF_HEAL_INFO, + "performing metadata selfheal on %s", uuid_utoa(inode->gfid)); + + ret = syncop_getxattr(priv->children[source], &loc, &xattr, NULL, NULL, + NULL); + if (ret < 0) { + ret = -EIO; + goto out; + } + + afr_delete_ignorable_xattrs(xattr); + + for (i = 0; i < priv->child_count; i++) { + if (old_xattr) { + dict_unref(old_xattr); + old_xattr = NULL; + } + + if (!healed_sinks[i]) + continue; + + ret = syncop_setattr(priv->children[i], &loc, + &locked_replies[source].poststat, AFR_HEAL_ATTR, + NULL, NULL, NULL, NULL); + if (ret) + healed_sinks[i] = 0; + + ret = syncop_getxattr(priv->children[i], &loc, &old_xattr, 0, NULL, + NULL); + if (old_xattr) { + afr_delete_ignorable_xattrs(old_xattr); + ret = syncop_removexattr(priv->children[i], &loc, "", old_xattr, + NULL); + if (ret) + healed_sinks[i] = 0; + } + + ret = syncop_setxattr(priv->children[i], &loc, xattr, 0, NULL, NULL); + if (ret) + healed_sinks[i] = 0; + } + ret = 0; out: - loc_wipe (&loc); - if (xattr) - dict_unref (xattr); - if (old_xattr) - dict_unref (old_xattr); + loc_wipe(&loc); + if (xattr) + dict_unref(xattr); + if (old_xattr) + dict_unref(old_xattr); - return ret; + return ret; } static uint64_t mtime_ns(struct iatt *ia) { - uint64_t ret; + uint64_t ret; - ret = (((uint64_t)(ia->ia_mtime)) * 1000000000) - + (uint64_t)(ia->ia_mtime_nsec); + ret = (((uint64_t)(ia->ia_mtime)) * 1000000000) + + (uint64_t)(ia->ia_mtime_nsec); - return ret; + return ret; } /* @@ -123,382 +122,367 @@ mtime_ns(struct iatt *ia) * the source with the most recent modification date. */ static int -afr_dirtime_splitbrain_source (call_frame_t *frame, xlator_t *this, - struct afr_reply *replies, - unsigned char *locked_on) +afr_dirtime_splitbrain_source(call_frame_t *frame, xlator_t *this, + struct afr_reply *replies, + unsigned char *locked_on) { - afr_private_t *priv = NULL; - int source = -1; - struct iatt source_ia; - struct iatt child_ia; - uint64_t mtime = 0; - int i; - int ret = -1; - - priv = this->private; - - for (i = 0; i < priv->child_count; i++) { - if (!locked_on[i]) - continue; - - if (!replies[i].valid) - continue; - - if (replies[i].op_ret != 0) - continue; - - if (mtime_ns(&replies[i].poststat) <= mtime) - continue; - - mtime = mtime_ns(&replies[i].poststat); - source = i; - } - - if (source == -1) - goto out; - - source_ia = replies[source].poststat; - if (source_ia.ia_type != IA_IFDIR) - goto out; - - for (i = 0; i < priv->child_count; i++) { - if (i == source) - continue; - - if (!replies[i].valid) - continue; - - if (replies[i].op_ret != 0) - continue; - - child_ia = replies[i].poststat; - - if (!IA_EQUAL(source_ia, child_ia, gfid) || - !IA_EQUAL(source_ia, child_ia, type) || - !IA_EQUAL(source_ia, child_ia, prot) || - !IA_EQUAL(source_ia, child_ia, uid) || - !IA_EQUAL(source_ia, child_ia, gid) || - !afr_xattrs_are_equal (replies[source].xdata, - replies[i].xdata)) - goto out; - } - - /* - * Metadata split brain is just about [amc]time - * We return our source. - */ - ret = source; + afr_private_t *priv = NULL; + int source = -1; + struct iatt source_ia; + struct iatt child_ia; + uint64_t mtime = 0; + int i; + int ret = -1; + + priv = this->private; + + for (i = 0; i < priv->child_count; i++) { + if (!locked_on[i]) + continue; + + if (!replies[i].valid) + continue; + + if (replies[i].op_ret != 0) + continue; + + if (mtime_ns(&replies[i].poststat) <= mtime) + continue; + + mtime = mtime_ns(&replies[i].poststat); + source = i; + } + + if (source == -1) + goto out; + + source_ia = replies[source].poststat; + if (source_ia.ia_type != IA_IFDIR) + goto out; + + for (i = 0; i < priv->child_count; i++) { + if (i == source) + continue; + + if (!replies[i].valid) + continue; + + if (replies[i].op_ret != 0) + continue; + + child_ia = replies[i].poststat; + + if (!IA_EQUAL(source_ia, child_ia, gfid) || + !IA_EQUAL(source_ia, child_ia, type) || + !IA_EQUAL(source_ia, child_ia, prot) || + !IA_EQUAL(source_ia, child_ia, uid) || + !IA_EQUAL(source_ia, child_ia, gid) || + !afr_xattrs_are_equal(replies[source].xdata, replies[i].xdata)) + goto out; + } + + /* + * Metadata split brain is just about [amc]time + * We return our source. + */ + ret = source; out: - return ret; + return ret; } - /* * Look for mismatching uid/gid or mode or user xattrs even if * AFR xattrs don't say so, and pick one arbitrarily as winner. */ static int -__afr_selfheal_metadata_finalize_source (call_frame_t *frame, xlator_t *this, - inode_t *inode, - unsigned char *sources, - unsigned char *sinks, - unsigned char *healed_sinks, - unsigned char *undid_pending, - unsigned char *locked_on, - struct afr_reply *replies) +__afr_selfheal_metadata_finalize_source(call_frame_t *frame, xlator_t *this, + inode_t *inode, unsigned char *sources, + unsigned char *sinks, + unsigned char *healed_sinks, + unsigned char *undid_pending, + unsigned char *locked_on, + struct afr_reply *replies) { - int i = 0; - afr_private_t *priv = NULL; - struct iatt srcstat = {0, }; - int source = -1; - int sources_count = 0; - - priv = this->private; - - sources_count = AFR_COUNT (sources, priv->child_count); - - if ((AFR_CMP (locked_on, healed_sinks, priv->child_count) == 0) - || !sources_count) { - - source = afr_mark_split_brain_source_sinks (frame, this, inode, - sources, sinks, - healed_sinks, - locked_on, replies, - AFR_METADATA_TRANSACTION); - if (source >= 0) { - _afr_fav_child_reset_sink_xattrs (frame, this, inode, - source, healed_sinks, - undid_pending, - AFR_METADATA_TRANSACTION, - locked_on, replies); - goto out; - } - - /* If this is a directory mtime/ctime only split brain - use the most recent */ - source = afr_dirtime_splitbrain_source (frame, this, - replies, locked_on); - if (source != -1) { - gf_msg (this->name, GF_LOG_INFO, 0, - AFR_MSG_SPLIT_BRAIN, "clear time " - "split brain on %s", - uuid_utoa (replies[source].poststat.ia_gfid)); - sources[source] = 1; - healed_sinks[source] = 0; - goto out; - } - - if (!priv->metadata_splitbrain_forced_heal) { - gf_event (EVENT_AFR_SPLIT_BRAIN, "subvol=%s;" - "type=metadata;file=%s", - this->name, uuid_utoa(inode->gfid)); - return -EIO; - } - - /* Metadata split brain, select one subvol - arbitrarily */ - for (i = 0; i < priv->child_count; i++) { - if (locked_on[i] && healed_sinks[i]) { - sources[i] = 1; - healed_sinks[i] = 0; - break; - } - } - } - - /* No split brain at this point. If we were called from - * afr_heal_splitbrain_file(), abort.*/ - if (afr_dict_contains_heal_op(frame)) - return -EIO; - - source = afr_choose_source_by_policy (priv, sources, - AFR_METADATA_TRANSACTION); - srcstat = replies[source].poststat; - - for (i = 0; i < priv->child_count; i++) { - if (!sources[i] || i == source) - continue; - if (!IA_EQUAL (srcstat, replies[i].poststat, type) || - !IA_EQUAL (srcstat, replies[i].poststat, uid) || - !IA_EQUAL (srcstat, replies[i].poststat, gid) || - !IA_EQUAL (srcstat, replies[i].poststat, prot)) { - gf_msg_debug (this->name, 0, "%s: iatt mismatch " - "for source(%d) vs (%d)", - uuid_utoa - (replies[source].poststat.ia_gfid), - source, i); - sources[i] = 0; - healed_sinks[i] = 1; - } - } - - for (i =0; i < priv->child_count; i++) { - if (!sources[i] || i == source) - continue; - if (!afr_xattrs_are_equal (replies[source].xdata, - replies[i].xdata)) { - gf_msg_debug (this->name, 0, "%s: xattr mismatch " - "for source(%d) vs (%d)", - uuid_utoa - (replies[source].poststat.ia_gfid), - source, i); - sources[i] = 0; - healed_sinks[i] = 1; - } + int i = 0; + afr_private_t *priv = NULL; + struct iatt srcstat = { + 0, + }; + int source = -1; + int sources_count = 0; + + priv = this->private; + + sources_count = AFR_COUNT(sources, priv->child_count); + + if ((AFR_CMP(locked_on, healed_sinks, priv->child_count) == 0) || + !sources_count) { + source = afr_mark_split_brain_source_sinks( + frame, this, inode, sources, sinks, healed_sinks, locked_on, + replies, AFR_METADATA_TRANSACTION); + if (source >= 0) { + _afr_fav_child_reset_sink_xattrs( + frame, this, inode, source, healed_sinks, undid_pending, + AFR_METADATA_TRANSACTION, locked_on, replies); + goto out; + } + + /* If this is a directory mtime/ctime only split brain + use the most recent */ + source = afr_dirtime_splitbrain_source(frame, this, replies, locked_on); + if (source != -1) { + gf_msg(this->name, GF_LOG_INFO, 0, AFR_MSG_SPLIT_BRAIN, + "clear time " + "split brain on %s", + uuid_utoa(replies[source].poststat.ia_gfid)); + sources[source] = 1; + healed_sinks[source] = 0; + goto out; } + if (!priv->metadata_splitbrain_forced_heal) { + gf_event(EVENT_AFR_SPLIT_BRAIN, + "subvol=%s;" + "type=metadata;file=%s", + this->name, uuid_utoa(inode->gfid)); + return -EIO; + } + + /* Metadata split brain, select one subvol + arbitrarily */ + for (i = 0; i < priv->child_count; i++) { + if (locked_on[i] && healed_sinks[i]) { + sources[i] = 1; + healed_sinks[i] = 0; + break; + } + } + } + + /* No split brain at this point. If we were called from + * afr_heal_splitbrain_file(), abort.*/ + if (afr_dict_contains_heal_op(frame)) + return -EIO; + + source = afr_choose_source_by_policy(priv, sources, + AFR_METADATA_TRANSACTION); + srcstat = replies[source].poststat; + + for (i = 0; i < priv->child_count; i++) { + if (!sources[i] || i == source) + continue; + if (!IA_EQUAL(srcstat, replies[i].poststat, type) || + !IA_EQUAL(srcstat, replies[i].poststat, uid) || + !IA_EQUAL(srcstat, replies[i].poststat, gid) || + !IA_EQUAL(srcstat, replies[i].poststat, prot)) { + gf_msg_debug(this->name, 0, + "%s: iatt mismatch " + "for source(%d) vs (%d)", + uuid_utoa(replies[source].poststat.ia_gfid), source, + i); + sources[i] = 0; + healed_sinks[i] = 1; + } + } + + for (i = 0; i < priv->child_count; i++) { + if (!sources[i] || i == source) + continue; + if (!afr_xattrs_are_equal(replies[source].xdata, replies[i].xdata)) { + gf_msg_debug(this->name, 0, + "%s: xattr mismatch " + "for source(%d) vs (%d)", + uuid_utoa(replies[source].poststat.ia_gfid), source, + i); + sources[i] = 0; + healed_sinks[i] = 1; + } + } + out: - afr_mark_active_sinks (this, sources, locked_on, healed_sinks); - return source; + afr_mark_active_sinks(this, sources, locked_on, healed_sinks); + return source; } - int -__afr_selfheal_metadata_prepare (call_frame_t *frame, xlator_t *this, inode_t *inode, - unsigned char *locked_on, unsigned char *sources, - unsigned char *sinks, unsigned char *healed_sinks, - unsigned char *undid_pending, - struct afr_reply *replies, gf_boolean_t *pflag) +__afr_selfheal_metadata_prepare(call_frame_t *frame, xlator_t *this, + inode_t *inode, unsigned char *locked_on, + unsigned char *sources, unsigned char *sinks, + unsigned char *healed_sinks, + unsigned char *undid_pending, + struct afr_reply *replies, gf_boolean_t *pflag) { - int ret = -1; - int source = -1; - afr_private_t *priv = NULL; - int i = 0; - uint64_t *witness = NULL; + int ret = -1; + int source = -1; + afr_private_t *priv = NULL; + int i = 0; + uint64_t *witness = NULL; - priv = this->private; + priv = this->private; - ret = afr_selfheal_unlocked_discover (frame, inode, inode->gfid, - replies); - if (ret) - return ret; - - witness = alloca0 (sizeof (*witness) * priv->child_count); - ret = afr_selfheal_find_direction (frame, this, replies, - AFR_METADATA_TRANSACTION, - locked_on, sources, sinks, witness, - pflag); - if (ret) - return ret; - - /* Initialize the healed_sinks[] array optimistically to - the intersection of to-be-healed (i.e sinks[]) and - the list of servers which are up (i.e locked_on[]). - - As we encounter failures in the healing process, we - will unmark the respective servers in the healed_sinks[] - array. - */ - AFR_INTERSECT (healed_sinks, sinks, locked_on, priv->child_count); - - /* If any source has witness, pick first - * witness source and make everybody else sinks */ - for (i = 0; i < priv->child_count; i++) { - if (sources[i] && witness[i]) { - source = i; - break; - } - } + ret = afr_selfheal_unlocked_discover(frame, inode, inode->gfid, replies); + if (ret) + return ret; - if (source != -1) { - for (i = 0; i < priv->child_count; i++) { - if (i != source && sources[i]) { - sources[i] = 0; - healed_sinks[i] = 1; - } - } + witness = alloca0(sizeof(*witness) * priv->child_count); + ret = afr_selfheal_find_direction(frame, this, replies, + AFR_METADATA_TRANSACTION, locked_on, + sources, sinks, witness, pflag); + if (ret) + return ret; + + /* Initialize the healed_sinks[] array optimistically to + the intersection of to-be-healed (i.e sinks[]) and + the list of servers which are up (i.e locked_on[]). + + As we encounter failures in the healing process, we + will unmark the respective servers in the healed_sinks[] + array. + */ + AFR_INTERSECT(healed_sinks, sinks, locked_on, priv->child_count); + + /* If any source has witness, pick first + * witness source and make everybody else sinks */ + for (i = 0; i < priv->child_count; i++) { + if (sources[i] && witness[i]) { + source = i; + break; } + } - source = __afr_selfheal_metadata_finalize_source (frame, this, inode, - sources, sinks, - healed_sinks, - undid_pending, - locked_on, replies); + if (source != -1) { + for (i = 0; i < priv->child_count; i++) { + if (i != source && sources[i]) { + sources[i] = 0; + healed_sinks[i] = 1; + } + } + } - if (source < 0) - return -EIO; + source = __afr_selfheal_metadata_finalize_source( + frame, this, inode, sources, sinks, healed_sinks, undid_pending, + locked_on, replies); - return source; -} + if (source < 0) + return -EIO; -int -afr_selfheal_metadata (call_frame_t *frame, xlator_t *this, inode_t *inode) -{ - afr_private_t *priv = NULL; - int ret = -1; - unsigned char *sources = NULL; - unsigned char *sinks = NULL; - unsigned char *data_lock = NULL; - unsigned char *healed_sinks = NULL; - unsigned char *undid_pending = NULL; - struct afr_reply *locked_replies = NULL; - gf_boolean_t did_sh = _gf_true; - int source = -1; - - priv = this->private; - - sources = alloca0 (priv->child_count); - sinks = alloca0 (priv->child_count); - healed_sinks = alloca0 (priv->child_count); - undid_pending = alloca0 (priv->child_count); - data_lock = alloca0 (priv->child_count); - - locked_replies = alloca0 (sizeof (*locked_replies) * priv->child_count); - - ret = afr_selfheal_inodelk (frame, this, inode, this->name, - LLONG_MAX - 1, 0, data_lock); - { - if (ret < AFR_SH_MIN_PARTICIPANTS) { - ret = -ENOTCONN; - goto unlock; - } - - ret = __afr_selfheal_metadata_prepare (frame, this, inode, - data_lock, sources, - sinks, healed_sinks, - undid_pending, - locked_replies, NULL); - if (ret < 0) - goto unlock; - - source = ret; - - if (AFR_COUNT (healed_sinks, priv->child_count) == 0) { - did_sh = _gf_false; - goto unlock; - } - - ret = __afr_selfheal_metadata_do (frame, this, inode, source, - healed_sinks, locked_replies); - if (ret) - goto unlock; - - /* Restore atime/mtime for files that don't need data heal as - * restoring timestamps happens only as a part of data-heal. - */ - if (!IA_ISREG (locked_replies[source].poststat.ia_type)) - afr_selfheal_restore_time (frame, this, inode, source, - healed_sinks, locked_replies); - - ret = afr_selfheal_undo_pending (frame, this, inode, sources, - sinks, healed_sinks, - undid_pending, - AFR_METADATA_TRANSACTION, - locked_replies, data_lock); - } -unlock: - afr_selfheal_uninodelk (frame, this, inode, this->name, - LLONG_MAX -1, 0, data_lock); - - if (did_sh) - afr_log_selfheal (inode->gfid, this, ret, "metadata", source, - sources, healed_sinks); - else - ret = 1; - - if (locked_replies) - afr_replies_wipe (locked_replies, priv->child_count); - return ret; + return source; } int -afr_selfheal_metadata_by_stbuf (xlator_t *this, struct iatt *stbuf) +afr_selfheal_metadata(call_frame_t *frame, xlator_t *this, inode_t *inode) { - inode_t *inode = NULL; - inode_t *link_inode = NULL; - call_frame_t *frame = NULL; - int ret = 0; - - if (gf_uuid_is_null (stbuf->ia_gfid)) { - ret = -EINVAL; - goto out; + afr_private_t *priv = NULL; + int ret = -1; + unsigned char *sources = NULL; + unsigned char *sinks = NULL; + unsigned char *data_lock = NULL; + unsigned char *healed_sinks = NULL; + unsigned char *undid_pending = NULL; + struct afr_reply *locked_replies = NULL; + gf_boolean_t did_sh = _gf_true; + int source = -1; + + priv = this->private; + + sources = alloca0(priv->child_count); + sinks = alloca0(priv->child_count); + healed_sinks = alloca0(priv->child_count); + undid_pending = alloca0(priv->child_count); + data_lock = alloca0(priv->child_count); + + locked_replies = alloca0(sizeof(*locked_replies) * priv->child_count); + + ret = afr_selfheal_inodelk(frame, this, inode, this->name, LLONG_MAX - 1, 0, + data_lock); + { + if (ret < AFR_SH_MIN_PARTICIPANTS) { + ret = -ENOTCONN; + goto unlock; } - inode = inode_new (this->itable); - if (!inode) { - ret = -ENOMEM; - goto out; - } + ret = __afr_selfheal_metadata_prepare( + frame, this, inode, data_lock, sources, sinks, healed_sinks, + undid_pending, locked_replies, NULL); + if (ret < 0) + goto unlock; - link_inode = inode_link (inode, NULL, NULL, stbuf); - if (!link_inode) { - ret = -ENOMEM; - goto out; - } + source = ret; - frame = afr_frame_create (this, &ret); - if (!frame) { - ret = -ret; - goto out; + if (AFR_COUNT(healed_sinks, priv->child_count) == 0) { + did_sh = _gf_false; + goto unlock; } - ret = afr_selfheal_metadata (frame, this, link_inode); + ret = __afr_selfheal_metadata_do(frame, this, inode, source, + healed_sinks, locked_replies); + if (ret) + goto unlock; + + /* Restore atime/mtime for files that don't need data heal as + * restoring timestamps happens only as a part of data-heal. + */ + if (!IA_ISREG(locked_replies[source].poststat.ia_type)) + afr_selfheal_restore_time(frame, this, inode, source, healed_sinks, + locked_replies); + + ret = afr_selfheal_undo_pending( + frame, this, inode, sources, sinks, healed_sinks, undid_pending, + AFR_METADATA_TRANSACTION, locked_replies, data_lock); + } +unlock: + afr_selfheal_uninodelk(frame, this, inode, this->name, LLONG_MAX - 1, 0, + data_lock); + + if (did_sh) + afr_log_selfheal(inode->gfid, this, ret, "metadata", source, sources, + healed_sinks); + else + ret = 1; + + if (locked_replies) + afr_replies_wipe(locked_replies, priv->child_count); + return ret; +} + +int +afr_selfheal_metadata_by_stbuf(xlator_t *this, struct iatt *stbuf) +{ + inode_t *inode = NULL; + inode_t *link_inode = NULL; + call_frame_t *frame = NULL; + int ret = 0; + + if (gf_uuid_is_null(stbuf->ia_gfid)) { + ret = -EINVAL; + goto out; + } + + inode = inode_new(this->itable); + if (!inode) { + ret = -ENOMEM; + goto out; + } + + link_inode = inode_link(inode, NULL, NULL, stbuf); + if (!link_inode) { + ret = -ENOMEM; + goto out; + } + + frame = afr_frame_create(this, &ret); + if (!frame) { + ret = -ret; + goto out; + } + + ret = afr_selfheal_metadata(frame, this, link_inode); out: - if (inode) - inode_unref (inode); - if (link_inode) - inode_unref (link_inode); - if (frame) - AFR_STACK_DESTROY (frame); - return ret; + if (inode) + inode_unref(inode); + if (link_inode) + inode_unref(link_inode); + if (frame) + AFR_STACK_DESTROY(frame); + return ret; } diff --git a/xlators/cluster/afr/src/afr-self-heal-name.c b/xlators/cluster/afr/src/afr-self-heal-name.c index 0a5be29d5ee..39aacee6ecf 100644 --- a/xlators/cluster/afr/src/afr-self-heal-name.c +++ b/xlators/cluster/afr/src/afr-self-heal-name.c @@ -8,696 +8,667 @@ cases as published by the Free Software Foundation. */ - #include "events.h" #include "afr.h" #include "afr-self-heal.h" #include "afr-messages.h" int -__afr_selfheal_assign_gfid (xlator_t *this, inode_t *parent, uuid_t pargfid, - const char *bname, inode_t *inode, - struct afr_reply *replies, void *gfid, - unsigned char *locked_on, int source, - unsigned char *sources, gf_boolean_t is_gfid_absent) +__afr_selfheal_assign_gfid(xlator_t *this, inode_t *parent, uuid_t pargfid, + const char *bname, inode_t *inode, + struct afr_reply *replies, void *gfid, + unsigned char *locked_on, int source, + unsigned char *sources, gf_boolean_t is_gfid_absent) { - int ret = 0; - int up_count = 0; - int locked_count = 0; - afr_private_t *priv = NULL; + int ret = 0; + int up_count = 0; + int locked_count = 0; + afr_private_t *priv = NULL; - priv = this->private; + priv = this->private; - gf_uuid_copy (parent->gfid, pargfid); + gf_uuid_copy(parent->gfid, pargfid); - if (is_gfid_absent) { - /* Ensure all children of AFR are up before performing gfid heal, to - * guard against the possibility of gfid split brain. */ + if (is_gfid_absent) { + /* Ensure all children of AFR are up before performing gfid heal, to + * guard against the possibility of gfid split brain. */ - up_count = AFR_COUNT (priv->child_up, priv->child_count); - if (up_count != priv->child_count) { - ret = -EIO; - goto out; - } + up_count = AFR_COUNT(priv->child_up, priv->child_count); + if (up_count != priv->child_count) { + ret = -EIO; + goto out; + } - locked_count = AFR_COUNT (locked_on, priv->child_count); - if (locked_count != priv->child_count) { - ret = -EIO; - goto out; - } + locked_count = AFR_COUNT(locked_on, priv->child_count); + if (locked_count != priv->child_count) { + ret = -EIO; + goto out; } + } - afr_lookup_and_heal_gfid (this, parent, bname, inode, replies, source, - sources, gfid); + afr_lookup_and_heal_gfid(this, parent, bname, inode, replies, source, + sources, gfid); out: - return ret; + return ret; } int -__afr_selfheal_name_impunge (call_frame_t *frame, xlator_t *this, - inode_t *parent, uuid_t pargfid, - const char *bname, inode_t *inode, - struct afr_reply *replies, int gfid_idx) +__afr_selfheal_name_impunge(call_frame_t *frame, xlator_t *this, + inode_t *parent, uuid_t pargfid, const char *bname, + inode_t *inode, struct afr_reply *replies, + int gfid_idx) { - int i = 0; - afr_private_t *priv = NULL; - int ret = 0; - unsigned char *sources = NULL; + int i = 0; + afr_private_t *priv = NULL; + int ret = 0; + unsigned char *sources = NULL; - priv = this->private; + priv = this->private; - sources = alloca0 (priv->child_count); + sources = alloca0(priv->child_count); - gf_uuid_copy (parent->gfid, pargfid); + gf_uuid_copy(parent->gfid, pargfid); - for (i = 0; i < priv->child_count; i++) { - if (!replies[i].valid || replies[i].op_ret != 0) - continue; + for (i = 0; i < priv->child_count; i++) { + if (!replies[i].valid || replies[i].op_ret != 0) + continue; - if (gf_uuid_compare (replies[i].poststat.ia_gfid, - replies[gfid_idx].poststat.ia_gfid) == 0) { - sources[i] = 1; - continue; - } + if (gf_uuid_compare(replies[i].poststat.ia_gfid, + replies[gfid_idx].poststat.ia_gfid) == 0) { + sources[i] = 1; + continue; } + } - for (i = 0; i < priv->child_count; i++) { - if (sources[i]) - continue; + for (i = 0; i < priv->child_count; i++) { + if (sources[i]) + continue; - ret |= afr_selfheal_recreate_entry (frame, i, gfid_idx, sources, - parent, bname, inode, - replies); - } + ret |= afr_selfheal_recreate_entry(frame, i, gfid_idx, sources, parent, + bname, inode, replies); + } - return ret; + return ret; } - int -__afr_selfheal_name_expunge (xlator_t *this, inode_t *parent, uuid_t pargfid, - const char *bname, inode_t *inode, - struct afr_reply *replies) +__afr_selfheal_name_expunge(xlator_t *this, inode_t *parent, uuid_t pargfid, + const char *bname, inode_t *inode, + struct afr_reply *replies) { - loc_t loc = {0, }; - int i = 0; - afr_private_t *priv = NULL; - char g[64]; - int ret = 0; - - priv = this->private; - - loc.parent = inode_ref (parent); - gf_uuid_copy (loc.pargfid, pargfid); - loc.name = bname; - loc.inode = inode_ref (inode); - - for (i = 0; i < priv->child_count; i++) { - if (!replies[i].valid) - continue; - - if (replies[i].op_ret) - continue; - - switch (replies[i].poststat.ia_type) { - case IA_IFDIR: - gf_msg (this->name, GF_LOG_WARNING, 0, - AFR_MSG_EXPUNGING_FILE_OR_DIR, - "expunging dir %s/%s (%s) on %s", - uuid_utoa (pargfid), bname, - uuid_utoa_r (replies[i].poststat.ia_gfid, g), - priv->children[i]->name); - - ret |= syncop_rmdir (priv->children[i], &loc, 1, NULL, - NULL); - break; - default: - gf_msg (this->name, GF_LOG_WARNING, 0, - AFR_MSG_EXPUNGING_FILE_OR_DIR, - "expunging file %s/%s (%s) on %s", - uuid_utoa (pargfid), bname, - uuid_utoa_r (replies[i].poststat.ia_gfid, g), - priv->children[i]->name); - - ret |= syncop_unlink (priv->children[i], &loc, NULL, - NULL); - break; - } - } - - loc_wipe (&loc); - - return ret; + loc_t loc = { + 0, + }; + int i = 0; + afr_private_t *priv = NULL; + char g[64]; + int ret = 0; + + priv = this->private; + + loc.parent = inode_ref(parent); + gf_uuid_copy(loc.pargfid, pargfid); + loc.name = bname; + loc.inode = inode_ref(inode); + + for (i = 0; i < priv->child_count; i++) { + if (!replies[i].valid) + continue; + + if (replies[i].op_ret) + continue; + + switch (replies[i].poststat.ia_type) { + case IA_IFDIR: + gf_msg(this->name, GF_LOG_WARNING, 0, + AFR_MSG_EXPUNGING_FILE_OR_DIR, + "expunging dir %s/%s (%s) on %s", uuid_utoa(pargfid), + bname, uuid_utoa_r(replies[i].poststat.ia_gfid, g), + priv->children[i]->name); + + ret |= syncop_rmdir(priv->children[i], &loc, 1, NULL, NULL); + break; + default: + gf_msg(this->name, GF_LOG_WARNING, 0, + AFR_MSG_EXPUNGING_FILE_OR_DIR, + "expunging file %s/%s (%s) on %s", uuid_utoa(pargfid), + bname, uuid_utoa_r(replies[i].poststat.ia_gfid, g), + priv->children[i]->name); + + ret |= syncop_unlink(priv->children[i], &loc, NULL, NULL); + break; + } + } + loc_wipe(&loc); + + return ret; } /* This function is to be called after ensuring that there is no gfid mismatch * for the inode across multiple sources */ static int -afr_selfheal_gfid_idx_get (xlator_t *this, struct afr_reply *replies, - unsigned char *sources) +afr_selfheal_gfid_idx_get(xlator_t *this, struct afr_reply *replies, + unsigned char *sources) { - int i = 0; - int gfid_idx = -1; - afr_private_t *priv = NULL; + int i = 0; + int gfid_idx = -1; + afr_private_t *priv = NULL; - priv = this->private; + priv = this->private; - for (i = 0; i < priv->child_count; i++) { - if (!replies[i].valid || replies[i].op_ret != 0) - continue; + for (i = 0; i < priv->child_count; i++) { + if (!replies[i].valid || replies[i].op_ret != 0) + continue; - if (!sources[i]) - continue; + if (!sources[i]) + continue; - if (gf_uuid_is_null (replies[i].poststat.ia_gfid)) - continue; + if (gf_uuid_is_null(replies[i].poststat.ia_gfid)) + continue; - gfid_idx = i; - break; - } - return gfid_idx; + gfid_idx = i; + break; + } + return gfid_idx; } static gf_boolean_t -afr_selfheal_name_need_heal_check (xlator_t *this, struct afr_reply *replies) +afr_selfheal_name_need_heal_check(xlator_t *this, struct afr_reply *replies) { - int i = 0; - int first_idx = -1; - gf_boolean_t need_heal = _gf_false; - afr_private_t *priv = NULL; - - priv = this->private; + int i = 0; + int first_idx = -1; + gf_boolean_t need_heal = _gf_false; + afr_private_t *priv = NULL; - for (i = 0; i < priv->child_count; i++) { - if (!replies[i].valid) - continue; + priv = this->private; - if ((replies[i].op_ret == -1) && - (replies[i].op_errno == ENODATA)) - need_heal = _gf_true; + for (i = 0; i < priv->child_count; i++) { + if (!replies[i].valid) + continue; - if (first_idx == -1) { - first_idx = i; - continue; - } + if ((replies[i].op_ret == -1) && (replies[i].op_errno == ENODATA)) + need_heal = _gf_true; - if (replies[i].op_ret != replies[first_idx].op_ret) - need_heal = _gf_true; + if (first_idx == -1) { + first_idx = i; + continue; + } - if (gf_uuid_compare (replies[i].poststat.ia_gfid, - replies[first_idx].poststat.ia_gfid)) - need_heal = _gf_true; + if (replies[i].op_ret != replies[first_idx].op_ret) + need_heal = _gf_true; - if ((replies[i].op_ret == 0) && - (gf_uuid_is_null(replies[i].poststat.ia_gfid))) - need_heal = _gf_true; + if (gf_uuid_compare(replies[i].poststat.ia_gfid, + replies[first_idx].poststat.ia_gfid)) + need_heal = _gf_true; - } + if ((replies[i].op_ret == 0) && + (gf_uuid_is_null(replies[i].poststat.ia_gfid))) + need_heal = _gf_true; + } - return need_heal; + return need_heal; } static int -afr_selfheal_name_type_mismatch_check (xlator_t *this, struct afr_reply *replies, - int source, unsigned char *sources, - uuid_t pargfid, const char *bname) +afr_selfheal_name_type_mismatch_check(xlator_t *this, struct afr_reply *replies, + int source, unsigned char *sources, + uuid_t pargfid, const char *bname) { - int i = 0; - int type_idx = -1; - ia_type_t inode_type = IA_INVAL; - ia_type_t inode_type1 = IA_INVAL; - afr_private_t *priv = NULL; + int i = 0; + int type_idx = -1; + ia_type_t inode_type = IA_INVAL; + ia_type_t inode_type1 = IA_INVAL; + afr_private_t *priv = NULL; - priv = this->private; + priv = this->private; - for (i = 0; i < priv->child_count; i++) { - if (!replies[i].valid || replies[i].op_ret != 0) - continue; + for (i = 0; i < priv->child_count; i++) { + if (!replies[i].valid || replies[i].op_ret != 0) + continue; - if (replies[i].poststat.ia_type == IA_INVAL) - continue; + if (replies[i].poststat.ia_type == IA_INVAL) + continue; - if (inode_type == IA_INVAL) { - inode_type = replies[i].poststat.ia_type; - type_idx = i; - continue; - } - inode_type1 = replies[i].poststat.ia_type; - if (sources[i] || source == -1) { - if ((sources[type_idx] || source == -1) && - (inode_type != inode_type1)) { - gf_msg (this->name, GF_LOG_WARNING, 0, - AFR_MSG_SPLIT_BRAIN, - "Type mismatch for /%s: " - "%s on %s and %s on %s", - uuid_utoa(pargfid), bname, - gf_inode_type_to_str (inode_type1), - priv->children[i]->name, - gf_inode_type_to_str (inode_type), - priv->children[type_idx]->name); - gf_event (EVENT_AFR_SPLIT_BRAIN, - "subvol=%s;type=file;" - "file=/%s;count=2;" - "child-%d=%s;type-%d=%s;child-%d=%s;" - "type-%d=%s", this->name, - uuid_utoa (pargfid), bname, i, - priv->children[i]->name, i, - gf_inode_type_to_str (inode_type1), - type_idx, - priv->children[type_idx]->name, - type_idx, - gf_inode_type_to_str (inode_type)); - return -EIO; - } - inode_type = replies[i].poststat.ia_type; - type_idx = i; - } + if (inode_type == IA_INVAL) { + inode_type = replies[i].poststat.ia_type; + type_idx = i; + continue; } - return 0; + inode_type1 = replies[i].poststat.ia_type; + if (sources[i] || source == -1) { + if ((sources[type_idx] || source == -1) && + (inode_type != inode_type1)) { + gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_SPLIT_BRAIN, + "Type mismatch for /%s: " + "%s on %s and %s on %s", + uuid_utoa(pargfid), bname, + gf_inode_type_to_str(inode_type1), + priv->children[i]->name, + gf_inode_type_to_str(inode_type), + priv->children[type_idx]->name); + gf_event(EVENT_AFR_SPLIT_BRAIN, + "subvol=%s;type=file;" + "file=/%s;count=2;" + "child-%d=%s;type-%d=%s;child-%d=%s;" + "type-%d=%s", + this->name, uuid_utoa(pargfid), bname, i, + priv->children[i]->name, i, + gf_inode_type_to_str(inode_type1), type_idx, + priv->children[type_idx]->name, type_idx, + gf_inode_type_to_str(inode_type)); + return -EIO; + } + inode_type = replies[i].poststat.ia_type; + type_idx = i; + } + } + return 0; } static int -afr_selfheal_name_gfid_mismatch_check (xlator_t *this, struct afr_reply *replies, - int source, unsigned char *sources, - int *gfid_idx, uuid_t pargfid, - const char *bname, inode_t *inode, - unsigned char *locked_on, dict_t *xdata) +afr_selfheal_name_gfid_mismatch_check(xlator_t *this, struct afr_reply *replies, + int source, unsigned char *sources, + int *gfid_idx, uuid_t pargfid, + const char *bname, inode_t *inode, + unsigned char *locked_on, dict_t *xdata) { - int i = 0; - int gfid_idx_iter = -1; - int ret = -1; - void *gfid = NULL; - void *gfid1 = NULL; - afr_private_t *priv = NULL; - - priv = this->private; - - for (i = 0; i < priv->child_count; i++) { - if (!replies[i].valid || replies[i].op_ret != 0) - continue; - - if (gf_uuid_is_null (replies[i].poststat.ia_gfid)) - continue; - - if (!gfid) { - gfid = &replies[i].poststat.ia_gfid; - gfid_idx_iter = i; - continue; - } - - gfid1 = &replies[i].poststat.ia_gfid; - if (sources[i] || source == -1) { - if ((sources[gfid_idx_iter] || source == -1) && - gf_uuid_compare (gfid, gfid1)) { - ret = afr_gfid_split_brain_source (this, - replies, - inode, - pargfid, - bname, - gfid_idx_iter, - i, locked_on, - gfid_idx, - xdata); - if (!ret && *gfid_idx >= 0) { - ret = dict_set_str (xdata, - "gfid-heal-msg", - "GFID split-brain " - "resolved"); - if (ret) - gf_msg (this->name, - GF_LOG_ERROR, 0, - AFR_MSG_DICT_SET_FAILED, - "Error setting gfid-" - "heal-msg dict"); - } - return ret; - } - gfid = &replies[i].poststat.ia_gfid; - gfid_idx_iter = i; - } - } - - *gfid_idx = gfid_idx_iter; - return 0; + int i = 0; + int gfid_idx_iter = -1; + int ret = -1; + void *gfid = NULL; + void *gfid1 = NULL; + afr_private_t *priv = NULL; + + priv = this->private; + + for (i = 0; i < priv->child_count; i++) { + if (!replies[i].valid || replies[i].op_ret != 0) + continue; + + if (gf_uuid_is_null(replies[i].poststat.ia_gfid)) + continue; + + if (!gfid) { + gfid = &replies[i].poststat.ia_gfid; + gfid_idx_iter = i; + continue; + } + + gfid1 = &replies[i].poststat.ia_gfid; + if (sources[i] || source == -1) { + if ((sources[gfid_idx_iter] || source == -1) && + gf_uuid_compare(gfid, gfid1)) { + ret = afr_gfid_split_brain_source(this, replies, inode, pargfid, + bname, gfid_idx_iter, i, + locked_on, gfid_idx, xdata); + if (!ret && *gfid_idx >= 0) { + ret = dict_set_str(xdata, "gfid-heal-msg", + "GFID split-brain " + "resolved"); + if (ret) + gf_msg(this->name, GF_LOG_ERROR, 0, + AFR_MSG_DICT_SET_FAILED, + "Error setting gfid-" + "heal-msg dict"); + } + return ret; + } + gfid = &replies[i].poststat.ia_gfid; + gfid_idx_iter = i; + } + } + + *gfid_idx = gfid_idx_iter; + return 0; } static gf_boolean_t -afr_selfheal_name_source_empty_check (xlator_t *this, struct afr_reply *replies, - unsigned char *sources, int source) +afr_selfheal_name_source_empty_check(xlator_t *this, struct afr_reply *replies, + unsigned char *sources, int source) { - int i = 0; - afr_private_t *priv = NULL; - gf_boolean_t source_is_empty = _gf_true; + int i = 0; + afr_private_t *priv = NULL; + gf_boolean_t source_is_empty = _gf_true; - priv = this->private; + priv = this->private; - if (source == -1) { - source_is_empty = _gf_false; - goto out; - } + if (source == -1) { + source_is_empty = _gf_false; + goto out; + } - for (i = 0; i < priv->child_count; i++) { - if (!sources[i]) - continue; + for (i = 0; i < priv->child_count; i++) { + if (!sources[i]) + continue; - if (replies[i].op_ret == -1 && replies[i].op_errno == ENOENT) - continue; + if (replies[i].op_ret == -1 && replies[i].op_errno == ENOENT) + continue; - source_is_empty = _gf_false; - break; - } + source_is_empty = _gf_false; + break; + } out: - return source_is_empty; + return source_is_empty; } int -__afr_selfheal_name_do (call_frame_t *frame, xlator_t *this, inode_t *parent, - uuid_t pargfid, const char *bname, inode_t *inode, - unsigned char *sources, unsigned char *sinks, - unsigned char *healed_sinks, int source, - unsigned char *locked_on, struct afr_reply *replies, - void *gfid_req, dict_t *xdata) +__afr_selfheal_name_do(call_frame_t *frame, xlator_t *this, inode_t *parent, + uuid_t pargfid, const char *bname, inode_t *inode, + unsigned char *sources, unsigned char *sinks, + unsigned char *healed_sinks, int source, + unsigned char *locked_on, struct afr_reply *replies, + void *gfid_req, dict_t *xdata) { - int gfid_idx = -1; - int ret = -1; - void *gfid = NULL; - gf_boolean_t source_is_empty = _gf_true; - gf_boolean_t need_heal = _gf_false; - gf_boolean_t is_gfid_absent = _gf_false; - - need_heal = afr_selfheal_name_need_heal_check (this, replies); - if (!need_heal) - return 0; - - source_is_empty = afr_selfheal_name_source_empty_check (this, replies, - sources, - source); - if (source_is_empty) { - ret = __afr_selfheal_name_expunge (this, parent, pargfid, - bname, inode, replies); - if (ret == -EIO) - ret = -1; - return ret; - } + int gfid_idx = -1; + int ret = -1; + void *gfid = NULL; + gf_boolean_t source_is_empty = _gf_true; + gf_boolean_t need_heal = _gf_false; + gf_boolean_t is_gfid_absent = _gf_false; + + need_heal = afr_selfheal_name_need_heal_check(this, replies); + if (!need_heal) + return 0; - ret = afr_selfheal_name_type_mismatch_check (this, replies, source, - sources, pargfid, bname); - if (ret) - return ret; + source_is_empty = afr_selfheal_name_source_empty_check(this, replies, + sources, source); + if (source_is_empty) { + ret = __afr_selfheal_name_expunge(this, parent, pargfid, bname, inode, + replies); + if (ret == -EIO) + ret = -1; + return ret; + } - ret = afr_selfheal_name_gfid_mismatch_check (this, replies, source, - sources, &gfid_idx, - pargfid, bname, inode, - locked_on, xdata); - if (ret) - return ret; + ret = afr_selfheal_name_type_mismatch_check(this, replies, source, sources, + pargfid, bname); + if (ret) + return ret; - if (gfid_idx == -1) { - if (!gfid_req || gf_uuid_is_null (gfid_req)) - return -1; - gfid = gfid_req; - } else { - gfid = &replies[gfid_idx].poststat.ia_gfid; - } + ret = afr_selfheal_name_gfid_mismatch_check(this, replies, source, sources, + &gfid_idx, pargfid, bname, + inode, locked_on, xdata); + if (ret) + return ret; - is_gfid_absent = (gfid_idx == -1) ? _gf_true : _gf_false; - ret = __afr_selfheal_assign_gfid (this, parent, pargfid, bname, inode, - replies, gfid, locked_on, source, - sources, is_gfid_absent); - if (ret) - return ret; + if (gfid_idx == -1) { + if (!gfid_req || gf_uuid_is_null(gfid_req)) + return -1; + gfid = gfid_req; + } else { + gfid = &replies[gfid_idx].poststat.ia_gfid; + } + + is_gfid_absent = (gfid_idx == -1) ? _gf_true : _gf_false; + ret = __afr_selfheal_assign_gfid(this, parent, pargfid, bname, inode, + replies, gfid, locked_on, source, sources, + is_gfid_absent); + if (ret) + return ret; - if (gfid_idx == -1) { - gfid_idx = afr_selfheal_gfid_idx_get (this, replies, sources); - if (gfid_idx == -1) - return -1; - } + if (gfid_idx == -1) { + gfid_idx = afr_selfheal_gfid_idx_get(this, replies, sources); + if (gfid_idx == -1) + return -1; + } - ret = __afr_selfheal_name_impunge (frame, this, parent, pargfid, - bname, inode, replies, gfid_idx); - if (ret == -EIO) - ret = -1; + ret = __afr_selfheal_name_impunge(frame, this, parent, pargfid, bname, + inode, replies, gfid_idx); + if (ret == -EIO) + ret = -1; - return ret; + return ret; } - int -__afr_selfheal_name_finalize_source (xlator_t *this, unsigned char *sources, - unsigned char *healed_sinks, - unsigned char *locked_on, - uint64_t *witness) +__afr_selfheal_name_finalize_source(xlator_t *this, unsigned char *sources, + unsigned char *healed_sinks, + unsigned char *locked_on, uint64_t *witness) { - int i = 0; - afr_private_t *priv = NULL; - int source = -1; - int sources_count = 0; - - priv = this->private; - - sources_count = AFR_COUNT (sources, priv->child_count); - - if ((AFR_CMP (locked_on, healed_sinks, priv->child_count) == 0) - || !sources_count || afr_does_witness_exist (this, witness)) { - memset (sources, 0, sizeof (*sources) * priv->child_count); - afr_mark_active_sinks (this, sources, locked_on, healed_sinks); - return -1; - } - - for (i = 0; i < priv->child_count; i++) { - if (sources[i]) { - source = i; - break; - } - } - - return source; + int i = 0; + afr_private_t *priv = NULL; + int source = -1; + int sources_count = 0; + + priv = this->private; + + sources_count = AFR_COUNT(sources, priv->child_count); + + if ((AFR_CMP(locked_on, healed_sinks, priv->child_count) == 0) || + !sources_count || afr_does_witness_exist(this, witness)) { + memset(sources, 0, sizeof(*sources) * priv->child_count); + afr_mark_active_sinks(this, sources, locked_on, healed_sinks); + return -1; + } + + for (i = 0; i < priv->child_count; i++) { + if (sources[i]) { + source = i; + break; + } + } + + return source; } int -__afr_selfheal_name_prepare (call_frame_t *frame, xlator_t *this, inode_t *parent, - uuid_t pargfid, unsigned char *locked_on, - unsigned char *sources, unsigned char *sinks, - unsigned char *healed_sinks, int *source_p) +__afr_selfheal_name_prepare(call_frame_t *frame, xlator_t *this, + inode_t *parent, uuid_t pargfid, + unsigned char *locked_on, unsigned char *sources, + unsigned char *sinks, unsigned char *healed_sinks, + int *source_p) { - int ret = -1; - int source = -1; - afr_private_t *priv = NULL; - struct afr_reply *replies = NULL; - uint64_t *witness = NULL; - - priv = this->private; - - replies = alloca0 (priv->child_count * sizeof(*replies)); - - ret = afr_selfheal_unlocked_discover (frame, parent, pargfid, replies); - if (ret) - goto out; - - witness = alloca0 (sizeof (*witness) * priv->child_count); - ret = afr_selfheal_find_direction (frame, this, replies, - AFR_ENTRY_TRANSACTION, - locked_on, sources, sinks, witness, - NULL); - if (ret) - goto out; - - /* Initialize the healed_sinks[] array optimistically to - the intersection of to-be-healed (i.e sinks[]) and - the list of servers which are up (i.e locked_on[]). - - As we encounter failures in the healing process, we - will unmark the respective servers in the healed_sinks[] - array. - */ - AFR_INTERSECT (healed_sinks, sinks, locked_on, priv->child_count); - - source = __afr_selfheal_name_finalize_source (this, sources, - healed_sinks, - locked_on, witness); - if (source < 0) { - /* If source is < 0 (typically split-brain), we perform a - conservative merge of entries rather than erroring out */ - } - *source_p = source; + int ret = -1; + int source = -1; + afr_private_t *priv = NULL; + struct afr_reply *replies = NULL; + uint64_t *witness = NULL; + + priv = this->private; + + replies = alloca0(priv->child_count * sizeof(*replies)); + + ret = afr_selfheal_unlocked_discover(frame, parent, pargfid, replies); + if (ret) + goto out; + + witness = alloca0(sizeof(*witness) * priv->child_count); + ret = afr_selfheal_find_direction(frame, this, replies, + AFR_ENTRY_TRANSACTION, locked_on, sources, + sinks, witness, NULL); + if (ret) + goto out; + + /* Initialize the healed_sinks[] array optimistically to + the intersection of to-be-healed (i.e sinks[]) and + the list of servers which are up (i.e locked_on[]). + + As we encounter failures in the healing process, we + will unmark the respective servers in the healed_sinks[] + array. + */ + AFR_INTERSECT(healed_sinks, sinks, locked_on, priv->child_count); + + source = __afr_selfheal_name_finalize_source(this, sources, healed_sinks, + locked_on, witness); + if (source < 0) { + /* If source is < 0 (typically split-brain), we perform a + conservative merge of entries rather than erroring out */ + } + *source_p = source; out: - if (replies) - afr_replies_wipe (replies, priv->child_count); + if (replies) + afr_replies_wipe(replies, priv->child_count); - return ret; + return ret; } - int -afr_selfheal_name_do (call_frame_t *frame, xlator_t *this, inode_t *parent, - uuid_t pargfid, const char *bname, void *gfid_req, - dict_t *xdata) +afr_selfheal_name_do(call_frame_t *frame, xlator_t *this, inode_t *parent, + uuid_t pargfid, const char *bname, void *gfid_req, + dict_t *xdata) { - afr_private_t *priv = NULL; - unsigned char *sources = NULL; - unsigned char *sinks = NULL; - unsigned char *healed_sinks = NULL; - unsigned char *locked_on = NULL; - int source = -1; - struct afr_reply *replies = NULL; - int ret = -1; - inode_t *inode = NULL; - dict_t *xattr = NULL; - - xattr = dict_new (); - if (!xattr) - return -ENOMEM; - - ret = dict_set_int32 (xattr, GF_GFIDLESS_LOOKUP, 1); - if (ret) { - dict_unref (xattr); - return -1; + afr_private_t *priv = NULL; + unsigned char *sources = NULL; + unsigned char *sinks = NULL; + unsigned char *healed_sinks = NULL; + unsigned char *locked_on = NULL; + int source = -1; + struct afr_reply *replies = NULL; + int ret = -1; + inode_t *inode = NULL; + dict_t *xattr = NULL; + + xattr = dict_new(); + if (!xattr) + return -ENOMEM; + + ret = dict_set_int32(xattr, GF_GFIDLESS_LOOKUP, 1); + if (ret) { + dict_unref(xattr); + return -1; + } + + priv = this->private; + + locked_on = alloca0(priv->child_count); + sources = alloca0(priv->child_count); + sinks = alloca0(priv->child_count); + healed_sinks = alloca0(priv->child_count); + + replies = alloca0(priv->child_count * sizeof(*replies)); + + ret = afr_selfheal_entrylk(frame, this, parent, this->name, bname, + locked_on); + { + if (ret < AFR_SH_MIN_PARTICIPANTS) { + ret = -ENOTCONN; + goto unlock; + } + + ret = __afr_selfheal_name_prepare(frame, this, parent, pargfid, + locked_on, sources, sinks, + healed_sinks, &source); + if (ret) + goto unlock; + + inode = afr_selfheal_unlocked_lookup_on(frame, parent, bname, replies, + locked_on, xattr); + if (!inode) { + ret = -ENOMEM; + goto unlock; } - priv = this->private; - - locked_on = alloca0 (priv->child_count); - sources = alloca0 (priv->child_count); - sinks = alloca0 (priv->child_count); - healed_sinks = alloca0 (priv->child_count); - - replies = alloca0 (priv->child_count * sizeof(*replies)); - - ret = afr_selfheal_entrylk (frame, this, parent, this->name, bname, - locked_on); - { - if (ret < AFR_SH_MIN_PARTICIPANTS) { - ret = -ENOTCONN; - goto unlock; - } - - ret = __afr_selfheal_name_prepare (frame, this, parent, pargfid, - locked_on, sources, sinks, - healed_sinks, &source); - if (ret) - goto unlock; - - inode = afr_selfheal_unlocked_lookup_on (frame, parent, bname, - replies, locked_on, - xattr); - if (!inode) { - ret = -ENOMEM; - goto unlock; - } - - ret = __afr_selfheal_name_do (frame, this, parent, pargfid, - bname, inode, sources, sinks, - healed_sinks, source, locked_on, - replies, gfid_req, xdata); - } + ret = __afr_selfheal_name_do(frame, this, parent, pargfid, bname, inode, + sources, sinks, healed_sinks, source, + locked_on, replies, gfid_req, xdata); + } unlock: - afr_selfheal_unentrylk (frame, this, parent, this->name, bname, - locked_on, NULL); - if (inode) - inode_unref (inode); + afr_selfheal_unentrylk(frame, this, parent, this->name, bname, locked_on, + NULL); + if (inode) + inode_unref(inode); - if (replies) - afr_replies_wipe (replies, priv->child_count); - if (xattr) - dict_unref (xattr); + if (replies) + afr_replies_wipe(replies, priv->child_count); + if (xattr) + dict_unref(xattr); - return ret; + return ret; } - int -afr_selfheal_name_unlocked_inspect (call_frame_t *frame, xlator_t *this, - inode_t *parent, uuid_t pargfid, - const char *bname, gf_boolean_t *need_heal) +afr_selfheal_name_unlocked_inspect(call_frame_t *frame, xlator_t *this, + inode_t *parent, uuid_t pargfid, + const char *bname, gf_boolean_t *need_heal) { - afr_private_t *priv = NULL; - int i = 0; - struct afr_reply *replies = NULL; - inode_t *inode = NULL; - int first_idx = -1; + afr_private_t *priv = NULL; + int i = 0; + struct afr_reply *replies = NULL; + inode_t *inode = NULL; + int first_idx = -1; - priv = this->private; + priv = this->private; - replies = alloca0 (sizeof (*replies) * priv->child_count); + replies = alloca0(sizeof(*replies) * priv->child_count); - inode = afr_selfheal_unlocked_lookup_on (frame, parent, bname, - replies, priv->child_up, NULL); - if (!inode) - return -ENOMEM; + inode = afr_selfheal_unlocked_lookup_on(frame, parent, bname, replies, + priv->child_up, NULL); + if (!inode) + return -ENOMEM; - for (i = 0; i < priv->child_count; i++) { - if (!replies[i].valid) - continue; + for (i = 0; i < priv->child_count; i++) { + if (!replies[i].valid) + continue; - if ((replies[i].op_ret == -1) && - (replies[i].op_errno == ENODATA)) { - *need_heal = _gf_true; - break; - } + if ((replies[i].op_ret == -1) && (replies[i].op_errno == ENODATA)) { + *need_heal = _gf_true; + break; + } - if (first_idx == -1) { - first_idx = i; - continue; - } + if (first_idx == -1) { + first_idx = i; + continue; + } - if (replies[i].op_ret != replies[first_idx].op_ret) { - *need_heal = _gf_true; - break; - } + if (replies[i].op_ret != replies[first_idx].op_ret) { + *need_heal = _gf_true; + break; + } - if (gf_uuid_compare (replies[i].poststat.ia_gfid, - replies[first_idx].poststat.ia_gfid)) { - *need_heal = _gf_true; - break; - } - } + if (gf_uuid_compare(replies[i].poststat.ia_gfid, + replies[first_idx].poststat.ia_gfid)) { + *need_heal = _gf_true; + break; + } + } - if (inode) - inode_unref (inode); - if (replies) - afr_replies_wipe (replies, priv->child_count); - return 0; + if (inode) + inode_unref(inode); + if (replies) + afr_replies_wipe(replies, priv->child_count); + return 0; } int -afr_selfheal_name (xlator_t *this, uuid_t pargfid, const char *bname, - void *gfid_req, dict_t *xdata) +afr_selfheal_name(xlator_t *this, uuid_t pargfid, const char *bname, + void *gfid_req, dict_t *xdata) { - inode_t *parent = NULL; - call_frame_t *frame = NULL; - int ret = -1; - gf_boolean_t need_heal = _gf_false; - - parent = afr_inode_find (this, pargfid); - if (!parent) - goto out; - - frame = afr_frame_create (this, NULL); - if (!frame) - goto out; - - ret = afr_selfheal_name_unlocked_inspect (frame, this, parent, pargfid, - bname, &need_heal); - if (ret) - goto out; - - if (need_heal) { - ret = afr_selfheal_name_do (frame, this, parent, pargfid, bname, - gfid_req, xdata); - if (ret) - goto out; - } + inode_t *parent = NULL; + call_frame_t *frame = NULL; + int ret = -1; + gf_boolean_t need_heal = _gf_false; + + parent = afr_inode_find(this, pargfid); + if (!parent) + goto out; + + frame = afr_frame_create(this, NULL); + if (!frame) + goto out; + + ret = afr_selfheal_name_unlocked_inspect(frame, this, parent, pargfid, + bname, &need_heal); + if (ret) + goto out; + + if (need_heal) { + ret = afr_selfheal_name_do(frame, this, parent, pargfid, bname, + gfid_req, xdata); + if (ret) + goto out; + } - ret = 0; + ret = 0; out: - if (parent) - inode_unref (parent); - if (frame) - AFR_STACK_DESTROY (frame); + if (parent) + inode_unref(parent); + if (frame) + AFR_STACK_DESTROY(frame); - return ret; + return ret; } diff --git a/xlators/cluster/afr/src/afr-self-heald.c b/xlators/cluster/afr/src/afr-self-heald.c index 1f7ae7bb43d..0cf01a041b4 100644 --- a/xlators/cluster/afr/src/afr-self-heald.c +++ b/xlators/cluster/afr/src/afr-self-heald.c @@ -8,7 +8,6 @@ cases as published by the Free Software Foundation. */ - #include "afr.h" #include "afr-self-heal.h" #include "afr-self-heald.h" @@ -17,1378 +16,1334 @@ #include "afr-messages.h" #include "byte-order.h" -#define SHD_INODE_LRU_LIMIT 2048 -#define AFR_EH_SPLIT_BRAIN_LIMIT 1024 -#define AFR_STATISTICS_HISTORY_SIZE 50 - - -#define ASSERT_LOCAL(this, healer) \ - if (!afr_shd_is_subvol_local(this, healer->subvol)) { \ - healer->local = _gf_false; \ - if (safe_break (healer)) { \ - break; \ - } else { \ - continue; \ - } \ - } else { \ - healer->local = _gf_true; \ - } - - -#define NTH_INDEX_HEALER(this, n) &((((afr_private_t *)this->private))->shd.index_healers[n]) -#define NTH_FULL_HEALER(this, n) &((((afr_private_t *)this->private))->shd.full_healers[n]) +#define SHD_INODE_LRU_LIMIT 2048 +#define AFR_EH_SPLIT_BRAIN_LIMIT 1024 +#define AFR_STATISTICS_HISTORY_SIZE 50 + +#define ASSERT_LOCAL(this, healer) \ + if (!afr_shd_is_subvol_local(this, healer->subvol)) { \ + healer->local = _gf_false; \ + if (safe_break(healer)) { \ + break; \ + } else { \ + continue; \ + } \ + } else { \ + healer->local = _gf_true; \ + } + +#define NTH_INDEX_HEALER(this, n) \ + &((((afr_private_t *)this->private))->shd.index_healers[n]) +#define NTH_FULL_HEALER(this, n) \ + &((((afr_private_t *)this->private))->shd.full_healers[n]) char * -afr_subvol_name (xlator_t *this, int subvol) +afr_subvol_name(xlator_t *this, int subvol) { - afr_private_t *priv = NULL; + afr_private_t *priv = NULL; - priv = this->private; - if (subvol < 0 || subvol > priv->child_count) - return NULL; + priv = this->private; + if (subvol < 0 || subvol > priv->child_count) + return NULL; - return priv->children[subvol]->name; + return priv->children[subvol]->name; } - void -afr_destroy_crawl_event_data (void *data) +afr_destroy_crawl_event_data(void *data) { - return; + return; } - void -afr_destroy_shd_event_data (void *data) +afr_destroy_shd_event_data(void *data) { - shd_event_t *shd_event = data; - - if (!shd_event) - return; - GF_FREE (shd_event->path); + shd_event_t *shd_event = data; + if (!shd_event) return; -} + GF_FREE(shd_event->path); + return; +} gf_boolean_t -afr_shd_is_subvol_local (xlator_t *this, int subvol) +afr_shd_is_subvol_local(xlator_t *this, int subvol) { - afr_private_t *priv = NULL; - gf_boolean_t is_local = _gf_false; - loc_t loc = {0, }; - - loc.inode = this->itable->root; - gf_uuid_copy (loc.gfid, loc.inode->gfid); - priv = this->private; - syncop_is_subvol_local(priv->children[subvol], &loc, &is_local); - return is_local; + afr_private_t *priv = NULL; + gf_boolean_t is_local = _gf_false; + loc_t loc = { + 0, + }; + + loc.inode = this->itable->root; + gf_uuid_copy(loc.gfid, loc.inode->gfid); + priv = this->private; + syncop_is_subvol_local(priv->children[subvol], &loc, &is_local); + return is_local; } - int -__afr_shd_healer_wait (struct subvol_healer *healer) +__afr_shd_healer_wait(struct subvol_healer *healer) { - afr_private_t *priv = NULL; - struct timespec wait_till = {0, }; - int ret = 0; + afr_private_t *priv = NULL; + struct timespec wait_till = { + 0, + }; + int ret = 0; - priv = healer->this->private; + priv = healer->this->private; disabled_loop: - wait_till.tv_sec = time (NULL) + priv->shd.timeout; + wait_till.tv_sec = time(NULL) + priv->shd.timeout; - while (!healer->rerun) { - ret = pthread_cond_timedwait (&healer->cond, - &healer->mutex, - &wait_till); - if (ret == ETIMEDOUT) - break; - } + while (!healer->rerun) { + ret = pthread_cond_timedwait(&healer->cond, &healer->mutex, &wait_till); + if (ret == ETIMEDOUT) + break; + } - ret = healer->rerun; - healer->rerun = 0; + ret = healer->rerun; + healer->rerun = 0; - if (!priv->shd.enabled) - goto disabled_loop; + if (!priv->shd.enabled) + goto disabled_loop; - return ret; + return ret; } - int -afr_shd_healer_wait (struct subvol_healer *healer) +afr_shd_healer_wait(struct subvol_healer *healer) { - int ret = 0; + int ret = 0; - pthread_mutex_lock (&healer->mutex); - { - ret = __afr_shd_healer_wait (healer); - } - pthread_mutex_unlock (&healer->mutex); + pthread_mutex_lock(&healer->mutex); + { + ret = __afr_shd_healer_wait(healer); + } + pthread_mutex_unlock(&healer->mutex); - return ret; + return ret; } - gf_boolean_t -safe_break (struct subvol_healer *healer) +safe_break(struct subvol_healer *healer) { - gf_boolean_t ret = _gf_false; + gf_boolean_t ret = _gf_false; - pthread_mutex_lock (&healer->mutex); - { - if (healer->rerun) - goto unlock; + pthread_mutex_lock(&healer->mutex); + { + if (healer->rerun) + goto unlock; - healer->running = _gf_false; - ret = _gf_true; - } + healer->running = _gf_false; + ret = _gf_true; + } unlock: - pthread_mutex_unlock (&healer->mutex); + pthread_mutex_unlock(&healer->mutex); - return ret; + return ret; } - inode_t * -afr_shd_inode_find (xlator_t *this, xlator_t *subvol, uuid_t gfid) +afr_shd_inode_find(xlator_t *this, xlator_t *subvol, uuid_t gfid) { - int ret = 0; - uint64_t val = IA_INVAL; - dict_t *xdata = NULL; - dict_t *rsp_dict = NULL; - inode_t *inode = NULL; - - xdata = dict_new (); - if (!xdata) - goto out; - - ret = dict_set_int8 (xdata, GF_INDEX_IA_TYPE_GET_REQ, 1); + int ret = 0; + uint64_t val = IA_INVAL; + dict_t *xdata = NULL; + dict_t *rsp_dict = NULL; + inode_t *inode = NULL; + + xdata = dict_new(); + if (!xdata) + goto out; + + ret = dict_set_int8(xdata, GF_INDEX_IA_TYPE_GET_REQ, 1); + if (ret) + goto out; + + ret = syncop_inode_find(this, subvol, gfid, &inode, xdata, &rsp_dict); + if (ret < 0) + goto out; + + if (rsp_dict) { + ret = dict_get_uint64(rsp_dict, GF_INDEX_IA_TYPE_GET_RSP, &val); if (ret) - goto out; - - ret = syncop_inode_find (this, subvol, gfid, &inode, - xdata, &rsp_dict); - if (ret < 0) - goto out; - - if (rsp_dict) { - ret = dict_get_uint64 (rsp_dict, GF_INDEX_IA_TYPE_GET_RSP, - &val); - if (ret) - goto out; - } - ret = inode_ctx_set2 (inode, subvol, 0, &val); + goto out; + } + ret = inode_ctx_set2(inode, subvol, 0, &val); out: - if (ret && inode) { - inode_unref (inode); - inode = NULL; - } - if (xdata) - dict_unref (xdata); - if (rsp_dict) - dict_unref (rsp_dict); - return inode; + if (ret && inode) { + inode_unref(inode); + inode = NULL; + } + if (xdata) + dict_unref(xdata); + if (rsp_dict) + dict_unref(rsp_dict); + return inode; } -inode_t* -afr_shd_index_inode (xlator_t *this, xlator_t *subvol, char *vgfid) +inode_t * +afr_shd_index_inode(xlator_t *this, xlator_t *subvol, char *vgfid) { - loc_t rootloc = {0, }; - inode_t *inode = NULL; - int ret = 0; - dict_t *xattr = NULL; - void *index_gfid = NULL; + loc_t rootloc = { + 0, + }; + inode_t *inode = NULL; + int ret = 0; + dict_t *xattr = NULL; + void *index_gfid = NULL; - rootloc.inode = inode_ref (this->itable->root); - gf_uuid_copy (rootloc.gfid, rootloc.inode->gfid); + rootloc.inode = inode_ref(this->itable->root); + gf_uuid_copy(rootloc.gfid, rootloc.inode->gfid); - ret = syncop_getxattr (subvol, &rootloc, &xattr, - vgfid, NULL, NULL); - if (ret || !xattr) { - errno = -ret; - goto out; - } + ret = syncop_getxattr(subvol, &rootloc, &xattr, vgfid, NULL, NULL); + if (ret || !xattr) { + errno = -ret; + goto out; + } - ret = dict_get_ptr (xattr, vgfid, &index_gfid); - if (ret) - goto out; + ret = dict_get_ptr(xattr, vgfid, &index_gfid); + if (ret) + goto out; - gf_msg_debug (this->name, 0, "%s dir gfid for %s: %s", - vgfid, subvol->name, uuid_utoa (index_gfid)); + gf_msg_debug(this->name, 0, "%s dir gfid for %s: %s", vgfid, subvol->name, + uuid_utoa(index_gfid)); - inode = afr_shd_inode_find (this, subvol, index_gfid); + inode = afr_shd_inode_find(this, subvol, index_gfid); out: - loc_wipe (&rootloc); + loc_wipe(&rootloc); - if (xattr) - dict_unref (xattr); + if (xattr) + dict_unref(xattr); - return inode; + return inode; } int -afr_shd_index_purge (xlator_t *subvol, inode_t *inode, char *name, - ia_type_t type) +afr_shd_index_purge(xlator_t *subvol, inode_t *inode, char *name, + ia_type_t type) { - int ret = 0; - loc_t loc = {0,}; + int ret = 0; + loc_t loc = { + 0, + }; - loc.parent = inode_ref (inode); - loc.name = name; + loc.parent = inode_ref(inode); + loc.name = name; - if (IA_ISDIR (type)) - ret = syncop_rmdir (subvol, &loc, 1, NULL, NULL); - else - ret = syncop_unlink (subvol, &loc, NULL, NULL); + if (IA_ISDIR(type)) + ret = syncop_rmdir(subvol, &loc, 1, NULL, NULL); + else + ret = syncop_unlink(subvol, &loc, NULL, NULL); - loc_wipe (&loc); - return ret; + loc_wipe(&loc); + return ret; } void -afr_shd_zero_xattrop (xlator_t *this, uuid_t gfid) +afr_shd_zero_xattrop(xlator_t *this, uuid_t gfid) { - - call_frame_t *frame = NULL; - inode_t *inode = NULL; - afr_private_t *priv = NULL; - dict_t *xattr = NULL; - int ret = 0; - int i = 0; - int raw[AFR_NUM_CHANGE_LOGS] = {0}; - - priv = this->private; - frame = afr_frame_create (this, NULL); - if (!frame) - goto out; - inode = afr_inode_find (this, gfid); - if (!inode) - goto out; - xattr = dict_new(); - if (!xattr) - goto out; - ret = dict_set_static_bin (xattr, AFR_DIRTY, raw, - sizeof(int) * AFR_NUM_CHANGE_LOGS); + call_frame_t *frame = NULL; + inode_t *inode = NULL; + afr_private_t *priv = NULL; + dict_t *xattr = NULL; + int ret = 0; + int i = 0; + int raw[AFR_NUM_CHANGE_LOGS] = {0}; + + priv = this->private; + frame = afr_frame_create(this, NULL); + if (!frame) + goto out; + inode = afr_inode_find(this, gfid); + if (!inode) + goto out; + xattr = dict_new(); + if (!xattr) + goto out; + ret = dict_set_static_bin(xattr, AFR_DIRTY, raw, + sizeof(int) * AFR_NUM_CHANGE_LOGS); + if (ret) + goto out; + for (i = 0; i < priv->child_count; i++) { + ret = dict_set_static_bin(xattr, priv->pending_key[i], raw, + sizeof(int) * AFR_NUM_CHANGE_LOGS); if (ret) - goto out; - for (i = 0; i < priv->child_count; i++) { - ret = dict_set_static_bin (xattr, priv->pending_key[i], raw, - sizeof(int) * AFR_NUM_CHANGE_LOGS); - if (ret) - goto out; - } + goto out; + } - /*Send xattrop to all bricks. Doing a lookup to see if bricks are up or - * has valid repies for this gfid seems a bit of an overkill.*/ - for (i = 0; i < priv->child_count; i++) - afr_selfheal_post_op (frame, this, inode, i, xattr, NULL); + /*Send xattrop to all bricks. Doing a lookup to see if bricks are up or + * has valid repies for this gfid seems a bit of an overkill.*/ + for (i = 0; i < priv->child_count; i++) + afr_selfheal_post_op(frame, this, inode, i, xattr, NULL); out: - if (frame) - AFR_STACK_DESTROY (frame); - if (inode) - inode_unref (inode); - if (xattr) - dict_unref (xattr); - return; + if (frame) + AFR_STACK_DESTROY(frame); + if (inode) + inode_unref(inode); + if (xattr) + dict_unref(xattr); + return; } int -afr_shd_selfheal_name (struct subvol_healer *healer, int child, uuid_t parent, - const char *bname) +afr_shd_selfheal_name(struct subvol_healer *healer, int child, uuid_t parent, + const char *bname) { - int ret = -1; + int ret = -1; - ret = afr_selfheal_name (THIS, parent, bname, NULL, NULL); + ret = afr_selfheal_name(THIS, parent, bname, NULL, NULL); - return ret; + return ret; } int -afr_shd_selfheal (struct subvol_healer *healer, int child, uuid_t gfid) +afr_shd_selfheal(struct subvol_healer *healer, int child, uuid_t gfid) { - int ret = 0; - eh_t *eh = NULL; - afr_private_t *priv = NULL; - afr_self_heald_t *shd = NULL; - shd_event_t *shd_event = NULL; - char *path = NULL; - xlator_t *subvol = NULL; - xlator_t *this = NULL; - crawl_event_t *crawl_event = NULL; - - this = healer->this; - priv = this->private; - shd = &priv->shd; - crawl_event = &healer->crawl_event; - - subvol = priv->children[child]; - - //If this fails with ENOENT/ESTALE index is stale - ret = syncop_gfid_to_path (this->itable, subvol, gfid, &path); - if (ret < 0) - return ret; - - ret = afr_selfheal (this, gfid); - - LOCK (&priv->lock); - { - if (ret == -EIO) { - eh = shd->split_brain; - crawl_event->split_brain_count++; - } else if (ret < 0) { - crawl_event->heal_failed_count++; - } else if (ret == 0) { - crawl_event->healed_count++; - } + int ret = 0; + eh_t *eh = NULL; + afr_private_t *priv = NULL; + afr_self_heald_t *shd = NULL; + shd_event_t *shd_event = NULL; + char *path = NULL; + xlator_t *subvol = NULL; + xlator_t *this = NULL; + crawl_event_t *crawl_event = NULL; + + this = healer->this; + priv = this->private; + shd = &priv->shd; + crawl_event = &healer->crawl_event; + + subvol = priv->children[child]; + + // If this fails with ENOENT/ESTALE index is stale + ret = syncop_gfid_to_path(this->itable, subvol, gfid, &path); + if (ret < 0) + return ret; + + ret = afr_selfheal(this, gfid); + + LOCK(&priv->lock); + { + if (ret == -EIO) { + eh = shd->split_brain; + crawl_event->split_brain_count++; + } else if (ret < 0) { + crawl_event->heal_failed_count++; + } else if (ret == 0) { + crawl_event->healed_count++; } - UNLOCK (&priv->lock); + } + UNLOCK(&priv->lock); - if (eh) { - shd_event = GF_CALLOC (1, sizeof(*shd_event), - gf_afr_mt_shd_event_t); - if (!shd_event) - goto out; + if (eh) { + shd_event = GF_CALLOC(1, sizeof(*shd_event), gf_afr_mt_shd_event_t); + if (!shd_event) + goto out; - shd_event->child = child; - shd_event->path = path; + shd_event->child = child; + shd_event->path = path; - if (eh_save_history (eh, shd_event) < 0) - goto out; + if (eh_save_history(eh, shd_event) < 0) + goto out; - shd_event = NULL; - path = NULL; - } + shd_event = NULL; + path = NULL; + } out: - GF_FREE (shd_event); - GF_FREE (path); - return ret; + GF_FREE(shd_event); + GF_FREE(path); + return ret; } - void -afr_shd_sweep_prepare (struct subvol_healer *healer) +afr_shd_sweep_prepare(struct subvol_healer *healer) { - crawl_event_t *event = NULL; + crawl_event_t *event = NULL; - event = &healer->crawl_event; + event = &healer->crawl_event; - event->healed_count = 0; - event->split_brain_count = 0; - event->heal_failed_count = 0; + event->healed_count = 0; + event->split_brain_count = 0; + event->heal_failed_count = 0; - time (&event->start_time); - event->end_time = 0; + time(&event->start_time); + event->end_time = 0; } - void -afr_shd_sweep_done (struct subvol_healer *healer) +afr_shd_sweep_done(struct subvol_healer *healer) { - crawl_event_t *event = NULL; - crawl_event_t *history = NULL; - afr_self_heald_t *shd = NULL; + crawl_event_t *event = NULL; + crawl_event_t *history = NULL; + afr_self_heald_t *shd = NULL; - event = &healer->crawl_event; - shd = &(((afr_private_t *)healer->this->private)->shd); + event = &healer->crawl_event; + shd = &(((afr_private_t *)healer->this->private)->shd); - time (&event->end_time); - history = memdup (event, sizeof (*event)); - event->start_time = 0; + time(&event->end_time); + history = memdup(event, sizeof(*event)); + event->start_time = 0; - if (!history) - return; + if (!history) + return; - if (eh_save_history (shd->statistics[healer->subvol], history) < 0) - GF_FREE (history); + if (eh_save_history(shd->statistics[healer->subvol], history) < 0) + GF_FREE(history); } int -afr_shd_index_heal (xlator_t *subvol, gf_dirent_t *entry, loc_t *parent, - void *data) +afr_shd_index_heal(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent, + void *data) { - struct subvol_healer *healer = data; - afr_private_t *priv = NULL; - uuid_t gfid = {0}; - int ret = 0; - uint64_t val = IA_INVAL; + struct subvol_healer *healer = data; + afr_private_t *priv = NULL; + uuid_t gfid = {0}; + int ret = 0; + uint64_t val = IA_INVAL; - priv = healer->this->private; - if (!priv->shd.enabled) - return -EBUSY; + priv = healer->this->private; + if (!priv->shd.enabled) + return -EBUSY; - gf_msg_debug (healer->this->name, 0, "got entry: %s from %s", - entry->d_name, priv->children[healer->subvol]->name); + gf_msg_debug(healer->this->name, 0, "got entry: %s from %s", entry->d_name, + priv->children[healer->subvol]->name); - ret = gf_uuid_parse (entry->d_name, gfid); - if (ret) - return 0; + ret = gf_uuid_parse(entry->d_name, gfid); + if (ret) + return 0; - inode_ctx_get2 (parent->inode, subvol, NULL, &val); + inode_ctx_get2(parent->inode, subvol, NULL, &val); - ret = afr_shd_selfheal (healer, healer->subvol, gfid); + ret = afr_shd_selfheal(healer, healer->subvol, gfid); - if (ret == -ENOENT || ret == -ESTALE) - afr_shd_index_purge (subvol, parent->inode, entry->d_name, val); + if (ret == -ENOENT || ret == -ESTALE) + afr_shd_index_purge(subvol, parent->inode, entry->d_name, val); - if (ret == 2) - /* If bricks crashed in pre-op after creating indices/xattrop - * link but before setting afr changelogs, we end up with stale - * xattrop links but zero changelogs. Remove such entries by - * sending a post-op with zero changelogs. - */ - afr_shd_zero_xattrop (healer->this, gfid); + if (ret == 2) + /* If bricks crashed in pre-op after creating indices/xattrop + * link but before setting afr changelogs, we end up with stale + * xattrop links but zero changelogs. Remove such entries by + * sending a post-op with zero changelogs. + */ + afr_shd_zero_xattrop(healer->this, gfid); - return 0; + return 0; } int -afr_shd_index_sweep (struct subvol_healer *healer, char *vgfid) +afr_shd_index_sweep(struct subvol_healer *healer, char *vgfid) { - loc_t loc = {0}; - afr_private_t *priv = NULL; - int ret = 0; - xlator_t *subvol = NULL; - dict_t *xdata = NULL; - call_frame_t *frame = NULL; - - priv = healer->this->private; - subvol = priv->children[healer->subvol]; - - frame = afr_frame_create (healer->this, &ret); - if (!frame) { - ret = -ret; - goto out; - } - - loc.inode = afr_shd_index_inode (healer->this, subvol, vgfid); - if (!loc.inode) { - gf_msg (healer->this->name, GF_LOG_WARNING, - 0, AFR_MSG_INDEX_DIR_GET_FAILED, - "unable to get index-dir on %s", subvol->name); - ret = -errno; - goto out; - } - - xdata = dict_new (); - if (!xdata || dict_set_int32 (xdata, "get-gfid-type", 1)) { - ret = -ENOMEM; - goto out; - } - - ret = syncop_mt_dir_scan (frame, subvol, &loc, GF_CLIENT_PID_SELF_HEALD, - healer, afr_shd_index_heal, xdata, - priv->shd.max_threads, priv->shd.wait_qlength); - - if (ret == 0) - ret = healer->crawl_event.healed_count; + loc_t loc = {0}; + afr_private_t *priv = NULL; + int ret = 0; + xlator_t *subvol = NULL; + dict_t *xdata = NULL; + call_frame_t *frame = NULL; + + priv = healer->this->private; + subvol = priv->children[healer->subvol]; + + frame = afr_frame_create(healer->this, &ret); + if (!frame) { + ret = -ret; + goto out; + } + + loc.inode = afr_shd_index_inode(healer->this, subvol, vgfid); + if (!loc.inode) { + gf_msg(healer->this->name, GF_LOG_WARNING, 0, + AFR_MSG_INDEX_DIR_GET_FAILED, "unable to get index-dir on %s", + subvol->name); + ret = -errno; + goto out; + } + + xdata = dict_new(); + if (!xdata || dict_set_int32(xdata, "get-gfid-type", 1)) { + ret = -ENOMEM; + goto out; + } + + ret = syncop_mt_dir_scan(frame, subvol, &loc, GF_CLIENT_PID_SELF_HEALD, + healer, afr_shd_index_heal, xdata, + priv->shd.max_threads, priv->shd.wait_qlength); + + if (ret == 0) + ret = healer->crawl_event.healed_count; out: - loc_wipe (&loc); + loc_wipe(&loc); - if (xdata) - dict_unref (xdata); - if (frame) - AFR_STACK_DESTROY (frame); - return ret; + if (xdata) + dict_unref(xdata); + if (frame) + AFR_STACK_DESTROY(frame); + return ret; } int -afr_shd_index_sweep_all (struct subvol_healer *healer) +afr_shd_index_sweep_all(struct subvol_healer *healer) { - int ret = 0; - int count = 0; - - ret = afr_shd_index_sweep (healer, GF_XATTROP_INDEX_GFID); - if (ret < 0) - goto out; - count = ret; - - ret = afr_shd_index_sweep (healer, GF_XATTROP_DIRTY_GFID); - if (ret < 0) - goto out; - count += ret; - - ret = afr_shd_index_sweep (healer, GF_XATTROP_ENTRY_CHANGES_GFID); - if (ret < 0) - goto out; - count += ret; + int ret = 0; + int count = 0; + + ret = afr_shd_index_sweep(healer, GF_XATTROP_INDEX_GFID); + if (ret < 0) + goto out; + count = ret; + + ret = afr_shd_index_sweep(healer, GF_XATTROP_DIRTY_GFID); + if (ret < 0) + goto out; + count += ret; + + ret = afr_shd_index_sweep(healer, GF_XATTROP_ENTRY_CHANGES_GFID); + if (ret < 0) + goto out; + count += ret; out: - if (ret < 0) - return ret; - else - return count; + if (ret < 0) + return ret; + else + return count; } int -afr_shd_full_heal (xlator_t *subvol, gf_dirent_t *entry, loc_t *parent, - void *data) +afr_shd_full_heal(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent, + void *data) { - struct subvol_healer *healer = data; - xlator_t *this = healer->this; - afr_private_t *priv = NULL; + struct subvol_healer *healer = data; + xlator_t *this = healer->this; + afr_private_t *priv = NULL; - priv = this->private; - if (!priv->shd.enabled) - return -EBUSY; + priv = this->private; + if (!priv->shd.enabled) + return -EBUSY; - afr_shd_selfheal_name (healer, healer->subvol, - parent->inode->gfid, entry->d_name); + afr_shd_selfheal_name(healer, healer->subvol, parent->inode->gfid, + entry->d_name); - afr_shd_selfheal (healer, healer->subvol, entry->d_stat.ia_gfid); + afr_shd_selfheal(healer, healer->subvol, entry->d_stat.ia_gfid); - return 0; + return 0; } int -afr_shd_full_sweep (struct subvol_healer *healer, inode_t *inode) +afr_shd_full_sweep(struct subvol_healer *healer, inode_t *inode) { - afr_private_t *priv = NULL; - loc_t loc = {0}; - - priv = healer->this->private; - loc.inode = inode; - return syncop_ftw (priv->children[healer->subvol], &loc, - GF_CLIENT_PID_SELF_HEALD, healer, - afr_shd_full_heal); + afr_private_t *priv = NULL; + loc_t loc = {0}; + + priv = healer->this->private; + loc.inode = inode; + return syncop_ftw(priv->children[healer->subvol], &loc, + GF_CLIENT_PID_SELF_HEALD, healer, afr_shd_full_heal); } void -afr_shd_ta_set_xattrs (xlator_t *this, loc_t *loc, dict_t **xdata, - int healer) +afr_shd_ta_set_xattrs(xlator_t *this, loc_t *loc, dict_t **xdata, int healer) { - afr_private_t *priv = NULL; - dict_t *xattr = NULL; - struct gf_flock flock = {0, }; - gf_boolean_t need_xattrop = _gf_false; - void *pending_raw = NULL; - int *raw = NULL; - int pending[AFR_NUM_CHANGE_LOGS] = {0,}; - int i = 0; - int j = 0; - int val = 0; - int ret = 0; - - priv = this->private; - - xattr = dict_new (); - if (!xattr) { - goto out; + afr_private_t *priv = NULL; + dict_t *xattr = NULL; + struct gf_flock flock = { + 0, + }; + gf_boolean_t need_xattrop = _gf_false; + void *pending_raw = NULL; + int *raw = NULL; + int pending[AFR_NUM_CHANGE_LOGS] = { + 0, + }; + int i = 0; + int j = 0; + int val = 0; + int ret = 0; + + priv = this->private; + + xattr = dict_new(); + if (!xattr) { + goto out; + } + + for (i = 0; i < priv->child_count; i++) { + raw = GF_CALLOC(AFR_NUM_CHANGE_LOGS, sizeof(int), gf_afr_mt_int32_t); + if (!raw) { + goto out; } - for (i = 0; i < priv->child_count; i++) { - raw = GF_CALLOC (AFR_NUM_CHANGE_LOGS, sizeof(int), - gf_afr_mt_int32_t); - if (!raw) { - goto out; - } - - ret = dict_get_ptr (*xdata, priv->pending_key[i], &pending_raw); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, -ret, - AFR_MSG_DICT_GET_FAILED, "Error getting value " - "of pending key %s", priv->pending_key[i]); - GF_FREE (raw); - goto out; - } - - memcpy (pending, pending_raw, sizeof (pending)); - for (j = 0; j < AFR_NUM_CHANGE_LOGS; j++) { - val = ntoh32 (pending[j]); - if (val) { - if (i == healer) { - gf_msg (this->name, GF_LOG_INFO, 0, - AFR_MSG_THIN_ARB, "I am " - "not the good shd. Skipping. " - "SHD = %d.", healer); - GF_FREE (raw); - goto out; - } - need_xattrop = _gf_true; - raw[j] = hton32 (-val); - } - } + ret = dict_get_ptr(*xdata, priv->pending_key[i], &pending_raw); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_GET_FAILED, + "Error getting value " + "of pending key %s", + priv->pending_key[i]); + GF_FREE(raw); + goto out; + } - ret = dict_set_bin (xattr, priv->pending_key[i], raw, - AFR_NUM_CHANGE_LOGS * sizeof (int)); - if (ret) { - GF_FREE (raw); - goto out; + memcpy(pending, pending_raw, sizeof(pending)); + for (j = 0; j < AFR_NUM_CHANGE_LOGS; j++) { + val = ntoh32(pending[j]); + if (val) { + if (i == healer) { + gf_msg(this->name, GF_LOG_INFO, 0, AFR_MSG_THIN_ARB, + "I am " + "not the good shd. Skipping. " + "SHD = %d.", + healer); + GF_FREE(raw); + goto out; } - - memset (pending, 0, sizeof (pending)); + need_xattrop = _gf_true; + raw[j] = hton32(-val); + } } - if (!need_xattrop) { - goto out; + ret = dict_set_bin(xattr, priv->pending_key[i], raw, + AFR_NUM_CHANGE_LOGS * sizeof(int)); + if (ret) { + GF_FREE(raw); + goto out; } - flock.l_type = F_WRLCK; - flock.l_start = 0; - flock.l_len = 0; + memset(pending, 0, sizeof(pending)); + } - ret = syncop_inodelk (priv->children[THIN_ARBITER_BRICK_INDEX], - AFR_TA_DOM_NOTIFY, loc, F_SETLKW, &flock, - NULL, NULL); - if (ret) - goto out; + if (!need_xattrop) { + goto out; + } - ret = syncop_xattrop (priv->children[THIN_ARBITER_BRICK_INDEX], loc, - GF_XATTROP_ADD_ARRAY, xattr, NULL, NULL, NULL); - if (ret) - gf_msg (this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB, - "Xattrop failed."); + flock.l_type = F_WRLCK; + flock.l_start = 0; + flock.l_len = 0; + + ret = syncop_inodelk(priv->children[THIN_ARBITER_BRICK_INDEX], + AFR_TA_DOM_NOTIFY, loc, F_SETLKW, &flock, NULL, NULL); + if (ret) + goto out; + + ret = syncop_xattrop(priv->children[THIN_ARBITER_BRICK_INDEX], loc, + GF_XATTROP_ADD_ARRAY, xattr, NULL, NULL, NULL); + if (ret) + gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB, + "Xattrop failed."); - flock.l_type = F_UNLCK; - syncop_inodelk (priv->children[THIN_ARBITER_BRICK_INDEX], - AFR_TA_DOM_NOTIFY, loc, F_SETLKW, &flock, NULL, NULL); + flock.l_type = F_UNLCK; + syncop_inodelk(priv->children[THIN_ARBITER_BRICK_INDEX], AFR_TA_DOM_NOTIFY, + loc, F_SETLKW, &flock, NULL, NULL); out: - if (xattr) - dict_unref (xattr); - return; + if (xattr) + dict_unref(xattr); + return; } void -afr_shd_ta_get_xattrs (xlator_t *this, loc_t *loc, dict_t **xdata) +afr_shd_ta_get_xattrs(xlator_t *this, loc_t *loc, dict_t **xdata) { - afr_private_t *priv = NULL; - dict_t *xattr = NULL; - struct iatt stbuf = {0,}; - int *raw = NULL; - int ret = 0; - int i = 0; - - priv = this->private; - - loc->parent = inode_ref (this->itable->root); - gf_uuid_copy (loc->pargfid, loc->parent->gfid); - loc->name = priv->pending_key[THIN_ARBITER_BRICK_INDEX]; - loc->inode = inode_new (loc->parent->table); - if (!loc->inode) { - goto out; - } - - ret = syncop_lookup (priv->children[THIN_ARBITER_BRICK_INDEX], loc, - &stbuf, 0, 0, 0); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB, - "Failed lookup on file %s.", loc->name); - goto out; - } - - gf_uuid_copy (priv->ta_gfid, stbuf.ia_gfid); - gf_uuid_copy (loc->gfid, priv->ta_gfid); - - xattr = dict_new (); - if (!xattr) { - gf_msg (this->name, GF_LOG_ERROR, 0, AFR_MSG_DICT_GET_FAILED, - "Failed to create dict."); - goto out; - } - - for (i = 0; i < priv->child_count; i++) { - raw = GF_CALLOC (AFR_NUM_CHANGE_LOGS, sizeof(int), - gf_afr_mt_int32_t); - if (!raw) { - goto out; - } - - ret = dict_set_bin (xattr, priv->pending_key[i], raw, - AFR_NUM_CHANGE_LOGS * sizeof (int)); - if (ret) { - GF_FREE (raw); - goto out; - } + afr_private_t *priv = NULL; + dict_t *xattr = NULL; + struct iatt stbuf = { + 0, + }; + int *raw = NULL; + int ret = 0; + int i = 0; + + priv = this->private; + + loc->parent = inode_ref(this->itable->root); + gf_uuid_copy(loc->pargfid, loc->parent->gfid); + loc->name = priv->pending_key[THIN_ARBITER_BRICK_INDEX]; + loc->inode = inode_new(loc->parent->table); + if (!loc->inode) { + goto out; + } + + ret = syncop_lookup(priv->children[THIN_ARBITER_BRICK_INDEX], loc, &stbuf, + 0, 0, 0); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB, + "Failed lookup on file %s.", loc->name); + goto out; + } + + gf_uuid_copy(priv->ta_gfid, stbuf.ia_gfid); + gf_uuid_copy(loc->gfid, priv->ta_gfid); + + xattr = dict_new(); + if (!xattr) { + gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_DICT_GET_FAILED, + "Failed to create dict."); + goto out; + } + + for (i = 0; i < priv->child_count; i++) { + raw = GF_CALLOC(AFR_NUM_CHANGE_LOGS, sizeof(int), gf_afr_mt_int32_t); + if (!raw) { + goto out; } - ret = syncop_xattrop (priv->children[THIN_ARBITER_BRICK_INDEX], - loc, GF_XATTROP_ADD_ARRAY, xattr, NULL, xdata, - NULL); + ret = dict_set_bin(xattr, priv->pending_key[i], raw, + AFR_NUM_CHANGE_LOGS * sizeof(int)); if (ret) { - gf_msg (this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB, - "Xattrop failed."); - goto out; + GF_FREE(raw); + goto out; } - if (!(*xdata)) - gf_msg (this->name, GF_LOG_ERROR, 0, AFR_MSG_DICT_GET_FAILED, - "Xdata response is empty."); + } + + ret = syncop_xattrop(priv->children[THIN_ARBITER_BRICK_INDEX], loc, + GF_XATTROP_ADD_ARRAY, xattr, NULL, xdata, NULL); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB, + "Xattrop failed."); + goto out; + } + if (!(*xdata)) + gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_DICT_GET_FAILED, + "Xdata response is empty."); out: - if (xattr) - dict_unref (xattr); - return; + if (xattr) + dict_unref(xattr); + return; } void * -afr_shd_index_healer (void *data) +afr_shd_index_healer(void *data) { - struct subvol_healer *healer = NULL; - xlator_t *this = NULL; - int ret = 0; - afr_private_t *priv = NULL; - dict_t *xdata = NULL; - loc_t loc = {0, }; - - healer = data; - THIS = this = healer->this; - priv = this->private; - - for (;;) { - afr_shd_healer_wait (healer); - - ASSERT_LOCAL(this, healer); - priv->local[healer->subvol] = healer->local; - - if (priv->thin_arbiter_count) { - loc_wipe (&loc); - afr_shd_ta_get_xattrs (this, &loc, &xdata); - } - + struct subvol_healer *healer = NULL; + xlator_t *this = NULL; + int ret = 0; + afr_private_t *priv = NULL; + dict_t *xdata = NULL; + loc_t loc = { + 0, + }; + + healer = data; + THIS = this = healer->this; + priv = this->private; + + for (;;) { + afr_shd_healer_wait(healer); + + ASSERT_LOCAL(this, healer); + priv->local[healer->subvol] = healer->local; + + if (priv->thin_arbiter_count) { + loc_wipe(&loc); + afr_shd_ta_get_xattrs(this, &loc, &xdata); + } - do { - gf_msg_debug (this->name, 0, - "starting index sweep on subvol %s", - afr_subvol_name (this, healer->subvol)); - - afr_shd_sweep_prepare (healer); - - ret = afr_shd_index_sweep_all (healer); - - afr_shd_sweep_done (healer); - /* - As long as at least one gfid was - healed, keep retrying. We may have - just healed a directory and thereby - created entries for other gfids which - could not be healed thus far. - */ - - gf_msg_debug (this->name, 0, - "finished index sweep on subvol %s", - afr_subvol_name (this, healer->subvol)); - /* - Give a pause before retrying to avoid a busy loop - in case the only entry in index is because of - an ongoing I/O. - */ - sleep (1); - } while (ret > 0); - - if (xdata && !healer->crawl_event.heal_failed_count) { - afr_shd_ta_set_xattrs (this, &loc, &xdata, - healer->subvol); - dict_unref (xdata); - xdata = NULL; - } - } + do { + gf_msg_debug(this->name, 0, "starting index sweep on subvol %s", + afr_subvol_name(this, healer->subvol)); + + afr_shd_sweep_prepare(healer); + + ret = afr_shd_index_sweep_all(healer); + + afr_shd_sweep_done(healer); + /* + As long as at least one gfid was + healed, keep retrying. We may have + just healed a directory and thereby + created entries for other gfids which + could not be healed thus far. + */ + + gf_msg_debug(this->name, 0, "finished index sweep on subvol %s", + afr_subvol_name(this, healer->subvol)); + /* + Give a pause before retrying to avoid a busy loop + in case the only entry in index is because of + an ongoing I/O. + */ + sleep(1); + } while (ret > 0); + + if (xdata && !healer->crawl_event.heal_failed_count) { + afr_shd_ta_set_xattrs(this, &loc, &xdata, healer->subvol); + dict_unref(xdata); + xdata = NULL; + } + } - loc_wipe (&loc); + loc_wipe(&loc); - return NULL; + return NULL; } - void * -afr_shd_full_healer (void *data) +afr_shd_full_healer(void *data) { - struct subvol_healer *healer = NULL; - xlator_t *this = NULL; - int run = 0; + struct subvol_healer *healer = NULL; + xlator_t *this = NULL; + int run = 0; - healer = data; - THIS = this = healer->this; + healer = data; + THIS = this = healer->this; - for (;;) { - pthread_mutex_lock (&healer->mutex); - { - run = __afr_shd_healer_wait (healer); - if (!run) - healer->running = _gf_false; - } - pthread_mutex_unlock (&healer->mutex); + for (;;) { + pthread_mutex_lock(&healer->mutex); + { + run = __afr_shd_healer_wait(healer); + if (!run) + healer->running = _gf_false; + } + pthread_mutex_unlock(&healer->mutex); - if (!run) - break; + if (!run) + break; - ASSERT_LOCAL(this, healer); + ASSERT_LOCAL(this, healer); - gf_msg (this->name, GF_LOG_INFO, 0, AFR_MSG_SELF_HEAL_INFO, - "starting full sweep on subvol %s", - afr_subvol_name (this, healer->subvol)); + gf_msg(this->name, GF_LOG_INFO, 0, AFR_MSG_SELF_HEAL_INFO, + "starting full sweep on subvol %s", + afr_subvol_name(this, healer->subvol)); - afr_shd_sweep_prepare (healer); + afr_shd_sweep_prepare(healer); - afr_shd_full_sweep (healer, this->itable->root); + afr_shd_full_sweep(healer, this->itable->root); - afr_shd_sweep_done (healer); + afr_shd_sweep_done(healer); - gf_msg (this->name, GF_LOG_INFO, 0, AFR_MSG_SELF_HEAL_INFO, - "finished full sweep on subvol %s", - afr_subvol_name (this, healer->subvol)); - } + gf_msg(this->name, GF_LOG_INFO, 0, AFR_MSG_SELF_HEAL_INFO, + "finished full sweep on subvol %s", + afr_subvol_name(this, healer->subvol)); + } - return NULL; + return NULL; } - int -afr_shd_healer_init (xlator_t *this, struct subvol_healer *healer) +afr_shd_healer_init(xlator_t *this, struct subvol_healer *healer) { - int ret = 0; + int ret = 0; - ret = pthread_mutex_init (&healer->mutex, NULL); - if (ret) - goto out; + ret = pthread_mutex_init(&healer->mutex, NULL); + if (ret) + goto out; - ret = pthread_cond_init (&healer->cond, NULL); - if (ret) - goto out; + ret = pthread_cond_init(&healer->cond, NULL); + if (ret) + goto out; - healer->this = this; - healer->running = _gf_false; - healer->rerun = _gf_false; - healer->local = _gf_false; + healer->this = this; + healer->running = _gf_false; + healer->rerun = _gf_false; + healer->local = _gf_false; out: - return ret; + return ret; } - int -afr_shd_healer_spawn (xlator_t *this, struct subvol_healer *healer, - void *(threadfn)(void *)) +afr_shd_healer_spawn(xlator_t *this, struct subvol_healer *healer, + void *(threadfn)(void *)) { - int ret = 0; - - pthread_mutex_lock (&healer->mutex); - { - if (healer->running) { - pthread_cond_signal (&healer->cond); - } else { - ret = gf_thread_create (&healer->thread, NULL, - threadfn, healer, "shdheal"); - if (ret) - goto unlock; - healer->running = 1; - } - - healer->rerun = 1; - } + int ret = 0; + + pthread_mutex_lock(&healer->mutex); + { + if (healer->running) { + pthread_cond_signal(&healer->cond); + } else { + ret = gf_thread_create(&healer->thread, NULL, threadfn, healer, + "shdheal"); + if (ret) + goto unlock; + healer->running = 1; + } + + healer->rerun = 1; + } unlock: - pthread_mutex_unlock (&healer->mutex); + pthread_mutex_unlock(&healer->mutex); - return ret; + return ret; } - int -afr_shd_full_healer_spawn (xlator_t *this, int subvol) +afr_shd_full_healer_spawn(xlator_t *this, int subvol) { - return afr_shd_healer_spawn (this, NTH_FULL_HEALER (this, subvol), - afr_shd_full_healer); + return afr_shd_healer_spawn(this, NTH_FULL_HEALER(this, subvol), + afr_shd_full_healer); } - int -afr_shd_index_healer_spawn (xlator_t *this, int subvol) +afr_shd_index_healer_spawn(xlator_t *this, int subvol) { - return afr_shd_healer_spawn (this, NTH_INDEX_HEALER (this, subvol), - afr_shd_index_healer); + return afr_shd_healer_spawn(this, NTH_INDEX_HEALER(this, subvol), + afr_shd_index_healer); } - int -afr_shd_dict_add_crawl_event (xlator_t *this, dict_t *output, - crawl_event_t *crawl_event) +afr_shd_dict_add_crawl_event(xlator_t *this, dict_t *output, + crawl_event_t *crawl_event) { - int ret = 0; - uint64_t count = 0; - char key[256] = {0}; - int xl_id = 0; - uint64_t healed_count = 0; - uint64_t split_brain_count = 0; - uint64_t heal_failed_count = 0; - char *start_time_str = 0; - char *end_time_str = NULL; - char *crawl_type = NULL; - int progress = -1; - int child = -1; - - child = crawl_event->child; - healed_count = crawl_event->healed_count; - split_brain_count = crawl_event->split_brain_count; - heal_failed_count = crawl_event->heal_failed_count; - crawl_type = crawl_event->crawl_type; - - if (!crawl_event->start_time) - goto out; - - start_time_str = gf_strdup (ctime (&crawl_event->start_time)); - - if (crawl_event->end_time) - end_time_str = gf_strdup (ctime (&crawl_event->end_time)); - - ret = dict_get_int32 (output, this->name, &xl_id); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, -ret, - AFR_MSG_DICT_GET_FAILED, "xl does not have id"); - goto out; - } - - snprintf (key, sizeof (key), "statistics-%d-%d-count", xl_id, child); - ret = dict_get_uint64 (output, key, &count); - - - snprintf (key, sizeof (key), "statistics_healed_cnt-%d-%d-%"PRIu64, - xl_id, child, count); - ret = dict_set_uint64(output, key, healed_count); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, - -ret, AFR_MSG_DICT_SET_FAILED, - "Could not add statistics_healed_count to output"); - goto out; - } - - snprintf (key, sizeof (key), "statistics_sb_cnt-%d-%d-%"PRIu64, - xl_id, child, count); - ret = dict_set_uint64 (output, key, split_brain_count); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, - -ret, AFR_MSG_DICT_SET_FAILED, - "Could not add statistics_split_brain_count to output"); - goto out; - } - - snprintf (key, sizeof (key), "statistics_crawl_type-%d-%d-%"PRIu64, - xl_id, child, count); - ret = dict_set_str (output, key, crawl_type); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, - -ret, AFR_MSG_DICT_SET_FAILED, - "Could not add statistics_crawl_type to output"); - goto out; - } - - snprintf (key, sizeof (key), "statistics_heal_failed_cnt-%d-%d-%"PRIu64, - xl_id, child, count); - ret = dict_set_uint64 (output, key, heal_failed_count); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, - -ret, AFR_MSG_DICT_SET_FAILED, - "Could not add statistics_healed_failed_count to output"); - goto out; - } - - snprintf (key, sizeof (key), "statistics_strt_time-%d-%d-%"PRIu64, - xl_id, child, count); - ret = dict_set_dynstr (output, key, start_time_str); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, - -ret, AFR_MSG_DICT_SET_FAILED, - "Could not add statistics_crawl_start_time to output"); - goto out; - } else { - start_time_str = NULL; - } - - if (!end_time_str) - progress = 1; - else - progress = 0; - - snprintf (key, sizeof (key), "statistics_end_time-%d-%d-%"PRIu64, - xl_id, child, count); - if (!end_time_str) - end_time_str = gf_strdup ("Could not determine the end time"); - ret = dict_set_dynstr (output, key, end_time_str); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, - -ret, AFR_MSG_DICT_SET_FAILED, - "Could not add statistics_crawl_end_time to output"); - goto out; - } else { - end_time_str = NULL; - } - - snprintf (key, sizeof (key), "statistics_inprogress-%d-%d-%"PRIu64, - xl_id, child, count); - - ret = dict_set_int32 (output, key, progress); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, - -ret, AFR_MSG_DICT_SET_FAILED, - "Could not add statistics_inprogress to output"); - goto out; - } - - snprintf (key, sizeof (key), "statistics-%d-%d-count", xl_id, child); - ret = dict_set_uint64 (output, key, count + 1); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, - -ret, AFR_MSG_DICT_SET_FAILED, - "Could not increment the counter."); - goto out; - } + int ret = 0; + uint64_t count = 0; + char key[256] = {0}; + int xl_id = 0; + uint64_t healed_count = 0; + uint64_t split_brain_count = 0; + uint64_t heal_failed_count = 0; + char *start_time_str = 0; + char *end_time_str = NULL; + char *crawl_type = NULL; + int progress = -1; + int child = -1; + + child = crawl_event->child; + healed_count = crawl_event->healed_count; + split_brain_count = crawl_event->split_brain_count; + heal_failed_count = crawl_event->heal_failed_count; + crawl_type = crawl_event->crawl_type; + + if (!crawl_event->start_time) + goto out; + + start_time_str = gf_strdup(ctime(&crawl_event->start_time)); + + if (crawl_event->end_time) + end_time_str = gf_strdup(ctime(&crawl_event->end_time)); + + ret = dict_get_int32(output, this->name, &xl_id); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_GET_FAILED, + "xl does not have id"); + goto out; + } + + snprintf(key, sizeof(key), "statistics-%d-%d-count", xl_id, child); + ret = dict_get_uint64(output, key, &count); + + snprintf(key, sizeof(key), "statistics_healed_cnt-%d-%d-%" PRIu64, xl_id, + child, count); + ret = dict_set_uint64(output, key, healed_count); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_SET_FAILED, + "Could not add statistics_healed_count to output"); + goto out; + } + + snprintf(key, sizeof(key), "statistics_sb_cnt-%d-%d-%" PRIu64, xl_id, child, + count); + ret = dict_set_uint64(output, key, split_brain_count); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_SET_FAILED, + "Could not add statistics_split_brain_count to output"); + goto out; + } + + snprintf(key, sizeof(key), "statistics_crawl_type-%d-%d-%" PRIu64, xl_id, + child, count); + ret = dict_set_str(output, key, crawl_type); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_SET_FAILED, + "Could not add statistics_crawl_type to output"); + goto out; + } + + snprintf(key, sizeof(key), "statistics_heal_failed_cnt-%d-%d-%" PRIu64, + xl_id, child, count); + ret = dict_set_uint64(output, key, heal_failed_count); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_SET_FAILED, + "Could not add statistics_healed_failed_count to output"); + goto out; + } + + snprintf(key, sizeof(key), "statistics_strt_time-%d-%d-%" PRIu64, xl_id, + child, count); + ret = dict_set_dynstr(output, key, start_time_str); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_SET_FAILED, + "Could not add statistics_crawl_start_time to output"); + goto out; + } else { + start_time_str = NULL; + } + + if (!end_time_str) + progress = 1; + else + progress = 0; + + snprintf(key, sizeof(key), "statistics_end_time-%d-%d-%" PRIu64, xl_id, + child, count); + if (!end_time_str) + end_time_str = gf_strdup("Could not determine the end time"); + ret = dict_set_dynstr(output, key, end_time_str); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_SET_FAILED, + "Could not add statistics_crawl_end_time to output"); + goto out; + } else { + end_time_str = NULL; + } + + snprintf(key, sizeof(key), "statistics_inprogress-%d-%d-%" PRIu64, xl_id, + child, count); + + ret = dict_set_int32(output, key, progress); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_SET_FAILED, + "Could not add statistics_inprogress to output"); + goto out; + } + + snprintf(key, sizeof(key), "statistics-%d-%d-count", xl_id, child); + ret = dict_set_uint64(output, key, count + 1); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_SET_FAILED, + "Could not increment the counter."); + goto out; + } out: - GF_FREE (start_time_str); - GF_FREE (end_time_str); - return ret; + GF_FREE(start_time_str); + GF_FREE(end_time_str); + return ret; } - int -afr_shd_dict_add_path (xlator_t *this, dict_t *output, int child, char *path, - struct timeval *tv) +afr_shd_dict_add_path(xlator_t *this, dict_t *output, int child, char *path, + struct timeval *tv) { - int ret = -1; - uint64_t count = 0; - char key[256] = {0}; - int xl_id = 0; - - ret = dict_get_int32 (output, this->name, &xl_id); + int ret = -1; + uint64_t count = 0; + char key[256] = {0}; + int xl_id = 0; + + ret = dict_get_int32(output, this->name, &xl_id); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_GET_FAILED, + "xl does not have id"); + goto out; + } + + snprintf(key, sizeof(key), "%d-%d-count", xl_id, child); + ret = dict_get_uint64(output, key, &count); + + snprintf(key, sizeof(key), "%d-%d-%" PRIu64, xl_id, child, count); + ret = dict_set_dynstr(output, key, path); + + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_SET_FAILED, + "%s: Could not add to output", path); + goto out; + } + + if (tv) { + snprintf(key, sizeof(key), "%d-%d-%" PRIu64 "-time", xl_id, child, + count); + ret = dict_set_uint32(output, key, tv->tv_sec); if (ret) { - gf_msg (this->name, GF_LOG_ERROR, -ret, - AFR_MSG_DICT_GET_FAILED, "xl does not have id"); - goto out; + gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_SET_FAILED, + "%s: Could not set time", path); + goto out; } + } - snprintf (key, sizeof (key), "%d-%d-count", xl_id, child); - ret = dict_get_uint64 (output, key, &count); + snprintf(key, sizeof(key), "%d-%d-count", xl_id, child); - snprintf (key, sizeof (key), "%d-%d-%"PRIu64, xl_id, child, count); - ret = dict_set_dynstr (output, key, path); + ret = dict_set_uint64(output, key, count + 1); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_SET_FAILED, + "Could not increment count"); + goto out; + } - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, -ret, - AFR_MSG_DICT_SET_FAILED, "%s: Could not add to output", - path); - goto out; - } - - if (tv) { - snprintf (key, sizeof (key), "%d-%d-%"PRIu64"-time", xl_id, - child, count); - ret = dict_set_uint32 (output, key, tv->tv_sec); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, - -ret, AFR_MSG_DICT_SET_FAILED, - "%s: Could not set time", - path); - goto out; - } - } - - snprintf (key, sizeof (key), "%d-%d-count", xl_id, child); - - ret = dict_set_uint64 (output, key, count + 1); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, - -ret, AFR_MSG_DICT_SET_FAILED, - "Could not increment count"); - goto out; - } - - ret = 0; + ret = 0; out: - return ret; + return ret; } int -afr_add_shd_event (circular_buffer_t *cb, void *data) +afr_add_shd_event(circular_buffer_t *cb, void *data) { - dict_t *output = NULL; - xlator_t *this = THIS; - afr_private_t *priv = NULL; - afr_self_heald_t *shd = NULL; - shd_event_t *shd_event = NULL; - char *path = NULL; - - output = data; - priv = this->private; - shd = &priv->shd; - shd_event = cb->data; - - if (!shd->index_healers[shd_event->child].local) - return 0; - - path = gf_strdup (shd_event->path); - if (!path) - return -ENOMEM; - - afr_shd_dict_add_path (this, output, shd_event->child, path, - &cb->tv); - return 0; + dict_t *output = NULL; + xlator_t *this = THIS; + afr_private_t *priv = NULL; + afr_self_heald_t *shd = NULL; + shd_event_t *shd_event = NULL; + char *path = NULL; + + output = data; + priv = this->private; + shd = &priv->shd; + shd_event = cb->data; + + if (!shd->index_healers[shd_event->child].local) + return 0; + + path = gf_strdup(shd_event->path); + if (!path) + return -ENOMEM; + + afr_shd_dict_add_path(this, output, shd_event->child, path, &cb->tv); + return 0; } int -afr_add_crawl_event (circular_buffer_t *cb, void *data) +afr_add_crawl_event(circular_buffer_t *cb, void *data) { - dict_t *output = NULL; - xlator_t *this = THIS; - afr_private_t *priv = NULL; - afr_self_heald_t *shd = NULL; - crawl_event_t *crawl_event = NULL; - - output = data; - priv = this->private; - shd = &priv->shd; - crawl_event = cb->data; - - if (!shd->index_healers[crawl_event->child].local) - return 0; + dict_t *output = NULL; + xlator_t *this = THIS; + afr_private_t *priv = NULL; + afr_self_heald_t *shd = NULL; + crawl_event_t *crawl_event = NULL; + + output = data; + priv = this->private; + shd = &priv->shd; + crawl_event = cb->data; + + if (!shd->index_healers[crawl_event->child].local) + return 0; - afr_shd_dict_add_crawl_event (this, output, crawl_event); + afr_shd_dict_add_crawl_event(this, output, crawl_event); - return 0; + return 0; } - int -afr_selfheal_daemon_init (xlator_t *this) +afr_selfheal_daemon_init(xlator_t *this) { - afr_private_t *priv = NULL; - afr_self_heald_t *shd = NULL; - int ret = -1; - int i = 0; - - priv = this->private; - shd = &priv->shd; - - this->itable = inode_table_new (SHD_INODE_LRU_LIMIT, this); - if (!this->itable) - goto out; - - shd->index_healers = GF_CALLOC (sizeof(*shd->index_healers), - priv->child_count, - gf_afr_mt_subvol_healer_t); - if (!shd->index_healers) - goto out; - - for (i = 0; i < priv->child_count; i++) { - shd->index_healers[i].subvol = i; - ret = afr_shd_healer_init (this, &shd->index_healers[i]); - if (ret) - goto out; - } - - shd->full_healers = GF_CALLOC (sizeof(*shd->full_healers), - priv->child_count, - gf_afr_mt_subvol_healer_t); - if (!shd->full_healers) - goto out; - for (i = 0; i < priv->child_count; i++) { - shd->full_healers[i].subvol = i; - ret = afr_shd_healer_init (this, &shd->full_healers[i]); - if (ret) - goto out; - } - - shd->split_brain = eh_new (AFR_EH_SPLIT_BRAIN_LIMIT, _gf_false, - afr_destroy_shd_event_data); - if (!shd->split_brain) - goto out; - - shd->statistics = GF_CALLOC (sizeof(eh_t *), priv->child_count, - gf_common_mt_eh_t); - if (!shd->statistics) - goto out; - - for (i = 0; i < priv->child_count ; i++) { - shd->statistics[i] = eh_new (AFR_STATISTICS_HISTORY_SIZE, - _gf_false, - afr_destroy_crawl_event_data); - if (!shd->statistics[i]) - goto out; - shd->full_healers[i].crawl_event.child = i; - shd->full_healers[i].crawl_event.crawl_type = "FULL"; - shd->index_healers[i].crawl_event.child = i; - shd->index_healers[i].crawl_event.crawl_type = "INDEX"; - } - - ret = 0; + afr_private_t *priv = NULL; + afr_self_heald_t *shd = NULL; + int ret = -1; + int i = 0; + + priv = this->private; + shd = &priv->shd; + + this->itable = inode_table_new(SHD_INODE_LRU_LIMIT, this); + if (!this->itable) + goto out; + + shd->index_healers = GF_CALLOC(sizeof(*shd->index_healers), + priv->child_count, + gf_afr_mt_subvol_healer_t); + if (!shd->index_healers) + goto out; + + for (i = 0; i < priv->child_count; i++) { + shd->index_healers[i].subvol = i; + ret = afr_shd_healer_init(this, &shd->index_healers[i]); + if (ret) + goto out; + } + + shd->full_healers = GF_CALLOC(sizeof(*shd->full_healers), priv->child_count, + gf_afr_mt_subvol_healer_t); + if (!shd->full_healers) + goto out; + for (i = 0; i < priv->child_count; i++) { + shd->full_healers[i].subvol = i; + ret = afr_shd_healer_init(this, &shd->full_healers[i]); + if (ret) + goto out; + } + + shd->split_brain = eh_new(AFR_EH_SPLIT_BRAIN_LIMIT, _gf_false, + afr_destroy_shd_event_data); + if (!shd->split_brain) + goto out; + + shd->statistics = GF_CALLOC(sizeof(eh_t *), priv->child_count, + gf_common_mt_eh_t); + if (!shd->statistics) + goto out; + + for (i = 0; i < priv->child_count; i++) { + shd->statistics[i] = eh_new(AFR_STATISTICS_HISTORY_SIZE, _gf_false, + afr_destroy_crawl_event_data); + if (!shd->statistics[i]) + goto out; + shd->full_healers[i].crawl_event.child = i; + shd->full_healers[i].crawl_event.crawl_type = "FULL"; + shd->index_healers[i].crawl_event.child = i; + shd->index_healers[i].crawl_event.crawl_type = "INDEX"; + } + + ret = 0; out: - return ret; + return ret; } - int -afr_selfheal_childup (xlator_t *this, int subvol) +afr_selfheal_childup(xlator_t *this, int subvol) { - afr_shd_index_healer_spawn (this, subvol); + afr_shd_index_healer_spawn(this, subvol); - return 0; + return 0; } - int -afr_shd_get_index_count (xlator_t *this, int i, uint64_t *count) +afr_shd_get_index_count(xlator_t *this, int i, uint64_t *count) { - afr_private_t *priv = NULL; - xlator_t *subvol = NULL; - loc_t rootloc = {0, }; - dict_t *xattr = NULL; - int ret = -1; + afr_private_t *priv = NULL; + xlator_t *subvol = NULL; + loc_t rootloc = { + 0, + }; + dict_t *xattr = NULL; + int ret = -1; - priv = this->private; - subvol = priv->children[i]; + priv = this->private; + subvol = priv->children[i]; - rootloc.inode = inode_ref (this->itable->root); - gf_uuid_copy (rootloc.gfid, rootloc.inode->gfid); + rootloc.inode = inode_ref(this->itable->root); + gf_uuid_copy(rootloc.gfid, rootloc.inode->gfid); - ret = syncop_getxattr (subvol, &rootloc, &xattr, - GF_XATTROP_INDEX_COUNT, NULL, NULL); - if (ret < 0) - goto out; + ret = syncop_getxattr(subvol, &rootloc, &xattr, GF_XATTROP_INDEX_COUNT, + NULL, NULL); + if (ret < 0) + goto out; - ret = dict_get_uint64 (xattr, GF_XATTROP_INDEX_COUNT, count); - if (ret) - goto out; + ret = dict_get_uint64(xattr, GF_XATTROP_INDEX_COUNT, count); + if (ret) + goto out; - ret = 0; + ret = 0; out: - if (xattr) - dict_unref (xattr); - loc_wipe (&rootloc); + if (xattr) + dict_unref(xattr); + loc_wipe(&rootloc); - return ret; + return ret; } - int -afr_xl_op (xlator_t *this, dict_t *input, dict_t *output) +afr_xl_op(xlator_t *this, dict_t *input, dict_t *output) { - gf_xl_afr_op_t op = GF_SHD_OP_INVALID; - int ret = 0; - int xl_id = 0; - afr_private_t *priv = NULL; - afr_self_heald_t *shd = NULL; - struct subvol_healer *healer = NULL; - int i = 0; - char key[64]; - int op_ret = 0; - uint64_t cnt = 0; - - priv = this->private; - shd = &priv->shd; - - ret = dict_get_int32 (input, "xl-op", (int32_t*)&op); - if (ret) - goto out; - ret = dict_get_int32 (input, this->name, &xl_id); - if (ret) - goto out; - ret = dict_set_int32 (output, this->name, xl_id); - if (ret) - goto out; - switch (op) { + gf_xl_afr_op_t op = GF_SHD_OP_INVALID; + int ret = 0; + int xl_id = 0; + afr_private_t *priv = NULL; + afr_self_heald_t *shd = NULL; + struct subvol_healer *healer = NULL; + int i = 0; + char key[64]; + int op_ret = 0; + uint64_t cnt = 0; + + priv = this->private; + shd = &priv->shd; + + ret = dict_get_int32(input, "xl-op", (int32_t *)&op); + if (ret) + goto out; + ret = dict_get_int32(input, this->name, &xl_id); + if (ret) + goto out; + ret = dict_set_int32(output, this->name, xl_id); + if (ret) + goto out; + switch (op) { case GF_SHD_OP_HEAL_INDEX: - op_ret = 0; - - for (i = 0; i < priv->child_count; i++) { - healer = &shd->index_healers[i]; - snprintf (key, sizeof (key), "%d-%d-status", xl_id, i); - - if (!priv->child_up[i]) { - ret = dict_set_str (output, key, - "Brick is not connected"); - op_ret = -1; - } else if (AFR_COUNT (priv->child_up, - priv->child_count) < 2) { - ret = dict_set_str (output, key, - "< 2 bricks in replica are up"); - op_ret = -1; - } else if (!afr_shd_is_subvol_local (this, healer->subvol)) { - ret = dict_set_str (output, key, - "Brick is remote"); - } else { - ret = dict_set_str (output, key, - "Started self-heal"); - afr_shd_index_healer_spawn (this, i); - } - } - break; + op_ret = 0; + + for (i = 0; i < priv->child_count; i++) { + healer = &shd->index_healers[i]; + snprintf(key, sizeof(key), "%d-%d-status", xl_id, i); + + if (!priv->child_up[i]) { + ret = dict_set_str(output, key, "Brick is not connected"); + op_ret = -1; + } else if (AFR_COUNT(priv->child_up, priv->child_count) < 2) { + ret = dict_set_str(output, key, + "< 2 bricks in replica are up"); + op_ret = -1; + } else if (!afr_shd_is_subvol_local(this, healer->subvol)) { + ret = dict_set_str(output, key, "Brick is remote"); + } else { + ret = dict_set_str(output, key, "Started self-heal"); + afr_shd_index_healer_spawn(this, i); + } + } + break; case GF_SHD_OP_HEAL_FULL: - op_ret = -1; - - for (i = 0; i < priv->child_count; i++) { - healer = &shd->full_healers[i]; - snprintf (key, sizeof (key), "%d-%d-status", xl_id, i); - - if (!priv->child_up[i]) { - ret = dict_set_str (output, key, - "Brick is not connected"); - } else if (AFR_COUNT (priv->child_up, - priv->child_count) < 2) { - ret = dict_set_str (output, key, - "< 2 bricks in replica are up"); - } else if (!afr_shd_is_subvol_local (this, healer->subvol)) { - ret = dict_set_str (output, key, - "Brick is remote"); - } else { - ret = dict_set_str (output, key, - "Started self-heal"); - afr_shd_full_healer_spawn (this, i); - op_ret = 0; - } - } - break; + op_ret = -1; + + for (i = 0; i < priv->child_count; i++) { + healer = &shd->full_healers[i]; + snprintf(key, sizeof(key), "%d-%d-status", xl_id, i); + + if (!priv->child_up[i]) { + ret = dict_set_str(output, key, "Brick is not connected"); + } else if (AFR_COUNT(priv->child_up, priv->child_count) < 2) { + ret = dict_set_str(output, key, + "< 2 bricks in replica are up"); + } else if (!afr_shd_is_subvol_local(this, healer->subvol)) { + ret = dict_set_str(output, key, "Brick is remote"); + } else { + ret = dict_set_str(output, key, "Started self-heal"); + afr_shd_full_healer_spawn(this, i); + op_ret = 0; + } + } + break; case GF_SHD_OP_INDEX_SUMMARY: - /* this case has been handled in glfs-heal.c */ - break; + /* this case has been handled in glfs-heal.c */ + break; case GF_SHD_OP_HEALED_FILES: case GF_SHD_OP_HEAL_FAILED_FILES: - for (i = 0; i < priv->child_count; i++) { - snprintf (key, sizeof (key), "%d-%d-status", xl_id, i); - ret = dict_set_str (output, key, "Operation Not " - "Supported"); - } - break; + for (i = 0; i < priv->child_count; i++) { + snprintf(key, sizeof(key), "%d-%d-status", xl_id, i); + ret = dict_set_str(output, key, + "Operation Not " + "Supported"); + } + break; case GF_SHD_OP_SPLIT_BRAIN_FILES: - eh_dump (shd->split_brain, output, afr_add_shd_event); - break; + eh_dump(shd->split_brain, output, afr_add_shd_event); + break; case GF_SHD_OP_STATISTICS: - for (i = 0; i < priv->child_count; i++) { - eh_dump (shd->statistics[i], output, - afr_add_crawl_event); - afr_shd_dict_add_crawl_event (this, output, - &shd->index_healers[i].crawl_event); - afr_shd_dict_add_crawl_event (this, output, - &shd->full_healers[i].crawl_event); - } - break; + for (i = 0; i < priv->child_count; i++) { + eh_dump(shd->statistics[i], output, afr_add_crawl_event); + afr_shd_dict_add_crawl_event( + this, output, &shd->index_healers[i].crawl_event); + afr_shd_dict_add_crawl_event(this, output, + &shd->full_healers[i].crawl_event); + } + break; case GF_SHD_OP_STATISTICS_HEAL_COUNT: case GF_SHD_OP_STATISTICS_HEAL_COUNT_PER_REPLICA: - op_ret = -1; - - for (i = 0; i < priv->child_count; i++) { - if (!priv->child_up[i]) { - snprintf (key, sizeof (key), "%d-%d-status", - xl_id, i); - ret = dict_set_str (output, key, - "Brick is not connected"); - } else { - snprintf (key, sizeof (key), "%d-%d-hardlinks", - xl_id, i); - ret = afr_shd_get_index_count (this, i, &cnt); - if (ret == 0) { - ret = dict_set_uint64 (output, key, cnt); - } - op_ret = 0; - } - } - - break; + op_ret = -1; + + for (i = 0; i < priv->child_count; i++) { + if (!priv->child_up[i]) { + snprintf(key, sizeof(key), "%d-%d-status", xl_id, i); + ret = dict_set_str(output, key, "Brick is not connected"); + } else { + snprintf(key, sizeof(key), "%d-%d-hardlinks", xl_id, i); + ret = afr_shd_get_index_count(this, i, &cnt); + if (ret == 0) { + ret = dict_set_uint64(output, key, cnt); + } + op_ret = 0; + } + } + + break; default: - gf_msg (this->name, GF_LOG_ERROR, 0, - AFR_MSG_INVALID_ARG, "Unknown set op %d", op); - break; - } + gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_INVALID_ARG, + "Unknown set op %d", op); + break; + } out: - dict_del (output, this->name); - return op_ret; + dict_del(output, this->name); + return op_ret; } diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c index 9c587db0562..3a542ceca43 100644 --- a/xlators/cluster/afr/src/afr-transaction.c +++ b/xlators/cluster/afr/src/afr-transaction.c @@ -21,57 +21,57 @@ #include typedef enum { - AFR_TRANSACTION_PRE_OP, - AFR_TRANSACTION_POST_OP, + AFR_TRANSACTION_PRE_OP, + AFR_TRANSACTION_POST_OP, } afr_xattrop_type_t; static void -afr_lock_resume_shared (struct list_head *list); +afr_lock_resume_shared(struct list_head *list); void -__afr_transaction_wake_shared (afr_local_t *local, struct list_head *shared); +__afr_transaction_wake_shared(afr_local_t *local, struct list_head *shared); void -afr_changelog_post_op (call_frame_t *frame, xlator_t *this); +afr_changelog_post_op(call_frame_t *frame, xlator_t *this); int -afr_changelog_post_op_safe (call_frame_t *frame, xlator_t *this); +afr_changelog_post_op_safe(call_frame_t *frame, xlator_t *this); gf_boolean_t -afr_changelog_pre_op_uninherit (call_frame_t *frame, xlator_t *this); +afr_changelog_pre_op_uninherit(call_frame_t *frame, xlator_t *this); gf_boolean_t -afr_changelog_pre_op_update (call_frame_t *frame, xlator_t *this); +afr_changelog_pre_op_update(call_frame_t *frame, xlator_t *this); int -afr_changelog_call_count (afr_transaction_type type, - unsigned char *pre_op_subvols, - unsigned char *failed_subvols, - unsigned int child_count); +afr_changelog_call_count(afr_transaction_type type, + unsigned char *pre_op_subvols, + unsigned char *failed_subvols, + unsigned int child_count); int -afr_changelog_do (call_frame_t *frame, xlator_t *this, dict_t *xattr, - afr_changelog_resume_t changelog_resume, - afr_xattrop_type_t op); +afr_changelog_do(call_frame_t *frame, xlator_t *this, dict_t *xattr, + afr_changelog_resume_t changelog_resume, + afr_xattrop_type_t op); void -afr_zero_fill_stat (afr_local_t *local) -{ - if (!local) - return; - if (local->transaction.type == AFR_DATA_TRANSACTION || - local->transaction.type == AFR_METADATA_TRANSACTION) { - gf_zero_fill_stat (&local->cont.inode_wfop.prebuf); - gf_zero_fill_stat (&local->cont.inode_wfop.postbuf); - } else if (local->transaction.type == AFR_ENTRY_TRANSACTION || - local->transaction.type == AFR_ENTRY_RENAME_TRANSACTION) { - gf_zero_fill_stat (&local->cont.dir_fop.buf); - gf_zero_fill_stat (&local->cont.dir_fop.preparent); - gf_zero_fill_stat (&local->cont.dir_fop.postparent); - if (local->transaction.type == AFR_ENTRY_TRANSACTION) - return; - gf_zero_fill_stat (&local->cont.dir_fop.prenewparent); - gf_zero_fill_stat (&local->cont.dir_fop.postnewparent); - } +afr_zero_fill_stat(afr_local_t *local) +{ + if (!local) + return; + if (local->transaction.type == AFR_DATA_TRANSACTION || + local->transaction.type == AFR_METADATA_TRANSACTION) { + gf_zero_fill_stat(&local->cont.inode_wfop.prebuf); + gf_zero_fill_stat(&local->cont.inode_wfop.postbuf); + } else if (local->transaction.type == AFR_ENTRY_TRANSACTION || + local->transaction.type == AFR_ENTRY_RENAME_TRANSACTION) { + gf_zero_fill_stat(&local->cont.dir_fop.buf); + gf_zero_fill_stat(&local->cont.dir_fop.preparent); + gf_zero_fill_stat(&local->cont.dir_fop.postparent); + if (local->transaction.type == AFR_ENTRY_TRANSACTION) + return; + gf_zero_fill_stat(&local->cont.dir_fop.prenewparent); + gf_zero_fill_stat(&local->cont.dir_fop.postnewparent); + } } /* In case of errors afr needs to choose which xdata from lower xlators it needs @@ -79,2483 +79,2402 @@ afr_zero_fill_stat (afr_local_t *local) * any good subvols which failed. Give preference to errnos other than * ENOTCONN even if the child is source */ void -afr_pick_error_xdata (afr_local_t *local, afr_private_t *priv, - inode_t *inode1, unsigned char *readable1, - inode_t *inode2, unsigned char *readable2) -{ - int s = -1;/*selection*/ - int i = 0; - unsigned char *readable = NULL; - - if (local->xdata_rsp) { - dict_unref (local->xdata_rsp); - local->xdata_rsp = NULL; - } - - readable = alloca0 (priv->child_count * sizeof (*readable)); - if (inode2 && readable2) {/*rename fop*/ - AFR_INTERSECT (readable, readable1, readable2, - priv->child_count); - } else { - memcpy (readable, readable1, - sizeof (*readable) * priv->child_count); - } - +afr_pick_error_xdata(afr_local_t *local, afr_private_t *priv, inode_t *inode1, + unsigned char *readable1, inode_t *inode2, + unsigned char *readable2) +{ + int s = -1; /*selection*/ + int i = 0; + unsigned char *readable = NULL; + + if (local->xdata_rsp) { + dict_unref(local->xdata_rsp); + local->xdata_rsp = NULL; + } + + readable = alloca0(priv->child_count * sizeof(*readable)); + if (inode2 && readable2) { /*rename fop*/ + AFR_INTERSECT(readable, readable1, readable2, priv->child_count); + } else { + memcpy(readable, readable1, sizeof(*readable) * priv->child_count); + } + + for (i = 0; i < priv->child_count; i++) { + if (!local->replies[i].valid) + continue; + + if (local->replies[i].op_ret >= 0) + continue; + + if (local->replies[i].op_errno == ENOTCONN) + continue; + + /*Order is important in the following condition*/ + if ((s < 0) || (!readable[s] && readable[i])) + s = i; + } + + if (s != -1 && local->replies[s].xdata) { + local->xdata_rsp = dict_ref(local->replies[s].xdata); + } else if (s == -1) { for (i = 0; i < priv->child_count; i++) { - if (!local->replies[i].valid) - continue; - - if (local->replies[i].op_ret >= 0) - continue; + if (!local->replies[i].valid) + continue; - if (local->replies[i].op_errno == ENOTCONN) - continue; + if (local->replies[i].op_ret >= 0) + continue; - /*Order is important in the following condition*/ - if ((s < 0) || (!readable[s] && readable[i])) - s = i; - } - - if (s != -1 && local->replies[s].xdata) { - local->xdata_rsp = dict_ref (local->replies[s].xdata); - } else if (s == -1) { - for (i = 0; i < priv->child_count; i++) { - if (!local->replies[i].valid) - continue; - - if (local->replies[i].op_ret >= 0) - continue; - - if (!local->replies[i].xdata) - continue; - local->xdata_rsp = dict_ref (local->replies[i].xdata); - break; - } + if (!local->replies[i].xdata) + continue; + local->xdata_rsp = dict_ref(local->replies[i].xdata); + break; } + } } gf_boolean_t -afr_needs_changelog_update (afr_local_t *local) +afr_needs_changelog_update(afr_local_t *local) { - if (local->transaction.type == AFR_DATA_TRANSACTION) - return _gf_true; - if (!local->optimistic_change_log) - return _gf_true; - return _gf_false; + if (local->transaction.type == AFR_DATA_TRANSACTION) + return _gf_true; + if (!local->optimistic_change_log) + return _gf_true; + return _gf_false; } gf_boolean_t -afr_changelog_has_quorum (afr_local_t *local, xlator_t *this) +afr_changelog_has_quorum(afr_local_t *local, xlator_t *this) { - afr_private_t *priv = NULL; - int i = 0; - unsigned char *success_children = NULL; + afr_private_t *priv = NULL; + int i = 0; + unsigned char *success_children = NULL; - priv = this->private; - success_children = alloca0 (priv->child_count); + priv = this->private; + success_children = alloca0(priv->child_count); - for (i = 0; i < priv->child_count; i++) { - if (!local->transaction.failed_subvols[i]) { - success_children[i] = 1; - } + for (i = 0; i < priv->child_count; i++) { + if (!local->transaction.failed_subvols[i]) { + success_children[i] = 1; } + } - if (afr_has_quorum (success_children, this)) { - return _gf_true; - } + if (afr_has_quorum(success_children, this)) { + return _gf_true; + } - return _gf_false; + return _gf_false; } - gf_boolean_t -afr_is_write_subvol_valid (call_frame_t *frame, xlator_t *this) +afr_is_write_subvol_valid(call_frame_t *frame, xlator_t *this) { - int i = 0; - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - uint64_t write_subvol = 0; - unsigned char *writable = NULL; - uint16_t datamap = 0; + int i = 0; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + uint64_t write_subvol = 0; + unsigned char *writable = NULL; + uint16_t datamap = 0; - local = frame->local; - priv = this->private; - writable = alloca0 (priv->child_count); + local = frame->local; + priv = this->private; + writable = alloca0(priv->child_count); - write_subvol = afr_write_subvol_get (frame, this); - datamap = (write_subvol & 0x00000000ffff0000) >> 16; - for (i = 0; i < priv->child_count; i++) { - if (datamap & (1 << i)) - writable[i] = 1; + write_subvol = afr_write_subvol_get(frame, this); + datamap = (write_subvol & 0x00000000ffff0000) >> 16; + for (i = 0; i < priv->child_count; i++) { + if (datamap & (1 << i)) + writable[i] = 1; - if (writable[i] && !local->transaction.failed_subvols[i]) - return _gf_true; - } + if (writable[i] && !local->transaction.failed_subvols[i]) + return _gf_true; + } - return _gf_false; + return _gf_false; } int -afr_transaction_fop (call_frame_t *frame, xlator_t *this) -{ - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - int call_count = -1; - unsigned char *failed_subvols = NULL; - int i = 0; - - local = frame->local; - priv = this->private; - - failed_subvols = local->transaction.failed_subvols; - call_count = priv->child_count - AFR_COUNT (failed_subvols, - priv->child_count); - /* Fail if pre-op did not succeed on quorum no. of bricks. */ - if (!afr_changelog_has_quorum (local, this) || !call_count) { - local->op_ret = -1; - /* local->op_errno is already captured in changelog cbk. */ - afr_transaction_resume (frame, this); - return 0; - } +afr_transaction_fop(call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + int call_count = -1; + unsigned char *failed_subvols = NULL; + int i = 0; + + local = frame->local; + priv = this->private; + + failed_subvols = local->transaction.failed_subvols; + call_count = priv->child_count - + AFR_COUNT(failed_subvols, priv->child_count); + /* Fail if pre-op did not succeed on quorum no. of bricks. */ + if (!afr_changelog_has_quorum(local, this) || !call_count) { + local->op_ret = -1; + /* local->op_errno is already captured in changelog cbk. */ + afr_transaction_resume(frame, this); + return 0; + } - /* Fail if at least one writeable brick isn't up.*/ - if (local->transaction.type == AFR_DATA_TRANSACTION && - !afr_is_write_subvol_valid (frame, this)) { - local->op_ret = -1; - local->op_errno = EIO; - afr_transaction_resume (frame, this); - return 0; - } + /* Fail if at least one writeable brick isn't up.*/ + if (local->transaction.type == AFR_DATA_TRANSACTION && + !afr_is_write_subvol_valid(frame, this)) { + local->op_ret = -1; + local->op_errno = EIO; + afr_transaction_resume(frame, this); + return 0; + } - local->call_count = call_count; - for (i = 0; i < priv->child_count; i++) { - if (local->transaction.pre_op[i] && !failed_subvols[i]) { - local->transaction.wind (frame, this, i); + local->call_count = call_count; + for (i = 0; i < priv->child_count; i++) { + if (local->transaction.pre_op[i] && !failed_subvols[i]) { + local->transaction.wind(frame, this, i); - if (!--call_count) - break; - } + if (!--call_count) + break; } + } - return 0; + return 0; } int -afr_transaction_done (call_frame_t *frame, xlator_t *this) +afr_transaction_done(call_frame_t *frame, xlator_t *this) { - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - gf_boolean_t unwind = _gf_false; - afr_lock_t *lock = NULL; - afr_local_t *lock_local = NULL; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + gf_boolean_t unwind = _gf_false; + afr_lock_t *lock = NULL; + afr_local_t *lock_local = NULL; - priv = this->private; - local = frame->local; + priv = this->private; + local = frame->local; - if (priv->consistent_metadata) { - LOCK (&frame->lock); - { - unwind = (local->transaction.main_frame != NULL); - } - UNLOCK (&frame->lock); - if (unwind)/*It definitely did post-op*/ - afr_zero_fill_stat (local); + if (priv->consistent_metadata) { + LOCK(&frame->lock); + { + unwind = (local->transaction.main_frame != NULL); } + UNLOCK(&frame->lock); + if (unwind) /*It definitely did post-op*/ + afr_zero_fill_stat(local); + } - if (local->transaction.do_eager_unlock) { - lock = &local->inode_ctx->lock[local->transaction.type]; - LOCK (&local->inode->lock); - { - lock->acquired = _gf_false; - lock->release = _gf_false; - list_splice_init (&lock->frozen, - &lock->waiting); - if (list_empty (&lock->waiting)) - goto unlock; - lock_local = list_entry (lock->waiting.next, - afr_local_t, - transaction.wait_list); - list_del_init (&lock_local->transaction.wait_list); - list_add (&lock_local->transaction.owner_list, - &lock->owners); - } -unlock: - UNLOCK (&local->inode->lock); - } - if (lock_local) { - afr_lock (lock_local->transaction.frame, - lock_local->transaction.frame->this); - } - local->transaction.unwind (frame, this); + if (local->transaction.do_eager_unlock) { + lock = &local->inode_ctx->lock[local->transaction.type]; + LOCK(&local->inode->lock); + { + lock->acquired = _gf_false; + lock->release = _gf_false; + list_splice_init(&lock->frozen, &lock->waiting); + if (list_empty(&lock->waiting)) + goto unlock; + lock_local = list_entry(lock->waiting.next, afr_local_t, + transaction.wait_list); + list_del_init(&lock_local->transaction.wait_list); + list_add(&lock_local->transaction.owner_list, &lock->owners); + } + unlock: + UNLOCK(&local->inode->lock); + } + if (lock_local) { + afr_lock(lock_local->transaction.frame, + lock_local->transaction.frame->this); + } + local->transaction.unwind(frame, this); - AFR_STACK_DESTROY (frame); + AFR_STACK_DESTROY(frame); - return 0; + return 0; } static void -afr_lock_fail_shared (afr_local_t *local, struct list_head *list) -{ - afr_local_t *each = NULL; - - while (!list_empty(list)) { - each = list_entry (list->next, afr_local_t, - transaction.wait_list); - list_del_init(&each->transaction.wait_list); - each->op_ret = -1; - each->op_errno = local->op_errno; - afr_transaction_done (each->transaction.frame, - each->transaction.frame->this); - } +afr_lock_fail_shared(afr_local_t *local, struct list_head *list) +{ + afr_local_t *each = NULL; + + while (!list_empty(list)) { + each = list_entry(list->next, afr_local_t, transaction.wait_list); + list_del_init(&each->transaction.wait_list); + each->op_ret = -1; + each->op_errno = local->op_errno; + afr_transaction_done(each->transaction.frame, + each->transaction.frame->this); + } } static void -afr_handle_lock_acquire_failure (afr_local_t *local, gf_boolean_t locked) +afr_handle_lock_acquire_failure(afr_local_t *local, gf_boolean_t locked) { - struct list_head shared; - afr_lock_t *lock = NULL; + struct list_head shared; + afr_lock_t *lock = NULL; - if (!local->transaction.eager_lock_on) - goto out; + if (!local->transaction.eager_lock_on) + goto out; - lock = &local->inode_ctx->lock[local->transaction.type]; + lock = &local->inode_ctx->lock[local->transaction.type]; - INIT_LIST_HEAD (&shared); - LOCK (&local->inode->lock); - { - lock->release = _gf_true; - list_splice_init (&lock->waiting, &shared); - } - UNLOCK (&local->inode->lock); + INIT_LIST_HEAD(&shared); + LOCK(&local->inode->lock); + { + lock->release = _gf_true; + list_splice_init(&lock->waiting, &shared); + } + UNLOCK(&local->inode->lock); - afr_lock_fail_shared (local, &shared); - local->transaction.do_eager_unlock = _gf_true; + afr_lock_fail_shared(local, &shared); + local->transaction.do_eager_unlock = _gf_true; out: - if (locked) { - local->internal_lock.lock_cbk = afr_transaction_done; - afr_unlock (local->transaction.frame, - local->transaction.frame->this); - } else { - afr_transaction_done (local->transaction.frame, - local->transaction.frame->this); - } + if (locked) { + local->internal_lock.lock_cbk = afr_transaction_done; + afr_unlock(local->transaction.frame, local->transaction.frame->this); + } else { + afr_transaction_done(local->transaction.frame, + local->transaction.frame->this); + } } -call_frame_t* -afr_transaction_detach_fop_frame (call_frame_t *frame) +call_frame_t * +afr_transaction_detach_fop_frame(call_frame_t *frame) { - afr_local_t * local = NULL; - call_frame_t *fop_frame = NULL; + afr_local_t *local = NULL; + call_frame_t *fop_frame = NULL; - local = frame->local; + local = frame->local; - afr_handle_inconsistent_fop (frame, &local->op_ret, &local->op_errno); - LOCK (&frame->lock); - { - fop_frame = local->transaction.main_frame; - local->transaction.main_frame = NULL; - } - UNLOCK (&frame->lock); + afr_handle_inconsistent_fop(frame, &local->op_ret, &local->op_errno); + LOCK(&frame->lock); + { + fop_frame = local->transaction.main_frame; + local->transaction.main_frame = NULL; + } + UNLOCK(&frame->lock); - return fop_frame; + return fop_frame; } - static void -afr_save_lk_owner (call_frame_t *frame) +afr_save_lk_owner(call_frame_t *frame) { - afr_local_t * local = NULL; + afr_local_t *local = NULL; - local = frame->local; + local = frame->local; - local->saved_lk_owner = frame->root->lk_owner; + local->saved_lk_owner = frame->root->lk_owner; } - static void -afr_restore_lk_owner (call_frame_t *frame) +afr_restore_lk_owner(call_frame_t *frame) { - afr_local_t * local = NULL; + afr_local_t *local = NULL; - local = frame->local; + local = frame->local; - frame->root->lk_owner = local->saved_lk_owner; + frame->root->lk_owner = local->saved_lk_owner; } void -__mark_all_success (call_frame_t *frame, xlator_t *this) +__mark_all_success(call_frame_t *frame, xlator_t *this) { - afr_private_t *priv = NULL; - afr_local_t *local = NULL; - int i; + afr_private_t *priv = NULL; + afr_local_t *local = NULL; + int i; - local = frame->local; - priv = this->private; + local = frame->local; + priv = this->private; - for (i = 0; i < priv->child_count; i++) { - local->transaction.failed_subvols[i] = 0; - } + for (i = 0; i < priv->child_count; i++) { + local->transaction.failed_subvols[i] = 0; + } } void -afr_compute_pre_op_sources (call_frame_t *frame, xlator_t *this) -{ - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - afr_transaction_type type = -1; - dict_t *xdata = NULL; - int **matrix = NULL; - int idx = -1; - int i = 0; - int j = 0; - - priv = this->private; - local = frame->local; - type = local->transaction.type; - idx = afr_index_for_transaction_type (type); - matrix = ALLOC_MATRIX (priv->child_count, int); - - for (i = 0; i < priv->child_count; i++) { - if (!local->transaction.changelog_xdata[i]) - continue; - xdata = local->transaction.changelog_xdata[i]; - afr_selfheal_fill_matrix (this, matrix, i, idx, xdata); - } - - memset (local->transaction.pre_op_sources, 1, priv->child_count); - - /*If lock or pre-op failed on a brick, it is not a source. */ - for (i = 0; i < priv->child_count; i++) { - if (local->transaction.failed_subvols[i]) - local->transaction.pre_op_sources[i] = 0; - } - - /* If brick is blamed by others, it is not a source. */ - for (i = 0; i < priv->child_count; i++) - for (j = 0; j < priv->child_count; j++) - if (matrix[i][j] != 0) - local->transaction.pre_op_sources[j] = 0; +afr_compute_pre_op_sources(call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + afr_transaction_type type = -1; + dict_t *xdata = NULL; + int **matrix = NULL; + int idx = -1; + int i = 0; + int j = 0; + + priv = this->private; + local = frame->local; + type = local->transaction.type; + idx = afr_index_for_transaction_type(type); + matrix = ALLOC_MATRIX(priv->child_count, int); + + for (i = 0; i < priv->child_count; i++) { + if (!local->transaction.changelog_xdata[i]) + continue; + xdata = local->transaction.changelog_xdata[i]; + afr_selfheal_fill_matrix(this, matrix, i, idx, xdata); + } + + memset(local->transaction.pre_op_sources, 1, priv->child_count); + + /*If lock or pre-op failed on a brick, it is not a source. */ + for (i = 0; i < priv->child_count; i++) { + if (local->transaction.failed_subvols[i]) + local->transaction.pre_op_sources[i] = 0; + } + + /* If brick is blamed by others, it is not a source. */ + for (i = 0; i < priv->child_count; i++) + for (j = 0; j < priv->child_count; j++) + if (matrix[i][j] != 0) + local->transaction.pre_op_sources[j] = 0; } gf_boolean_t -afr_has_arbiter_fop_cbk_quorum (call_frame_t *frame) -{ - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - xlator_t *this = NULL; - gf_boolean_t fop_failed = _gf_false; - unsigned char *pre_op_sources = NULL; - int i = 0; - - local = frame->local; - this = frame->this; - priv = this->private; - pre_op_sources = local->transaction.pre_op_sources; - - /* If the fop failed on the brick, it is not a source. */ - for (i = 0; i < priv->child_count; i++) - if (local->transaction.failed_subvols[i]) - pre_op_sources[i] = 0; - - switch (AFR_COUNT (pre_op_sources, priv->child_count)) { +afr_has_arbiter_fop_cbk_quorum(call_frame_t *frame) +{ + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + xlator_t *this = NULL; + gf_boolean_t fop_failed = _gf_false; + unsigned char *pre_op_sources = NULL; + int i = 0; + + local = frame->local; + this = frame->this; + priv = this->private; + pre_op_sources = local->transaction.pre_op_sources; + + /* If the fop failed on the brick, it is not a source. */ + for (i = 0; i < priv->child_count; i++) + if (local->transaction.failed_subvols[i]) + pre_op_sources[i] = 0; + + switch (AFR_COUNT(pre_op_sources, priv->child_count)) { case 1: - if (pre_op_sources[ARBITER_BRICK_INDEX]) - fop_failed = _gf_true; - break; - case 0: + if (pre_op_sources[ARBITER_BRICK_INDEX]) fop_failed = _gf_true; - break; - } + break; + case 0: + fop_failed = _gf_true; + break; + } - if (fop_failed) - return _gf_false; + if (fop_failed) + return _gf_false; - return _gf_true; + return _gf_true; } void -afr_txn_arbitrate_fop (call_frame_t *frame, xlator_t *this) -{ - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - int pre_op_sources_count = 0; - int i = 0; - - priv = this->private; - local = frame->local; - - afr_compute_pre_op_sources (frame, this); - pre_op_sources_count = AFR_COUNT (local->transaction.pre_op_sources, - priv->child_count); - - /* If arbiter is the only source, do not proceed. */ - if (pre_op_sources_count < 2 && - local->transaction.pre_op_sources[ARBITER_BRICK_INDEX]) { - local->op_ret = -1; - local->op_errno = ENOTCONN; - for (i = 0; i < priv->child_count; i++) - local->transaction.failed_subvols[i] = 1; - } +afr_txn_arbitrate_fop(call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + int pre_op_sources_count = 0; + int i = 0; - afr_transaction_fop (frame, this); + priv = this->private; + local = frame->local; - return; + afr_compute_pre_op_sources(frame, this); + pre_op_sources_count = AFR_COUNT(local->transaction.pre_op_sources, + priv->child_count); + + /* If arbiter is the only source, do not proceed. */ + if (pre_op_sources_count < 2 && + local->transaction.pre_op_sources[ARBITER_BRICK_INDEX]) { + local->op_ret = -1; + local->op_errno = ENOTCONN; + for (i = 0; i < priv->child_count; i++) + local->transaction.failed_subvols[i] = 1; + } + + afr_transaction_fop(frame, this); + + return; } int -afr_transaction_perform_fop (call_frame_t *frame, xlator_t *this) -{ - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - int i = 0; - int ret = 0; - int failure_count = 0; - struct list_head shared; - afr_lock_t *lock = NULL; - - local = frame->local; - priv = this->private; - - INIT_LIST_HEAD (&shared); - if (local->transaction.type == AFR_DATA_TRANSACTION && - !local->transaction.inherited) { - ret = afr_write_subvol_set (frame, this); - if (ret) { - /*act as if operation failed on all subvols*/ - local->op_ret = -1; - local->op_errno = -ret; - for (i = 0; i < priv->child_count; i++) - local->transaction.failed_subvols[i] = 1; - } - } - - if (local->pre_op_compat) - /* old mode, pre-op was done as afr_changelog_do() - just now, before OP */ - afr_changelog_pre_op_update (frame, this); - - if (!local->transaction.eager_lock_on || - local->transaction.inherited) - goto fop; - failure_count = AFR_COUNT (local->transaction.failed_subvols, - priv->child_count); - if (failure_count == priv->child_count) { - afr_handle_lock_acquire_failure (local, _gf_true); - return 0; - } else { - lock = &local->inode_ctx->lock[local->transaction.type]; - LOCK (&local->inode->lock); - { - lock->acquired = _gf_true; - __afr_transaction_wake_shared (local, &shared); - } - UNLOCK (&local->inode->lock); +afr_transaction_perform_fop(call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + int i = 0; + int ret = 0; + int failure_count = 0; + struct list_head shared; + afr_lock_t *lock = NULL; + + local = frame->local; + priv = this->private; + + INIT_LIST_HEAD(&shared); + if (local->transaction.type == AFR_DATA_TRANSACTION && + !local->transaction.inherited) { + ret = afr_write_subvol_set(frame, this); + if (ret) { + /*act as if operation failed on all subvols*/ + local->op_ret = -1; + local->op_errno = -ret; + for (i = 0; i < priv->child_count; i++) + local->transaction.failed_subvols[i] = 1; + } + } + + if (local->pre_op_compat) + /* old mode, pre-op was done as afr_changelog_do() + just now, before OP */ + afr_changelog_pre_op_update(frame, this); + + if (!local->transaction.eager_lock_on || local->transaction.inherited) + goto fop; + failure_count = AFR_COUNT(local->transaction.failed_subvols, + priv->child_count); + if (failure_count == priv->child_count) { + afr_handle_lock_acquire_failure(local, _gf_true); + return 0; + } else { + lock = &local->inode_ctx->lock[local->transaction.type]; + LOCK(&local->inode->lock); + { + lock->acquired = _gf_true; + __afr_transaction_wake_shared(local, &shared); } + UNLOCK(&local->inode->lock); + } fop: - /* Perform fops with the lk-owner from top xlator. - * Eg: lk-owner of posix-lk and flush should be same, - * flush cant clear the posix-lks without that lk-owner. - */ - afr_save_lk_owner (frame); - frame->root->lk_owner = - local->transaction.main_frame->root->lk_owner; + /* Perform fops with the lk-owner from top xlator. + * Eg: lk-owner of posix-lk and flush should be same, + * flush cant clear the posix-lks without that lk-owner. + */ + afr_save_lk_owner(frame); + frame->root->lk_owner = local->transaction.main_frame->root->lk_owner; - if (priv->arbiter_count == 1) { - afr_txn_arbitrate_fop (frame, this); - } else { - afr_transaction_fop (frame, this); - } + if (priv->arbiter_count == 1) { + afr_txn_arbitrate_fop(frame, this); + } else { + afr_transaction_fop(frame, this); + } - afr_lock_resume_shared (&shared); - return 0; + afr_lock_resume_shared(&shared); + return 0; } int -afr_set_pending_dict (afr_private_t *priv, dict_t *xattr, int **pending) +afr_set_pending_dict(afr_private_t *priv, dict_t *xattr, int **pending) { - int i = 0; - int ret = 0; - - for (i = 0; i < priv->child_count; i++) { + int i = 0; + int ret = 0; - ret = dict_set_static_bin (xattr, priv->pending_key[i], - pending[i], - AFR_NUM_CHANGE_LOGS * sizeof (int)); - /* 3 = data+metadata+entry */ + for (i = 0; i < priv->child_count; i++) { + ret = dict_set_static_bin(xattr, priv->pending_key[i], pending[i], + AFR_NUM_CHANGE_LOGS * sizeof(int)); + /* 3 = data+metadata+entry */ - if (ret) - break; - } + if (ret) + break; + } - return ret; + return ret; } /* {{{ pending */ int -afr_changelog_post_op_done (call_frame_t *frame, xlator_t *this) +afr_changelog_post_op_done(call_frame_t *frame, xlator_t *this) { - afr_local_t *local = NULL; - afr_internal_lock_t *int_lock = NULL; + afr_local_t *local = NULL; + afr_internal_lock_t *int_lock = NULL; - local = frame->local; - int_lock = &local->internal_lock; + local = frame->local; + int_lock = &local->internal_lock; - /* Fail the FOP if post-op did not succeed on quorum no. of bricks. */ - if (!afr_changelog_has_quorum (local, this)) { - local->op_ret = -1; - /*local->op_errno is already captured in changelog cbk*/ - } + /* Fail the FOP if post-op did not succeed on quorum no. of bricks. */ + if (!afr_changelog_has_quorum(local, this)) { + local->op_ret = -1; + /*local->op_errno is already captured in changelog cbk*/ + } - if (local->transaction.resume_stub) { - call_resume (local->transaction.resume_stub); - local->transaction.resume_stub = NULL; - } + if (local->transaction.resume_stub) { + call_resume(local->transaction.resume_stub); + local->transaction.resume_stub = NULL; + } - int_lock->lock_cbk = afr_transaction_done; - afr_unlock (frame, this); + int_lock->lock_cbk = afr_transaction_done; + afr_unlock(frame, this); - return 0; + return 0; } - -unsigned char* -afr_locked_nodes_get (afr_transaction_type type, afr_internal_lock_t *int_lock) +unsigned char * +afr_locked_nodes_get(afr_transaction_type type, afr_internal_lock_t *int_lock) { - unsigned char *locked_nodes = NULL; - switch (type) { + unsigned char *locked_nodes = NULL; + switch (type) { case AFR_DATA_TRANSACTION: case AFR_METADATA_TRANSACTION: - locked_nodes = int_lock->locked_nodes; - break; + locked_nodes = int_lock->locked_nodes; + break; case AFR_ENTRY_TRANSACTION: case AFR_ENTRY_RENAME_TRANSACTION: - /*Because same set of subvols participate in all lockee - * entities*/ - locked_nodes = int_lock->lockee[0].locked_nodes; - break; - } - return locked_nodes; + /*Because same set of subvols participate in all lockee + * entities*/ + locked_nodes = int_lock->lockee[0].locked_nodes; + break; + } + return locked_nodes; } - int -afr_changelog_call_count (afr_transaction_type type, - unsigned char *pre_op_subvols, - unsigned char *failed_subvols, - unsigned int child_count) +afr_changelog_call_count(afr_transaction_type type, + unsigned char *pre_op_subvols, + unsigned char *failed_subvols, + unsigned int child_count) { - int i = 0; - int call_count = 0; + int i = 0; + int call_count = 0; - for (i = 0; i < child_count; i++) { - if (pre_op_subvols[i] && !failed_subvols[i]) { - call_count++; - } + for (i = 0; i < child_count; i++) { + if (pre_op_subvols[i] && !failed_subvols[i]) { + call_count++; } + } - if (type == AFR_ENTRY_RENAME_TRANSACTION) - call_count *= 2; + if (type == AFR_ENTRY_RENAME_TRANSACTION) + call_count *= 2; - return call_count; + return call_count; } - gf_boolean_t -afr_txn_nothing_failed (call_frame_t *frame, xlator_t *this) +afr_txn_nothing_failed(call_frame_t *frame, xlator_t *this) { - afr_private_t *priv = NULL; - afr_local_t *local = NULL; - int i = 0; + afr_private_t *priv = NULL; + afr_local_t *local = NULL; + int i = 0; - local = frame->local; - priv = this->private; + local = frame->local; + priv = this->private; - if (priv->thin_arbiter_count) { - /* We need to perform post-op even if 1 data brick was down - * before the txn started.*/ - if (AFR_COUNT (local->transaction.failed_subvols, - priv->child_count)) - return _gf_false; - } + if (priv->thin_arbiter_count) { + /* We need to perform post-op even if 1 data brick was down + * before the txn started.*/ + if (AFR_COUNT(local->transaction.failed_subvols, priv->child_count)) + return _gf_false; + } - for (i = 0; i < priv->child_count; i++) { - if (local->transaction.pre_op[i] && - local->transaction.failed_subvols[i]) - return _gf_false; - } + for (i = 0; i < priv->child_count; i++) { + if (local->transaction.pre_op[i] && + local->transaction.failed_subvols[i]) + return _gf_false; + } - return _gf_true; + return _gf_true; } void -afr_handle_symmetric_errors (call_frame_t *frame, xlator_t *this) +afr_handle_symmetric_errors(call_frame_t *frame, xlator_t *this) { - if (afr_is_symmetric_error (frame, this)) - __mark_all_success (frame, this); + if (afr_is_symmetric_error(frame, this)) + __mark_all_success(frame, this); } gf_boolean_t -afr_has_quorum (unsigned char *subvols, xlator_t *this) -{ - unsigned int quorum_count = 0; - afr_private_t *priv = NULL; - unsigned int up_children_count = 0; - - priv = this->private; - up_children_count = AFR_COUNT (subvols, priv->child_count); - - if (priv->quorum_count == AFR_QUORUM_AUTO) { - /* - * Special case for auto-quorum with an even number of nodes. - * - * A replica set with even count N can only handle the same - * number of failures as odd N-1 before losing "vanilla" - * quorum, and the probability of more simultaneous failures is - * actually higher. For example, with a 1% chance of failure - * we'd have a 0.03% chance of two simultaneous failures with - * N=3 but a 0.06% chance with N=4. However, the special case - * is necessary for N=2 because there's no real quorum in that - * case (i.e. can't normally survive *any* failures). In that - * case, we treat the first node as a tie-breaker, allowing - * quorum to be retained in some cases while still honoring the - * all-important constraint that there can not simultaneously - * be two partitioned sets of nodes each believing they have - * quorum. Of two equally sized sets, the one without that - * first node will lose. - * - * It turns out that the special case is beneficial for higher - * values of N as well. Continuing the example above, the - * probability of losing quorum with N=4 and this type of - * quorum is (very) slightly lower than with N=3 and vanilla - * quorum. The difference becomes even more pronounced with - * higher N. Therefore, even though such replica counts are - * unlikely to be seen in practice, we might as well use the - * "special" quorum then as well. - */ - if ((up_children_count * 2) == priv->child_count) { - return subvols[0]; - } - } +afr_has_quorum(unsigned char *subvols, xlator_t *this) +{ + unsigned int quorum_count = 0; + afr_private_t *priv = NULL; + unsigned int up_children_count = 0; - if (priv->quorum_count == AFR_QUORUM_AUTO) { - quorum_count = priv->child_count/2 + 1; - } else { - quorum_count = priv->quorum_count; + priv = this->private; + up_children_count = AFR_COUNT(subvols, priv->child_count); + + if (priv->quorum_count == AFR_QUORUM_AUTO) { + /* + * Special case for auto-quorum with an even number of nodes. + * + * A replica set with even count N can only handle the same + * number of failures as odd N-1 before losing "vanilla" + * quorum, and the probability of more simultaneous failures is + * actually higher. For example, with a 1% chance of failure + * we'd have a 0.03% chance of two simultaneous failures with + * N=3 but a 0.06% chance with N=4. However, the special case + * is necessary for N=2 because there's no real quorum in that + * case (i.e. can't normally survive *any* failures). In that + * case, we treat the first node as a tie-breaker, allowing + * quorum to be retained in some cases while still honoring the + * all-important constraint that there can not simultaneously + * be two partitioned sets of nodes each believing they have + * quorum. Of two equally sized sets, the one without that + * first node will lose. + * + * It turns out that the special case is beneficial for higher + * values of N as well. Continuing the example above, the + * probability of losing quorum with N=4 and this type of + * quorum is (very) slightly lower than with N=3 and vanilla + * quorum. The difference becomes even more pronounced with + * higher N. Therefore, even though such replica counts are + * unlikely to be seen in practice, we might as well use the + * "special" quorum then as well. + */ + if ((up_children_count * 2) == priv->child_count) { + return subvols[0]; } + } - if (up_children_count >= quorum_count) - return _gf_true; + if (priv->quorum_count == AFR_QUORUM_AUTO) { + quorum_count = priv->child_count / 2 + 1; + } else { + quorum_count = priv->quorum_count; + } - return _gf_false; + if (up_children_count >= quorum_count) + return _gf_true; + + return _gf_false; } static gf_boolean_t -afr_has_fop_quorum (call_frame_t *frame) +afr_has_fop_quorum(call_frame_t *frame) { - xlator_t *this = frame->this; - afr_local_t *local = frame->local; - unsigned char *locked_nodes = NULL; + xlator_t *this = frame->this; + afr_local_t *local = frame->local; + unsigned char *locked_nodes = NULL; - locked_nodes = afr_locked_nodes_get (local->transaction.type, - &local->internal_lock); - return afr_has_quorum (locked_nodes, this); + locked_nodes = afr_locked_nodes_get(local->transaction.type, + &local->internal_lock); + return afr_has_quorum(locked_nodes, this); } static gf_boolean_t -afr_has_fop_cbk_quorum (call_frame_t *frame) +afr_has_fop_cbk_quorum(call_frame_t *frame) { - afr_local_t *local = frame->local; - xlator_t *this = frame->this; - afr_private_t *priv = this->private; - unsigned char *success = alloca0(priv->child_count); - int i = 0; + afr_local_t *local = frame->local; + xlator_t *this = frame->this; + afr_private_t *priv = this->private; + unsigned char *success = alloca0(priv->child_count); + int i = 0; - for (i = 0; i < priv->child_count; i++) { - if (local->transaction.pre_op[i]) - if (!local->transaction.failed_subvols[i]) - success[i] = 1; - } + for (i = 0; i < priv->child_count; i++) { + if (local->transaction.pre_op[i]) + if (!local->transaction.failed_subvols[i]) + success[i] = 1; + } - return afr_has_quorum (success, this); + return afr_has_quorum(success, this); } gf_boolean_t -afr_need_dirty_marking (call_frame_t *frame, xlator_t *this) +afr_need_dirty_marking(call_frame_t *frame, xlator_t *this) { - afr_private_t *priv = this->private; - afr_local_t *local = NULL; - gf_boolean_t need_dirty = _gf_false; + afr_private_t *priv = this->private; + afr_local_t *local = NULL; + gf_boolean_t need_dirty = _gf_false; - local = frame->local; + local = frame->local; - if (!priv->quorum_count || !local->optimistic_change_log) - return _gf_false; + if (!priv->quorum_count || !local->optimistic_change_log) + return _gf_false; - if (local->transaction.type == AFR_DATA_TRANSACTION || - local->transaction.type == AFR_METADATA_TRANSACTION) - return _gf_false; + if (local->transaction.type == AFR_DATA_TRANSACTION || + local->transaction.type == AFR_METADATA_TRANSACTION) + return _gf_false; - if (AFR_COUNT (local->transaction.failed_subvols, priv->child_count) == - priv->child_count) - return _gf_false; + if (AFR_COUNT(local->transaction.failed_subvols, priv->child_count) == + priv->child_count) + return _gf_false; - if (priv->arbiter_count) { - if (!afr_has_arbiter_fop_cbk_quorum (frame)) - need_dirty = _gf_true; - } else if (!afr_has_fop_cbk_quorum (frame)) { - need_dirty = _gf_true; - } + if (priv->arbiter_count) { + if (!afr_has_arbiter_fop_cbk_quorum(frame)) + need_dirty = _gf_true; + } else if (!afr_has_fop_cbk_quorum(frame)) { + need_dirty = _gf_true; + } - return need_dirty; + return need_dirty; } void -afr_handle_quorum (call_frame_t *frame, xlator_t *this) +afr_handle_quorum(call_frame_t *frame, xlator_t *this) { - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - int i = 0; - const char *file = NULL; - uuid_t gfid = {0}; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + int i = 0; + const char *file = NULL; + uuid_t gfid = {0}; - local = frame->local; - priv = frame->this->private; + local = frame->local; + priv = frame->this->private; - if (priv->quorum_count == 0) - return; - - /* If the fop already failed return right away to preserve errno */ - if (local->op_ret == -1) - return; + if (priv->quorum_count == 0) + return; - /* - * Network split may happen just after the fops are unwound, so check - * if the fop succeeded in a way it still follows quorum. If it doesn't, - * mark the fop as failure, mark the changelogs so it reflects that - * failure. - * - * Scenario: - * There are 3 mounts on 3 machines(node1, node2, node3) all writing to - * single file. Network split happened in a way that node1 can't see - * node2, node3. Node2, node3 both of them can't see node1. Now at the - * time of sending write all the bricks are up. Just after write fop is - * wound on node1, network split happens. Node1 thinks write fop failed - * on node2, node3 so marks pending changelog for those 2 extended - * attributes on node1. Node2, node3 thinks writes failed on node1 so - * they mark pending changelog for node1. When the network is stable - * again the file already is in split-brain. These checks prevent - * marking pending changelog on other subvolumes if the fop doesn't - * succeed in a way it is still following quorum. So with this fix what - * is happening is, node1 will have all pending changelog(FOOL) because - * the write succeeded only on node1 but failed on node2, node3 so - * instead of marking pending changelogs on node2, node3 it just treats - * the fop as failure and goes into DIRTY state. Where as node2, node3 - * say they are sources and have pending changelog to node1 so there is - * no split-brain with the fix. The problem is eliminated completely. - */ + /* If the fop already failed return right away to preserve errno */ + if (local->op_ret == -1) + return; - if (priv->arbiter_count) { - if (afr_has_arbiter_fop_cbk_quorum (frame)) - return; - } else if (afr_has_fop_cbk_quorum (frame)) { - return; - } + /* + * Network split may happen just after the fops are unwound, so check + * if the fop succeeded in a way it still follows quorum. If it doesn't, + * mark the fop as failure, mark the changelogs so it reflects that + * failure. + * + * Scenario: + * There are 3 mounts on 3 machines(node1, node2, node3) all writing to + * single file. Network split happened in a way that node1 can't see + * node2, node3. Node2, node3 both of them can't see node1. Now at the + * time of sending write all the bricks are up. Just after write fop is + * wound on node1, network split happens. Node1 thinks write fop failed + * on node2, node3 so marks pending changelog for those 2 extended + * attributes on node1. Node2, node3 thinks writes failed on node1 so + * they mark pending changelog for node1. When the network is stable + * again the file already is in split-brain. These checks prevent + * marking pending changelog on other subvolumes if the fop doesn't + * succeed in a way it is still following quorum. So with this fix what + * is happening is, node1 will have all pending changelog(FOOL) because + * the write succeeded only on node1 but failed on node2, node3 so + * instead of marking pending changelogs on node2, node3 it just treats + * the fop as failure and goes into DIRTY state. Where as node2, node3 + * say they are sources and have pending changelog to node1 so there is + * no split-brain with the fix. The problem is eliminated completely. + */ + + if (priv->arbiter_count) { + if (afr_has_arbiter_fop_cbk_quorum(frame)) + return; + } else if (afr_has_fop_cbk_quorum(frame)) { + return; + } - if (afr_need_dirty_marking (frame, this)) - goto set_response; + if (afr_need_dirty_marking(frame, this)) + goto set_response; - for (i = 0; i < priv->child_count; i++) { - if (local->transaction.pre_op[i]) - afr_transaction_fop_failed (frame, frame->this, i); - } + for (i = 0; i < priv->child_count; i++) { + if (local->transaction.pre_op[i]) + afr_transaction_fop_failed(frame, frame->this, i); + } set_response: - local->op_ret = -1; - local->op_errno = afr_final_errno (local, priv); - if (local->op_errno == 0) - local->op_errno = afr_quorum_errno (priv); - - if (local->fd) { - gf_uuid_copy (gfid, local->fd->inode->gfid); - file = uuid_utoa (gfid); - } else { - loc_path (&local->loc, local->loc.name); - file = local->loc.path; - } - - gf_msg (frame->this->name, GF_LOG_WARNING, local->op_errno, - AFR_MSG_QUORUM_FAIL, "%s: Failing %s as quorum is not met", - file, gf_fop_list[local->op]); - - switch (local->transaction.type) { + local->op_ret = -1; + local->op_errno = afr_final_errno(local, priv); + if (local->op_errno == 0) + local->op_errno = afr_quorum_errno(priv); + + if (local->fd) { + gf_uuid_copy(gfid, local->fd->inode->gfid); + file = uuid_utoa(gfid); + } else { + loc_path(&local->loc, local->loc.name); + file = local->loc.path; + } + + gf_msg(frame->this->name, GF_LOG_WARNING, local->op_errno, + AFR_MSG_QUORUM_FAIL, "%s: Failing %s as quorum is not met", file, + gf_fop_list[local->op]); + + switch (local->transaction.type) { case AFR_ENTRY_TRANSACTION: case AFR_ENTRY_RENAME_TRANSACTION: - afr_pick_error_xdata (local, priv, local->parent, - local->readable, local->parent2, - local->readable2); - break; + afr_pick_error_xdata(local, priv, local->parent, local->readable, + local->parent2, local->readable2); + break; default: - afr_pick_error_xdata (local, priv, local->inode, - local->readable, NULL, NULL); - break; - } + afr_pick_error_xdata(local, priv, local->inode, local->readable, + NULL, NULL); + break; + } } int -afr_fill_ta_loc (xlator_t *this, loc_t *loc) -{ - afr_private_t *priv = NULL; - - priv = this->private; - loc->parent = inode_ref (priv->root_inode); - gf_uuid_copy (loc->pargfid, loc->parent->gfid); - loc->name = priv->pending_key[THIN_ARBITER_BRICK_INDEX]; - gf_uuid_copy (loc->gfid, priv->ta_gfid); - loc->inode = inode_new (loc->parent->table); - if (!loc->inode) { - loc_wipe(loc); - return -ENOMEM; - } - return 0; +afr_fill_ta_loc(xlator_t *this, loc_t *loc) +{ + afr_private_t *priv = NULL; + + priv = this->private; + loc->parent = inode_ref(priv->root_inode); + gf_uuid_copy(loc->pargfid, loc->parent->gfid); + loc->name = priv->pending_key[THIN_ARBITER_BRICK_INDEX]; + gf_uuid_copy(loc->gfid, priv->ta_gfid); + loc->inode = inode_new(loc->parent->table); + if (!loc->inode) { + loc_wipe(loc); + return -ENOMEM; + } + return 0; } int -afr_changelog_thin_arbiter_post_op (xlator_t *this, afr_local_t *local) -{ - int ret = 0; - afr_private_t *priv = NULL; - dict_t *xattr = NULL; - int failed_count = 0; - struct gf_flock flock = {0, }; - loc_t loc = {0,}; - int i = 0; +afr_changelog_thin_arbiter_post_op(xlator_t *this, afr_local_t *local) +{ + int ret = 0; + afr_private_t *priv = NULL; + dict_t *xattr = NULL; + int failed_count = 0; + struct gf_flock flock = { + 0, + }; + loc_t loc = { + 0, + }; + int i = 0; + + priv = this->private; + if (!priv->thin_arbiter_count) + return 0; - priv = this->private; - if (!priv->thin_arbiter_count) - return 0; + failed_count = AFR_COUNT(local->transaction.failed_subvols, + priv->child_count); + if (!failed_count) + return 0; + GF_ASSERT(failed_count == 1); + ret = afr_fill_ta_loc(this, &loc); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB, + "Failed to populate thin-arbiter loc for: %s.", loc.name); + goto out; + } + + xattr = dict_new(); + if (!xattr) { + ret = -ENOMEM; + goto out; + } + for (i = 0; i < priv->child_count; i++) { + ret = dict_set_static_bin(xattr, priv->pending_key[i], + local->pending[i], + AFR_NUM_CHANGE_LOGS * sizeof(int)); + if (ret) + goto out; + } + + flock.l_type = F_WRLCK; + flock.l_start = 0; + flock.l_len = 0; + + /*TODO: Convert to two domain locking. */ + ret = syncop_inodelk(priv->children[THIN_ARBITER_BRICK_INDEX], + AFR_TA_DOM_NOTIFY, &loc, F_SETLKW, &flock, NULL, NULL); + if (ret) + goto out; + + ret = syncop_xattrop(priv->children[THIN_ARBITER_BRICK_INDEX], &loc, + GF_XATTROP_ADD_ARRAY, xattr, NULL, NULL, NULL); + + if (ret == -EINVAL) { + gf_msg(this->name, GF_LOG_INFO, -ret, AFR_MSG_THIN_ARB, + "Thin-arbiter has denied post-op on %s for gfid %s.", + priv->pending_key[THIN_ARBITER_BRICK_INDEX], + uuid_utoa(local->inode->gfid)); + + } else if (ret) { + gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB, + "Post-op on thin-arbiter id file %s failed for gfid %s.", + priv->pending_key[THIN_ARBITER_BRICK_INDEX], + uuid_utoa(local->inode->gfid)); + } + flock.l_type = F_UNLCK; + syncop_inodelk(priv->children[THIN_ARBITER_BRICK_INDEX], AFR_TA_DOM_NOTIFY, + &loc, F_SETLK, &flock, NULL, NULL); +out: + if (xattr) + dict_unref(xattr); - failed_count = AFR_COUNT (local->transaction.failed_subvols, - priv->child_count); - if (!failed_count) - return 0; + return ret; +} - GF_ASSERT (failed_count == 1); - ret = afr_fill_ta_loc (this, &loc); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB, - "Failed to populate thin-arbiter loc for: %s.", - loc.name); - goto out; - } +int +afr_changelog_post_op_now(call_frame_t *frame, xlator_t *this) +{ + afr_private_t *priv = this->private; + afr_local_t *local = NULL; + dict_t *xattr = NULL; + int i = 0; + int ret = 0; + int idx = 0; + int nothing_failed = 1; + gf_boolean_t need_undirty = _gf_false; + + afr_handle_quorum(frame, this); + local = frame->local; + idx = afr_index_for_transaction_type(local->transaction.type); + + xattr = dict_new(); + if (!xattr) { + local->op_ret = -1; + local->op_errno = ENOMEM; + afr_changelog_post_op_done(frame, this); + goto out; + } - xattr = dict_new (); - if (!xattr) { - ret = -ENOMEM; - goto out; - } - for (i = 0; i < priv->child_count; i++) { - ret = dict_set_static_bin (xattr, priv->pending_key[i], - local->pending[i], - AFR_NUM_CHANGE_LOGS * sizeof (int)); - if (ret) - goto out; + nothing_failed = afr_txn_nothing_failed(frame, this); + + if (afr_changelog_pre_op_uninherit(frame, this)) + need_undirty = _gf_false; + else + need_undirty = _gf_true; + + if (local->op_ret < 0 && !nothing_failed) { + if (afr_need_dirty_marking(frame, this)) { + local->dirty[idx] = hton32(1); + goto set_dirty; } - flock.l_type = F_WRLCK; - flock.l_start = 0; - flock.l_len = 0; + afr_changelog_post_op_done(frame, this); + goto out; + } - /*TODO: Convert to two domain locking. */ - ret = syncop_inodelk (priv->children[THIN_ARBITER_BRICK_INDEX], - AFR_TA_DOM_NOTIFY, &loc, F_SETLKW, &flock, - NULL, NULL); - if (ret) - goto out; + if (nothing_failed && !need_undirty) { + afr_changelog_post_op_done(frame, this); + goto out; + } + + if (local->transaction.in_flight_sb) { + local->op_ret = -1; + local->op_errno = local->transaction.in_flight_sb_errno; + afr_changelog_post_op_done(frame, this); + goto out; + } + + for (i = 0; i < priv->child_count; i++) { + if (local->transaction.failed_subvols[i]) + local->pending[i][idx] = hton32(1); + } + + ret = afr_set_pending_dict(priv, xattr, local->pending); + if (ret < 0) { + local->op_ret = -1; + local->op_errno = ENOMEM; + afr_changelog_post_op_done(frame, this); + goto out; + } - ret = syncop_xattrop (priv->children[THIN_ARBITER_BRICK_INDEX], &loc, - GF_XATTROP_ADD_ARRAY, xattr, NULL, NULL, NULL); + ret = afr_changelog_thin_arbiter_post_op(this, local); + if (ret < 0) { + local->op_ret = -1; + local->op_errno = -ret; + afr_changelog_post_op_done(frame, this); + goto out; + } - if (ret == -EINVAL) { - gf_msg (this->name, GF_LOG_INFO, -ret, AFR_MSG_THIN_ARB, - "Thin-arbiter has denied post-op on %s for gfid %s.", - priv->pending_key[THIN_ARBITER_BRICK_INDEX], - uuid_utoa (local->inode->gfid)); + if (need_undirty) + local->dirty[idx] = hton32(-1); + else + local->dirty[idx] = hton32(0); - } else if (ret) { - gf_msg (this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB, - "Post-op on thin-arbiter id file %s failed for gfid %s.", - priv->pending_key[THIN_ARBITER_BRICK_INDEX], - uuid_utoa (local->inode->gfid)); - } - flock.l_type = F_UNLCK; - syncop_inodelk (priv->children[THIN_ARBITER_BRICK_INDEX], - AFR_TA_DOM_NOTIFY, &loc, F_SETLK, &flock, NULL, NULL); +set_dirty: + ret = dict_set_static_bin(xattr, AFR_DIRTY, local->dirty, + sizeof(int) * AFR_NUM_CHANGE_LOGS); + if (ret) { + local->op_ret = -1; + local->op_errno = ENOMEM; + afr_changelog_post_op_done(frame, this); + goto out; + } + + afr_changelog_do(frame, this, xattr, afr_changelog_post_op_done, + AFR_TRANSACTION_POST_OP); out: - if (xattr) - dict_unref (xattr); + if (xattr) + dict_unref(xattr); - return ret; + return 0; } -int -afr_changelog_post_op_now (call_frame_t *frame, xlator_t *this) -{ - afr_private_t *priv = this->private; - afr_local_t *local = NULL; - dict_t *xattr = NULL; - int i = 0; - int ret = 0; - int idx = 0; - int nothing_failed = 1; - gf_boolean_t need_undirty = _gf_false; - - afr_handle_quorum (frame, this); - local = frame->local; - idx = afr_index_for_transaction_type (local->transaction.type); - - xattr = dict_new (); - if (!xattr) { - local->op_ret = -1; - local->op_errno = ENOMEM; - afr_changelog_post_op_done (frame, this); - goto out; - } +gf_boolean_t +afr_changelog_pre_op_uninherit(call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + afr_inode_ctx_t *ctx = NULL; + int i = 0; + gf_boolean_t ret = _gf_false; + int type = 0; - nothing_failed = afr_txn_nothing_failed (frame, this); + local = frame->local; + priv = this->private; + ctx = local->inode_ctx; - if (afr_changelog_pre_op_uninherit (frame, this)) - need_undirty = _gf_false; - else - need_undirty = _gf_true; + type = afr_index_for_transaction_type(local->transaction.type); + if (type != AFR_DATA_TRANSACTION) + return !local->transaction.dirtied; - if (local->op_ret < 0 && !nothing_failed) { - if (afr_need_dirty_marking (frame, this)) { - local->dirty[idx] = hton32(1); - goto set_dirty; - } + if (local->transaction.no_uninherit) + return _gf_false; - afr_changelog_post_op_done (frame, this); - goto out; - } + /* This function must be idempotent. So check if we + were called before and return the same answer again. - if (nothing_failed && !need_undirty) { - afr_changelog_post_op_done (frame, this); - goto out; - } + It is important to keep this function idempotent for + the call in afr_changelog_post_op_safe() to not have + side effects on the call from afr_changelog_post_op_now() + */ + if (local->transaction.uninherit_done) + return local->transaction.uninherit_value; - if (local->transaction.in_flight_sb) { - local->op_ret = -1; - local->op_errno = local->transaction.in_flight_sb_errno; - afr_changelog_post_op_done (frame, this); - goto out; + LOCK(&local->inode->lock); + { + for (i = 0; i < priv->child_count; i++) { + if (local->transaction.pre_op[i] != ctx->pre_op_done[type][i]) { + ret = !local->transaction.dirtied; + goto unlock; + } + } + + if (ctx->inherited[type]) { + ret = _gf_true; + ctx->inherited[type]--; + } else if (ctx->on_disk[type]) { + ret = _gf_false; + ctx->on_disk[type]--; + } else { + /* ASSERT */ + ret = _gf_false; } - for (i = 0; i < priv->child_count; i++) { - if (local->transaction.failed_subvols[i]) - local->pending[i][idx] = hton32(1); - } - - ret = afr_set_pending_dict (priv, xattr, local->pending); - if (ret < 0) { - local->op_ret = -1; - local->op_errno = ENOMEM; - afr_changelog_post_op_done (frame, this); - goto out; - } - - ret = afr_changelog_thin_arbiter_post_op (this, local); - if (ret < 0) { - local->op_ret = -1; - local->op_errno = -ret; - afr_changelog_post_op_done (frame, this); - goto out; + if (!ctx->inherited[type] && !ctx->on_disk[type]) { + for (i = 0; i < priv->child_count; i++) + ctx->pre_op_done[type][i] = 0; } - - if (need_undirty) - local->dirty[idx] = hton32(-1); - else - local->dirty[idx] = hton32(0); - -set_dirty: - ret = dict_set_static_bin (xattr, AFR_DIRTY, local->dirty, - sizeof(int) * AFR_NUM_CHANGE_LOGS); - if (ret) { - local->op_ret = -1; - local->op_errno = ENOMEM; - afr_changelog_post_op_done (frame, this); - goto out; - } - - afr_changelog_do (frame, this, xattr, afr_changelog_post_op_done, - AFR_TRANSACTION_POST_OP); -out: - if (xattr) - dict_unref (xattr); - - return 0; -} - - -gf_boolean_t -afr_changelog_pre_op_uninherit (call_frame_t *frame, xlator_t *this) -{ - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - afr_inode_ctx_t *ctx = NULL; - int i = 0; - gf_boolean_t ret = _gf_false; - int type = 0; - - local = frame->local; - priv = this->private; - ctx = local->inode_ctx; - - type = afr_index_for_transaction_type (local->transaction.type); - if (type != AFR_DATA_TRANSACTION) - return !local->transaction.dirtied; - - if (local->transaction.no_uninherit) - return _gf_false; - - /* This function must be idempotent. So check if we - were called before and return the same answer again. - - It is important to keep this function idempotent for - the call in afr_changelog_post_op_safe() to not have - side effects on the call from afr_changelog_post_op_now() - */ - if (local->transaction.uninherit_done) - return local->transaction.uninherit_value; - - LOCK(&local->inode->lock); - { - for (i = 0; i < priv->child_count; i++) { - if (local->transaction.pre_op[i] != - ctx->pre_op_done[type][i]) { - ret = !local->transaction.dirtied; - goto unlock; - } - } - - if (ctx->inherited[type]) { - ret = _gf_true; - ctx->inherited[type]--; - } else if (ctx->on_disk[type]) { - ret = _gf_false; - ctx->on_disk[type]--; - } else { - /* ASSERT */ - ret = _gf_false; - } - - if (!ctx->inherited[type] && !ctx->on_disk[type]) { - for (i = 0; i < priv->child_count; i++) - ctx->pre_op_done[type][i] = 0; - } - } + } unlock: - UNLOCK(&local->inode->lock); + UNLOCK(&local->inode->lock); - local->transaction.uninherit_done = _gf_true; - local->transaction.uninherit_value = ret; + local->transaction.uninherit_done = _gf_true; + local->transaction.uninherit_value = ret; - return ret; + return ret; } - gf_boolean_t -afr_changelog_pre_op_inherit (call_frame_t *frame, xlator_t *this) +afr_changelog_pre_op_inherit(call_frame_t *frame, xlator_t *this) { - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - int i = 0; - gf_boolean_t ret = _gf_false; - int type = 0; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + int i = 0; + gf_boolean_t ret = _gf_false; + int type = 0; - local = frame->local; - priv = this->private; + local = frame->local; + priv = this->private; - if (local->transaction.type != AFR_DATA_TRANSACTION) - return _gf_false; + if (local->transaction.type != AFR_DATA_TRANSACTION) + return _gf_false; - type = afr_index_for_transaction_type (local->transaction.type); + type = afr_index_for_transaction_type(local->transaction.type); - LOCK(&local->inode->lock); - { - if (!local->inode_ctx->on_disk[type]) { - /* nothing to inherit yet */ - ret = _gf_false; - goto unlock; - } + LOCK(&local->inode->lock); + { + if (!local->inode_ctx->on_disk[type]) { + /* nothing to inherit yet */ + ret = _gf_false; + goto unlock; + } - for (i = 0; i < priv->child_count; i++) { - if (local->transaction.pre_op[i] != - local->inode_ctx->pre_op_done[type][i]) { - /* either inherit exactly, or don't */ - ret = _gf_false; - goto unlock; - } - } + for (i = 0; i < priv->child_count; i++) { + if (local->transaction.pre_op[i] != + local->inode_ctx->pre_op_done[type][i]) { + /* either inherit exactly, or don't */ + ret = _gf_false; + goto unlock; + } + } - local->inode_ctx->inherited[type]++; + local->inode_ctx->inherited[type]++; - ret = _gf_true; + ret = _gf_true; - local->transaction.inherited = _gf_true; - } + local->transaction.inherited = _gf_true; + } unlock: - UNLOCK(&local->inode->lock); + UNLOCK(&local->inode->lock); - return ret; + return ret; } - gf_boolean_t -afr_changelog_pre_op_update (call_frame_t *frame, xlator_t *this) -{ - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - int i = 0; - gf_boolean_t ret = _gf_false; - int type = 0; - - local = frame->local; - priv = this->private; - - if (local->transaction.type == AFR_ENTRY_TRANSACTION || - local->transaction.type == AFR_ENTRY_RENAME_TRANSACTION) - return _gf_false; - - if (local->transaction.inherited) - /* was already inherited in afr_changelog_pre_op */ - return _gf_false; - - if (!local->transaction.dirtied) - return _gf_false; - - if (!afr_txn_nothing_failed (frame, this)) - return _gf_false; - - type = afr_index_for_transaction_type (local->transaction.type); - - ret = _gf_false; - - LOCK(&local->inode->lock); - { - if (!local->inode_ctx->on_disk[type]) { - for (i = 0; i < priv->child_count; i++) - local->inode_ctx->pre_op_done[type][i] = - (!local->transaction.failed_subvols[i]); - } else { - for (i = 0; i < priv->child_count; i++) - if (local->inode_ctx->pre_op_done[type][i] != - (!local->transaction.failed_subvols[i])) { - local->transaction.no_uninherit = 1; - goto unlock; - } - } - local->inode_ctx->on_disk[type]++; - - ret = _gf_true; - } -unlock: - UNLOCK(&local->inode->lock); +afr_changelog_pre_op_update(call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + int i = 0; + gf_boolean_t ret = _gf_false; + int type = 0; - return ret; -} + local = frame->local; + priv = this->private; + if (local->transaction.type == AFR_ENTRY_TRANSACTION || + local->transaction.type == AFR_ENTRY_RENAME_TRANSACTION) + return _gf_false; -int -afr_changelog_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, dict_t *xattr, dict_t *xdata) -{ - afr_local_t *local = NULL; - int call_count = -1; - int child_index = -1; + if (local->transaction.inherited) + /* was already inherited in afr_changelog_pre_op */ + return _gf_false; - local = frame->local; - child_index = (long) cookie; + if (!local->transaction.dirtied) + return _gf_false; - if (op_ret == -1) { - local->op_errno = op_errno; - afr_transaction_fop_failed (frame, this, child_index); - } + if (!afr_txn_nothing_failed(frame, this)) + return _gf_false; - if (xattr) - local->transaction.changelog_xdata[child_index] = dict_ref (xattr); + type = afr_index_for_transaction_type(local->transaction.type); - call_count = afr_frame_return (frame); + ret = _gf_false; - if (call_count == 0) { - local->transaction.changelog_resume (frame, this); + LOCK(&local->inode->lock); + { + if (!local->inode_ctx->on_disk[type]) { + for (i = 0; i < priv->child_count; i++) + local->inode_ctx->pre_op_done[type][i] = + (!local->transaction.failed_subvols[i]); + } else { + for (i = 0; i < priv->child_count; i++) + if (local->inode_ctx->pre_op_done[type][i] != + (!local->transaction.failed_subvols[i])) { + local->transaction.no_uninherit = 1; + goto unlock; + } } + local->inode_ctx->on_disk[type]++; - return 0; + ret = _gf_true; + } +unlock: + UNLOCK(&local->inode->lock); + + return ret; } -void -afr_changelog_populate_xdata (call_frame_t *frame, afr_xattrop_type_t op, - dict_t **xdata, dict_t **newloc_xdata) -{ - int i = 0; - int ret = 0; - char *key = NULL; - const char *name = NULL; - dict_t *xdata1 = NULL; - dict_t *xdata2 = NULL; - xlator_t *this = NULL; - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - gf_boolean_t need_entry_key_set = _gf_true; - - local = frame->local; - this = THIS; - priv = this->private; - - if (local->transaction.type == AFR_DATA_TRANSACTION || - local->transaction.type == AFR_METADATA_TRANSACTION) - goto out; +int +afr_changelog_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, + int op_errno, dict_t *xattr, dict_t *xdata) +{ + afr_local_t *local = NULL; + int call_count = -1; + int child_index = -1; - if (!priv->esh_granular) - goto out; + local = frame->local; + child_index = (long)cookie; - xdata1 = dict_new(); - if (!xdata1) - goto out; + if (op_ret == -1) { + local->op_errno = op_errno; + afr_transaction_fop_failed(frame, this, child_index); + } + + if (xattr) + local->transaction.changelog_xdata[child_index] = dict_ref(xattr); - name = local->loc.name; - if (local->op == GF_FOP_LINK) - name = local->newloc.name; + call_count = afr_frame_return(frame); - switch (op) { + if (call_count == 0) { + local->transaction.changelog_resume(frame, this); + } + + return 0; +} + +void +afr_changelog_populate_xdata(call_frame_t *frame, afr_xattrop_type_t op, + dict_t **xdata, dict_t **newloc_xdata) +{ + int i = 0; + int ret = 0; + char *key = NULL; + const char *name = NULL; + dict_t *xdata1 = NULL; + dict_t *xdata2 = NULL; + xlator_t *this = NULL; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + gf_boolean_t need_entry_key_set = _gf_true; + + local = frame->local; + this = THIS; + priv = this->private; + + if (local->transaction.type == AFR_DATA_TRANSACTION || + local->transaction.type == AFR_METADATA_TRANSACTION) + goto out; + + if (!priv->esh_granular) + goto out; + + xdata1 = dict_new(); + if (!xdata1) + goto out; + + name = local->loc.name; + if (local->op == GF_FOP_LINK) + name = local->newloc.name; + + switch (op) { case AFR_TRANSACTION_PRE_OP: - key = GF_XATTROP_ENTRY_IN_KEY; - break; + key = GF_XATTROP_ENTRY_IN_KEY; + break; case AFR_TRANSACTION_POST_OP: - if (afr_txn_nothing_failed (frame, this)) { - key = GF_XATTROP_ENTRY_OUT_KEY; - for (i = 0; i < priv->child_count; i++) { - if (!local->transaction.failed_subvols[i]) - continue; - need_entry_key_set = _gf_false; - break; - } - /* If the transaction itself did not fail and there - * are no failed subvolumes, check whether the fop - * failed due to a symmetric error. If it did, do - * not set the ENTRY_OUT xattr which would end up - * deleting a name index which was created possibly by - * an earlier entry txn that may have failed on some - * of the sub-volumes. - */ - if (local->op_ret) - need_entry_key_set = _gf_false; - } else { - key = GF_XATTROP_ENTRY_IN_KEY; + if (afr_txn_nothing_failed(frame, this)) { + key = GF_XATTROP_ENTRY_OUT_KEY; + for (i = 0; i < priv->child_count; i++) { + if (!local->transaction.failed_subvols[i]) + continue; + need_entry_key_set = _gf_false; + break; } - break; - } + /* If the transaction itself did not fail and there + * are no failed subvolumes, check whether the fop + * failed due to a symmetric error. If it did, do + * not set the ENTRY_OUT xattr which would end up + * deleting a name index which was created possibly by + * an earlier entry txn that may have failed on some + * of the sub-volumes. + */ + if (local->op_ret) + need_entry_key_set = _gf_false; + } else { + key = GF_XATTROP_ENTRY_IN_KEY; + } + break; + } - if (need_entry_key_set) { - ret = dict_set_str (xdata1, key, (char *)name); - if (ret) - gf_msg (THIS->name, GF_LOG_ERROR, 0, - AFR_MSG_DICT_SET_FAILED, - "%s/%s: Could not set %s key during xattrop", - uuid_utoa (local->loc.pargfid), local->loc.name, - key); - if (local->transaction.type == AFR_ENTRY_RENAME_TRANSACTION) { - xdata2 = dict_new (); - if (!xdata2) - goto out; - - ret = dict_set_str (xdata2, key, - (char *)local->newloc.name); - if (ret) - gf_msg (THIS->name, GF_LOG_ERROR, 0, - AFR_MSG_DICT_SET_FAILED, - "%s/%s: Could not set %s key during " - "xattrop", - uuid_utoa (local->newloc.pargfid), - local->newloc.name, key); - } + if (need_entry_key_set) { + ret = dict_set_str(xdata1, key, (char *)name); + if (ret) + gf_msg(THIS->name, GF_LOG_ERROR, 0, AFR_MSG_DICT_SET_FAILED, + "%s/%s: Could not set %s key during xattrop", + uuid_utoa(local->loc.pargfid), local->loc.name, key); + if (local->transaction.type == AFR_ENTRY_RENAME_TRANSACTION) { + xdata2 = dict_new(); + if (!xdata2) + goto out; + + ret = dict_set_str(xdata2, key, (char *)local->newloc.name); + if (ret) + gf_msg(THIS->name, GF_LOG_ERROR, 0, AFR_MSG_DICT_SET_FAILED, + "%s/%s: Could not set %s key during " + "xattrop", + uuid_utoa(local->newloc.pargfid), local->newloc.name, + key); } + } - *xdata = xdata1; - *newloc_xdata = xdata2; - xdata1 = xdata2 = NULL; + *xdata = xdata1; + *newloc_xdata = xdata2; + xdata1 = xdata2 = NULL; out: - if (xdata1) - dict_unref (xdata1); - return; + if (xdata1) + dict_unref(xdata1); + return; } int -afr_changelog_prepare (xlator_t *this, call_frame_t *frame, int *call_count, - afr_changelog_resume_t changelog_resume, - afr_xattrop_type_t op, dict_t **xdata, - dict_t **newloc_xdata) +afr_changelog_prepare(xlator_t *this, call_frame_t *frame, int *call_count, + afr_changelog_resume_t changelog_resume, + afr_xattrop_type_t op, dict_t **xdata, + dict_t **newloc_xdata) { - afr_private_t *priv = NULL; - afr_local_t *local = NULL; + afr_private_t *priv = NULL; + afr_local_t *local = NULL; - local = frame->local; - priv = this->private; + local = frame->local; + priv = this->private; - *call_count = afr_changelog_call_count (local->transaction.type, - local->transaction.pre_op, - local->transaction.failed_subvols, - priv->child_count); + *call_count = afr_changelog_call_count( + local->transaction.type, local->transaction.pre_op, + local->transaction.failed_subvols, priv->child_count); - if (*call_count == 0) { - changelog_resume (frame, this); - return -1; - } + if (*call_count == 0) { + changelog_resume(frame, this); + return -1; + } - afr_changelog_populate_xdata (frame, op, xdata, newloc_xdata); - local->call_count = *call_count; + afr_changelog_populate_xdata(frame, op, xdata, newloc_xdata); + local->call_count = *call_count; - local->transaction.changelog_resume = changelog_resume; - return 0; + local->transaction.changelog_resume = changelog_resume; + return 0; } int -afr_changelog_do (call_frame_t *frame, xlator_t *this, dict_t *xattr, - afr_changelog_resume_t changelog_resume, - afr_xattrop_type_t op) +afr_changelog_do(call_frame_t *frame, xlator_t *this, dict_t *xattr, + afr_changelog_resume_t changelog_resume, afr_xattrop_type_t op) { - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - dict_t *xdata = NULL; - dict_t *newloc_xdata = NULL; - int i = 0; - int call_count = 0; - int ret = 0; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + dict_t *xdata = NULL; + dict_t *newloc_xdata = NULL; + int i = 0; + int call_count = 0; + int ret = 0; - local = frame->local; - priv = this->private; + local = frame->local; + priv = this->private; - for (i = 0; i < priv->child_count; i++) { - if (local->transaction.changelog_xdata[i]) { - dict_unref (local->transaction.changelog_xdata[i]); - local->transaction.changelog_xdata[i] = NULL; - } + for (i = 0; i < priv->child_count; i++) { + if (local->transaction.changelog_xdata[i]) { + dict_unref(local->transaction.changelog_xdata[i]); + local->transaction.changelog_xdata[i] = NULL; } + } - ret = afr_changelog_prepare (this, frame, &call_count, changelog_resume, - op, &xdata, &newloc_xdata); + ret = afr_changelog_prepare(this, frame, &call_count, changelog_resume, op, + &xdata, &newloc_xdata); - if (ret) - return 0; + if (ret) + return 0; - for (i = 0; i < priv->child_count; i++) { - if (!local->transaction.pre_op[i] || - local->transaction.failed_subvols[i]) - continue; + for (i = 0; i < priv->child_count; i++) { + if (!local->transaction.pre_op[i] || + local->transaction.failed_subvols[i]) + continue; - switch (local->transaction.type) { - case AFR_DATA_TRANSACTION: - case AFR_METADATA_TRANSACTION: - if (!local->fd) { - STACK_WIND_COOKIE (frame, afr_changelog_cbk, - (void *) (long) i, - priv->children[i], - priv->children[i]->fops->xattrop, - &local->loc, - GF_XATTROP_ADD_ARRAY, xattr, - xdata); - } else { - STACK_WIND_COOKIE (frame, afr_changelog_cbk, - (void *) (long) i, - priv->children[i], - priv->children[i]->fops->fxattrop, - local->fd, - GF_XATTROP_ADD_ARRAY, xattr, - xdata); - } - break; - case AFR_ENTRY_RENAME_TRANSACTION: - - STACK_WIND_COOKIE (frame, afr_changelog_cbk, - (void *) (long) i, - priv->children[i], - priv->children[i]->fops->xattrop, - &local->transaction.new_parent_loc, - GF_XATTROP_ADD_ARRAY, xattr, - newloc_xdata); - call_count--; + switch (local->transaction.type) { + case AFR_DATA_TRANSACTION: + case AFR_METADATA_TRANSACTION: + if (!local->fd) { + STACK_WIND_COOKIE( + frame, afr_changelog_cbk, (void *)(long)i, + priv->children[i], priv->children[i]->fops->xattrop, + &local->loc, GF_XATTROP_ADD_ARRAY, xattr, xdata); + } else { + STACK_WIND_COOKIE( + frame, afr_changelog_cbk, (void *)(long)i, + priv->children[i], priv->children[i]->fops->fxattrop, + local->fd, GF_XATTROP_ADD_ARRAY, xattr, xdata); + } + break; + case AFR_ENTRY_RENAME_TRANSACTION: + + STACK_WIND_COOKIE(frame, afr_changelog_cbk, (void *)(long)i, + priv->children[i], + priv->children[i]->fops->xattrop, + &local->transaction.new_parent_loc, + GF_XATTROP_ADD_ARRAY, xattr, newloc_xdata); + call_count--; /* fall through */ - case AFR_ENTRY_TRANSACTION: - if (local->fd) - STACK_WIND_COOKIE (frame, afr_changelog_cbk, - (void *) (long) i, - priv->children[i], - priv->children[i]->fops->fxattrop, - local->fd, - GF_XATTROP_ADD_ARRAY, xattr, - xdata); - else - STACK_WIND_COOKIE (frame, afr_changelog_cbk, - (void *) (long) i, - priv->children[i], - priv->children[i]->fops->xattrop, - &local->transaction.parent_loc, - GF_XATTROP_ADD_ARRAY, xattr, - xdata); - break; - } - - if (!--call_count) - break; + case AFR_ENTRY_TRANSACTION: + if (local->fd) + STACK_WIND_COOKIE( + frame, afr_changelog_cbk, (void *)(long)i, + priv->children[i], priv->children[i]->fops->fxattrop, + local->fd, GF_XATTROP_ADD_ARRAY, xattr, xdata); + else + STACK_WIND_COOKIE(frame, afr_changelog_cbk, (void *)(long)i, + priv->children[i], + priv->children[i]->fops->xattrop, + &local->transaction.parent_loc, + GF_XATTROP_ADD_ARRAY, xattr, xdata); + break; } - if (xdata) - dict_unref (xdata); - if (newloc_xdata) - dict_unref (newloc_xdata); - return 0; + if (!--call_count) + break; + } + + if (xdata) + dict_unref(xdata); + if (newloc_xdata) + dict_unref(newloc_xdata); + return 0; } static void -afr_init_optimistic_changelog_for_txn (xlator_t *this, afr_local_t *local) +afr_init_optimistic_changelog_for_txn(xlator_t *this, afr_local_t *local) { - int locked_count = 0; - afr_private_t *priv = NULL; + int locked_count = 0; + afr_private_t *priv = NULL; - priv = this->private; + priv = this->private; - locked_count = AFR_COUNT (local->transaction.pre_op, priv->child_count); - if (priv->optimistic_change_log && locked_count == priv->child_count) - local->optimistic_change_log = 1; + locked_count = AFR_COUNT(local->transaction.pre_op, priv->child_count); + if (priv->optimistic_change_log && locked_count == priv->child_count) + local->optimistic_change_log = 1; - return; + return; } int -afr_changelog_pre_op (call_frame_t *frame, xlator_t *this) -{ - afr_private_t * priv = this->private; - int i = 0; - int ret = 0; - int call_count = 0; - int op_errno = 0; - afr_local_t *local = NULL; - afr_internal_lock_t *int_lock = NULL; - unsigned char *locked_nodes = NULL; - int idx = -1; - gf_boolean_t pre_nop = _gf_true; - dict_t *xdata_req = NULL; - - local = frame->local; - int_lock = &local->internal_lock; - idx = afr_index_for_transaction_type (local->transaction.type); - - locked_nodes = afr_locked_nodes_get (local->transaction.type, int_lock); - - for (i = 0; i < priv->child_count; i++) { - if (locked_nodes[i]) { - local->transaction.pre_op[i] = 1; - call_count++; - } else { - local->transaction.failed_subvols[i] = 1; - } - } - - afr_init_optimistic_changelog_for_txn (this, local); - - if (afr_changelog_pre_op_inherit (frame, this)) - goto next; - - /* This condition should not be met with present code, as - * transaction.done will be called if locks are not acquired on even a - * single node. - */ - if (call_count == 0) { - op_errno = ENOTCONN; - goto err; - } - - /* Check if the fop can be performed on at least - * quorum number of nodes. - */ - if (priv->quorum_count && !afr_has_fop_quorum (frame)) { - op_errno = int_lock->lock_op_errno; - if (op_errno == 0) - op_errno = afr_quorum_errno (priv); - goto err; +afr_changelog_pre_op(call_frame_t *frame, xlator_t *this) +{ + afr_private_t *priv = this->private; + int i = 0; + int ret = 0; + int call_count = 0; + int op_errno = 0; + afr_local_t *local = NULL; + afr_internal_lock_t *int_lock = NULL; + unsigned char *locked_nodes = NULL; + int idx = -1; + gf_boolean_t pre_nop = _gf_true; + dict_t *xdata_req = NULL; + + local = frame->local; + int_lock = &local->internal_lock; + idx = afr_index_for_transaction_type(local->transaction.type); + + locked_nodes = afr_locked_nodes_get(local->transaction.type, int_lock); + + for (i = 0; i < priv->child_count; i++) { + if (locked_nodes[i]) { + local->transaction.pre_op[i] = 1; + call_count++; + } else { + local->transaction.failed_subvols[i] = 1; + } + } + + afr_init_optimistic_changelog_for_txn(this, local); + + if (afr_changelog_pre_op_inherit(frame, this)) + goto next; + + /* This condition should not be met with present code, as + * transaction.done will be called if locks are not acquired on even a + * single node. + */ + if (call_count == 0) { + op_errno = ENOTCONN; + goto err; + } + + /* Check if the fop can be performed on at least + * quorum number of nodes. + */ + if (priv->quorum_count && !afr_has_fop_quorum(frame)) { + op_errno = int_lock->lock_op_errno; + if (op_errno == 0) + op_errno = afr_quorum_errno(priv); + goto err; + } + + xdata_req = dict_new(); + if (!xdata_req) { + op_errno = ENOMEM; + goto err; + } + + if (call_count < priv->child_count) + pre_nop = _gf_false; + + /* Set an all-zero pending changelog so that in the cbk, we can get the + * current on-disk values. In a replica 3 volume with arbiter enabled, + * these values are needed to arrive at a go/ no-go of the fop phase to + * avoid ending up in split-brain.*/ + + ret = afr_set_pending_dict(priv, xdata_req, local->pending); + if (ret < 0) { + op_errno = ENOMEM; + goto err; + } + + if (afr_needs_changelog_update(local)) { + local->dirty[idx] = hton32(1); + + ret = dict_set_static_bin(xdata_req, AFR_DIRTY, local->dirty, + sizeof(int) * AFR_NUM_CHANGE_LOGS); + if (ret) { + op_errno = ENOMEM; + goto err; } - xdata_req = dict_new(); - if (!xdata_req) { - op_errno = ENOMEM; - goto err; - } - - if (call_count < priv->child_count) - pre_nop = _gf_false; - - /* Set an all-zero pending changelog so that in the cbk, we can get the - * current on-disk values. In a replica 3 volume with arbiter enabled, - * these values are needed to arrive at a go/ no-go of the fop phase to - * avoid ending up in split-brain.*/ - - ret = afr_set_pending_dict (priv, xdata_req, local->pending); - if (ret < 0) { - op_errno = ENOMEM; - goto err; - } - - if (afr_needs_changelog_update (local)) { - - local->dirty[idx] = hton32(1); - - ret = dict_set_static_bin (xdata_req, AFR_DIRTY, local->dirty, - sizeof(int) * AFR_NUM_CHANGE_LOGS); - if (ret) { - op_errno = ENOMEM; - goto err; - } + pre_nop = _gf_false; + local->transaction.dirtied = 1; + } - pre_nop = _gf_false; - local->transaction.dirtied = 1; - } + if (pre_nop) + goto next; - if (pre_nop) - goto next; + if (!local->pre_op_compat) { + dict_copy(xdata_req, local->xdata_req); + goto next; + } - if (!local->pre_op_compat) { - dict_copy (xdata_req, local->xdata_req); - goto next; - } + afr_changelog_do(frame, this, xdata_req, afr_transaction_perform_fop, + AFR_TRANSACTION_PRE_OP); - afr_changelog_do (frame, this, xdata_req, afr_transaction_perform_fop, - AFR_TRANSACTION_PRE_OP); + if (xdata_req) + dict_unref(xdata_req); - if (xdata_req) - dict_unref (xdata_req); - - return 0; + return 0; next: - afr_transaction_perform_fop (frame, this); + afr_transaction_perform_fop(frame, this); - if (xdata_req) - dict_unref (xdata_req); + if (xdata_req) + dict_unref(xdata_req); - return 0; + return 0; err: - local->internal_lock.lock_cbk = afr_transaction_done; - local->op_ret = -1; - local->op_errno = op_errno; + local->internal_lock.lock_cbk = afr_transaction_done; + local->op_ret = -1; + local->op_errno = op_errno; - afr_handle_lock_acquire_failure (local, _gf_true); + afr_handle_lock_acquire_failure(local, _gf_true); - if (xdata_req) - dict_unref (xdata_req); + if (xdata_req) + dict_unref(xdata_req); - return 0; + return 0; } - int -afr_post_nonblocking_inodelk_cbk (call_frame_t *frame, xlator_t *this) +afr_post_nonblocking_inodelk_cbk(call_frame_t *frame, xlator_t *this) { - afr_internal_lock_t *int_lock = NULL; - afr_local_t *local = NULL; + afr_internal_lock_t *int_lock = NULL; + afr_local_t *local = NULL; - local = frame->local; - int_lock = &local->internal_lock; + local = frame->local; + int_lock = &local->internal_lock; - /* Initiate blocking locks if non-blocking has failed */ - if (int_lock->lock_op_ret < 0) { - gf_msg_debug (this->name, 0, - "Non blocking inodelks failed. Proceeding to blocking"); - int_lock->lock_cbk = afr_internal_lock_finish; - afr_blocking_lock (frame, this); - } else { + /* Initiate blocking locks if non-blocking has failed */ + if (int_lock->lock_op_ret < 0) { + gf_msg_debug(this->name, 0, + "Non blocking inodelks failed. Proceeding to blocking"); + int_lock->lock_cbk = afr_internal_lock_finish; + afr_blocking_lock(frame, this); + } else { + gf_msg_debug(this->name, 0, + "Non blocking inodelks done. Proceeding to FOP"); + afr_internal_lock_finish(frame, this); + } - gf_msg_debug (this->name, 0, - "Non blocking inodelks done. Proceeding to FOP"); - afr_internal_lock_finish (frame, this); - } - - return 0; + return 0; } - int -afr_post_nonblocking_entrylk_cbk (call_frame_t *frame, xlator_t *this) +afr_post_nonblocking_entrylk_cbk(call_frame_t *frame, xlator_t *this) { - afr_internal_lock_t *int_lock = NULL; - afr_local_t *local = NULL; - - local = frame->local; - int_lock = &local->internal_lock; + afr_internal_lock_t *int_lock = NULL; + afr_local_t *local = NULL; - /* Initiate blocking locks if non-blocking has failed */ - if (int_lock->lock_op_ret < 0) { - gf_msg_debug (this->name, 0, - "Non blocking entrylks failed. Proceeding to blocking"); - int_lock->lock_cbk = afr_internal_lock_finish; - afr_blocking_lock (frame, this); - } else { + local = frame->local; + int_lock = &local->internal_lock; - gf_msg_debug (this->name, 0, - "Non blocking entrylks done. Proceeding to FOP"); + /* Initiate blocking locks if non-blocking has failed */ + if (int_lock->lock_op_ret < 0) { + gf_msg_debug(this->name, 0, + "Non blocking entrylks failed. Proceeding to blocking"); + int_lock->lock_cbk = afr_internal_lock_finish; + afr_blocking_lock(frame, this); + } else { + gf_msg_debug(this->name, 0, + "Non blocking entrylks done. Proceeding to FOP"); - afr_internal_lock_finish (frame, this); - } + afr_internal_lock_finish(frame, this); + } - return 0; + return 0; } - int -afr_post_blocking_rename_cbk (call_frame_t *frame, xlator_t *this) +afr_post_blocking_rename_cbk(call_frame_t *frame, xlator_t *this) { - afr_internal_lock_t *int_lock = NULL; - afr_local_t *local = NULL; + afr_internal_lock_t *int_lock = NULL; + afr_local_t *local = NULL; - local = frame->local; - int_lock = &local->internal_lock; + local = frame->local; + int_lock = &local->internal_lock; - if (int_lock->lock_op_ret < 0) { - gf_msg (this->name, GF_LOG_INFO, 0, - AFR_MSG_BLOCKING_LKS_FAILED, - "Blocking entrylks failed."); - - afr_transaction_done (frame, this); - } else { + if (int_lock->lock_op_ret < 0) { + gf_msg(this->name, GF_LOG_INFO, 0, AFR_MSG_BLOCKING_LKS_FAILED, + "Blocking entrylks failed."); - gf_msg_debug (this->name, 0, - "Blocking entrylks done. Proceeding to FOP"); + afr_transaction_done(frame, this); + } else { + gf_msg_debug(this->name, 0, + "Blocking entrylks done. Proceeding to FOP"); - afr_internal_lock_finish (frame, this); - } - return 0; + afr_internal_lock_finish(frame, this); + } + return 0; } int -afr_post_lower_unlock_cbk (call_frame_t *frame, xlator_t *this) +afr_post_lower_unlock_cbk(call_frame_t *frame, xlator_t *this) { - afr_internal_lock_t *int_lock = NULL; - afr_local_t *local = NULL; + afr_internal_lock_t *int_lock = NULL; + afr_local_t *local = NULL; - local = frame->local; - int_lock = &local->internal_lock; + local = frame->local; + int_lock = &local->internal_lock; - GF_ASSERT (!int_lock->higher_locked); + GF_ASSERT(!int_lock->higher_locked); - int_lock->lock_cbk = afr_post_blocking_rename_cbk; - afr_blocking_lock (frame, this); + int_lock->lock_cbk = afr_post_blocking_rename_cbk; + afr_blocking_lock(frame, this); - return 0; + return 0; } - int -afr_set_transaction_flock (xlator_t *this, afr_local_t *local) +afr_set_transaction_flock(xlator_t *this, afr_local_t *local) { - afr_internal_lock_t *int_lock = NULL; - afr_private_t *priv = NULL; + afr_internal_lock_t *int_lock = NULL; + afr_private_t *priv = NULL; - int_lock = &local->internal_lock; - priv = this->private; + int_lock = &local->internal_lock; + priv = this->private; - if ((priv->arbiter_count || local->transaction.eager_lock_on || - priv->full_lock) && - local->transaction.type == AFR_DATA_TRANSACTION) { - /*Lock entire file to avoid network split brains.*/ - int_lock->flock.l_len = 0; - int_lock->flock.l_start = 0; - } else { - int_lock->flock.l_len = local->transaction.len; - int_lock->flock.l_start = local->transaction.start; - } - int_lock->flock.l_type = F_WRLCK; + if ((priv->arbiter_count || local->transaction.eager_lock_on || + priv->full_lock) && + local->transaction.type == AFR_DATA_TRANSACTION) { + /*Lock entire file to avoid network split brains.*/ + int_lock->flock.l_len = 0; + int_lock->flock.l_start = 0; + } else { + int_lock->flock.l_len = local->transaction.len; + int_lock->flock.l_start = local->transaction.start; + } + int_lock->flock.l_type = F_WRLCK; - return 0; + return 0; } int -afr_lock (call_frame_t *frame, xlator_t *this) +afr_lock(call_frame_t *frame, xlator_t *this) { - afr_internal_lock_t *int_lock = NULL; - afr_local_t *local = NULL; + afr_internal_lock_t *int_lock = NULL; + afr_local_t *local = NULL; - local = frame->local; - int_lock = &local->internal_lock; + local = frame->local; + int_lock = &local->internal_lock; - int_lock->domain = this->name; + int_lock->domain = this->name; - switch (local->transaction.type) { + switch (local->transaction.type) { case AFR_DATA_TRANSACTION: case AFR_METADATA_TRANSACTION: - afr_set_transaction_flock (this, local); + afr_set_transaction_flock(this, local); - int_lock->lock_cbk = afr_post_nonblocking_inodelk_cbk; + int_lock->lock_cbk = afr_post_nonblocking_inodelk_cbk; - afr_nonblocking_inodelk (frame, this); - break; + afr_nonblocking_inodelk(frame, this); + break; case AFR_ENTRY_RENAME_TRANSACTION: - int_lock->lock_cbk = afr_post_nonblocking_entrylk_cbk; - afr_nonblocking_entrylk (frame, this); - break; + int_lock->lock_cbk = afr_post_nonblocking_entrylk_cbk; + afr_nonblocking_entrylk(frame, this); + break; case AFR_ENTRY_TRANSACTION: - int_lock->lk_basename = local->transaction.basename; - if (local->transaction.parent_loc.path) - int_lock->lk_loc = &local->transaction.parent_loc; - else - GF_ASSERT (local->fd); + int_lock->lk_basename = local->transaction.basename; + if (local->transaction.parent_loc.path) + int_lock->lk_loc = &local->transaction.parent_loc; + else + GF_ASSERT(local->fd); - int_lock->lock_cbk = afr_post_nonblocking_entrylk_cbk; - afr_nonblocking_entrylk (frame, this); - break; - } + int_lock->lock_cbk = afr_post_nonblocking_entrylk_cbk; + afr_nonblocking_entrylk(frame, this); + break; + } - return 0; + return 0; } static gf_boolean_t -afr_locals_overlap (afr_local_t *local1, afr_local_t *local2) +afr_locals_overlap(afr_local_t *local1, afr_local_t *local2) { - uint64_t start1 = local1->transaction.start; - uint64_t start2 = local2->transaction.start; - uint64_t end1 = 0; - uint64_t end2 = 0; + uint64_t start1 = local1->transaction.start; + uint64_t start2 = local2->transaction.start; + uint64_t end1 = 0; + uint64_t end2 = 0; - if (local1->transaction.len) - end1 = start1 + local1->transaction.len - 1; - else - end1 = ULLONG_MAX; + if (local1->transaction.len) + end1 = start1 + local1->transaction.len - 1; + else + end1 = ULLONG_MAX; - if (local2->transaction.len) - end2 = start2 + local2->transaction.len - 1; - else - end2 = ULLONG_MAX; + if (local2->transaction.len) + end2 = start2 + local2->transaction.len - 1; + else + end2 = ULLONG_MAX; - return ((end1 >= start2) && (end2 >= start1)); + return ((end1 >= start2) && (end2 >= start1)); } gf_boolean_t -afr_has_lock_conflict (afr_local_t *local, gf_boolean_t waitlist_check) -{ - afr_local_t *each = NULL; - afr_lock_t *lock = NULL; - - lock = &local->inode_ctx->lock[local->transaction.type]; - /* - * Once full file lock is acquired in eager-lock phase, overlapping - * writes do not compete for inode-locks, instead are transferred to the - * next writes. Because of this overlapping writes are not ordered. - * This can cause inconsistencies in replication. - * Example: - * Two overlapping writes w1, w2 are sent in parallel on same fd - * in two threads t1, t2. - * Both threads can execute afr_writev_wind in the following manner. - * t1 winds w1 on brick-0 - * t2 winds w2 on brick-0 - * t2 winds w2 on brick-1 - * t1 winds w1 on brick-1 - * - * This check makes sure the locks are not transferred for - * overlapping writes. - */ - list_for_each_entry (each, &lock->owners, transaction.owner_list) { - if (afr_locals_overlap (each, local)) { - return _gf_true; - } - } - - if (!waitlist_check) - return _gf_false; - list_for_each_entry (each, &lock->waiting, transaction.wait_list) { - if (afr_locals_overlap (each, local)) { - return _gf_true; - } - } +afr_has_lock_conflict(afr_local_t *local, gf_boolean_t waitlist_check) +{ + afr_local_t *each = NULL; + afr_lock_t *lock = NULL; + + lock = &local->inode_ctx->lock[local->transaction.type]; + /* + * Once full file lock is acquired in eager-lock phase, overlapping + * writes do not compete for inode-locks, instead are transferred to the + * next writes. Because of this overlapping writes are not ordered. + * This can cause inconsistencies in replication. + * Example: + * Two overlapping writes w1, w2 are sent in parallel on same fd + * in two threads t1, t2. + * Both threads can execute afr_writev_wind in the following manner. + * t1 winds w1 on brick-0 + * t2 winds w2 on brick-0 + * t2 winds w2 on brick-1 + * t1 winds w1 on brick-1 + * + * This check makes sure the locks are not transferred for + * overlapping writes. + */ + list_for_each_entry(each, &lock->owners, transaction.owner_list) + { + if (afr_locals_overlap(each, local)) { + return _gf_true; + } + } + + if (!waitlist_check) return _gf_false; + list_for_each_entry(each, &lock->waiting, transaction.wait_list) + { + if (afr_locals_overlap(each, local)) { + return _gf_true; + } + } + return _gf_false; } - /* }}} */ static void -afr_copy_inodelk_vars (afr_internal_lock_t *dst, afr_internal_lock_t *src, - xlator_t *this) +afr_copy_inodelk_vars(afr_internal_lock_t *dst, afr_internal_lock_t *src, + xlator_t *this) { - afr_private_t *priv = this->private; + afr_private_t *priv = this->private; - dst->domain = src->domain; - dst->flock.l_len = src->flock.l_len; - dst->flock.l_start = src->flock.l_start; - dst->flock.l_type = src->flock.l_type; - dst->lock_count = src->lock_count; - memcpy (dst->locked_nodes, src->locked_nodes, - priv->child_count * sizeof (*dst->locked_nodes)); + dst->domain = src->domain; + dst->flock.l_len = src->flock.l_len; + dst->flock.l_start = src->flock.l_start; + dst->flock.l_type = src->flock.l_type; + dst->lock_count = src->lock_count; + memcpy(dst->locked_nodes, src->locked_nodes, + priv->child_count * sizeof(*dst->locked_nodes)); } void -__afr_transaction_wake_shared (afr_local_t *local, struct list_head *shared) -{ - gf_boolean_t conflict = _gf_false; - afr_local_t *each = NULL; - afr_lock_t *lock = &local->inode_ctx->lock[local->transaction.type]; - - while (!conflict) { - if (list_empty (&lock->waiting)) - return; - each = list_entry(lock->waiting.next, afr_local_t, - transaction.wait_list); - if (afr_has_lock_conflict (each, _gf_false)) { - conflict = _gf_true; - } - if (conflict && !list_empty (&lock->owners)) - return; - afr_copy_inodelk_vars (&each->internal_lock, - &local->internal_lock, - each->transaction.frame->this); - list_move_tail (&each->transaction.wait_list, shared); - list_add_tail(&each->transaction.owner_list, &lock->owners); +__afr_transaction_wake_shared(afr_local_t *local, struct list_head *shared) +{ + gf_boolean_t conflict = _gf_false; + afr_local_t *each = NULL; + afr_lock_t *lock = &local->inode_ctx->lock[local->transaction.type]; + + while (!conflict) { + if (list_empty(&lock->waiting)) + return; + each = list_entry(lock->waiting.next, afr_local_t, + transaction.wait_list); + if (afr_has_lock_conflict(each, _gf_false)) { + conflict = _gf_true; } + if (conflict && !list_empty(&lock->owners)) + return; + afr_copy_inodelk_vars(&each->internal_lock, &local->internal_lock, + each->transaction.frame->this); + list_move_tail(&each->transaction.wait_list, shared); + list_add_tail(&each->transaction.owner_list, &lock->owners); + } } static void -afr_lock_resume_shared (struct list_head *list) +afr_lock_resume_shared(struct list_head *list) { - afr_local_t *each = NULL; + afr_local_t *each = NULL; - while (!list_empty(list)) { - each = list_entry(list->next, afr_local_t, - transaction.wait_list); - list_del_init(&each->transaction.wait_list); - afr_changelog_pre_op (each->transaction.frame, - each->transaction.frame->this); - } + while (!list_empty(list)) { + each = list_entry(list->next, afr_local_t, transaction.wait_list); + list_del_init(&each->transaction.wait_list); + afr_changelog_pre_op(each->transaction.frame, + each->transaction.frame->this); + } } int -afr_internal_lock_finish (call_frame_t *frame, xlator_t *this) +afr_internal_lock_finish(call_frame_t *frame, xlator_t *this) { - afr_local_t *local = frame->local; - afr_lock_t *lock = NULL; + afr_local_t *local = frame->local; + afr_lock_t *lock = NULL; - - local->internal_lock.lock_cbk = NULL; - if (!local->transaction.eager_lock_on) { - if (local->internal_lock.lock_op_ret < 0) { - afr_transaction_done (frame, this); - return 0; - } - afr_changelog_pre_op (frame, this); + local->internal_lock.lock_cbk = NULL; + if (!local->transaction.eager_lock_on) { + if (local->internal_lock.lock_op_ret < 0) { + afr_transaction_done(frame, this); + return 0; + } + afr_changelog_pre_op(frame, this); + } else { + lock = &local->inode_ctx->lock[local->transaction.type]; + if (local->internal_lock.lock_op_ret < 0) { + afr_handle_lock_acquire_failure(local, _gf_false); } else { - lock = &local->inode_ctx->lock[local->transaction.type]; - if (local->internal_lock.lock_op_ret < 0) { - afr_handle_lock_acquire_failure (local, _gf_false); - } else { - lock->event_generation = local->event_generation; - afr_changelog_pre_op (frame, this); - } + lock->event_generation = local->event_generation; + afr_changelog_pre_op(frame, this); } + } - return 0; + return 0; } gf_boolean_t -afr_are_multiple_fds_opened (afr_local_t *local, xlator_t *this) +afr_are_multiple_fds_opened(afr_local_t *local, xlator_t *this) { - /* Lets say mount1 has eager-lock(full-lock) and after the eager-lock - * is taken mount2 opened the same file, it won't be able to - * perform any data operations until mount1 releases eager-lock. - * To avoid such scenario do not enable eager-lock for this transaction - * if open-fd-count is > 1 - */ + /* Lets say mount1 has eager-lock(full-lock) and after the eager-lock + * is taken mount2 opened the same file, it won't be able to + * perform any data operations until mount1 releases eager-lock. + * To avoid such scenario do not enable eager-lock for this transaction + * if open-fd-count is > 1 + */ - if (local->inode_ctx->open_fd_count > 1) - return _gf_true; + if (local->inode_ctx->open_fd_count > 1) + return _gf_true; - return _gf_false; + return _gf_false; } - gf_boolean_t -afr_is_delayed_changelog_post_op_needed (call_frame_t *frame, xlator_t *this, - int delay) -{ - afr_local_t *local = NULL; - afr_lock_t *lock = NULL; - gf_boolean_t res = _gf_false; - - local = frame->local; - lock = &local->inode_ctx->lock[local->transaction.type]; - - if (!afr_txn_nothing_failed (frame, this)) { - lock->release = _gf_true; - goto out; - } - - if (afr_are_multiple_fds_opened (local, this)) { - lock->release = _gf_true; - goto out; - } - - if (!list_empty (&lock->owners)) - goto out; - else - GF_ASSERT (list_empty (&lock->waiting)); - - if (lock->release) { - goto out; - } - - if (!delay) { - goto out; - } - - if ((local->op != GF_FOP_WRITE) && - (local->op != GF_FOP_FXATTROP)) { - /*Only allow writes but shard does [f]xattrops on writes, so - * they are fine too*/ - goto out; - } - - res = _gf_true; +afr_is_delayed_changelog_post_op_needed(call_frame_t *frame, xlator_t *this, + int delay) +{ + afr_local_t *local = NULL; + afr_lock_t *lock = NULL; + gf_boolean_t res = _gf_false; + + local = frame->local; + lock = &local->inode_ctx->lock[local->transaction.type]; + + if (!afr_txn_nothing_failed(frame, this)) { + lock->release = _gf_true; + goto out; + } + + if (afr_are_multiple_fds_opened(local, this)) { + lock->release = _gf_true; + goto out; + } + + if (!list_empty(&lock->owners)) + goto out; + else + GF_ASSERT(list_empty(&lock->waiting)); + + if (lock->release) { + goto out; + } + + if (!delay) { + goto out; + } + + if ((local->op != GF_FOP_WRITE) && (local->op != GF_FOP_FXATTROP)) { + /*Only allow writes but shard does [f]xattrops on writes, so + * they are fine too*/ + goto out; + } + + res = _gf_true; out: - return res; + return res; } - void -afr_delayed_changelog_wake_up_cbk (void *data) -{ - afr_lock_t *lock = NULL; - afr_local_t *local = data; - afr_local_t *timer_local = NULL; - struct list_head shared; - - INIT_LIST_HEAD (&shared); - lock = &local->inode_ctx->lock[local->transaction.type]; - LOCK (&local->inode->lock); - { - timer_local = list_entry(lock->post_op.next, - afr_local_t, - transaction.owner_list); - if (list_empty (&lock->owners) && (local == timer_local)) { - GF_ASSERT (list_empty (&lock->waiting)); - /*Last owner*/ - lock->release = _gf_true; - lock->delay_timer = NULL; - } - } - UNLOCK (&local->inode->lock); - afr_changelog_post_op_now (local->transaction.frame, - local->transaction.frame->this); +afr_delayed_changelog_wake_up_cbk(void *data) +{ + afr_lock_t *lock = NULL; + afr_local_t *local = data; + afr_local_t *timer_local = NULL; + struct list_head shared; + + INIT_LIST_HEAD(&shared); + lock = &local->inode_ctx->lock[local->transaction.type]; + LOCK(&local->inode->lock); + { + timer_local = list_entry(lock->post_op.next, afr_local_t, + transaction.owner_list); + if (list_empty(&lock->owners) && (local == timer_local)) { + GF_ASSERT(list_empty(&lock->waiting)); + /*Last owner*/ + lock->release = _gf_true; + lock->delay_timer = NULL; + } + } + UNLOCK(&local->inode->lock); + afr_changelog_post_op_now(local->transaction.frame, + local->transaction.frame->this); } - /* SET operation */ int -afr_fd_report_unstable_write (xlator_t *this, afr_local_t *local) +afr_fd_report_unstable_write(xlator_t *this, afr_local_t *local) { - LOCK(&local->inode->lock); - { - local->inode_ctx->witnessed_unstable_write = _gf_true; - } - UNLOCK(&local->inode->lock); + LOCK(&local->inode->lock); + { + local->inode_ctx->witnessed_unstable_write = _gf_true; + } + UNLOCK(&local->inode->lock); - return 0; + return 0; } /* TEST and CLEAR operation */ gf_boolean_t -afr_fd_has_witnessed_unstable_write (xlator_t *this, inode_t *inode) +afr_fd_has_witnessed_unstable_write(xlator_t *this, inode_t *inode) { - afr_inode_ctx_t *ctx = NULL; - gf_boolean_t witness = _gf_false; + afr_inode_ctx_t *ctx = NULL; + gf_boolean_t witness = _gf_false; - LOCK(&inode->lock); - { - (void)__afr_inode_ctx_get (this, inode, &ctx); + LOCK(&inode->lock); + { + (void)__afr_inode_ctx_get(this, inode, &ctx); - if (ctx->witnessed_unstable_write) { - witness = _gf_true; - ctx->witnessed_unstable_write = _gf_false; - } + if (ctx->witnessed_unstable_write) { + witness = _gf_true; + ctx->witnessed_unstable_write = _gf_false; } - UNLOCK (&inode->lock); + } + UNLOCK(&inode->lock); - return witness; + return witness; } - int -afr_changelog_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, struct iatt *pre, - struct iatt *post, dict_t *xdata) +afr_changelog_fsync_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, struct iatt *pre, + struct iatt *post, dict_t *xdata) { - afr_private_t *priv = NULL; - int child_index = (long) cookie; - int call_count = -1; - afr_local_t *local = NULL; + afr_private_t *priv = NULL; + int child_index = (long)cookie; + int call_count = -1; + afr_local_t *local = NULL; - priv = this->private; - local = frame->local; + priv = this->private; + local = frame->local; - if (op_ret != 0) { - /* Failure of fsync() is as good as failure of previous - write(). So treat it like one. - */ - gf_msg (this->name, GF_LOG_WARNING, - op_errno, AFR_MSG_FSYNC_FAILED, - "fsync(%s) failed on subvolume %s. Transaction was %s", - uuid_utoa (local->fd->inode->gfid), - priv->children[child_index]->name, - gf_fop_list[local->op]); + if (op_ret != 0) { + /* Failure of fsync() is as good as failure of previous + write(). So treat it like one. + */ + gf_msg(this->name, GF_LOG_WARNING, op_errno, AFR_MSG_FSYNC_FAILED, + "fsync(%s) failed on subvolume %s. Transaction was %s", + uuid_utoa(local->fd->inode->gfid), + priv->children[child_index]->name, gf_fop_list[local->op]); - afr_transaction_fop_failed (frame, this, child_index); - } + afr_transaction_fop_failed(frame, this, child_index); + } - call_count = afr_frame_return (frame); + call_count = afr_frame_return(frame); - if (call_count == 0) - afr_changelog_post_op_now (frame, this); + if (call_count == 0) + afr_changelog_post_op_now(frame, this); - return 0; + return 0; } - int -afr_changelog_fsync (call_frame_t *frame, xlator_t *this) +afr_changelog_fsync(call_frame_t *frame, xlator_t *this) { - afr_local_t *local = NULL; - int i = 0; - int call_count = 0; - afr_private_t *priv = NULL; - dict_t *xdata = NULL; - GF_UNUSED int ret = -1; + afr_local_t *local = NULL; + int i = 0; + int call_count = 0; + afr_private_t *priv = NULL; + dict_t *xdata = NULL; + GF_UNUSED int ret = -1; - local = frame->local; - priv = this->private; + local = frame->local; + priv = this->private; - call_count = AFR_COUNT (local->transaction.pre_op, priv->child_count); + call_count = AFR_COUNT(local->transaction.pre_op, priv->child_count); - if (!call_count) { - /* will go straight to unlock */ - afr_changelog_post_op_now (frame, this); - return 0; - } + if (!call_count) { + /* will go straight to unlock */ + afr_changelog_post_op_now(frame, this); + return 0; + } - local->call_count = call_count; + local->call_count = call_count; - xdata = dict_new(); - if (xdata) - ret = dict_set_int32 (xdata, "batch-fsync", 1); + xdata = dict_new(); + if (xdata) + ret = dict_set_int32(xdata, "batch-fsync", 1); - for (i = 0; i < priv->child_count; i++) { - if (!local->transaction.pre_op[i]) - continue; + for (i = 0; i < priv->child_count; i++) { + if (!local->transaction.pre_op[i]) + continue; - STACK_WIND_COOKIE (frame, afr_changelog_fsync_cbk, - (void *) (long) i, priv->children[i], - priv->children[i]->fops->fsync, local->fd, - 1, xdata); - if (!--call_count) - break; - } + STACK_WIND_COOKIE(frame, afr_changelog_fsync_cbk, (void *)(long)i, + priv->children[i], priv->children[i]->fops->fsync, + local->fd, 1, xdata); + if (!--call_count) + break; + } - if (xdata) - dict_unref (xdata); + if (xdata) + dict_unref(xdata); - return 0; + return 0; } - int -afr_changelog_post_op_safe (call_frame_t *frame, xlator_t *this) +afr_changelog_post_op_safe(call_frame_t *frame, xlator_t *this) { - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - - local = frame->local; - priv = this->private; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; - if (!local->fd || local->transaction.type != AFR_DATA_TRANSACTION) { - afr_changelog_post_op_now (frame, this); - return 0; - } + local = frame->local; + priv = this->private; - if (afr_changelog_pre_op_uninherit (frame, this) && - afr_txn_nothing_failed (frame, this)) { - /* just detected that this post-op is about to - be optimized away as a new write() has - already piggybacked on this frame's changelog. - */ - afr_changelog_post_op_now (frame, this); - return 0; - } + if (!local->fd || local->transaction.type != AFR_DATA_TRANSACTION) { + afr_changelog_post_op_now(frame, this); + return 0; + } - /* Calling afr_changelog_post_op_now() now will result in - issuing ->[f]xattrop(). - - Performing a hard POST-OP (->[f]xattrop() FOP) is a more - responsible operation that what it might appear on the surface. - - The changelog of a file (in the xattr of the file on the server) - stores information (pending count) about the state of the file - on the OTHER server. This changelog is blindly trusted, and must - therefore be updated in such a way it remains trustworthy. This - implies that decrementing the pending count (essentially "clearing - the dirty flag") must be done STRICTLY after we are sure that the - operation on the other server has reached stable storage. - - While the backend filesystem on that server will eventually flush - it to stable storage, we (being in userspace) have no mechanism - to get notified when the write became "stable". - - This means we need take matter into our own hands and issue an - fsync() EVEN IF THE APPLICATION WAS PERFORMING UNSTABLE WRITES, - and get an acknowledgement for it. And we need to wait for the - fsync() acknowledgement before initiating the hard POST-OP. - - However if the FD itself was opened in O_SYNC or O_DSYNC then - we are already guaranteed that the writes were made stable as - part of the FOP itself. The same holds true for NFS stable - writes which happen on an anonymous FD with O_DSYNC or O_SYNC - flag set in the writev() @flags param. For all other write types, - mark a flag in the fdctx whenever an unstable write is witnessed. + if (afr_changelog_pre_op_uninherit(frame, this) && + afr_txn_nothing_failed(frame, this)) { + /* just detected that this post-op is about to + be optimized away as a new write() has + already piggybacked on this frame's changelog. */ + afr_changelog_post_op_now(frame, this); + return 0; + } + + /* Calling afr_changelog_post_op_now() now will result in + issuing ->[f]xattrop(). + + Performing a hard POST-OP (->[f]xattrop() FOP) is a more + responsible operation that what it might appear on the surface. + + The changelog of a file (in the xattr of the file on the server) + stores information (pending count) about the state of the file + on the OTHER server. This changelog is blindly trusted, and must + therefore be updated in such a way it remains trustworthy. This + implies that decrementing the pending count (essentially "clearing + the dirty flag") must be done STRICTLY after we are sure that the + operation on the other server has reached stable storage. + + While the backend filesystem on that server will eventually flush + it to stable storage, we (being in userspace) have no mechanism + to get notified when the write became "stable". + + This means we need take matter into our own hands and issue an + fsync() EVEN IF THE APPLICATION WAS PERFORMING UNSTABLE WRITES, + and get an acknowledgement for it. And we need to wait for the + fsync() acknowledgement before initiating the hard POST-OP. + + However if the FD itself was opened in O_SYNC or O_DSYNC then + we are already guaranteed that the writes were made stable as + part of the FOP itself. The same holds true for NFS stable + writes which happen on an anonymous FD with O_DSYNC or O_SYNC + flag set in the writev() @flags param. For all other write types, + mark a flag in the fdctx whenever an unstable write is witnessed. + */ + + if (!afr_fd_has_witnessed_unstable_write(this, local->inode)) { + afr_changelog_post_op_now(frame, this); + return 0; + } - if (!afr_fd_has_witnessed_unstable_write (this, local->inode)) { - afr_changelog_post_op_now (frame, this); - return 0; - } - - /* Check whether users want durability and perform fsync/post-op - * accordingly. - */ - if (priv->ensure_durability) { - /* Time to fsync() */ - afr_changelog_fsync (frame, this); - } else { - afr_changelog_post_op_now (frame, this); - } + /* Check whether users want durability and perform fsync/post-op + * accordingly. + */ + if (priv->ensure_durability) { + /* Time to fsync() */ + afr_changelog_fsync(frame, this); + } else { + afr_changelog_post_op_now(frame, this); + } - return 0; + return 0; } void -afr_changelog_post_op (call_frame_t *frame, xlator_t *this) -{ - struct timespec delta = {0, }; - afr_private_t *priv = NULL; - afr_local_t *local = frame->local; - afr_lock_t *lock = NULL; - gf_boolean_t post_op = _gf_true; - struct list_head shared; - - priv = this->private; - delta.tv_sec = priv->post_op_delay_secs; - delta.tv_nsec = 0; - - INIT_LIST_HEAD (&shared); - if (!local->transaction.eager_lock_on) - goto out; - - lock = &local->inode_ctx->lock[local->transaction.type]; - LOCK (&local->inode->lock); - { - list_del_init (&local->transaction.owner_list); - list_add (&local->transaction.owner_list, &lock->post_op); - __afr_transaction_wake_shared (local, &shared); - - if (!afr_is_delayed_changelog_post_op_needed (frame, this, - delta.tv_sec)) { - if (list_empty (&lock->owners)) - lock->release = _gf_true; - goto unlock; - } - - GF_ASSERT (lock->delay_timer == NULL); - lock->delay_timer = gf_timer_call_after (this->ctx, delta, - afr_delayed_changelog_wake_up_cbk, - local); - if (!lock->delay_timer) { - lock->release = _gf_true; - } else { - post_op = _gf_false; - } +afr_changelog_post_op(call_frame_t *frame, xlator_t *this) +{ + struct timespec delta = { + 0, + }; + afr_private_t *priv = NULL; + afr_local_t *local = frame->local; + afr_lock_t *lock = NULL; + gf_boolean_t post_op = _gf_true; + struct list_head shared; + + priv = this->private; + delta.tv_sec = priv->post_op_delay_secs; + delta.tv_nsec = 0; + + INIT_LIST_HEAD(&shared); + if (!local->transaction.eager_lock_on) + goto out; + + lock = &local->inode_ctx->lock[local->transaction.type]; + LOCK(&local->inode->lock); + { + list_del_init(&local->transaction.owner_list); + list_add(&local->transaction.owner_list, &lock->post_op); + __afr_transaction_wake_shared(local, &shared); + + if (!afr_is_delayed_changelog_post_op_needed(frame, this, + delta.tv_sec)) { + if (list_empty(&lock->owners)) + lock->release = _gf_true; + goto unlock; + } - } + GF_ASSERT(lock->delay_timer == NULL); + lock->delay_timer = gf_timer_call_after( + this->ctx, delta, afr_delayed_changelog_wake_up_cbk, local); + if (!lock->delay_timer) { + lock->release = _gf_true; + } else { + post_op = _gf_false; + } + } unlock: - UNLOCK (&local->inode->lock); + UNLOCK(&local->inode->lock); - if (!list_empty (&shared)) { - afr_lock_resume_shared (&shared); - } + if (!list_empty(&shared)) { + afr_lock_resume_shared(&shared); + } out: - if (post_op) { - if (!local->transaction.eager_lock_on || lock->release) { - afr_changelog_post_op_safe (frame, this); - } else { - afr_changelog_post_op_now (frame, this); - } + if (post_op) { + if (!local->transaction.eager_lock_on || lock->release) { + afr_changelog_post_op_safe(frame, this); + } else { + afr_changelog_post_op_now(frame, this); } + } } int -afr_transaction_resume (call_frame_t *frame, xlator_t *this) +afr_transaction_resume(call_frame_t *frame, xlator_t *this) { - afr_local_t *local = NULL; + afr_local_t *local = NULL; - local = frame->local; + local = frame->local; - afr_restore_lk_owner (frame); + afr_restore_lk_owner(frame); - afr_handle_symmetric_errors (frame, this); + afr_handle_symmetric_errors(frame, this); - if (!local->pre_op_compat) - /* new mode, pre-op was done along - with OP */ - afr_changelog_pre_op_update (frame, this); + if (!local->pre_op_compat) + /* new mode, pre-op was done along + with OP */ + afr_changelog_pre_op_update(frame, this); - afr_changelog_post_op (frame, this); + afr_changelog_post_op(frame, this); - return 0; + return 0; } - /** * afr_transaction_fop_failed - inform that an fop failed */ void -afr_transaction_fop_failed (call_frame_t *frame, xlator_t *this, - int child_index) +afr_transaction_fop_failed(call_frame_t *frame, xlator_t *this, int child_index) { - afr_local_t * local = NULL; + afr_local_t *local = NULL; - local = frame->local; + local = frame->local; - local->transaction.failed_subvols[child_index] = 1; + local->transaction.failed_subvols[child_index] = 1; } static gf_boolean_t -__need_previous_lock_unlocked (afr_local_t *local) +__need_previous_lock_unlocked(afr_local_t *local) { - afr_lock_t *lock = NULL; + afr_lock_t *lock = NULL; - if (!local->transaction.eager_lock_on) - return _gf_true; + if (!local->transaction.eager_lock_on) + return _gf_true; - lock = &local->inode_ctx->lock[local->transaction.type]; - if (!lock->acquired) - return _gf_false; - if (lock->acquired && lock->event_generation != local->event_generation) - return _gf_true; + lock = &local->inode_ctx->lock[local->transaction.type]; + if (!lock->acquired) return _gf_false; + if (lock->acquired && lock->event_generation != local->event_generation) + return _gf_true; + return _gf_false; } void -__afr_eager_lock_handle (afr_local_t *local, gf_boolean_t *take_lock, - gf_boolean_t *do_pre_op, afr_local_t **timer_local) -{ - afr_lock_t *lock = NULL; - afr_local_t *owner_local = NULL; - xlator_t *this = local->transaction.frame->this; - - if (local->fd && !afr_are_multiple_fds_opened (local, this)) { - local->transaction.eager_lock_on = _gf_true; - afr_set_lk_owner (local->transaction.frame, this, local->inode); - } - - lock = &local->inode_ctx->lock[local->transaction.type]; - if (__need_previous_lock_unlocked (local)) { - if (!list_empty (&lock->owners)) { - lock->release = _gf_true; - } else if (lock->delay_timer) { - lock->release = _gf_true; - if (gf_timer_call_cancel (this->ctx, - lock->delay_timer)) { - /* It will be put in frozen list - * in the code flow below*/ - } else { - *timer_local = list_entry(lock->post_op.next, - afr_local_t, - transaction.owner_list); - lock->delay_timer = NULL; - } - } - if (!local->transaction.eager_lock_on) - goto out; - } - - if (lock->release) { - list_add_tail (&local->transaction.wait_list, - &lock->frozen); - *take_lock = _gf_false; - goto out; - } - - if (lock->delay_timer) { - *take_lock = _gf_false; - if (gf_timer_call_cancel (this->ctx, - lock->delay_timer)) { - list_add_tail (&local->transaction.wait_list, - &lock->frozen); - } else { - *timer_local = list_entry(lock->post_op.next, - afr_local_t, - transaction.owner_list); - afr_copy_inodelk_vars (&local->internal_lock, - &(*timer_local)->internal_lock, - this); - lock->delay_timer = NULL; - *do_pre_op = _gf_true; - list_add_tail (&local->transaction.owner_list, - &lock->owners); - } - goto out; - } - - if (!list_empty (&lock->owners)) { - if (!lock->acquired || - afr_has_lock_conflict (local, _gf_true)) { - list_add_tail (&local->transaction.wait_list, - &lock->waiting); - *take_lock = _gf_false; - goto out; - } - owner_local = list_entry (lock->owners.next, - afr_local_t, +__afr_eager_lock_handle(afr_local_t *local, gf_boolean_t *take_lock, + gf_boolean_t *do_pre_op, afr_local_t **timer_local) +{ + afr_lock_t *lock = NULL; + afr_local_t *owner_local = NULL; + xlator_t *this = local->transaction.frame->this; + + if (local->fd && !afr_are_multiple_fds_opened(local, this)) { + local->transaction.eager_lock_on = _gf_true; + afr_set_lk_owner(local->transaction.frame, this, local->inode); + } + + lock = &local->inode_ctx->lock[local->transaction.type]; + if (__need_previous_lock_unlocked(local)) { + if (!list_empty(&lock->owners)) { + lock->release = _gf_true; + } else if (lock->delay_timer) { + lock->release = _gf_true; + if (gf_timer_call_cancel(this->ctx, lock->delay_timer)) { + /* It will be put in frozen list + * in the code flow below*/ + } else { + *timer_local = list_entry(lock->post_op.next, afr_local_t, transaction.owner_list); - afr_copy_inodelk_vars (&local->internal_lock, - &owner_local->internal_lock, - this); - *take_lock = _gf_false; - *do_pre_op = _gf_true; + lock->delay_timer = NULL; + } } - - if (lock->acquired) - GF_ASSERT (!(*take_lock)); - list_add_tail (&local->transaction.owner_list, &lock->owners); + if (!local->transaction.eager_lock_on) + goto out; + } + + if (lock->release) { + list_add_tail(&local->transaction.wait_list, &lock->frozen); + *take_lock = _gf_false; + goto out; + } + + if (lock->delay_timer) { + *take_lock = _gf_false; + if (gf_timer_call_cancel(this->ctx, lock->delay_timer)) { + list_add_tail(&local->transaction.wait_list, &lock->frozen); + } else { + *timer_local = list_entry(lock->post_op.next, afr_local_t, + transaction.owner_list); + afr_copy_inodelk_vars(&local->internal_lock, + &(*timer_local)->internal_lock, this); + lock->delay_timer = NULL; + *do_pre_op = _gf_true; + list_add_tail(&local->transaction.owner_list, &lock->owners); + } + goto out; + } + + if (!list_empty(&lock->owners)) { + if (!lock->acquired || afr_has_lock_conflict(local, _gf_true)) { + list_add_tail(&local->transaction.wait_list, &lock->waiting); + *take_lock = _gf_false; + goto out; + } + owner_local = list_entry(lock->owners.next, afr_local_t, + transaction.owner_list); + afr_copy_inodelk_vars(&local->internal_lock, + &owner_local->internal_lock, this); + *take_lock = _gf_false; + *do_pre_op = _gf_true; + } + + if (lock->acquired) + GF_ASSERT(!(*take_lock)); + list_add_tail(&local->transaction.owner_list, &lock->owners); out: - return; + return; } void -afr_transaction_start (afr_local_t *local, xlator_t *this) +afr_transaction_start(afr_local_t *local, xlator_t *this) { - afr_private_t *priv = NULL; - gf_boolean_t take_lock = _gf_true; - gf_boolean_t do_pre_op = _gf_false; - afr_local_t *timer_local = NULL; + afr_private_t *priv = NULL; + gf_boolean_t take_lock = _gf_true; + gf_boolean_t do_pre_op = _gf_false; + afr_local_t *timer_local = NULL; - priv = this->private; + priv = this->private; - if (local->transaction.type != AFR_DATA_TRANSACTION && - local->transaction.type != AFR_METADATA_TRANSACTION) - goto lock_phase; + if (local->transaction.type != AFR_DATA_TRANSACTION && + local->transaction.type != AFR_METADATA_TRANSACTION) + goto lock_phase; - if (!priv->eager_lock) - goto lock_phase; + if (!priv->eager_lock) + goto lock_phase; - LOCK (&local->inode->lock); - { - __afr_eager_lock_handle (local, &take_lock, &do_pre_op, - &timer_local); - } - UNLOCK (&local->inode->lock); + LOCK(&local->inode->lock); + { + __afr_eager_lock_handle(local, &take_lock, &do_pre_op, &timer_local); + } + UNLOCK(&local->inode->lock); lock_phase: - if (!local->transaction.eager_lock_on) { - afr_set_lk_owner (local->transaction.frame, this, - local->transaction.frame->root); - } - - - if (take_lock) { - afr_lock (local->transaction.frame, this); - } else if (do_pre_op) { - afr_changelog_pre_op (local->transaction.frame, this); - } - /*Always call delayed_changelog_wake_up_cbk after calling pre-op above - * so that any inheriting can happen*/ - if (timer_local) - afr_delayed_changelog_wake_up_cbk (timer_local); + if (!local->transaction.eager_lock_on) { + afr_set_lk_owner(local->transaction.frame, this, + local->transaction.frame->root); + } + + if (take_lock) { + afr_lock(local->transaction.frame, this); + } else if (do_pre_op) { + afr_changelog_pre_op(local->transaction.frame, this); + } + /*Always call delayed_changelog_wake_up_cbk after calling pre-op above + * so that any inheriting can happen*/ + if (timer_local) + afr_delayed_changelog_wake_up_cbk(timer_local); } int -afr_write_txn_refresh_done (call_frame_t *frame, xlator_t *this, int err) +afr_write_txn_refresh_done(call_frame_t *frame, xlator_t *this, int err) { - afr_local_t *local = frame->local; + afr_local_t *local = frame->local; - if (err) { - AFR_SET_ERROR_AND_CHECK_SPLIT_BRAIN(-1, err); - goto fail; - } + if (err) { + AFR_SET_ERROR_AND_CHECK_SPLIT_BRAIN(-1, err); + goto fail; + } - afr_transaction_start (local, this); - return 0; + afr_transaction_start(local, this); + return 0; fail: - local->transaction.unwind (frame, this); - AFR_STACK_DESTROY (frame); - return 0; + local->transaction.unwind(frame, this); + AFR_STACK_DESTROY(frame); + return 0; } int -afr_transaction (call_frame_t *frame, xlator_t *this, afr_transaction_type type) -{ - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - int ret = -1; - int event_generation = 0; - - local = frame->local; - priv = this->private; - local->transaction.frame = frame; - - local->transaction.type = type; - - if (priv->quorum_count && !afr_has_quorum (local->child_up, this)) { - ret = -afr_quorum_errno(priv); - goto out; - } - - if (!afr_is_consistent_io_possible (local, priv, &ret)) { - ret = -ret; /*op_errno to ret conversion*/ - goto out; - } - - ret = afr_transaction_local_init (local, this); - if (ret < 0) - goto out; - - - if (type != AFR_METADATA_TRANSACTION) { - goto txn_start; - } - - ret = afr_inode_get_readable (frame, local->inode, this, - local->readable, &event_generation, type); - if (ret < 0 || afr_is_inode_refresh_reqd (local->inode, this, - priv->event_generation, - event_generation)) { - afr_inode_refresh (frame, this, local->inode, local->loc.gfid, - afr_write_txn_refresh_done); - ret = 0; - goto out; - } +afr_transaction(call_frame_t *frame, xlator_t *this, afr_transaction_type type) +{ + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + int ret = -1; + int event_generation = 0; + + local = frame->local; + priv = this->private; + local->transaction.frame = frame; + + local->transaction.type = type; + + if (priv->quorum_count && !afr_has_quorum(local->child_up, this)) { + ret = -afr_quorum_errno(priv); + goto out; + } + + if (!afr_is_consistent_io_possible(local, priv, &ret)) { + ret = -ret; /*op_errno to ret conversion*/ + goto out; + } + + ret = afr_transaction_local_init(local, this); + if (ret < 0) + goto out; + + if (type != AFR_METADATA_TRANSACTION) { + goto txn_start; + } + + ret = afr_inode_get_readable(frame, local->inode, this, local->readable, + &event_generation, type); + if (ret < 0 || + afr_is_inode_refresh_reqd(local->inode, this, priv->event_generation, + event_generation)) { + afr_inode_refresh(frame, this, local->inode, local->loc.gfid, + afr_write_txn_refresh_done); + ret = 0; + goto out; + } txn_start: - ret = 0; - afr_transaction_start (local, this); + ret = 0; + afr_transaction_start(local, this); out: - return ret; + return ret; } diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c index bde9a97a4e7..568293cdf2c 100644 --- a/xlators/cluster/afr/src/afr.c +++ b/xlators/cluster/afr/src/afr.c @@ -21,1196 +21,1135 @@ struct volume_options options[]; static char *afr_favorite_child_policies[AFR_FAV_CHILD_POLICY_MAX + 1] = { - [AFR_FAV_CHILD_NONE] = "none", - [AFR_FAV_CHILD_BY_SIZE] = "size", - [AFR_FAV_CHILD_BY_CTIME] = "ctime", - [AFR_FAV_CHILD_BY_MTIME] = "mtime", - [AFR_FAV_CHILD_BY_MAJORITY] = "majority", - [AFR_FAV_CHILD_POLICY_MAX] = NULL, + [AFR_FAV_CHILD_NONE] = "none", + [AFR_FAV_CHILD_BY_SIZE] = "size", + [AFR_FAV_CHILD_BY_CTIME] = "ctime", + [AFR_FAV_CHILD_BY_MTIME] = "mtime", + [AFR_FAV_CHILD_BY_MAJORITY] = "majority", + [AFR_FAV_CHILD_POLICY_MAX] = NULL, }; int32_t -notify (xlator_t *this, int32_t event, - void *data, ...) +notify(xlator_t *this, int32_t event, void *data, ...) { - int ret = -1; - va_list ap; - void *data2 = NULL; + int ret = -1; + va_list ap; + void *data2 = NULL; - va_start (ap, data); - data2 = va_arg (ap, dict_t*); - va_end (ap); - ret = afr_notify (this, event, data, data2); + va_start(ap, data); + data2 = va_arg(ap, dict_t *); + va_end(ap); + ret = afr_notify(this, event, data, data2); - return ret; + return ret; } int32_t -mem_acct_init (xlator_t *this) +mem_acct_init(xlator_t *this) { - int ret = -1; - - if (!this) - return ret; + int ret = -1; - ret = xlator_mem_acct_init (this, gf_afr_mt_end + 1); + if (!this) + return ret; - if (ret != 0) { - return ret; - } + ret = xlator_mem_acct_init(this, gf_afr_mt_end + 1); + if (ret != 0) { return ret; -} + } + return ret; +} int -xlator_subvolume_index (xlator_t *this, xlator_t *subvol) +xlator_subvolume_index(xlator_t *this, xlator_t *subvol) { - int index = -1; - int i = 0; - xlator_list_t *list = NULL; - - list = this->children; - - while (list) { - if (subvol == list->xlator || - strcmp (subvol->name, list->xlator->name) == 0) { - index = i; - break; - } - list = list->next; - i++; + int index = -1; + int i = 0; + xlator_list_t *list = NULL; + + list = this->children; + + while (list) { + if (subvol == list->xlator || + strcmp(subvol->name, list->xlator->name) == 0) { + index = i; + break; } + list = list->next; + i++; + } - return index; + return index; } static void -fix_quorum_options (xlator_t *this, afr_private_t *priv, char *qtype, - dict_t *options) +fix_quorum_options(xlator_t *this, afr_private_t *priv, char *qtype, + dict_t *options) { - - if (dict_get (options, "quorum-type") == NULL) { - /* If user doesn't configure anything enable auto-quorum if the - * replica has more than two subvolumes */ - if (priv->child_count > 2) - qtype = "auto"; - } - - if (priv->quorum_count && strcmp (qtype, "fixed")) { - gf_msg (this->name,GF_LOG_WARNING, 0, AFR_MSG_QUORUM_OVERRIDE, - "quorum-type %s overriding quorum-count %u", - qtype, priv->quorum_count); - } - - if (!strcmp (qtype, "none")) { - priv->quorum_count = 0; - } else if (!strcmp (qtype, "auto")) { - priv->quorum_count = AFR_QUORUM_AUTO; - } - + if (dict_get(options, "quorum-type") == NULL) { + /* If user doesn't configure anything enable auto-quorum if the + * replica has more than two subvolumes */ + if (priv->child_count > 2) + qtype = "auto"; + } + + if (priv->quorum_count && strcmp(qtype, "fixed")) { + gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_QUORUM_OVERRIDE, + "quorum-type %s overriding quorum-count %u", qtype, + priv->quorum_count); + } + + if (!strcmp(qtype, "none")) { + priv->quorum_count = 0; + } else if (!strcmp(qtype, "auto")) { + priv->quorum_count = AFR_QUORUM_AUTO; + } } int -afr_set_favorite_child_policy (afr_private_t *priv, char *policy) +afr_set_favorite_child_policy(afr_private_t *priv, char *policy) { - int index = -1; + int index = -1; - index = gf_get_index_by_elem (afr_favorite_child_policies, policy); - if (index < 0 || index >= AFR_FAV_CHILD_POLICY_MAX) - return -1; + index = gf_get_index_by_elem(afr_favorite_child_policies, policy); + if (index < 0 || index >= AFR_FAV_CHILD_POLICY_MAX) + return -1; - priv->fav_child_policy = index; + priv->fav_child_policy = index; - return 0; + return 0; } int -reconfigure (xlator_t *this, dict_t *options) +reconfigure(xlator_t *this, dict_t *options) { - afr_private_t *priv = NULL; - xlator_t *read_subvol = NULL; - int read_subvol_index = -1; - int ret = -1; - int index = -1; - char *qtype = NULL; - char *fav_child_policy = NULL; - gf_boolean_t consistent_io = _gf_false; - gf_boolean_t choose_local_old = _gf_false; - - priv = this->private; + afr_private_t *priv = NULL; + xlator_t *read_subvol = NULL; + int read_subvol_index = -1; + int ret = -1; + int index = -1; + char *qtype = NULL; + char *fav_child_policy = NULL; + gf_boolean_t consistent_io = _gf_false; + gf_boolean_t choose_local_old = _gf_false; - GF_OPTION_RECONF ("afr-dirty-xattr", - priv->afr_dirty, options, str, - out); + priv = this->private; - GF_OPTION_RECONF ("metadata-splitbrain-forced-heal", - priv->metadata_splitbrain_forced_heal, options, bool, - out); + GF_OPTION_RECONF("afr-dirty-xattr", priv->afr_dirty, options, str, out); - GF_OPTION_RECONF ("background-self-heal-count", - priv->background_self_heal_count, options, uint32, - out); + GF_OPTION_RECONF("metadata-splitbrain-forced-heal", + priv->metadata_splitbrain_forced_heal, options, bool, out); - GF_OPTION_RECONF ("heal-wait-queue-length", - priv->heal_wait_qlen, options, uint32, out); + GF_OPTION_RECONF("background-self-heal-count", + priv->background_self_heal_count, options, uint32, out); + GF_OPTION_RECONF("heal-wait-queue-length", priv->heal_wait_qlen, options, + uint32, out); - GF_OPTION_RECONF ("metadata-self-heal", - priv->metadata_self_heal, options, bool, out); + GF_OPTION_RECONF("metadata-self-heal", priv->metadata_self_heal, options, + bool, out); - GF_OPTION_RECONF ("data-self-heal", priv->data_self_heal, options, str, - out); + GF_OPTION_RECONF("data-self-heal", priv->data_self_heal, options, str, out); - GF_OPTION_RECONF ("entry-self-heal", priv->entry_self_heal, options, - bool, out); + GF_OPTION_RECONF("entry-self-heal", priv->entry_self_heal, options, bool, + out); - GF_OPTION_RECONF ("data-self-heal-window-size", - priv->data_self_heal_window_size, options, - uint32, out); + GF_OPTION_RECONF("data-self-heal-window-size", + priv->data_self_heal_window_size, options, uint32, out); - GF_OPTION_RECONF ("data-self-heal-algorithm", - priv->data_self_heal_algorithm, options, str, out); + GF_OPTION_RECONF("data-self-heal-algorithm", priv->data_self_heal_algorithm, + options, str, out); - GF_OPTION_RECONF ("halo-enabled", - priv->halo_enabled, options, bool, - out); + GF_OPTION_RECONF("halo-enabled", priv->halo_enabled, options, bool, out); - GF_OPTION_RECONF ("halo-shd-max-latency", - priv->shd.halo_max_latency_msec, options, uint32, - out); + GF_OPTION_RECONF("halo-shd-max-latency", priv->shd.halo_max_latency_msec, + options, uint32, out); - GF_OPTION_RECONF ("halo-nfsd-max-latency", - priv->nfsd.halo_max_latency_msec, options, uint32, - out); + GF_OPTION_RECONF("halo-nfsd-max-latency", priv->nfsd.halo_max_latency_msec, + options, uint32, out); - GF_OPTION_RECONF ("halo-max-latency", priv->halo_max_latency_msec, - options, uint32, out); + GF_OPTION_RECONF("halo-max-latency", priv->halo_max_latency_msec, options, + uint32, out); - GF_OPTION_RECONF ("halo-max-replicas", priv->halo_max_replicas, options, - uint32, out); + GF_OPTION_RECONF("halo-max-replicas", priv->halo_max_replicas, options, + uint32, out); - GF_OPTION_RECONF ("halo-min-replicas", priv->halo_min_replicas, options, - uint32, out); + GF_OPTION_RECONF("halo-min-replicas", priv->halo_min_replicas, options, + uint32, out); - GF_OPTION_RECONF ("read-subvolume", read_subvol, options, xlator, out); + GF_OPTION_RECONF("read-subvolume", read_subvol, options, xlator, out); - choose_local_old = priv->choose_local; - GF_OPTION_RECONF ("choose-local", priv->choose_local, options, bool, - out); + choose_local_old = priv->choose_local; + GF_OPTION_RECONF("choose-local", priv->choose_local, options, bool, out); - if (choose_local_old != priv->choose_local) { - priv->read_child = -1; - if (choose_local_old == _gf_false) - priv->did_discovery = _gf_false; - } - - GF_OPTION_RECONF ("read-hash-mode", priv->hash_mode, - options, uint32, out); - - if (read_subvol) { - index = xlator_subvolume_index (this, read_subvol); - if (index == -1) { - gf_msg (this->name, GF_LOG_ERROR, 0, - AFR_MSG_INVALID_SUBVOL, "%s not a subvolume", - read_subvol->name); - goto out; - } - priv->read_child = index; + if (choose_local_old != priv->choose_local) { + priv->read_child = -1; + if (choose_local_old == _gf_false) + priv->did_discovery = _gf_false; + } + + GF_OPTION_RECONF("read-hash-mode", priv->hash_mode, options, uint32, out); + + if (read_subvol) { + index = xlator_subvolume_index(this, read_subvol); + if (index == -1) { + gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_INVALID_SUBVOL, + "%s not a subvolume", read_subvol->name); + goto out; } - - GF_OPTION_RECONF ("read-subvolume-index",read_subvol_index, options,int32,out); - - if (read_subvol_index >-1) { - index=read_subvol_index; - if (index >= priv->child_count) { - gf_msg (this->name, GF_LOG_ERROR, 0, - AFR_MSG_INVALID_SUBVOL, - "%d not a subvolume-index", index); - goto out; - } - priv->read_child = index; + priv->read_child = index; + } + + GF_OPTION_RECONF("read-subvolume-index", read_subvol_index, options, int32, + out); + + if (read_subvol_index > -1) { + index = read_subvol_index; + if (index >= priv->child_count) { + gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_INVALID_SUBVOL, + "%d not a subvolume-index", index); + goto out; } + priv->read_child = index; + } - GF_OPTION_RECONF ("pre-op-compat", priv->pre_op_compat, options, bool, - out); - GF_OPTION_RECONF ("locking-scheme", priv->locking_scheme, options, str, - out); - GF_OPTION_RECONF ("full-lock", priv->full_lock, options, bool, out); - GF_OPTION_RECONF ("granular-entry-heal", priv->esh_granular, options, - bool, out); + GF_OPTION_RECONF("pre-op-compat", priv->pre_op_compat, options, bool, out); + GF_OPTION_RECONF("locking-scheme", priv->locking_scheme, options, str, out); + GF_OPTION_RECONF("full-lock", priv->full_lock, options, bool, out); + GF_OPTION_RECONF("granular-entry-heal", priv->esh_granular, options, bool, + out); - GF_OPTION_RECONF ("eager-lock", priv->eager_lock, options, bool, out); - GF_OPTION_RECONF ("quorum-type", qtype, options, str, out); - GF_OPTION_RECONF ("quorum-count", priv->quorum_count, options, - uint32, out); - fix_quorum_options (this, priv, qtype, options); - if (priv->quorum_count && !afr_has_quorum (priv->child_up, this)) - gf_msg (this->name, GF_LOG_WARNING, 0, AFR_MSG_QUORUM_FAIL, - "Client-quorum is not met"); + GF_OPTION_RECONF("eager-lock", priv->eager_lock, options, bool, out); + GF_OPTION_RECONF("quorum-type", qtype, options, str, out); + GF_OPTION_RECONF("quorum-count", priv->quorum_count, options, uint32, out); + fix_quorum_options(this, priv, qtype, options); + if (priv->quorum_count && !afr_has_quorum(priv->child_up, this)) + gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_QUORUM_FAIL, + "Client-quorum is not met"); + GF_OPTION_RECONF("post-op-delay-secs", priv->post_op_delay_secs, options, + uint32, out); - GF_OPTION_RECONF ("post-op-delay-secs", priv->post_op_delay_secs, options, - uint32, out); + GF_OPTION_RECONF(AFR_SH_READDIR_SIZE_KEY, priv->sh_readdir_size, options, + size_uint64, out); + /* Reset this so we re-discover in case the topology changed. */ + GF_OPTION_RECONF("ensure-durability", priv->ensure_durability, options, + bool, out); - GF_OPTION_RECONF (AFR_SH_READDIR_SIZE_KEY, priv->sh_readdir_size, - options, size_uint64, out); - /* Reset this so we re-discover in case the topology changed. */ - GF_OPTION_RECONF ("ensure-durability", priv->ensure_durability, options, - bool, out); + GF_OPTION_RECONF("self-heal-daemon", priv->shd.enabled, options, bool, out); - GF_OPTION_RECONF ("self-heal-daemon", priv->shd.enabled, options, - bool, out); + GF_OPTION_RECONF("iam-self-heal-daemon", priv->shd.iamshd, options, bool, + out); - GF_OPTION_RECONF ("iam-self-heal-daemon", priv->shd.iamshd, options, - bool, out); + GF_OPTION_RECONF("heal-timeout", priv->shd.timeout, options, int32, out); - GF_OPTION_RECONF ("heal-timeout", priv->shd.timeout, options, - int32, out); + GF_OPTION_RECONF("consistent-metadata", priv->consistent_metadata, options, + bool, out); - GF_OPTION_RECONF ("consistent-metadata", priv->consistent_metadata, - options, bool, out); + GF_OPTION_RECONF("shd-max-threads", priv->shd.max_threads, options, uint32, + out); - GF_OPTION_RECONF ("shd-max-threads", priv->shd.max_threads, - options, uint32, out); + GF_OPTION_RECONF("shd-wait-qlength", priv->shd.wait_qlength, options, + uint32, out); - GF_OPTION_RECONF ("shd-wait-qlength", priv->shd.wait_qlength, - options, uint32, out); - - GF_OPTION_RECONF ("favorite-child-policy", fav_child_policy, options, - str, out); - if (afr_set_favorite_child_policy (priv, fav_child_policy) == -1) - goto out; + GF_OPTION_RECONF("favorite-child-policy", fav_child_policy, options, str, + out); + if (afr_set_favorite_child_policy(priv, fav_child_policy) == -1) + goto out; - priv->did_discovery = _gf_false; + priv->did_discovery = _gf_false; - GF_OPTION_RECONF ("consistent-io", consistent_io, options, bool, out); - if (priv->quorum_count != 0) - consistent_io = _gf_false; - priv->consistent_io = consistent_io; + GF_OPTION_RECONF("consistent-io", consistent_io, options, bool, out); + if (priv->quorum_count != 0) + consistent_io = _gf_false; + priv->consistent_io = consistent_io; - ret = 0; + ret = 0; out: - return ret; - + return ret; } static int -afr_pending_xattrs_init (afr_private_t *priv, xlator_t *this) +afr_pending_xattrs_init(afr_private_t *priv, xlator_t *this) { - int ret = -1; - int i = 0; - char *ptr = NULL; - char *ptr1 = NULL; - char *xattrs_list = NULL; - xlator_list_t *trav = NULL; - int child_count = -1; - - trav = this->children; - child_count = priv->child_count; - if (priv->thin_arbiter_count) { - /* priv->pending_key[THIN_ARBITER_BRICK_INDEX] is used as the - * name of the thin arbiter file for persistence across add/ - * removal of DHT subvols.*/ - child_count++; - } + int ret = -1; + int i = 0; + char *ptr = NULL; + char *ptr1 = NULL; + char *xattrs_list = NULL; + xlator_list_t *trav = NULL; + int child_count = -1; + + trav = this->children; + child_count = priv->child_count; + if (priv->thin_arbiter_count) { + /* priv->pending_key[THIN_ARBITER_BRICK_INDEX] is used as the + * name of the thin arbiter file for persistence across add/ + * removal of DHT subvols.*/ + child_count++; + } + + GF_OPTION_INIT("afr-pending-xattr", xattrs_list, str, out); + priv->pending_key = GF_CALLOC(sizeof(*priv->pending_key), child_count, + gf_afr_mt_char); + if (!priv->pending_key) { + ret = -ENOMEM; + goto out; + } + if (!xattrs_list) { + gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_NO_CHANGELOG, + "Unable to fetch afr-pending-xattr option from volfile." + " Falling back to using client translator names. "); - GF_OPTION_INIT ("afr-pending-xattr", xattrs_list, str, out); - priv->pending_key = GF_CALLOC (sizeof (*priv->pending_key), - child_count, gf_afr_mt_char); - if (!priv->pending_key) { - ret = -ENOMEM; - goto out; - } - if (!xattrs_list) { - gf_msg (this->name, GF_LOG_WARNING, 0, AFR_MSG_NO_CHANGELOG, - "Unable to fetch afr-pending-xattr option from volfile." - " Falling back to using client translator names. "); - - while (i < child_count) { - ret = gf_asprintf (&priv->pending_key[i], "%s.%s", - AFR_XATTR_PREFIX, - trav->xlator->name); - if (ret == -1) { - ret = -ENOMEM; - goto out; - } - trav = trav->next; - i++; - } - ret = 0; - goto out; - } - - ptr = ptr1 = gf_strdup (xattrs_list); - if (!ptr) { + while (i < child_count) { + ret = gf_asprintf(&priv->pending_key[i], "%s.%s", AFR_XATTR_PREFIX, + trav->xlator->name); + if (ret == -1) { ret = -ENOMEM; goto out; - } - for (i = 0, ptr = strtok (ptr, ","); ptr; ptr = strtok (NULL, ",")) { - ret = gf_asprintf (&priv->pending_key[i], "%s.%s", - AFR_XATTR_PREFIX, ptr); - if (ret == -1) { - ret = -ENOMEM; - goto out; - } - i++; + } + trav = trav->next; + i++; } ret = 0; + goto out; + } + + ptr = ptr1 = gf_strdup(xattrs_list); + if (!ptr) { + ret = -ENOMEM; + goto out; + } + for (i = 0, ptr = strtok(ptr, ","); ptr; ptr = strtok(NULL, ",")) { + ret = gf_asprintf(&priv->pending_key[i], "%s.%s", AFR_XATTR_PREFIX, + ptr); + if (ret == -1) { + ret = -ENOMEM; + goto out; + } + i++; + } + ret = 0; out: - GF_FREE (ptr1); - return ret; - + GF_FREE(ptr1); + return ret; } int32_t -init (xlator_t *this) +init(xlator_t *this) { - afr_private_t *priv = NULL; - int child_count = 0; - xlator_list_t *trav = NULL; - int i = 0; - int ret = -1; - GF_UNUSED int op_errno = 0; - xlator_t *read_subvol = NULL; - int read_subvol_index = -1; - char *qtype = NULL; - char *fav_child_policy = NULL; - char *thin_arbiter = NULL; - - if (!this->children) { - gf_msg (this->name, GF_LOG_ERROR, 0, - AFR_MSG_CHILD_MISCONFIGURED, - "replicate translator needs more than one " - "subvolume defined."); - return -1; - } - - if (!this->parents) { - gf_msg (this->name, GF_LOG_WARNING, 0, - AFR_MSG_VOL_MISCONFIGURED, "Volume is dangling."); - } - - this->private = GF_CALLOC (1, sizeof (afr_private_t), - gf_afr_mt_afr_private_t); - if (!this->private) - goto out; - - priv = this->private; - LOCK_INIT (&priv->lock); - - child_count = xlator_subvolume_count (this); - - priv->child_count = child_count; - - priv->read_child = -1; - - GF_OPTION_INIT ("arbiter-count", priv->arbiter_count, uint32, out); - GF_OPTION_INIT ("thin-arbiter", thin_arbiter, str, out); - if (thin_arbiter && strlen(thin_arbiter) > 0) { - priv->thin_arbiter_count = 1; - priv->child_count--; - priv->ta_bad_child_index = AFR_CHILD_UNKNOWN; - priv->ta_notify_dom_lock_offset = 0; + afr_private_t *priv = NULL; + int child_count = 0; + xlator_list_t *trav = NULL; + int i = 0; + int ret = -1; + GF_UNUSED int op_errno = 0; + xlator_t *read_subvol = NULL; + int read_subvol_index = -1; + char *qtype = NULL; + char *fav_child_policy = NULL; + char *thin_arbiter = NULL; + + if (!this->children) { + gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_CHILD_MISCONFIGURED, + "replicate translator needs more than one " + "subvolume defined."); + return -1; + } + + if (!this->parents) { + gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_VOL_MISCONFIGURED, + "Volume is dangling."); + } + + this->private = GF_CALLOC(1, sizeof(afr_private_t), + gf_afr_mt_afr_private_t); + if (!this->private) + goto out; + + priv = this->private; + LOCK_INIT(&priv->lock); + + child_count = xlator_subvolume_count(this); + + priv->child_count = child_count; + + priv->read_child = -1; + + GF_OPTION_INIT("arbiter-count", priv->arbiter_count, uint32, out); + GF_OPTION_INIT("thin-arbiter", thin_arbiter, str, out); + if (thin_arbiter && strlen(thin_arbiter) > 0) { + priv->thin_arbiter_count = 1; + priv->child_count--; + priv->ta_bad_child_index = AFR_CHILD_UNKNOWN; + priv->ta_notify_dom_lock_offset = 0; + } + INIT_LIST_HEAD(&priv->healing); + INIT_LIST_HEAD(&priv->heal_waiting); + + priv->spb_choice_timeout = AFR_DEFAULT_SPB_CHOICE_TIMEOUT; + + GF_OPTION_INIT("afr-dirty-xattr", priv->afr_dirty, str, out); + + GF_OPTION_INIT("metadata-splitbrain-forced-heal", + priv->metadata_splitbrain_forced_heal, bool, out); + + GF_OPTION_INIT("read-subvolume", read_subvol, xlator, out); + if (read_subvol) { + priv->read_child = xlator_subvolume_index(this, read_subvol); + if (priv->read_child == -1) { + gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_INVALID_SUBVOL, + "%s not a subvolume", read_subvol->name); + goto out; } - INIT_LIST_HEAD (&priv->healing); - INIT_LIST_HEAD (&priv->heal_waiting); - - priv->spb_choice_timeout = AFR_DEFAULT_SPB_CHOICE_TIMEOUT; - - GF_OPTION_INIT ("afr-dirty-xattr", priv->afr_dirty, str, out); - - GF_OPTION_INIT ("metadata-splitbrain-forced-heal", - priv->metadata_splitbrain_forced_heal, bool, out); - - GF_OPTION_INIT ("read-subvolume", read_subvol, xlator, out); - if (read_subvol) { - priv->read_child = xlator_subvolume_index (this, read_subvol); - if (priv->read_child == -1) { - gf_msg (this->name, GF_LOG_ERROR, 0, - AFR_MSG_INVALID_SUBVOL, "%s not a subvolume", - read_subvol->name); - goto out; - } - } - GF_OPTION_INIT ("read-subvolume-index",read_subvol_index,int32,out); - if (read_subvol_index > -1) { - if (read_subvol_index >= priv->child_count) { - gf_msg (this->name, GF_LOG_ERROR, 0, - AFR_MSG_INVALID_SUBVOL, - "%d not a subvolume-index", read_subvol_index); - goto out; - } - priv->read_child = read_subvol_index; + } + GF_OPTION_INIT("read-subvolume-index", read_subvol_index, int32, out); + if (read_subvol_index > -1) { + if (read_subvol_index >= priv->child_count) { + gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_INVALID_SUBVOL, + "%d not a subvolume-index", read_subvol_index); + goto out; } - GF_OPTION_INIT ("choose-local", priv->choose_local, bool, out); - - priv->pending_reads = GF_CALLOC (sizeof(*priv->pending_reads), - priv->child_count, gf_afr_mt_atomic_t); - - GF_OPTION_INIT ("read-hash-mode", priv->hash_mode, uint32, out); + priv->read_child = read_subvol_index; + } + GF_OPTION_INIT("choose-local", priv->choose_local, bool, out); - priv->favorite_child = -1; + priv->pending_reads = GF_CALLOC(sizeof(*priv->pending_reads), + priv->child_count, gf_afr_mt_atomic_t); - GF_OPTION_INIT ("favorite-child-policy", fav_child_policy, str, out); - if (afr_set_favorite_child_policy(priv, fav_child_policy) == -1) - goto out; - - GF_OPTION_INIT ("shd-max-threads", priv->shd.max_threads, - uint32, out); - - GF_OPTION_INIT ("shd-wait-qlength", priv->shd.wait_qlength, - uint32, out); + GF_OPTION_INIT("read-hash-mode", priv->hash_mode, uint32, out); - GF_OPTION_INIT ("background-self-heal-count", - priv->background_self_heal_count, uint32, out); + priv->favorite_child = -1; - GF_OPTION_INIT ("heal-wait-queue-length", - priv->heal_wait_qlen, uint32, out); + GF_OPTION_INIT("favorite-child-policy", fav_child_policy, str, out); + if (afr_set_favorite_child_policy(priv, fav_child_policy) == -1) + goto out; - GF_OPTION_INIT ("data-self-heal", priv->data_self_heal, str, out); + GF_OPTION_INIT("shd-max-threads", priv->shd.max_threads, uint32, out); - GF_OPTION_INIT ("data-self-heal-algorithm", - priv->data_self_heal_algorithm, str, out); + GF_OPTION_INIT("shd-wait-qlength", priv->shd.wait_qlength, uint32, out); - GF_OPTION_INIT ("data-self-heal-window-size", - priv->data_self_heal_window_size, uint32, out); + GF_OPTION_INIT("background-self-heal-count", + priv->background_self_heal_count, uint32, out); - GF_OPTION_INIT ("metadata-self-heal", priv->metadata_self_heal, bool, - out); + GF_OPTION_INIT("heal-wait-queue-length", priv->heal_wait_qlen, uint32, out); - GF_OPTION_INIT ("entry-self-heal", priv->entry_self_heal, bool, out); + GF_OPTION_INIT("data-self-heal", priv->data_self_heal, str, out); - GF_OPTION_INIT ("halo-shd-max-latency", priv->shd.halo_max_latency_msec, - uint32, out); + GF_OPTION_INIT("data-self-heal-algorithm", priv->data_self_heal_algorithm, + str, out); - GF_OPTION_INIT ("halo-max-latency", priv->halo_max_latency_msec, - uint32, out); - GF_OPTION_INIT ("halo-max-replicas", priv->halo_max_replicas, uint32, - out); - GF_OPTION_INIT ("halo-min-replicas", priv->halo_min_replicas, uint32, - out); + GF_OPTION_INIT("data-self-heal-window-size", + priv->data_self_heal_window_size, uint32, out); - GF_OPTION_INIT ("halo-enabled", - priv->halo_enabled, bool, out); + GF_OPTION_INIT("metadata-self-heal", priv->metadata_self_heal, bool, out); - GF_OPTION_INIT ("halo-nfsd-max-latency", - priv->nfsd.halo_max_latency_msec, uint32, out); + GF_OPTION_INIT("entry-self-heal", priv->entry_self_heal, bool, out); - GF_OPTION_INIT ("iam-nfs-daemon", priv->nfsd.iamnfsd, bool, out); + GF_OPTION_INIT("halo-shd-max-latency", priv->shd.halo_max_latency_msec, + uint32, out); - GF_OPTION_INIT ("optimistic-change-log", priv->optimistic_change_log, - bool, out); + GF_OPTION_INIT("halo-max-latency", priv->halo_max_latency_msec, uint32, + out); + GF_OPTION_INIT("halo-max-replicas", priv->halo_max_replicas, uint32, out); + GF_OPTION_INIT("halo-min-replicas", priv->halo_min_replicas, uint32, out); - GF_OPTION_INIT ("pre-op-compat", priv->pre_op_compat, bool, out); - GF_OPTION_INIT ("locking-scheme", priv->locking_scheme, str, out); - GF_OPTION_INIT ("full-lock", priv->full_lock, bool, out); - GF_OPTION_INIT ("granular-entry-heal", priv->esh_granular, bool, out); + GF_OPTION_INIT("halo-enabled", priv->halo_enabled, bool, out); - GF_OPTION_INIT ("eager-lock", priv->eager_lock, bool, out); - GF_OPTION_INIT ("quorum-type", qtype, str, out); - GF_OPTION_INIT ("quorum-count", priv->quorum_count, uint32, out); - GF_OPTION_INIT (AFR_SH_READDIR_SIZE_KEY, priv->sh_readdir_size, size_uint64, - out); - fix_quorum_options (this, priv, qtype, this->options); + GF_OPTION_INIT("halo-nfsd-max-latency", priv->nfsd.halo_max_latency_msec, + uint32, out); - GF_OPTION_INIT ("post-op-delay-secs", priv->post_op_delay_secs, uint32, out); - GF_OPTION_INIT ("ensure-durability", priv->ensure_durability, bool, - out); + GF_OPTION_INIT("iam-nfs-daemon", priv->nfsd.iamnfsd, bool, out); - GF_OPTION_INIT ("self-heal-daemon", priv->shd.enabled, bool, out); + GF_OPTION_INIT("optimistic-change-log", priv->optimistic_change_log, bool, + out); - GF_OPTION_INIT ("iam-self-heal-daemon", priv->shd.iamshd, bool, out); - GF_OPTION_INIT ("heal-timeout", priv->shd.timeout, int32, out); + GF_OPTION_INIT("pre-op-compat", priv->pre_op_compat, bool, out); + GF_OPTION_INIT("locking-scheme", priv->locking_scheme, str, out); + GF_OPTION_INIT("full-lock", priv->full_lock, bool, out); + GF_OPTION_INIT("granular-entry-heal", priv->esh_granular, bool, out); - GF_OPTION_INIT ("consistent-metadata", priv->consistent_metadata, bool, - out); - GF_OPTION_INIT ("consistent-io", priv->consistent_io, bool, out); + GF_OPTION_INIT("eager-lock", priv->eager_lock, bool, out); + GF_OPTION_INIT("quorum-type", qtype, str, out); + GF_OPTION_INIT("quorum-count", priv->quorum_count, uint32, out); + GF_OPTION_INIT(AFR_SH_READDIR_SIZE_KEY, priv->sh_readdir_size, size_uint64, + out); + fix_quorum_options(this, priv, qtype, this->options); - if (priv->quorum_count != 0) - priv->consistent_io = _gf_false; + GF_OPTION_INIT("post-op-delay-secs", priv->post_op_delay_secs, uint32, out); + GF_OPTION_INIT("ensure-durability", priv->ensure_durability, bool, out); - priv->wait_count = 1; + GF_OPTION_INIT("self-heal-daemon", priv->shd.enabled, bool, out); - priv->local = GF_CALLOC (sizeof (unsigned char), child_count, - gf_afr_mt_char); - if (!priv->local) { - ret = -ENOMEM; - goto out; - } + GF_OPTION_INIT("iam-self-heal-daemon", priv->shd.iamshd, bool, out); + GF_OPTION_INIT("heal-timeout", priv->shd.timeout, int32, out); - priv->child_up = GF_CALLOC (sizeof (unsigned char), child_count, - gf_afr_mt_char); + GF_OPTION_INIT("consistent-metadata", priv->consistent_metadata, bool, out); + GF_OPTION_INIT("consistent-io", priv->consistent_io, bool, out); - priv->child_latency = GF_MALLOC (sizeof (*priv->child_latency) - * child_count, - gf_afr_mt_child_latency_t); + if (priv->quorum_count != 0) + priv->consistent_io = _gf_false; - if (!priv->child_up || !priv->child_latency) { - ret = -ENOMEM; - goto out; - } - /*Initialize to -ve ping timeout so that they are not considered - * in child-up events until ping-event comes*/ - for (i = 0; i < child_count; i++) - priv->child_latency[i] = -1; - - priv->children = GF_CALLOC (sizeof (xlator_t *), child_count, - gf_afr_mt_xlator_t); - if (!priv->children) { - ret = -ENOMEM; - goto out; - } + priv->wait_count = 1; - ret = afr_pending_xattrs_init (priv, this); - if (ret) - goto out; + priv->local = GF_CALLOC(sizeof(unsigned char), child_count, gf_afr_mt_char); + if (!priv->local) { + ret = -ENOMEM; + goto out; + } - trav = this->children; - i = 0; - while (i < child_count) { - priv->children[i] = trav->xlator; - trav = trav->next; - i++; - } + priv->child_up = GF_CALLOC(sizeof(unsigned char), child_count, + gf_afr_mt_char); - ret = gf_asprintf (&priv->sh_domain, AFR_SH_DATA_DOMAIN_FMT, - this->name); - if (-1 == ret) { - ret = -ENOMEM; - goto out; - } + priv->child_latency = GF_MALLOC(sizeof(*priv->child_latency) * child_count, + gf_afr_mt_child_latency_t); - priv->last_event = GF_CALLOC (child_count, sizeof (*priv->last_event), - gf_afr_mt_int32_t); - if (!priv->last_event) { - ret = -ENOMEM; - goto out; - } + if (!priv->child_up || !priv->child_latency) { + ret = -ENOMEM; + goto out; + } + /*Initialize to -ve ping timeout so that they are not considered + * in child-up events until ping-event comes*/ + for (i = 0; i < child_count; i++) + priv->child_latency[i] = -1; - ret = afr_selfheal_daemon_init (this); - if (ret) { - ret = -ENOMEM; - goto out; - } + priv->children = GF_CALLOC(sizeof(xlator_t *), child_count, + gf_afr_mt_xlator_t); + if (!priv->children) { + ret = -ENOMEM; + goto out; + } - /* keep more local here as we may need them for self-heal etc */ - this->local_pool = mem_pool_new (afr_local_t, 512); - if (!this->local_pool) { - ret = -1; - goto out; - } + ret = afr_pending_xattrs_init(priv, this); + if (ret) + goto out; - priv->root_inode = NULL; + trav = this->children; + i = 0; + while (i < child_count) { + priv->children[i] = trav->xlator; + trav = trav->next; + i++; + } - ret = 0; + ret = gf_asprintf(&priv->sh_domain, AFR_SH_DATA_DOMAIN_FMT, this->name); + if (-1 == ret) { + ret = -ENOMEM; + goto out; + } + + priv->last_event = GF_CALLOC(child_count, sizeof(*priv->last_event), + gf_afr_mt_int32_t); + if (!priv->last_event) { + ret = -ENOMEM; + goto out; + } + + ret = afr_selfheal_daemon_init(this); + if (ret) { + ret = -ENOMEM; + goto out; + } + + /* keep more local here as we may need them for self-heal etc */ + this->local_pool = mem_pool_new(afr_local_t, 512); + if (!this->local_pool) { + ret = -1; + goto out; + } + + priv->root_inode = NULL; + + ret = 0; out: - return ret; + return ret; } - int -fini (xlator_t *this) +fini(xlator_t *this) { - afr_private_t *priv = NULL; - - priv = this->private; - LOCK (&priv->lock); - if (priv->timer != NULL) { - gf_timer_call_cancel(this->ctx, priv->timer); - priv->timer = NULL; - } - UNLOCK (&priv->lock); - this->private = NULL; - afr_priv_destroy (priv); - //if (this->itable);//I don't see any destroy func - - return 0; + afr_private_t *priv = NULL; + + priv = this->private; + LOCK(&priv->lock); + if (priv->timer != NULL) { + gf_timer_call_cancel(this->ctx, priv->timer); + priv->timer = NULL; + } + UNLOCK(&priv->lock); + this->private = NULL; + afr_priv_destroy(priv); + // if (this->itable);//I don't see any destroy func + + return 0; } - struct xlator_fops fops = { - .lookup = afr_lookup, - .lk = afr_lk, - .flush = afr_flush, - .statfs = afr_statfs, - .fsyncdir = afr_fsyncdir, - .inodelk = afr_inodelk, - .finodelk = afr_finodelk, - .entrylk = afr_entrylk, - .fentrylk = afr_fentrylk, - .ipc = afr_ipc, - .lease = afr_lease, - - /* inode read */ - .access = afr_access, - .stat = afr_stat, - .fstat = afr_fstat, - .readlink = afr_readlink, - .getxattr = afr_getxattr, - .fgetxattr = afr_fgetxattr, - .readv = afr_readv, - - /* inode write */ - .writev = afr_writev, - .truncate = afr_truncate, - .ftruncate = afr_ftruncate, - .setxattr = afr_setxattr, - .fsetxattr = afr_fsetxattr, - .setattr = afr_setattr, - .fsetattr = afr_fsetattr, - .removexattr = afr_removexattr, - .fremovexattr = afr_fremovexattr, - .fallocate = afr_fallocate, - .discard = afr_discard, - .zerofill = afr_zerofill, - .xattrop = afr_xattrop, - .fxattrop = afr_fxattrop, - .fsync = afr_fsync, - - /*inode open*/ - .opendir = afr_opendir, - .open = afr_open, - - /* dir read */ - .readdir = afr_readdir, - .readdirp = afr_readdirp, - - /* dir write */ - .create = afr_create, - .mknod = afr_mknod, - .mkdir = afr_mkdir, - .unlink = afr_unlink, - .rmdir = afr_rmdir, - .link = afr_link, - .symlink = afr_symlink, - .rename = afr_rename, + .lookup = afr_lookup, + .lk = afr_lk, + .flush = afr_flush, + .statfs = afr_statfs, + .fsyncdir = afr_fsyncdir, + .inodelk = afr_inodelk, + .finodelk = afr_finodelk, + .entrylk = afr_entrylk, + .fentrylk = afr_fentrylk, + .ipc = afr_ipc, + .lease = afr_lease, + + /* inode read */ + .access = afr_access, + .stat = afr_stat, + .fstat = afr_fstat, + .readlink = afr_readlink, + .getxattr = afr_getxattr, + .fgetxattr = afr_fgetxattr, + .readv = afr_readv, + + /* inode write */ + .writev = afr_writev, + .truncate = afr_truncate, + .ftruncate = afr_ftruncate, + .setxattr = afr_setxattr, + .fsetxattr = afr_fsetxattr, + .setattr = afr_setattr, + .fsetattr = afr_fsetattr, + .removexattr = afr_removexattr, + .fremovexattr = afr_fremovexattr, + .fallocate = afr_fallocate, + .discard = afr_discard, + .zerofill = afr_zerofill, + .xattrop = afr_xattrop, + .fxattrop = afr_fxattrop, + .fsync = afr_fsync, + + /*inode open*/ + .opendir = afr_opendir, + .open = afr_open, + + /* dir read */ + .readdir = afr_readdir, + .readdirp = afr_readdirp, + + /* dir write */ + .create = afr_create, + .mknod = afr_mknod, + .mkdir = afr_mkdir, + .unlink = afr_unlink, + .rmdir = afr_rmdir, + .link = afr_link, + .symlink = afr_symlink, + .rename = afr_rename, }; - struct xlator_dumpops dumpops = { - .priv = afr_priv_dump, + .priv = afr_priv_dump, }; - struct xlator_cbks cbks = { - .release = afr_release, - .releasedir = afr_releasedir, - .forget = afr_forget, + .release = afr_release, + .releasedir = afr_releasedir, + .forget = afr_forget, }; - struct volume_options options[] = { - { .key = {"read-subvolume" }, - .type = GF_OPTION_TYPE_XLATOR, - .op_version = {1}, - .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC, - .tags = {"replicate"}, - .description = "inode-read fops happen only on one of the bricks in " - "replicate. Afr will prefer the one specified using " - "this option if it is not stale. Option value must be " - "one of the xlator names of the children. " - "Ex: -client-0 till " - "-client-" - }, - { .key = {"read-subvolume-index" }, - .type = GF_OPTION_TYPE_INT, - .default_value = "-1", - .op_version = {2}, - .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC, - .tags = {"replicate"}, - .description = "inode-read fops happen only on one of the bricks in " - "replicate. AFR will prefer the one specified using " - "this option if it is not stale. allowed options" - " include -1 till replica-count - 1" - }, - { .key = {"read-hash-mode" }, - .type = GF_OPTION_TYPE_INT, - .min = 0, - .max = 3, - .default_value = "1", - .op_version = {2}, - .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC, - .tags = {"replicate"}, - .description = "inode-read fops happen only on one of the bricks in " - "replicate. AFR will prefer the one computed using " - "the method specified using this option.\n" - "0 = first readable child of AFR, starting from 1st child.\n" - "1 = hash by GFID of file (all clients use " - "same subvolume).\n" - "2 = hash by GFID of file and client PID.\n" - "3 = brick having the least outstanding read requests." - }, - { .key = {"choose-local" }, - .type = GF_OPTION_TYPE_BOOL, - .default_value = "true", - .op_version = {2}, - .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC, - .tags = {"replicate"}, - .description = "Choose a local subvolume (i.e. Brick) to read from" - " if read-subvolume is not explicitly set.", - }, - { .key = {"background-self-heal-count"}, - .type = GF_OPTION_TYPE_INT, - .min = 0, - .max = 256, - .default_value = "8", - .validate = GF_OPT_VALIDATE_MIN, - .op_version = {1}, - .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC, - .tags = {"replicate"}, - .description = "This specifies the number of per client self-heal " - "jobs that can perform parallel heals in the " - "background." - }, - { .key = {"halo-shd-max-latency"}, - .type = GF_OPTION_TYPE_INT, - .min = 1, - .max = 99999, - .default_value = "99999", - .op_version = {GD_OP_VERSION_3_11_0}, - .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC, - .tags = {"replicate", "halo"}, - .description = "Maximum latency for shd halo replication in msec." - }, - { .key = {"halo-enabled"}, - .type = GF_OPTION_TYPE_BOOL, - .default_value = "False", - .op_version = {GD_OP_VERSION_3_11_0}, - .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC, - .tags = {"replicate", "halo"}, - .description = "Enable Halo (geo) replication mode." - }, - { .key = {"halo-nfsd-max-latency"}, - .type = GF_OPTION_TYPE_INT, - .min = 1, - .max = 99999, - .default_value = "5", - .op_version = {GD_OP_VERSION_3_11_0}, - .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC, - .tags = {"replicate", "halo"}, - .description = "Maximum latency for nfsd halo replication in msec." - }, - { .key = {"halo-max-latency"}, - .type = GF_OPTION_TYPE_INT, - .min = 1, - .max = AFR_HALO_MAX_LATENCY, - .default_value = "5", - .op_version = {GD_OP_VERSION_3_11_0}, - .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC, - .tags = {"replicate", "halo"}, - .description = "Maximum latency for halo replication in msec." - }, - { .key = {"halo-max-replicas"}, - .type = GF_OPTION_TYPE_INT, - .min = 1, - .max = 99999, - .default_value = "99999", - .op_version = {GD_OP_VERSION_3_11_0}, - .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC, - .tags = {"replicate", "halo"}, - .description = "The maximum number of halo replicas; replicas" - " beyond this value will be written asynchronously" - "via the SHD." - }, - { .key = {"halo-min-replicas"}, - .type = GF_OPTION_TYPE_INT, - .min = 1, - .max = 99999, - .default_value = "2", - .op_version = {GD_OP_VERSION_3_11_0}, - .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC, - .tags = {"replicate", "halo"}, - .description = "The minimmum number of halo replicas, before adding " - "out of region replicas." - }, - { .key = {"heal-wait-queue-length"}, - .type = GF_OPTION_TYPE_INT, - .min = 0, - .max = 10000, /*Around 100MB with sizeof(afr_local_t)= 10496 bytes*/ - .default_value = "128", - .validate = GF_OPT_VALIDATE_MIN, - .op_version = {GD_OP_VERSION_3_7_10}, - .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC, - .tags = {"replicate"}, - .description = "This specifies the number of heals that can be queued" - " for the parallel background self heal jobs." - }, - { .key = {"data-self-heal"}, - .type = GF_OPTION_TYPE_STR, - .value = {"1", "on", "yes", "true", "enable", - "0", "off", "no", "false", "disable", - "open"}, - .default_value = "on", - .op_version = {1}, - .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC, - .tags = {"replicate"}, - .description = "Using this option we can enable/disable data " - "self-heal on the file. \"open\" means data " - "self-heal action will only be triggered by file " - "open operations." - }, - { .key = {"data-self-heal-algorithm"}, - .type = GF_OPTION_TYPE_STR, - .op_version = {1}, - .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC, - .tags = {"replicate"}, - .description = "Select between \"full\", \"diff\". The " - "\"full\" algorithm copies the entire file from " - "source to sink. The \"diff\" algorithm copies to " - "sink only those blocks whose checksums don't match " - "with those of source. If no option is configured " - "the option is chosen dynamically as follows: " - "If the file does not exist on one of the sinks " - "or empty file exists or if the source file size is " - "about the same as page size the entire file will " - "be read and written i.e \"full\" algo, " - "otherwise \"diff\" algo is chosen.", - .value = { "diff", "full"} - }, - { .key = {"data-self-heal-window-size"}, - .type = GF_OPTION_TYPE_INT, - .min = 1, - .max = 1024, - .default_value = "1", - .op_version = {1}, - .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC, - .tags = {"replicate"}, - .description = "Maximum number blocks per file for which self-heal " - "process would be applied simultaneously." - }, - { .key = {"metadata-self-heal"}, - .type = GF_OPTION_TYPE_BOOL, - .default_value = "on", - .op_version = {1}, - .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC, - .tags = {"replicate"}, - /*.validate_fn = validate_replica*/ - .description = "Using this option we can enable/disable metadata " - "i.e. Permissions, ownerships, xattrs self-heal on " - "the file/directory." - }, - { .key = {"entry-self-heal"}, - .type = GF_OPTION_TYPE_BOOL, - .default_value = "on", - .op_version = {1}, - .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC, - .tags = {"replicate"}, - /*.validate_fn = validate_replica*/ - .description = "Using this option we can enable/disable entry " - "self-heal on the directory." - }, - { .key = {"data-change-log"}, - .type = GF_OPTION_TYPE_BOOL, - .default_value = "on", - .op_version = {1}, - .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC, - .tags = {"replicate"}, - .description = "This option exists only for backward compatibility " - "and configuring it doesn't have any effect" - }, - { .key = {"metadata-change-log"}, - .type = GF_OPTION_TYPE_BOOL, - .default_value = "on", - .op_version = {1}, - .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC, - .tags = {"replicate"}, - .description = "This option exists only for backward compatibility " - "and configuring it doesn't have any effect" - }, - { .key = {"entry-change-log"}, - .type = GF_OPTION_TYPE_BOOL, - .default_value = "on", - .op_version = {1}, - .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC, - .tags = {"replicate"}, - .description = "This option exists only for backward compatibility " - "and configuring it doesn't have any effect" - }, - { .key = {"optimistic-change-log"}, - .type = GF_OPTION_TYPE_BOOL, - .default_value = "on", - .description = "Entry/Metadata fops will not perform " - "pre fop changelog operations in afr transaction " - "if this option is enabled." - }, - { .key = {"inodelk-trace"}, - .type = GF_OPTION_TYPE_BOOL, - .default_value = "off", - .description = "Enabling this option logs inode lock/unlocks" - }, - { .key = {"entrylk-trace"}, - .type = GF_OPTION_TYPE_BOOL, - .default_value = "off", - .description = "Enabling this option logs entry lock/unlocks" - }, - { .key = {"pre-op-compat"}, - .type = GF_OPTION_TYPE_BOOL, - .default_value = "on", - .description = "Use separate pre-op xattrop() FOP rather than " - "overloading xdata of the OP" - }, - { .key = {"eager-lock"}, - .type = GF_OPTION_TYPE_BOOL, - .default_value = "on", - .op_version = {1}, - .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC, - .tags = {"replicate"}, - .description = "Enable/Disable eager lock for replica volume. " - "Lock phase of a transaction has two sub-phases. " - "First is an attempt to acquire locks in parallel by " - "broadcasting non-blocking lock requests. If lock " - "acquisition fails on any server, then the held locks " - "are unlocked and we revert to a blocking locks mode " - "sequentially on one server after another. If this " - "option is enabled the initial broadcasting lock " - "request attempts to acquire a full lock on the entire file. " - "If this fails, we revert back to the sequential " - "\"regional\" blocking locks as before. In the case " - "where such an \"eager\" lock is granted in the " - "non-blocking phase, it gives rise to an opportunity " - "for optimization. i.e, if the next write transaction " - "on the same FD arrives before the unlock phase of " - "the first transaction, it \"takes over\" the full " - "file lock. Similarly if yet another data transaction " - "arrives before the unlock phase of the \"optimized\" " - "transaction, that in turn \"takes over\" the lock as " - "well. The actual unlock now happens at the end of " - "the last \"optimized\" transaction." - - }, - { .key = {"self-heal-daemon"}, - .type = GF_OPTION_TYPE_BOOL, - .default_value = "on", - .op_version = {1}, - .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE, - .tags = {"replicate"}, - /*.validate_fn = validate_replica_heal_enable_disable*/ - .description = "This option applies to only self-heal-daemon. " - "Index directory crawl and automatic healing of files " - "will not be performed if this option is turned off." - }, - { .key = {"iam-self-heal-daemon"}, - .type = GF_OPTION_TYPE_BOOL, - .default_value = "off", - .description = "This option differentiates if the replicate " - "translator is running as part of self-heal-daemon " - "or not." - }, - { .key = {"iam-nfs-daemon"}, - .type = GF_OPTION_TYPE_BOOL, - .default_value = "off", - .description = "This option differentiates if the replicate " - "translator is running as part of an NFS daemon " - "or not." - }, - { .key = {"quorum-type"}, - .type = GF_OPTION_TYPE_STR, - .value = { "none", "auto", "fixed"}, - .default_value = "none", - .op_version = {1}, - .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC, - .tags = {"replicate"}, - /*.option = quorum-type*/ - .description = "If value is \"fixed\" only allow writes if " - "quorum-count bricks are present. If value is " - "\"auto\" only allow writes if more than half of " - "bricks, or exactly half including the first, are " - "present.", - }, - { .key = {"quorum-count"}, - .type = GF_OPTION_TYPE_INT, - .min = 1, - .max = INT_MAX, - .default_value = 0, - .op_version = {1}, - .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC, - .tags = {"replicate"}, - /*.option = quorum-count*/ - /*.validate_fn = validate_quorum_count*/ - .description = "If quorum-type is \"fixed\" only allow writes if " - "this many bricks are present. Other quorum types " - "will OVERWRITE this value.", - }, - { .key = {"quorum-reads"}, - .type = GF_OPTION_TYPE_BOOL, - .default_value = "no", - .op_version = {GD_OP_VERSION_3_7_0}, - .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC, - .tags = {"replicate"}, - .description = "This option has been removed. Reads are not allowed " - "if quorum is not met.", - }, - { .key = {"node-uuid"}, - .type = GF_OPTION_TYPE_STR, - .description = "Local glusterd uuid string, used in starting " - "self-heal-daemon so that it can crawl only on " - "local index directories.", - }, - { .key = {"post-op-delay-secs"}, - .type = GF_OPTION_TYPE_INT, - .min = 0, - .max = INT_MAX, - .default_value = "1", - .op_version = {2}, - .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC, - .tags = {"replicate"}, - .description = "Time interval induced artificially before " - "post-operation phase of the transaction to " - "enhance overlap of adjacent write operations.", - }, - { .key = {AFR_SH_READDIR_SIZE_KEY}, - .type = GF_OPTION_TYPE_SIZET, - .description = "readdirp size for performing entry self-heal", - .min = 1024, - .max = 131072, - .op_version = {2}, - .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE, - .tags = {"replicate"}, - .default_value = "1KB", - }, - { .key = {"ensure-durability"}, - .type = GF_OPTION_TYPE_BOOL, - .op_version = {3}, - .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC, - .tags = {"replicate"}, - .description = "Afr performs fsyncs for transactions if this " - "option is on to make sure the changelogs/data is " - "written to the disk", - .default_value = "on", - }, - { .key = {"afr-dirty-xattr"}, - .type = GF_OPTION_TYPE_STR, - .default_value = AFR_DIRTY_DEFAULT, - }, - { .key = {"afr-pending-xattr"}, - .type = GF_OPTION_TYPE_STR, - .description = "Comma separated list of xattrs that are used to " - "capture information on pending heals." - }, - { .key = {"metadata-splitbrain-forced-heal"}, - .type = GF_OPTION_TYPE_BOOL, - .default_value = "off", - }, - { .key = {"heal-timeout"}, - .type = GF_OPTION_TYPE_INT, - .min = 5, - .max = INT_MAX, - .default_value = "600", - .op_version = {2}, - .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC, - .tags = {"replicate"}, - .description = "time interval for checking the need to self-heal " - "in self-heal-daemon" - }, - { .key = {"consistent-metadata"}, - .type = GF_OPTION_TYPE_BOOL, - .default_value = "no", - .op_version = {GD_OP_VERSION_3_7_0}, - .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC, - .tags = {"replicate"}, - .description = "If this option is enabled, readdirp will force " - "lookups on those entries read whose read child is " - "not the same as that of the parent. This will " - "guarantee that all read operations on a file serve " - "attributes from the same subvol as long as it holds " - " a good copy of the file/dir.", - }, - { .key = {"arbiter-count"}, - .type = GF_OPTION_TYPE_INT, - .description = "subset of child_count. Has to be 0 or 1." - }, - { .key = {"thin-arbiter"}, - .type = GF_OPTION_TYPE_STR, - .op_version = {GD_OP_VERSION_4_1_0}, - .flags = OPT_FLAG_SETTABLE, - .tags = {"replicate"}, - .description = "contains host:path of thin abriter brick", - }, - { .key = {"shd-max-threads"}, - .type = GF_OPTION_TYPE_INT, - .min = 1, - .max = 64, - .default_value = "1", - .op_version = {GD_OP_VERSION_3_7_12}, - .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC, - .tags = {"replicate"}, - .description = "Maximum number of parallel heals SHD can do per " - "local brick. This can substantially lower heal times" - ", but can also crush your bricks if you don't have " - "the storage hardware to support this." - }, - { .key = {"shd-wait-qlength"}, - .type = GF_OPTION_TYPE_INT, - .min = 1, - .max = 655536, - .default_value = "1024", - .op_version = {GD_OP_VERSION_3_7_12}, - .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC, - .tags = {"replicate"}, - .description = "This option can be used to control number of heals" - " that can wait in SHD per subvolume", - }, - { .key = {"locking-scheme"}, - .type = GF_OPTION_TYPE_STR, - .value = { "full", "granular"}, - .default_value = "full", - .op_version = {GD_OP_VERSION_3_7_12}, - .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC, - .tags = {"replicate"}, - .description = "If this option is set to granular, self-heal will " - "stop being compatible with afr-v1, which helps afr " - "be more granular while self-healing", - }, - { .key = {"full-lock"}, - .type = GF_OPTION_TYPE_BOOL, - .default_value = "yes", - .op_version = {GD_OP_VERSION_3_13_2}, - .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE, - .tags = {"replicate"}, - .description = "If this option is disabled, then the IOs will take " - "range locks same as versions till 3.13.1." - }, - { .key = {"granular-entry-heal"}, - .type = GF_OPTION_TYPE_BOOL, - .default_value = "no", - .op_version = {GD_OP_VERSION_3_8_0}, - .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC, - .tags = {"replicate"}, - .description = "If this option is enabled, self-heal will resort to " - "granular way of recording changelogs and doing entry " - "self-heal.", - }, - { .key = {"favorite-child-policy"}, - .type = GF_OPTION_TYPE_STR, - .value = {"none", "size", "ctime", "mtime", "majority"}, - .default_value = "none", - .op_version = {GD_OP_VERSION_3_7_12}, - .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC, - .tags = {"replicate"}, - .description = "This option can be used to automatically resolve " - "split-brains using various policies without user " - "intervention. \"size\" picks the file with the " - "biggest size as the source. \"ctime\" and \"mtime\" " - "pick the file with the latest ctime and mtime " - "respectively as the source. \"majority\" picks a file" - " with identical mtime and size in more than half the " - "number of bricks in the replica.", - }, - { .key = {"consistent-io"}, - .type = GF_OPTION_TYPE_BOOL, - .default_value = "no", - .description = "If this option is enabled, i/o will fail even if " - "one of the bricks is down in the replicas", - }, - { .key = {"use-compound-fops"}, - .type = GF_OPTION_TYPE_BOOL, - .default_value = "no", - .op_version = {GD_OP_VERSION_3_8_4}, - .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC, - .tags = {"replicate"}, - .description = "This option exists only for backward compatibility " - "and configuring it doesn't have any effect" - }, - { .key = {NULL} }, + {.key = {"read-subvolume"}, + .type = GF_OPTION_TYPE_XLATOR, + .op_version = {1}, + .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC, + .tags = {"replicate"}, + .description = "inode-read fops happen only on one of the bricks in " + "replicate. Afr will prefer the one specified using " + "this option if it is not stale. Option value must be " + "one of the xlator names of the children. " + "Ex: -client-0 till " + "-client-"}, + {.key = {"read-subvolume-index"}, + .type = GF_OPTION_TYPE_INT, + .default_value = "-1", + .op_version = {2}, + .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC, + .tags = {"replicate"}, + .description = "inode-read fops happen only on one of the bricks in " + "replicate. AFR will prefer the one specified using " + "this option if it is not stale. allowed options" + " include -1 till replica-count - 1"}, + {.key = {"read-hash-mode"}, + .type = GF_OPTION_TYPE_INT, + .min = 0, + .max = 3, + .default_value = "1", + .op_version = {2}, + .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC, + .tags = {"replicate"}, + .description = + "inode-read fops happen only on one of the bricks in " + "replicate. AFR will prefer the one computed using " + "the method specified using this option.\n" + "0 = first readable child of AFR, starting from 1st child.\n" + "1 = hash by GFID of file (all clients use " + "same subvolume).\n" + "2 = hash by GFID of file and client PID.\n" + "3 = brick having the least outstanding read requests."}, + { + .key = {"choose-local"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "true", + .op_version = {2}, + .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC, + .tags = {"replicate"}, + .description = "Choose a local subvolume (i.e. Brick) to read from" + " if read-subvolume is not explicitly set.", + }, + {.key = {"background-self-heal-count"}, + .type = GF_OPTION_TYPE_INT, + .min = 0, + .max = 256, + .default_value = "8", + .validate = GF_OPT_VALIDATE_MIN, + .op_version = {1}, + .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC, + .tags = {"replicate"}, + .description = "This specifies the number of per client self-heal " + "jobs that can perform parallel heals in the " + "background."}, + {.key = {"halo-shd-max-latency"}, + .type = GF_OPTION_TYPE_INT, + .min = 1, + .max = 99999, + .default_value = "99999", + .op_version = {GD_OP_VERSION_3_11_0}, + .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC, + .tags = {"replicate", "halo"}, + .description = "Maximum latency for shd halo replication in msec."}, + {.key = {"halo-enabled"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "False", + .op_version = {GD_OP_VERSION_3_11_0}, + .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC, + .tags = {"replicate", "halo"}, + .description = "Enable Halo (geo) replication mode."}, + {.key = {"halo-nfsd-max-latency"}, + .type = GF_OPTION_TYPE_INT, + .min = 1, + .max = 99999, + .default_value = "5", + .op_version = {GD_OP_VERSION_3_11_0}, + .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC, + .tags = {"replicate", "halo"}, + .description = "Maximum latency for nfsd halo replication in msec."}, + {.key = {"halo-max-latency"}, + .type = GF_OPTION_TYPE_INT, + .min = 1, + .max = AFR_HALO_MAX_LATENCY, + .default_value = "5", + .op_version = {GD_OP_VERSION_3_11_0}, + .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC, + .tags = {"replicate", "halo"}, + .description = "Maximum latency for halo replication in msec."}, + {.key = {"halo-max-replicas"}, + .type = GF_OPTION_TYPE_INT, + .min = 1, + .max = 99999, + .default_value = "99999", + .op_version = {GD_OP_VERSION_3_11_0}, + .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC, + .tags = {"replicate", "halo"}, + .description = "The maximum number of halo replicas; replicas" + " beyond this value will be written asynchronously" + "via the SHD."}, + {.key = {"halo-min-replicas"}, + .type = GF_OPTION_TYPE_INT, + .min = 1, + .max = 99999, + .default_value = "2", + .op_version = {GD_OP_VERSION_3_11_0}, + .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC, + .tags = {"replicate", "halo"}, + .description = "The minimmum number of halo replicas, before adding " + "out of region replicas."}, + {.key = {"heal-wait-queue-length"}, + .type = GF_OPTION_TYPE_INT, + .min = 0, + .max = 10000, /*Around 100MB with sizeof(afr_local_t)= 10496 bytes*/ + .default_value = "128", + .validate = GF_OPT_VALIDATE_MIN, + .op_version = {GD_OP_VERSION_3_7_10}, + .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC, + .tags = {"replicate"}, + .description = "This specifies the number of heals that can be queued" + " for the parallel background self heal jobs."}, + {.key = {"data-self-heal"}, + .type = GF_OPTION_TYPE_STR, + .value = {"1", "on", "yes", "true", "enable", "0", "off", "no", "false", + "disable", "open"}, + .default_value = "on", + .op_version = {1}, + .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC, + .tags = {"replicate"}, + .description = "Using this option we can enable/disable data " + "self-heal on the file. \"open\" means data " + "self-heal action will only be triggered by file " + "open operations."}, + {.key = {"data-self-heal-algorithm"}, + .type = GF_OPTION_TYPE_STR, + .op_version = {1}, + .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC, + .tags = {"replicate"}, + .description = "Select between \"full\", \"diff\". The " + "\"full\" algorithm copies the entire file from " + "source to sink. The \"diff\" algorithm copies to " + "sink only those blocks whose checksums don't match " + "with those of source. If no option is configured " + "the option is chosen dynamically as follows: " + "If the file does not exist on one of the sinks " + "or empty file exists or if the source file size is " + "about the same as page size the entire file will " + "be read and written i.e \"full\" algo, " + "otherwise \"diff\" algo is chosen.", + .value = {"diff", "full"}}, + {.key = {"data-self-heal-window-size"}, + .type = GF_OPTION_TYPE_INT, + .min = 1, + .max = 1024, + .default_value = "1", + .op_version = {1}, + .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC, + .tags = {"replicate"}, + .description = "Maximum number blocks per file for which self-heal " + "process would be applied simultaneously."}, + {.key = {"metadata-self-heal"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "on", + .op_version = {1}, + .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC, + .tags = {"replicate"}, + /*.validate_fn = validate_replica*/ + .description = "Using this option we can enable/disable metadata " + "i.e. Permissions, ownerships, xattrs self-heal on " + "the file/directory."}, + {.key = {"entry-self-heal"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "on", + .op_version = {1}, + .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC, + .tags = {"replicate"}, + /*.validate_fn = validate_replica*/ + .description = "Using this option we can enable/disable entry " + "self-heal on the directory."}, + {.key = {"data-change-log"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "on", + .op_version = {1}, + .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC, + .tags = {"replicate"}, + .description = "This option exists only for backward compatibility " + "and configuring it doesn't have any effect"}, + {.key = {"metadata-change-log"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "on", + .op_version = {1}, + .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC, + .tags = {"replicate"}, + .description = "This option exists only for backward compatibility " + "and configuring it doesn't have any effect"}, + {.key = {"entry-change-log"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "on", + .op_version = {1}, + .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC, + .tags = {"replicate"}, + .description = "This option exists only for backward compatibility " + "and configuring it doesn't have any effect"}, + {.key = {"optimistic-change-log"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "on", + .description = "Entry/Metadata fops will not perform " + "pre fop changelog operations in afr transaction " + "if this option is enabled."}, + {.key = {"inodelk-trace"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "off", + .description = "Enabling this option logs inode lock/unlocks"}, + {.key = {"entrylk-trace"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "off", + .description = "Enabling this option logs entry lock/unlocks"}, + {.key = {"pre-op-compat"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "on", + .description = "Use separate pre-op xattrop() FOP rather than " + "overloading xdata of the OP"}, + {.key = {"eager-lock"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "on", + .op_version = {1}, + .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC, + .tags = {"replicate"}, + .description = + "Enable/Disable eager lock for replica volume. " + "Lock phase of a transaction has two sub-phases. " + "First is an attempt to acquire locks in parallel by " + "broadcasting non-blocking lock requests. If lock " + "acquisition fails on any server, then the held locks " + "are unlocked and we revert to a blocking locks mode " + "sequentially on one server after another. If this " + "option is enabled the initial broadcasting lock " + "request attempts to acquire a full lock on the entire file. " + "If this fails, we revert back to the sequential " + "\"regional\" blocking locks as before. In the case " + "where such an \"eager\" lock is granted in the " + "non-blocking phase, it gives rise to an opportunity " + "for optimization. i.e, if the next write transaction " + "on the same FD arrives before the unlock phase of " + "the first transaction, it \"takes over\" the full " + "file lock. Similarly if yet another data transaction " + "arrives before the unlock phase of the \"optimized\" " + "transaction, that in turn \"takes over\" the lock as " + "well. The actual unlock now happens at the end of " + "the last \"optimized\" transaction." + + }, + {.key = {"self-heal-daemon"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "on", + .op_version = {1}, + .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE, + .tags = {"replicate"}, + /*.validate_fn = validate_replica_heal_enable_disable*/ + .description = "This option applies to only self-heal-daemon. " + "Index directory crawl and automatic healing of files " + "will not be performed if this option is turned off."}, + {.key = {"iam-self-heal-daemon"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "off", + .description = "This option differentiates if the replicate " + "translator is running as part of self-heal-daemon " + "or not."}, + {.key = {"iam-nfs-daemon"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "off", + .description = "This option differentiates if the replicate " + "translator is running as part of an NFS daemon " + "or not."}, + { + .key = {"quorum-type"}, + .type = GF_OPTION_TYPE_STR, + .value = {"none", "auto", "fixed"}, + .default_value = "none", + .op_version = {1}, + .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC, + .tags = {"replicate"}, + /*.option = quorum-type*/ + .description = "If value is \"fixed\" only allow writes if " + "quorum-count bricks are present. If value is " + "\"auto\" only allow writes if more than half of " + "bricks, or exactly half including the first, are " + "present.", + }, + { + .key = {"quorum-count"}, + .type = GF_OPTION_TYPE_INT, + .min = 1, + .max = INT_MAX, + .default_value = 0, + .op_version = {1}, + .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC, + .tags = {"replicate"}, + /*.option = quorum-count*/ + /*.validate_fn = validate_quorum_count*/ + .description = "If quorum-type is \"fixed\" only allow writes if " + "this many bricks are present. Other quorum types " + "will OVERWRITE this value.", + }, + { + .key = {"quorum-reads"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "no", + .op_version = {GD_OP_VERSION_3_7_0}, + .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC, + .tags = {"replicate"}, + .description = "This option has been removed. Reads are not allowed " + "if quorum is not met.", + }, + { + .key = {"node-uuid"}, + .type = GF_OPTION_TYPE_STR, + .description = "Local glusterd uuid string, used in starting " + "self-heal-daemon so that it can crawl only on " + "local index directories.", + }, + { + .key = {"post-op-delay-secs"}, + .type = GF_OPTION_TYPE_INT, + .min = 0, + .max = INT_MAX, + .default_value = "1", + .op_version = {2}, + .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC, + .tags = {"replicate"}, + .description = "Time interval induced artificially before " + "post-operation phase of the transaction to " + "enhance overlap of adjacent write operations.", + }, + { + .key = {AFR_SH_READDIR_SIZE_KEY}, + .type = GF_OPTION_TYPE_SIZET, + .description = "readdirp size for performing entry self-heal", + .min = 1024, + .max = 131072, + .op_version = {2}, + .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE, + .tags = {"replicate"}, + .default_value = "1KB", + }, + { + .key = {"ensure-durability"}, + .type = GF_OPTION_TYPE_BOOL, + .op_version = {3}, + .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC, + .tags = {"replicate"}, + .description = "Afr performs fsyncs for transactions if this " + "option is on to make sure the changelogs/data is " + "written to the disk", + .default_value = "on", + }, + { + .key = {"afr-dirty-xattr"}, + .type = GF_OPTION_TYPE_STR, + .default_value = AFR_DIRTY_DEFAULT, + }, + {.key = {"afr-pending-xattr"}, + .type = GF_OPTION_TYPE_STR, + .description = "Comma separated list of xattrs that are used to " + "capture information on pending heals."}, + { + .key = {"metadata-splitbrain-forced-heal"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "off", + }, + {.key = {"heal-timeout"}, + .type = GF_OPTION_TYPE_INT, + .min = 5, + .max = INT_MAX, + .default_value = "600", + .op_version = {2}, + .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC, + .tags = {"replicate"}, + .description = "time interval for checking the need to self-heal " + "in self-heal-daemon"}, + { + .key = {"consistent-metadata"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "no", + .op_version = {GD_OP_VERSION_3_7_0}, + .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC, + .tags = {"replicate"}, + .description = "If this option is enabled, readdirp will force " + "lookups on those entries read whose read child is " + "not the same as that of the parent. This will " + "guarantee that all read operations on a file serve " + "attributes from the same subvol as long as it holds " + " a good copy of the file/dir.", + }, + {.key = {"arbiter-count"}, + .type = GF_OPTION_TYPE_INT, + .description = "subset of child_count. Has to be 0 or 1."}, + { + .key = {"thin-arbiter"}, + .type = GF_OPTION_TYPE_STR, + .op_version = {GD_OP_VERSION_4_1_0}, + .flags = OPT_FLAG_SETTABLE, + .tags = {"replicate"}, + .description = "contains host:path of thin abriter brick", + }, + {.key = {"shd-max-threads"}, + .type = GF_OPTION_TYPE_INT, + .min = 1, + .max = 64, + .default_value = "1", + .op_version = {GD_OP_VERSION_3_7_12}, + .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC, + .tags = {"replicate"}, + .description = "Maximum number of parallel heals SHD can do per " + "local brick. This can substantially lower heal times" + ", but can also crush your bricks if you don't have " + "the storage hardware to support this."}, + { + .key = {"shd-wait-qlength"}, + .type = GF_OPTION_TYPE_INT, + .min = 1, + .max = 655536, + .default_value = "1024", + .op_version = {GD_OP_VERSION_3_7_12}, + .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC, + .tags = {"replicate"}, + .description = "This option can be used to control number of heals" + " that can wait in SHD per subvolume", + }, + { + .key = {"locking-scheme"}, + .type = GF_OPTION_TYPE_STR, + .value = {"full", "granular"}, + .default_value = "full", + .op_version = {GD_OP_VERSION_3_7_12}, + .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC, + .tags = {"replicate"}, + .description = "If this option is set to granular, self-heal will " + "stop being compatible with afr-v1, which helps afr " + "be more granular while self-healing", + }, + {.key = {"full-lock"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "yes", + .op_version = {GD_OP_VERSION_3_13_2}, + .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE, + .tags = {"replicate"}, + .description = "If this option is disabled, then the IOs will take " + "range locks same as versions till 3.13.1."}, + { + .key = {"granular-entry-heal"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "no", + .op_version = {GD_OP_VERSION_3_8_0}, + .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC, + .tags = {"replicate"}, + .description = "If this option is enabled, self-heal will resort to " + "granular way of recording changelogs and doing entry " + "self-heal.", + }, + { + .key = {"favorite-child-policy"}, + .type = GF_OPTION_TYPE_STR, + .value = {"none", "size", "ctime", "mtime", "majority"}, + .default_value = "none", + .op_version = {GD_OP_VERSION_3_7_12}, + .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC, + .tags = {"replicate"}, + .description = "This option can be used to automatically resolve " + "split-brains using various policies without user " + "intervention. \"size\" picks the file with the " + "biggest size as the source. \"ctime\" and \"mtime\" " + "pick the file with the latest ctime and mtime " + "respectively as the source. \"majority\" picks a file" + " with identical mtime and size in more than half the " + "number of bricks in the replica.", + }, + { + .key = {"consistent-io"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "no", + .description = "If this option is enabled, i/o will fail even if " + "one of the bricks is down in the replicas", + }, + {.key = {"use-compound-fops"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "no", + .op_version = {GD_OP_VERSION_3_8_4}, + .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC, + .tags = {"replicate"}, + .description = "This option exists only for backward compatibility " + "and configuring it doesn't have any effect"}, + {.key = {NULL}}, }; diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c index 129acbef0b5..f43a10bec2f 100644 --- a/xlators/cluster/dht/src/dht-common.c +++ b/xlators/cluster/dht/src/dht-common.c @@ -8,7 +8,6 @@ cases as published by the Free Software Foundation. */ - /* TODO: add NS locking */ #include "glusterfs.h" @@ -27,258 +26,237 @@ int run_defrag = 0; - - -int dht_link2 (xlator_t *this, xlator_t *dst_node, call_frame_t *frame, - int ret); - int -dht_removexattr2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame, - int ret); +dht_link2(xlator_t *this, xlator_t *dst_node, call_frame_t *frame, int ret); int -dht_setxattr2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame, - int ret); - +dht_removexattr2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, + int ret); int -dht_rmdir_readdirp_do (call_frame_t *readdirp_frame, xlator_t *this); - +dht_setxattr2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret); int -dht_common_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *dict, - dict_t *xdata); +dht_rmdir_readdirp_do(call_frame_t *readdirp_frame, xlator_t *this); int -dht_set_file_xattr_req (xlator_t *this, loc_t *loc, dict_t *xattr_req); +dht_common_xattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *dict, + dict_t *xdata); int -dht_set_dir_xattr_req (xlator_t *this, loc_t *loc, dict_t *xattr_req); +dht_set_file_xattr_req(xlator_t *this, loc_t *loc, dict_t *xattr_req); int -dht_do_fresh_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc); +dht_set_dir_xattr_req(xlator_t *this, loc_t *loc, dict_t *xattr_req); +int +dht_do_fresh_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc); /* Sets the blocks and size values to fixed values. This is to be called * only for dirs. The caller is responsible for checking the type */ -int32_t dht_set_fixed_dir_stat (struct iatt *stat) +int32_t +dht_set_fixed_dir_stat(struct iatt *stat) { - if (stat) { - stat->ia_blocks = DHT_DIR_STAT_BLOCKS; - stat->ia_size = DHT_DIR_STAT_SIZE; - return 0; - } - return -1; + if (stat) { + stat->ia_blocks = DHT_DIR_STAT_BLOCKS; + stat->ia_size = DHT_DIR_STAT_SIZE; + return 0; + } + return -1; } - /* Set both DHT_IATT_IN_XDATA_KEY and DHT_MODE_IN_XDATA_KEY * Use DHT_MODE_IN_XDATA_KEY if available. Else fall back to * DHT_IATT_IN_XDATA_KEY */ -int dht_request_iatt_in_xdata (xlator_t *this, dict_t *xattr_req) +int +dht_request_iatt_in_xdata(xlator_t *this, dict_t *xattr_req) { - int ret = -1; + int ret = -1; - ret = dict_set_int8 (xattr_req, DHT_MODE_IN_XDATA_KEY, 1); - ret = dict_set_int8 (xattr_req, DHT_IATT_IN_XDATA_KEY, 1); + ret = dict_set_int8(xattr_req, DHT_MODE_IN_XDATA_KEY, 1); + ret = dict_set_int8(xattr_req, DHT_IATT_IN_XDATA_KEY, 1); - /* At least one call succeeded */ - return ret; + /* At least one call succeeded */ + return ret; } - /* Get both DHT_IATT_IN_XDATA_KEY and DHT_MODE_IN_XDATA_KEY * Use DHT_MODE_IN_XDATA_KEY if available, else fall back to * DHT_IATT_IN_XDATA_KEY * This will return a dummy iatt with only the mode and type set */ -int dht_read_iatt_from_xdata (xlator_t *this, dict_t *xdata, - struct iatt *stbuf) +int +dht_read_iatt_from_xdata(xlator_t *this, dict_t *xdata, struct iatt *stbuf) { - int ret = -1; - int32_t mode = 0; + int ret = -1; + int32_t mode = 0; - ret = dict_get_int32 (xdata, DHT_MODE_IN_XDATA_KEY, &mode); + ret = dict_get_int32(xdata, DHT_MODE_IN_XDATA_KEY, &mode); - if (ret) { - ret = dict_get_bin (xdata, DHT_IATT_IN_XDATA_KEY, - (void **)&stbuf); - } else { - stbuf->ia_prot = ia_prot_from_st_mode (mode); - stbuf->ia_type = ia_type_from_st_mode (mode); - } + if (ret) { + ret = dict_get_bin(xdata, DHT_IATT_IN_XDATA_KEY, (void **)&stbuf); + } else { + stbuf->ia_prot = ia_prot_from_st_mode(mode); + stbuf->ia_type = ia_type_from_st_mode(mode); + } - return ret; + return ret; } - - int -dht_rmdir_unlock (call_frame_t *frame, xlator_t *this); - -char *xattrs_to_heal[] = { - "user.", - POSIX_ACL_ACCESS_XATTR, - POSIX_ACL_DEFAULT_XATTR, - QUOTA_LIMIT_KEY, - QUOTA_LIMIT_OBJECTS_KEY, - GF_SELINUX_XATTR_KEY, - NULL -}; +dht_rmdir_unlock(call_frame_t *frame, xlator_t *this); +char *xattrs_to_heal[] = {"user.", + POSIX_ACL_ACCESS_XATTR, + POSIX_ACL_DEFAULT_XATTR, + QUOTA_LIMIT_KEY, + QUOTA_LIMIT_OBJECTS_KEY, + GF_SELINUX_XATTR_KEY, + NULL}; -char *dht_dbg_vxattrs[] = { - DHT_DBG_HASHED_SUBVOL_PATTERN, - NULL -}; +char *dht_dbg_vxattrs[] = {DHT_DBG_HASHED_SUBVOL_PATTERN, NULL}; /* Return true if key exists in array -*/ + */ static gf_boolean_t -dht_match_xattr (const char *key) +dht_match_xattr(const char *key) { - return gf_get_index_by_elem (xattrs_to_heal, (char *)key) >= 0; + return gf_get_index_by_elem(xattrs_to_heal, (char *)key) >= 0; } int -dht_aggregate_quota_xattr (dict_t *dst, char *key, data_t *value) +dht_aggregate_quota_xattr(dict_t *dst, char *key, data_t *value) { - int ret = -1; - quota_meta_t *meta_dst = NULL; - quota_meta_t *meta_src = NULL; - int64_t *size = NULL; - int64_t dst_dir_count = 0; - int64_t src_dir_count = 0; - - if (value == NULL) { - gf_msg ("dht", GF_LOG_WARNING, 0, - DHT_MSG_DATA_NULL, "data value is NULL"); - ret = -1; - goto out; - } + int ret = -1; + quota_meta_t *meta_dst = NULL; + quota_meta_t *meta_src = NULL; + int64_t *size = NULL; + int64_t dst_dir_count = 0; + int64_t src_dir_count = 0; - ret = dict_get_bin (dst, key, (void **)&meta_dst); + if (value == NULL) { + gf_msg("dht", GF_LOG_WARNING, 0, DHT_MSG_DATA_NULL, + "data value is NULL"); + ret = -1; + goto out; + } + + ret = dict_get_bin(dst, key, (void **)&meta_dst); + if (ret < 0) { + meta_dst = GF_CALLOC(1, sizeof(quota_meta_t), gf_common_quota_meta_t); + if (meta_dst == NULL) { + gf_msg("dht", GF_LOG_WARNING, ENOMEM, DHT_MSG_NO_MEMORY, + "Memory allocation failed"); + ret = -1; + goto out; + } + ret = dict_set_bin(dst, key, meta_dst, sizeof(quota_meta_t)); if (ret < 0) { - meta_dst = GF_CALLOC (1, sizeof (quota_meta_t), - gf_common_quota_meta_t); - if (meta_dst == NULL) { - gf_msg ("dht", GF_LOG_WARNING, ENOMEM, - DHT_MSG_NO_MEMORY, - "Memory allocation failed"); - ret = -1; - goto out; - } - ret = dict_set_bin (dst, key, meta_dst, - sizeof (quota_meta_t)); - if (ret < 0) { - gf_msg ("dht", GF_LOG_WARNING, EINVAL, - DHT_MSG_DICT_SET_FAILED, - "dht aggregate dict set failed"); - GF_FREE (meta_dst); - ret = -1; - goto out; - } + gf_msg("dht", GF_LOG_WARNING, EINVAL, DHT_MSG_DICT_SET_FAILED, + "dht aggregate dict set failed"); + GF_FREE(meta_dst); + ret = -1; + goto out; } + } - if (value->len > sizeof (int64_t)) { - meta_src = data_to_bin (value); + if (value->len > sizeof(int64_t)) { + meta_src = data_to_bin(value); - meta_dst->size = hton64 (ntoh64 (meta_dst->size) + - ntoh64 (meta_src->size)); - meta_dst->file_count = hton64 (ntoh64 (meta_dst->file_count) + - ntoh64 (meta_src->file_count)); + meta_dst->size = hton64(ntoh64(meta_dst->size) + + ntoh64(meta_src->size)); + meta_dst->file_count = hton64(ntoh64(meta_dst->file_count) + + ntoh64(meta_src->file_count)); - if (value->len > (2 * sizeof (int64_t))) { - dst_dir_count = ntoh64 (meta_dst->dir_count); - src_dir_count = ntoh64 (meta_src->dir_count); + if (value->len > (2 * sizeof(int64_t))) { + dst_dir_count = ntoh64(meta_dst->dir_count); + src_dir_count = ntoh64(meta_src->dir_count); - if (src_dir_count > dst_dir_count) - meta_dst->dir_count = meta_src->dir_count; - } else { - meta_dst->dir_count = 0; - } + if (src_dir_count > dst_dir_count) + meta_dst->dir_count = meta_src->dir_count; } else { - size = data_to_bin (value); - meta_dst->size = hton64 (ntoh64 (meta_dst->size) + - ntoh64 (*size)); + meta_dst->dir_count = 0; } + } else { + size = data_to_bin(value); + meta_dst->size = hton64(ntoh64(meta_dst->size) + ntoh64(*size)); + } - ret = 0; + ret = 0; out: - return ret; + return ret; } - -int add_opt(char **optsp, const char *opt) +int +add_opt(char **optsp, const char *opt) { - char *newopts = NULL; - unsigned oldsize = 0; - unsigned newsize = 0; + char *newopts = NULL; + unsigned oldsize = 0; + unsigned newsize = 0; - if (*optsp == NULL) - newopts = gf_strdup (opt); - else { - oldsize = strlen (*optsp); - newsize = oldsize + 1 + strlen (opt) + 1; - newopts = GF_REALLOC (*optsp, newsize); - if (newopts) - sprintf (newopts + oldsize, ",%s", opt); - } - if (newopts == NULL) { - gf_msg ("dht", GF_LOG_WARNING, 0, - DHT_MSG_NO_MEMORY, - "Error to add choices in buffer in add_opt"); - return -1; - } - *optsp = newopts; - return 0; + if (*optsp == NULL) + newopts = gf_strdup(opt); + else { + oldsize = strlen(*optsp); + newsize = oldsize + 1 + strlen(opt) + 1; + newopts = GF_REALLOC(*optsp, newsize); + if (newopts) + sprintf(newopts + oldsize, ",%s", opt); + } + if (newopts == NULL) { + gf_msg("dht", GF_LOG_WARNING, 0, DHT_MSG_NO_MEMORY, + "Error to add choices in buffer in add_opt"); + return -1; + } + *optsp = newopts; + return 0; } /* Return Choice list from Split brain status */ static char * -getChoices (const char *value) +getChoices(const char *value) { - int i = 0; - char *ptr = NULL; - char *tok = NULL; - char *result = NULL; - char *newval = NULL; + int i = 0; + char *ptr = NULL; + char *tok = NULL; + char *result = NULL; + char *newval = NULL; - ptr = strstr (value, "Choices:"); - if (!ptr) { - result = ptr; - goto out; - } + ptr = strstr(value, "Choices:"); + if (!ptr) { + result = ptr; + goto out; + } - newval = gf_strdup (ptr); - if (!newval) { - result = newval; - goto out; - } + newval = gf_strdup(ptr); + if (!newval) { + result = newval; + goto out; + } - tok = strtok (newval, ":"); - if (!tok) { - result = tok; - goto out; - } + tok = strtok(newval, ":"); + if (!tok) { + result = tok; + goto out; + } - while (tok) { - i++; - if (i == 2) - break; - tok = strtok (NULL, ":"); - } + while (tok) { + i++; + if (i == 2) + break; + tok = strtok(NULL, ":"); + } - result = gf_strdup (tok); + result = gf_strdup(tok); out: - if (newval) - GF_FREE (newval); + if (newval) + GF_FREE(newval); - return result; + return result; } /* This function prepare a list of choices for key @@ -291,248 +269,226 @@ out: */ int -dht_aggregate_split_brain_xattr (dict_t *dst, char *key, data_t *value) +dht_aggregate_split_brain_xattr(dict_t *dst, char *key, data_t *value) { + int ret = 0; + char *oldvalue = NULL; + char *old_choice = NULL; + char *new_choice = NULL; + char *full_choice = NULL; + char *status = NULL; + + if (value == NULL) { + gf_msg("dht", GF_LOG_WARNING, 0, DHT_MSG_DATA_NULL, + "GF_AFR_SBRAIN_STATUS value is NULL"); + ret = -1; + goto out; + } + + ret = dict_get_str(dst, key, &oldvalue); + if (ret) + goto out; - int ret = 0; - char *oldvalue = NULL; - char *old_choice = NULL; - char *new_choice = NULL; - char *full_choice = NULL; - char *status = NULL; + /* skip code that is irrelevant if !oldvalue */ + if (!oldvalue) + goto out; - if (value == NULL) { - gf_msg ("dht", GF_LOG_WARNING, 0, - DHT_MSG_DATA_NULL, - "GF_AFR_SBRAIN_STATUS value is NULL"); + if (strstr(oldvalue, "not")) { + gf_msg_debug("dht", 0, "Need to update split-brain status in dict"); + ret = -1; + goto out; + } + if (strstr(oldvalue, "metadata-split-brain:yes") && + (strstr(oldvalue, "data-split-brain:no"))) { + if (strstr(value->data, "not")) { + gf_msg_debug("dht", 0, "No need to update split-brain status"); + ret = 0; + goto out; + } + if (strstr(value->data, "yes") && + (strncmp(oldvalue, value->data, strlen(oldvalue)))) { + old_choice = getChoices(oldvalue); + if (!old_choice) { + gf_msg("dht", GF_LOG_WARNING, 0, DHT_MSG_NO_MEMORY, + "Error to get choices"); ret = -1; goto out; - } + } - ret = dict_get_str (dst, key, &oldvalue); - if (ret) + ret = add_opt(&full_choice, old_choice); + if (ret) { + gf_msg("dht", GF_LOG_WARNING, 0, DHT_MSG_NO_MEMORY, + "Error to add choices"); + ret = -1; goto out; + } - /* skip code that is irrelevant if !oldvalue */ - if (!oldvalue) + new_choice = getChoices(value->data); + if (!new_choice) { + gf_msg("dht", GF_LOG_WARNING, 0, DHT_MSG_NO_MEMORY, + "Error to get choices"); + ret = -1; goto out; + } - if (strstr (oldvalue, "not")) { - gf_msg_debug ("dht", 0, - "Need to update split-brain status in dict"); + ret = add_opt(&full_choice, new_choice); + if (ret) { + gf_msg("dht", GF_LOG_WARNING, 0, DHT_MSG_NO_MEMORY, + "Error to add choices "); ret = -1; goto out; + } + ret = gf_asprintf(&status, + "data-split-brain:%s " + "metadata-split-brain:%s Choices:%s", + "no", "yes", full_choice); + + if (-1 == ret) { + gf_msg("dht", GF_LOG_WARNING, 0, DHT_MSG_NO_MEMORY, + "Error to prepare status "); + goto out; + } + ret = dict_set_dynstr(dst, key, status); + if (ret) { + gf_msg("dht", GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED, + "Failed to set full choice"); + } } - if (strstr (oldvalue, "metadata-split-brain:yes") - && (strstr (oldvalue, "data-split-brain:no"))) { - if (strstr (value->data, "not")) { - gf_msg_debug ("dht", 0, - "No need to update split-brain status"); - ret = 0; - goto out; - } - if (strstr (value->data, "yes") && - (strncmp (oldvalue, value->data, strlen(oldvalue)))) { - old_choice = getChoices (oldvalue); - if (!old_choice) { - gf_msg ("dht", GF_LOG_WARNING, 0, - DHT_MSG_NO_MEMORY, - "Error to get choices"); - ret = -1; - goto out; - } - - ret = add_opt (&full_choice, old_choice); - if (ret) { - gf_msg ("dht", GF_LOG_WARNING, 0, - DHT_MSG_NO_MEMORY, - "Error to add choices"); - ret = -1; - goto out; - } - - new_choice = getChoices (value->data); - if (!new_choice) { - gf_msg ("dht", GF_LOG_WARNING, 0, - DHT_MSG_NO_MEMORY, - "Error to get choices"); - ret = -1; - goto out; - } - - ret = add_opt (&full_choice, new_choice); - if (ret) { - gf_msg ("dht", GF_LOG_WARNING, 0, - DHT_MSG_NO_MEMORY, - "Error to add choices "); - ret = -1; - goto out; - } - ret = gf_asprintf (&status, - "data-split-brain:%s " - "metadata-split-brain:%s Choices:%s", - "no", "yes", full_choice); - - if (-1 == ret) { - gf_msg ("dht", GF_LOG_WARNING, 0, - DHT_MSG_NO_MEMORY, - "Error to prepare status "); - goto out; - } - ret = dict_set_dynstr (dst, key, status); - if (ret) { - gf_msg ("dht", GF_LOG_WARNING, 0, - DHT_MSG_DICT_SET_FAILED, - "Failed to set full choice"); - } - } - } + } out: - if (old_choice) - GF_FREE (old_choice); - if (new_choice) - GF_FREE (new_choice); - if (full_choice) - GF_FREE (full_choice); + if (old_choice) + GF_FREE(old_choice); + if (new_choice) + GF_FREE(new_choice); + if (full_choice) + GF_FREE(full_choice); - return ret; + return ret; } - - int -dht_aggregate (dict_t *this, char *key, data_t *value, void *data) +dht_aggregate(dict_t *this, char *key, data_t *value, void *data) { - dict_t *dst = NULL; - int32_t ret = -1; - data_t *dict_data = NULL; + dict_t *dst = NULL; + int32_t ret = -1; + data_t *dict_data = NULL; - dst = data; + dst = data; - /* compare split brain xattr only */ - if (strcmp (key, GF_AFR_SBRAIN_STATUS) == 0) { - ret = dht_aggregate_split_brain_xattr(dst, key, value); + /* compare split brain xattr only */ + if (strcmp(key, GF_AFR_SBRAIN_STATUS) == 0) { + ret = dht_aggregate_split_brain_xattr(dst, key, value); + if (!ret) + goto out; + } else if (strcmp(key, QUOTA_SIZE_KEY) == 0) { + ret = dht_aggregate_quota_xattr(dst, key, value); + if (ret) { + gf_msg("dht", GF_LOG_WARNING, 0, + DHT_MSG_AGGREGATE_QUOTA_XATTR_FAILED, + "Failed to aggregate quota xattr"); + } + goto out; + } else if (fnmatch(GF_XATTR_STIME_PATTERN, key, FNM_NOESCAPE) == 0) { + ret = gf_get_min_stime(THIS, dst, key, value); + goto out; + } else { + /* compare user xattrs only */ + if (!strncmp(key, "user.", SLEN("user."))) { + ret = dict_lookup(dst, key, &dict_data); + if (!ret && dict_data && value) { + ret = is_data_equal(dict_data, value); if (!ret) - goto out; - } else if (strcmp (key, QUOTA_SIZE_KEY) == 0) { - ret = dht_aggregate_quota_xattr (dst, key, value); - if (ret) { - gf_msg ("dht", GF_LOG_WARNING, 0, - DHT_MSG_AGGREGATE_QUOTA_XATTR_FAILED, - "Failed to aggregate quota xattr"); - } - goto out; - } else if (fnmatch (GF_XATTR_STIME_PATTERN, key, FNM_NOESCAPE) == 0) { - ret = gf_get_min_stime (THIS, dst, key, value); - goto out; - } else { - /* compare user xattrs only */ - if (!strncmp (key, "user.", SLEN ("user."))) { - ret = dict_lookup (dst, key, &dict_data); - if (!ret && dict_data && value) { - ret = is_data_equal (dict_data, value); - if (!ret) - gf_msg_debug ("dht", 0, - "xattr mismatch for %s", - key); - } - } + gf_msg_debug("dht", 0, "xattr mismatch for %s", key); + } } + } - ret = dict_set (dst, key, value); - if (ret) { - gf_msg ("dht", GF_LOG_WARNING, 0, - DHT_MSG_DICT_SET_FAILED, - "Failed to set dictionary value: key = %s", - key); - } + ret = dict_set(dst, key, value); + if (ret) { + gf_msg("dht", GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED, + "Failed to set dictionary value: key = %s", key); + } out: - return ret; + return ret; } - void -dht_aggregate_xattr (dict_t *dst, dict_t *src) +dht_aggregate_xattr(dict_t *dst, dict_t *src) { - if ((dst == NULL) || (src == NULL)) { - goto out; - } + if ((dst == NULL) || (src == NULL)) { + goto out; + } - dict_foreach (src, dht_aggregate, dst); + dict_foreach(src, dht_aggregate, dst); out: - return; + return; } /* Code to save hashed subvol on inode ctx as a mds subvol -*/ + */ int -dht_inode_ctx_mdsvol_set (inode_t *inode, xlator_t *this, xlator_t *mds_subvol) +dht_inode_ctx_mdsvol_set(inode_t *inode, xlator_t *this, xlator_t *mds_subvol) { - dht_inode_ctx_t *ctx = NULL; - int ret = -1; - uint64_t ctx_int = 0; - gf_boolean_t ctx_free = _gf_false; - + dht_inode_ctx_t *ctx = NULL; + int ret = -1; + uint64_t ctx_int = 0; + gf_boolean_t ctx_free = _gf_false; - LOCK (&inode->lock); - { - ret = __inode_ctx_get (inode, this , &ctx_int); - if (ctx_int) { - ctx = (dht_inode_ctx_t *)ctx_int; - ctx->mds_subvol = mds_subvol; - } else { - ctx = GF_CALLOC (1, sizeof(*ctx), gf_dht_mt_inode_ctx_t); - if (!ctx) - goto unlock; - ctx->mds_subvol = mds_subvol; - ctx_free = _gf_true; - ctx_int = (long) ctx; - ret = __inode_ctx_set (inode, this, &ctx_int); - } - } + LOCK(&inode->lock); + { + ret = __inode_ctx_get(inode, this, &ctx_int); + if (ctx_int) { + ctx = (dht_inode_ctx_t *)ctx_int; + ctx->mds_subvol = mds_subvol; + } else { + ctx = GF_CALLOC(1, sizeof(*ctx), gf_dht_mt_inode_ctx_t); + if (!ctx) + goto unlock; + ctx->mds_subvol = mds_subvol; + ctx_free = _gf_true; + ctx_int = (long)ctx; + ret = __inode_ctx_set(inode, this, &ctx_int); + } + } unlock: - UNLOCK (&inode->lock); - if (ret && ctx_free) - GF_FREE (ctx); - return ret; + UNLOCK(&inode->lock); + if (ret && ctx_free) + GF_FREE(ctx); + return ret; } /*Code to get mds subvol from inode ctx */ int -dht_inode_ctx_mdsvol_get (inode_t *inode, xlator_t *this, xlator_t **mdsvol) +dht_inode_ctx_mdsvol_get(inode_t *inode, xlator_t *this, xlator_t **mdsvol) { - dht_inode_ctx_t *ctx = NULL; - int ret = -1; + dht_inode_ctx_t *ctx = NULL; + int ret = -1; - if (!mdsvol) - return ret; + if (!mdsvol) + return ret; - if (__is_root_gfid(inode->gfid)) { - (*mdsvol) = FIRST_CHILD (this); - return 0; - } + if (__is_root_gfid(inode->gfid)) { + (*mdsvol) = FIRST_CHILD(this); + return 0; + } - ret = dht_inode_ctx_get (inode, this, &ctx); + ret = dht_inode_ctx_get(inode, this, &ctx); - if (!ret && ctx) { - if (ctx->mds_subvol) { - *mdsvol = ctx->mds_subvol; - ret = 0; - } else { - ret = -1; - } + if (!ret && ctx) { + if (ctx->mds_subvol) { + *mdsvol = ctx->mds_subvol; + ret = 0; + } else { + ret = -1; } + } - return ret; + return ret; } - - - - /* TODO: - use volumename in xattr instead of "dht" - use NS locks @@ -540,293 +496,277 @@ dht_inode_ctx_mdsvol_get (inode_t *inode, xlator_t *this, xlator_t **mdsvol) - complete linkfile selfheal */ - int -dht_lookup_selfheal_cbk (call_frame_t *frame, void *cookie, - xlator_t *this, - int op_ret, int op_errno, dict_t *xdata) +dht_lookup_selfheal_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, dict_t *xdata) { - dht_local_t *local = NULL; - dht_layout_t *layout = NULL; - dht_conf_t *conf = NULL; - int ret = -1; + dht_local_t *local = NULL; + dht_layout_t *layout = NULL; + dht_conf_t *conf = NULL; + int ret = -1; - GF_VALIDATE_OR_GOTO ("dht", frame, out); - GF_VALIDATE_OR_GOTO ("dht", this, out); - GF_VALIDATE_OR_GOTO ("dht", frame->local, out); + GF_VALIDATE_OR_GOTO("dht", frame, out); + GF_VALIDATE_OR_GOTO("dht", this, out); + GF_VALIDATE_OR_GOTO("dht", frame->local, out); - local = frame->local; - conf = this->private; - ret = op_ret; + local = frame->local; + conf = this->private; + ret = op_ret; - FRAME_SU_UNDO (frame, dht_local_t); + FRAME_SU_UNDO(frame, dht_local_t); - if (ret == 0) { - layout = local->selfheal.layout; - ret = dht_layout_set (this, local->inode, layout); - } + if (ret == 0) { + layout = local->selfheal.layout; + ret = dht_layout_set(this, local->inode, layout); + } - dht_inode_ctx_time_update (local->inode, this, &local->stbuf, 1); - if (local->loc.parent) { - dht_inode_ctx_time_update (local->loc.parent, this, - &local->postparent, 1); - } + dht_inode_ctx_time_update(local->inode, this, &local->stbuf, 1); + if (local->loc.parent) { + dht_inode_ctx_time_update(local->loc.parent, this, &local->postparent, + 1); + } - DHT_STRIP_PHASE1_FLAGS (&local->stbuf); - dht_set_fixed_dir_stat (&local->postparent); - /* Delete mds xattr at the time of STACK UNWIND */ - GF_REMOVE_INTERNAL_XATTR (conf->mds_xattr_key, local->xattr); + DHT_STRIP_PHASE1_FLAGS(&local->stbuf); + dht_set_fixed_dir_stat(&local->postparent); + /* Delete mds xattr at the time of STACK UNWIND */ + GF_REMOVE_INTERNAL_XATTR(conf->mds_xattr_key, local->xattr); - DHT_STACK_UNWIND (lookup, frame, ret, local->op_errno, local->inode, - &local->stbuf, local->xattr, &local->postparent); + DHT_STACK_UNWIND(lookup, frame, ret, local->op_errno, local->inode, + &local->stbuf, local->xattr, &local->postparent); out: - return ret; -} - -int -dht_discover_complete (xlator_t *this, call_frame_t *discover_frame) -{ - dht_local_t *local = NULL; - dht_local_t *heal_local = NULL; - call_frame_t *main_frame = NULL; - call_frame_t *heal_frame = NULL; - int op_errno = 0; - int ret = -1; - dht_layout_t *layout = NULL; - dht_conf_t *conf = NULL; - uint32_t vol_commit_hash = 0; - xlator_t *source = NULL; - int heal_path = 0; - int error_while_marking_mds = 0; - int i = 0; - loc_t loc = {0 }; - int8_t is_read_only = 0, layout_anomalies = 0; - char gfid_local[GF_UUID_BUF_SIZE] = {0}; - - local = discover_frame->local; - layout = local->layout; - conf = this->private; - gf_uuid_unparse(local->gfid, gfid_local); - - LOCK(&discover_frame->lock); - { - main_frame = local->main_frame; - local->main_frame = NULL; - } - UNLOCK(&discover_frame->lock); - - if (!main_frame) - return 0; - - /* Code to update all extended attributed from - subvol to local->xattr on that internal xattr has found - */ - if (conf->subvolume_cnt == 1) - local->need_xattr_heal = 0; - if (local->need_xattr_heal && (local->mds_xattr)) { - dht_dir_set_heal_xattr (this, local, local->xattr, - local->mds_xattr, NULL, NULL); - dict_unref (local->mds_xattr); - local->mds_xattr = NULL; - } - - ret = dict_get_int8 (local->xattr_req, QUOTA_READ_ONLY_KEY, - &is_read_only); - if (ret < 0) - gf_msg_debug (this->name, 0, "key = %s not present in dict", - QUOTA_READ_ONLY_KEY); - - if (local->file_count && local->dir_count) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_FILE_TYPE_MISMATCH, - "path %s exists as a file on one subvolume " - "and directory on another. " - "Please fix it manually", - local->loc.path); - op_errno = EIO; - goto out; - } - - if (local->cached_subvol) { - ret = dht_layout_preset (this, local->cached_subvol, - local->inode); - if (ret < 0) { - gf_msg (this->name, GF_LOG_WARNING, 0, - DHT_MSG_LAYOUT_SET_FAILED, - "failed to set layout for subvolume %s", - local->cached_subvol ? local->cached_subvol->name : ""); - op_errno = EINVAL; - goto out; - } - } else { - ret = dht_layout_normalize (this, &local->loc, layout); - if ((ret < 0) || ((ret > 0) && (local->op_ret != 0))) { - /* either the layout is incorrect or the directory is - * not found even in one subvolume. - */ - gf_msg_debug (this->name, 0, - "normalizing failed on %s " - "(overlaps/holes present: %s, " - "ENOENT errors: %d)", local->loc.path, - (ret < 0) ? "yes" : "no", (ret > 0) ? ret : 0); - layout_anomalies = 1; - } else if (local->inode) { - dht_layout_set (this, local->inode, layout); - } - } + return ret; +} + +int +dht_discover_complete(xlator_t *this, call_frame_t *discover_frame) +{ + dht_local_t *local = NULL; + dht_local_t *heal_local = NULL; + call_frame_t *main_frame = NULL; + call_frame_t *heal_frame = NULL; + int op_errno = 0; + int ret = -1; + dht_layout_t *layout = NULL; + dht_conf_t *conf = NULL; + uint32_t vol_commit_hash = 0; + xlator_t *source = NULL; + int heal_path = 0; + int error_while_marking_mds = 0; + int i = 0; + loc_t loc = {0}; + int8_t is_read_only = 0, layout_anomalies = 0; + char gfid_local[GF_UUID_BUF_SIZE] = {0}; + + local = discover_frame->local; + layout = local->layout; + conf = this->private; + gf_uuid_unparse(local->gfid, gfid_local); + + LOCK(&discover_frame->lock); + { + main_frame = local->main_frame; + local->main_frame = NULL; + } + UNLOCK(&discover_frame->lock); + + if (!main_frame) + return 0; + + /* Code to update all extended attributed from + subvol to local->xattr on that internal xattr has found + */ + if (conf->subvolume_cnt == 1) + local->need_xattr_heal = 0; + if (local->need_xattr_heal && (local->mds_xattr)) { + dht_dir_set_heal_xattr(this, local, local->xattr, local->mds_xattr, + NULL, NULL); + dict_unref(local->mds_xattr); + local->mds_xattr = NULL; + } + + ret = dict_get_int8(local->xattr_req, QUOTA_READ_ONLY_KEY, &is_read_only); + if (ret < 0) + gf_msg_debug(this->name, 0, "key = %s not present in dict", + QUOTA_READ_ONLY_KEY); + + if (local->file_count && local->dir_count) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_FILE_TYPE_MISMATCH, + "path %s exists as a file on one subvolume " + "and directory on another. " + "Please fix it manually", + local->loc.path); + op_errno = EIO; + goto out; + } - if (!conf->vch_forced) { - ret = dict_get_uint32 (local->xattr, - conf->commithash_xattr_name, - &vol_commit_hash); - if (ret == 0) { - conf->vol_commit_hash = vol_commit_hash; - } + if (local->cached_subvol) { + ret = dht_layout_preset(this, local->cached_subvol, local->inode); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_LAYOUT_SET_FAILED, + "failed to set layout for subvolume %s", + local->cached_subvol ? local->cached_subvol->name : ""); + op_errno = EINVAL; + goto out; + } + } else { + ret = dht_layout_normalize(this, &local->loc, layout); + if ((ret < 0) || ((ret > 0) && (local->op_ret != 0))) { + /* either the layout is incorrect or the directory is + * not found even in one subvolume. + */ + gf_msg_debug(this->name, 0, + "normalizing failed on %s " + "(overlaps/holes present: %s, " + "ENOENT errors: %d)", + local->loc.path, (ret < 0) ? "yes" : "no", + (ret > 0) ? ret : 0); + layout_anomalies = 1; + } else if (local->inode) { + dht_layout_set(this, local->inode, layout); + } + } + + if (!conf->vch_forced) { + ret = dict_get_uint32(local->xattr, conf->commithash_xattr_name, + &vol_commit_hash); + if (ret == 0) { + conf->vol_commit_hash = vol_commit_hash; } + } - if (IA_ISDIR (local->stbuf.ia_type) && !is_read_only) { - for (i = 0; i < layout->cnt; i++) { - if (!source && !layout->list[i].err) - source = layout->list[i].xlator; - if (layout->list[i].err == ENOENT || - layout->list[i].err == ESTALE) { - heal_path = 1; - } - - if (source && heal_path) - break; - } - } + if (IA_ISDIR(local->stbuf.ia_type) && !is_read_only) { + for (i = 0; i < layout->cnt; i++) { + if (!source && !layout->list[i].err) + source = layout->list[i].xlator; + if (layout->list[i].err == ENOENT || + layout->list[i].err == ESTALE) { + heal_path = 1; + } - if (IA_ISDIR (local->stbuf.ia_type)) { - /* Call function to save hashed subvol on inode ctx if - internal mds xattr is not present and all subvols are up - */ - if (!local->op_ret && !__is_root_gfid (local->stbuf.ia_gfid)) - (void) dht_common_mark_mdsxattr (discover_frame, - &error_while_marking_mds, 1); - - if (local->need_xattr_heal && !heal_path) { - local->need_xattr_heal = 0; - ret = dht_dir_xattr_heal (this, local); - if (ret) - gf_msg (this->name, GF_LOG_ERROR, - ret, - DHT_MSG_DIR_XATTR_HEAL_FAILED, - "xattr heal failed for " - "directory gfid is %s ", - gfid_local); - } + if (source && heal_path) + break; } + } - if (source && (heal_path || layout_anomalies || error_while_marking_mds)) { - gf_uuid_copy (loc.gfid, local->gfid); - if (gf_uuid_is_null (loc.gfid)) { - goto done; - } - - if (local->inode) - loc.inode = inode_ref (local->inode); - else - goto done; - - heal_frame = create_frame (this, this->ctx->pool); - if (heal_frame) { - heal_local = dht_local_init (heal_frame, &loc, - NULL, 0); - if (!heal_local) - goto cleanup; - - gf_uuid_copy (heal_local->gfid, local->gfid); - heal_frame->cookie = source; - heal_local->xattr = dict_ref (local->xattr); - heal_local->stbuf = local->stbuf; - heal_local->postparent = local->postparent; - heal_local->inode = inode_ref (loc.inode); - heal_local->main_frame = main_frame; - FRAME_SU_DO (heal_frame, dht_local_t); - ret = synctask_new (this->ctx->env, - dht_heal_full_path, - dht_heal_full_path_done, - heal_frame, heal_frame); - if (!ret) { - loc_wipe (&loc); - return 0; - } - /* - * Failed to spawn the synctask. Returning - * with out doing heal. - */ -cleanup: - loc_wipe (&loc); - DHT_STACK_DESTROY (heal_frame); - } - - } + if (IA_ISDIR(local->stbuf.ia_type)) { + /* Call function to save hashed subvol on inode ctx if + internal mds xattr is not present and all subvols are up + */ + if (!local->op_ret && !__is_root_gfid(local->stbuf.ia_gfid)) + (void)dht_common_mark_mdsxattr(discover_frame, + &error_while_marking_mds, 1); + + if (local->need_xattr_heal && !heal_path) { + local->need_xattr_heal = 0; + ret = dht_dir_xattr_heal(this, local); + if (ret) + gf_msg(this->name, GF_LOG_ERROR, ret, + DHT_MSG_DIR_XATTR_HEAL_FAILED, + "xattr heal failed for " + "directory gfid is %s ", + gfid_local); + } + } + + if (source && (heal_path || layout_anomalies || error_while_marking_mds)) { + gf_uuid_copy(loc.gfid, local->gfid); + if (gf_uuid_is_null(loc.gfid)) { + goto done; + } + + if (local->inode) + loc.inode = inode_ref(local->inode); + else + goto done; + + heal_frame = create_frame(this, this->ctx->pool); + if (heal_frame) { + heal_local = dht_local_init(heal_frame, &loc, NULL, 0); + if (!heal_local) + goto cleanup; + + gf_uuid_copy(heal_local->gfid, local->gfid); + heal_frame->cookie = source; + heal_local->xattr = dict_ref(local->xattr); + heal_local->stbuf = local->stbuf; + heal_local->postparent = local->postparent; + heal_local->inode = inode_ref(loc.inode); + heal_local->main_frame = main_frame; + FRAME_SU_DO(heal_frame, dht_local_t); + ret = synctask_new(this->ctx->env, dht_heal_full_path, + dht_heal_full_path_done, heal_frame, heal_frame); + if (!ret) { + loc_wipe(&loc); + return 0; + } + /* + * Failed to spawn the synctask. Returning + * with out doing heal. + */ + cleanup: + loc_wipe(&loc); + DHT_STACK_DESTROY(heal_frame); + } + } done: - dht_set_fixed_dir_stat (&local->postparent); - /* Delete mds xattr at the time of STACK UNWIND */ - if (local->xattr) - GF_REMOVE_INTERNAL_XATTR (conf->mds_xattr_key, local->xattr); + dht_set_fixed_dir_stat(&local->postparent); + /* Delete mds xattr at the time of STACK UNWIND */ + if (local->xattr) + GF_REMOVE_INTERNAL_XATTR(conf->mds_xattr_key, local->xattr); - DHT_STACK_UNWIND (lookup, main_frame, local->op_ret, local->op_errno, - local->inode, &local->stbuf, local->xattr, - &local->postparent); - return 0; + DHT_STACK_UNWIND(lookup, main_frame, local->op_ret, local->op_errno, + local->inode, &local->stbuf, local->xattr, + &local->postparent); + return 0; out: - DHT_STACK_UNWIND (lookup, main_frame, -1, op_errno, NULL, NULL, NULL, - NULL); + DHT_STACK_UNWIND(lookup, main_frame, -1, op_errno, NULL, NULL, NULL, NULL); - return ret; + return ret; } int -dht_common_mark_mdsxattr_cbk (call_frame_t *frame, void *cookie, - xlator_t *this, int op_ret, int op_errno, - dict_t *xdata) +dht_common_mark_mdsxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, dict_t *xdata) { - dht_local_t *local = NULL; - xlator_t *prev = cookie; - int ret = -1; - dht_conf_t *conf = 0; - dht_layout_t *layout = NULL; + dht_local_t *local = NULL; + xlator_t *prev = cookie; + int ret = -1; + dht_conf_t *conf = 0; + dht_layout_t *layout = NULL; - GF_VALIDATE_OR_GOTO (this->name, frame, out); - GF_VALIDATE_OR_GOTO (this->name, frame->local, out); + GF_VALIDATE_OR_GOTO(this->name, frame, out); + GF_VALIDATE_OR_GOTO(this->name, frame->local, out); - local = frame->local; - conf = this->private; - layout = local->selfheal.layout; + local = frame->local; + conf = this->private; + layout = local->selfheal.layout; - if (op_ret) { - gf_msg_debug (this->name, op_ret, - "Failed to set %s on the MDS %s for path %s. ", - conf->mds_xattr_key, prev->name, local->loc.path); - } else { - /* Save mds subvol on inode ctx */ - ret = dht_inode_ctx_mdsvol_set (local->inode, this, prev); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_SET_INODE_CTX_FAILED, - "Failed to set mds subvol on inode ctx" - " %s for %s ", prev->name, - local->loc.path); - } - } - if (!local->mds_heal_fresh_lookup && layout) { - dht_selfheal_dir_setattr (frame, &local->loc, &local->stbuf, - 0xffffffff, layout); - } + if (op_ret) { + gf_msg_debug(this->name, op_ret, + "Failed to set %s on the MDS %s for path %s. ", + conf->mds_xattr_key, prev->name, local->loc.path); + } else { + /* Save mds subvol on inode ctx */ + ret = dht_inode_ctx_mdsvol_set(local->inode, this, prev); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_SET_INODE_CTX_FAILED, + "Failed to set mds subvol on inode ctx" + " %s for %s ", + prev->name, local->loc.path); + } + } + if (!local->mds_heal_fresh_lookup && layout) { + dht_selfheal_dir_setattr(frame, &local->loc, &local->stbuf, 0xffffffff, + layout); + } out: - if (local && local->mds_heal_fresh_lookup) - DHT_STACK_DESTROY (frame); - return 0; + if (local && local->mds_heal_fresh_lookup) + DHT_STACK_DESTROY(frame); + return 0; } - - /* Common function call by revalidate/selfheal code path to populate internal xattr if it is not present, mark_during_fresh_lookup value determines either function is call by revalidate_cbk(discover_complete) @@ -842,1508 +782,1416 @@ out: penalty. */ int -dht_common_mark_mdsxattr (call_frame_t *frame, int *errst, int mark_during_fresh_lookup) -{ - dht_local_t *local = NULL; - xlator_t *this = NULL; - xlator_t *hashed_subvol = NULL; - int ret = 0; - int i = 0; - dict_t *xattrs = NULL; - char gfid_local[GF_UUID_BUF_SIZE] = {0,}; - int32_t zero[1] = {0}; - dht_conf_t *conf = 0; - dht_layout_t *layout = NULL; - dht_local_t *copy_local = NULL; - call_frame_t *xattr_frame = NULL; - gf_boolean_t vol_down = _gf_false; - - this = frame->this; - - GF_VALIDATE_OR_GOTO ("dht", frame, out); - GF_VALIDATE_OR_GOTO ("dht", this, out); - GF_VALIDATE_OR_GOTO (this->name, frame->local, out); - GF_VALIDATE_OR_GOTO (this->name, this->private, out); - - local = frame->local; - conf = this->private; - layout = local->selfheal.layout; - local->mds_heal_fresh_lookup = mark_during_fresh_lookup; - gf_uuid_unparse(local->gfid, gfid_local); - - /* Code to update hashed subvol consider as a mds subvol - and wind a setxattr call on hashed subvol to update - internal xattr +dht_common_mark_mdsxattr(call_frame_t *frame, int *errst, + int mark_during_fresh_lookup) +{ + dht_local_t *local = NULL; + xlator_t *this = NULL; + xlator_t *hashed_subvol = NULL; + int ret = 0; + int i = 0; + dict_t *xattrs = NULL; + char gfid_local[GF_UUID_BUF_SIZE] = { + 0, + }; + int32_t zero[1] = {0}; + dht_conf_t *conf = 0; + dht_layout_t *layout = NULL; + dht_local_t *copy_local = NULL; + call_frame_t *xattr_frame = NULL; + gf_boolean_t vol_down = _gf_false; + + this = frame->this; + + GF_VALIDATE_OR_GOTO("dht", frame, out); + GF_VALIDATE_OR_GOTO("dht", this, out); + GF_VALIDATE_OR_GOTO(this->name, frame->local, out); + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + + local = frame->local; + conf = this->private; + layout = local->selfheal.layout; + local->mds_heal_fresh_lookup = mark_during_fresh_lookup; + gf_uuid_unparse(local->gfid, gfid_local); + + /* Code to update hashed subvol consider as a mds subvol + and wind a setxattr call on hashed subvol to update + internal xattr + */ + if (!local->xattr || !dict_get(local->xattr, conf->mds_xattr_key)) { + /* It means no internal MDS xattr has been set yet + */ + /* Check the status of all subvol are up while call + this function call by lookup code path */ - if (!local->xattr || !dict_get (local->xattr, conf->mds_xattr_key)) { - /* It means no internal MDS xattr has been set yet - */ - /* Check the status of all subvol are up while call - this function call by lookup code path - */ - if (mark_during_fresh_lookup) { - for (i = 0; i < conf->subvolume_cnt; i++) { - if (!conf->subvolume_status[i]) { - vol_down = _gf_true; - break; - } - } - if (vol_down) { - gf_msg_debug (this->name, 0, - "subvol %s is down. Unable to " - " save mds subvol on inode for " - " path %s gfid is %s " , - conf->subvolumes[i]->name, - local->loc.path, gfid_local); - goto out; - } + if (mark_during_fresh_lookup) { + for (i = 0; i < conf->subvolume_cnt; i++) { + if (!conf->subvolume_status[i]) { + vol_down = _gf_true; + break; } + } + if (vol_down) { + gf_msg_debug(this->name, 0, + "subvol %s is down. Unable to " + " save mds subvol on inode for " + " path %s gfid is %s ", + conf->subvolumes[i]->name, local->loc.path, + gfid_local); + goto out; + } + } - /* Calculate hashed subvol based on inode and parent node - */ - hashed_subvol = dht_inode_get_hashed_subvol (local->inode, this, - &local->loc); - if (!hashed_subvol) { - gf_msg (this->name, GF_LOG_DEBUG, 0, - DHT_MSG_HASHED_SUBVOL_GET_FAILED, - "Failed to get hashed subvol for path %s" - "gfid is %s ", - local->loc.path, gfid_local); - (*errst) = 1; - ret = -1; - goto out; - } - xattrs = dict_new (); - if (!xattrs) { - gf_msg (this->name, GF_LOG_ERROR, ENOMEM, - DHT_MSG_NO_MEMORY, "dict_new failed"); - ret = -1; - goto out; - } - /* Add internal MDS xattr on disk for hashed subvol - */ - ret = dht_dict_set_array (xattrs, conf->mds_xattr_key, - zero, 1); - if (ret) { - gf_msg (this->name, GF_LOG_WARNING, ENOMEM, - DHT_MSG_DICT_SET_FAILED, - "Failed to set dictionary" - " value:key = %s for " - "path %s", conf->mds_xattr_key, - local->loc.path); - ret = -1; - goto out; - } - /* Create a new frame to wind a call only while - this function call by revalidate_cbk code path - To wind a call parallel need to create a new frame - */ - if (mark_during_fresh_lookup) { - xattr_frame = create_frame (this, this->ctx->pool); - if (!xattr_frame) { - ret = -1; - goto out; - } - copy_local = dht_local_init (xattr_frame, &(local->loc), - NULL, 0); - if (!copy_local) { - ret = -1; - DHT_STACK_DESTROY (xattr_frame); - goto out; - } - copy_local->stbuf = local->stbuf; - copy_local->mds_heal_fresh_lookup = mark_during_fresh_lookup; - if (!copy_local->inode) - copy_local->inode = inode_ref (local->inode); - gf_uuid_copy (copy_local->loc.gfid, local->gfid); - FRAME_SU_DO (xattr_frame, dht_local_t); - STACK_WIND_COOKIE (xattr_frame, dht_common_mark_mdsxattr_cbk, - hashed_subvol, hashed_subvol, - hashed_subvol->fops->setxattr, - &local->loc, xattrs, 0, NULL); - } else { - STACK_WIND_COOKIE (frame, - dht_common_mark_mdsxattr_cbk, - (void *)hashed_subvol, - hashed_subvol, - hashed_subvol->fops->setxattr, - &local->loc, xattrs, 0, - NULL); - } - } else { - gf_msg_debug (this->name, 0, - "internal xattr %s is present on subvol" - "on path %s gfid is %s " , conf->mds_xattr_key, - local->loc.path, gfid_local); - if (!mark_during_fresh_lookup) - dht_selfheal_dir_setattr (frame, &local->loc, - &local->stbuf, 0xffffffff, - layout); + /* Calculate hashed subvol based on inode and parent node + */ + hashed_subvol = dht_inode_get_hashed_subvol(local->inode, this, + &local->loc); + if (!hashed_subvol) { + gf_msg(this->name, GF_LOG_DEBUG, 0, + DHT_MSG_HASHED_SUBVOL_GET_FAILED, + "Failed to get hashed subvol for path %s" + "gfid is %s ", + local->loc.path, gfid_local); + (*errst) = 1; + ret = -1; + goto out; + } + xattrs = dict_new(); + if (!xattrs) { + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY, + "dict_new failed"); + ret = -1; + goto out; } + /* Add internal MDS xattr on disk for hashed subvol + */ + ret = dht_dict_set_array(xattrs, conf->mds_xattr_key, zero, 1); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, ENOMEM, DHT_MSG_DICT_SET_FAILED, + "Failed to set dictionary" + " value:key = %s for " + "path %s", + conf->mds_xattr_key, local->loc.path); + ret = -1; + goto out; + } + /* Create a new frame to wind a call only while + this function call by revalidate_cbk code path + To wind a call parallel need to create a new frame + */ + if (mark_during_fresh_lookup) { + xattr_frame = create_frame(this, this->ctx->pool); + if (!xattr_frame) { + ret = -1; + goto out; + } + copy_local = dht_local_init(xattr_frame, &(local->loc), NULL, 0); + if (!copy_local) { + ret = -1; + DHT_STACK_DESTROY(xattr_frame); + goto out; + } + copy_local->stbuf = local->stbuf; + copy_local->mds_heal_fresh_lookup = mark_during_fresh_lookup; + if (!copy_local->inode) + copy_local->inode = inode_ref(local->inode); + gf_uuid_copy(copy_local->loc.gfid, local->gfid); + FRAME_SU_DO(xattr_frame, dht_local_t); + STACK_WIND_COOKIE(xattr_frame, dht_common_mark_mdsxattr_cbk, + hashed_subvol, hashed_subvol, + hashed_subvol->fops->setxattr, &local->loc, + xattrs, 0, NULL); + } else { + STACK_WIND_COOKIE(frame, dht_common_mark_mdsxattr_cbk, + (void *)hashed_subvol, hashed_subvol, + hashed_subvol->fops->setxattr, &local->loc, + xattrs, 0, NULL); + } + } else { + gf_msg_debug(this->name, 0, + "internal xattr %s is present on subvol" + "on path %s gfid is %s ", + conf->mds_xattr_key, local->loc.path, gfid_local); + if (!mark_during_fresh_lookup) + dht_selfheal_dir_setattr(frame, &local->loc, &local->stbuf, + 0xffffffff, layout); + } out: - if (xattrs) - dict_unref (xattrs); - return ret; -} + if (xattrs) + dict_unref(xattrs); + return ret; +} + +int +dht_discover_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, + int op_errno, inode_t *inode, struct iatt *stbuf, + dict_t *xattr, struct iatt *postparent) +{ + dht_local_t *local = NULL; + int this_call_cnt = 0; + xlator_t *prev = NULL; + dht_layout_t *layout = NULL; + int ret = -1; + int is_dir = 0; + int32_t check_mds = 0; + int is_linkfile = 0; + int attempt_unwind = 0; + dht_conf_t *conf = 0; + char gfid_local[GF_UUID_BUF_SIZE] = {0}; + char gfid_node[GF_UUID_BUF_SIZE] = {0}; + int32_t mds_xattr_val[1] = {0}; + int errst = 0; + + GF_VALIDATE_OR_GOTO("dht", frame, out); + GF_VALIDATE_OR_GOTO("dht", this, out); + GF_VALIDATE_OR_GOTO("dht", frame->local, out); + GF_VALIDATE_OR_GOTO("dht", this->private, out); + GF_VALIDATE_OR_GOTO("dht", cookie, out); + + local = frame->local; + prev = cookie; + conf = this->private; + + layout = local->layout; + + /* Check if the gfid is different for file from other node */ + if (!op_ret && gf_uuid_compare(local->gfid, stbuf->ia_gfid)) { + gf_uuid_unparse(stbuf->ia_gfid, gfid_node); + gf_uuid_unparse(local->gfid, gfid_local); + gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_GFID_MISMATCH, + "%s: gfid different on %s, gfid local = %s" + "gfid other = %s", + local->loc.path, prev->name, gfid_local, gfid_node); + } -int -dht_discover_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, - inode_t *inode, struct iatt *stbuf, dict_t *xattr, - struct iatt *postparent) -{ - dht_local_t *local = NULL; - int this_call_cnt = 0; - xlator_t *prev = NULL; - dht_layout_t *layout = NULL; - int ret = -1; - int is_dir = 0; - int32_t check_mds = 0; - int is_linkfile = 0; - int attempt_unwind = 0; - dht_conf_t *conf = 0; - char gfid_local[GF_UUID_BUF_SIZE] = {0}; - char gfid_node[GF_UUID_BUF_SIZE] = {0}; - int32_t mds_xattr_val[1] = {0}; - int errst = 0; + LOCK(&frame->lock); + { + /* TODO: assert equal mode on stbuf->st_mode and + local->stbuf->st_mode + else mkdir/chmod/chown and fix + */ - GF_VALIDATE_OR_GOTO ("dht", frame, out); - GF_VALIDATE_OR_GOTO ("dht", this, out); - GF_VALIDATE_OR_GOTO ("dht", frame->local, out); - GF_VALIDATE_OR_GOTO ("dht", this->private, out); - GF_VALIDATE_OR_GOTO ("dht", cookie, out); + ret = dht_layout_merge(this, layout, prev, op_ret, op_errno, xattr); + if (ret) + gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_LAYOUT_MERGE_FAILED, + "%s: failed to merge layouts for subvol %s", local->loc.path, + prev->name); - local = frame->local; - prev = cookie; - conf = this->private; + if (op_ret == -1) { + local->op_errno = op_errno; + gf_msg_debug(this->name, op_errno, + "lookup of %s on %s returned error", local->loc.path, + prev->name); - layout = local->layout; + goto unlock; + } + is_linkfile = check_is_linkfile(inode, stbuf, xattr, + conf->link_xattr_name); + is_dir = check_is_dir(inode, stbuf, xattr); - /* Check if the gfid is different for file from other node */ - if (!op_ret && gf_uuid_compare (local->gfid, stbuf->ia_gfid)) { + if (is_dir) { + local->dir_count++; + } else { + local->file_count++; + + if (!is_linkfile && !local->cached_subvol) { + /* real file */ + /* Ok, we somehow managed to find a file on + * more than one subvol. ignore this or we + * will end up overwriting information while a + * a thread is potentially unwinding from + * dht_discover_complete + */ + local->cached_subvol = prev; + attempt_unwind = 1; + } else { + goto unlock; + } + } - gf_uuid_unparse(stbuf->ia_gfid, gfid_node); - gf_uuid_unparse(local->gfid, gfid_local); + local->op_ret = 0; - gf_msg (this->name, GF_LOG_WARNING, 0, - DHT_MSG_GFID_MISMATCH, - "%s: gfid different on %s, gfid local = %s" - "gfid other = %s", - local->loc.path, prev->name, - gfid_local, gfid_node); + if (local->xattr == NULL) { + local->xattr = dict_ref(xattr); + } else { + /* Don't aggregate for files. See BZ#1484113 */ + if (is_dir) + dht_aggregate_xattr(local->xattr, xattr); } + if (local->inode == NULL) + local->inode = inode_ref(inode); - LOCK (&frame->lock); - { - /* TODO: assert equal mode on stbuf->st_mode and - local->stbuf->st_mode - - else mkdir/chmod/chown and fix - */ + dht_iatt_merge(this, &local->stbuf, stbuf); + dht_iatt_merge(this, &local->postparent, postparent); - ret = dht_layout_merge (this, layout, prev, - op_ret, op_errno, xattr); - if (ret) - gf_msg (this->name, GF_LOG_WARNING, 0, - DHT_MSG_LAYOUT_MERGE_FAILED, - "%s: failed to merge layouts for subvol %s", - local->loc.path, prev->name); - - if (op_ret == -1) { - local->op_errno = op_errno; - gf_msg_debug (this->name, op_errno, - "lookup of %s on %s returned error", - local->loc.path, prev->name); - - goto unlock; - } - - is_linkfile = check_is_linkfile (inode, stbuf, xattr, - conf->link_xattr_name); - is_dir = check_is_dir (inode, stbuf, xattr); - - if (is_dir) { - local->dir_count ++; - } else { - local->file_count ++; - - if (!is_linkfile && !local->cached_subvol) { - /* real file */ - /* Ok, we somehow managed to find a file on - * more than one subvol. ignore this or we - * will end up overwriting information while a - * a thread is potentially unwinding from - * dht_discover_complete - */ - local->cached_subvol = prev; - attempt_unwind = 1; - } else { - goto unlock; - } - } - - local->op_ret = 0; - - if (local->xattr == NULL) { - local->xattr = dict_ref (xattr); - } else { - /* Don't aggregate for files. See BZ#1484113 */ - if (is_dir) - dht_aggregate_xattr (local->xattr, xattr); - } - - if (local->inode == NULL) - local->inode = inode_ref (inode); - - dht_iatt_merge (this, &local->stbuf, stbuf); - dht_iatt_merge (this, &local->postparent, postparent); - - if (!dict_get (xattr, conf->mds_xattr_key)) { - goto unlock; - } else { - gf_msg_debug (this->name, 0, - "internal xattr %s is present on subvol" - "on path %s gfid is %s " , - conf->mds_xattr_key, - local->loc.path, gfid_local); - } - check_mds = dht_dict_get_array (xattr, conf->mds_xattr_key, - mds_xattr_val, 1, &errst); - /* save mds subvol on inode ctx */ - ret = dht_inode_ctx_mdsvol_set (local->inode, this, - prev); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_SET_INODE_CTX_FAILED, - "Failed to set hashed subvol for %s vol is %s", - local->loc.path, prev->name); - } - - if ((check_mds < 0) && !errst) { - local->mds_xattr = dict_ref (xattr); - gf_msg_debug (this->name, 0, - "Value of %s is not zero on mds subvol" - "so xattr needs to be healed on non mds" - " path is %s and vol name is %s " - " gfid is %s" , - conf->mds_xattr_key, - local->loc.path, - prev->name, gfid_local); - local->need_xattr_heal = 1; - local->mds_subvol = prev; - } - - } + if (!dict_get(xattr, conf->mds_xattr_key)) { + goto unlock; + } else { + gf_msg_debug(this->name, 0, + "internal xattr %s is present on subvol" + "on path %s gfid is %s ", + conf->mds_xattr_key, local->loc.path, gfid_local); + } + check_mds = dht_dict_get_array(xattr, conf->mds_xattr_key, + mds_xattr_val, 1, &errst); + /* save mds subvol on inode ctx */ + ret = dht_inode_ctx_mdsvol_set(local->inode, this, prev); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_SET_INODE_CTX_FAILED, + "Failed to set hashed subvol for %s vol is %s", + local->loc.path, prev->name); + } + + if ((check_mds < 0) && !errst) { + local->mds_xattr = dict_ref(xattr); + gf_msg_debug(this->name, 0, + "Value of %s is not zero on mds subvol" + "so xattr needs to be healed on non mds" + " path is %s and vol name is %s " + " gfid is %s", + conf->mds_xattr_key, local->loc.path, prev->name, + gfid_local); + local->need_xattr_heal = 1; + local->mds_subvol = prev; + } + } unlock: - UNLOCK (&frame->lock); + UNLOCK(&frame->lock); out: - /* Make sure, the thread executing dht_discover_complete is the one - * which calls STACK_DESTROY (frame). In the case of "attempt_unwind", - * this makes sure that the thread don't call dht_frame_return, till - * call to dht_discover_complete is done. - */ - if (attempt_unwind) { - dht_discover_complete (this, frame); - } + /* Make sure, the thread executing dht_discover_complete is the one + * which calls STACK_DESTROY (frame). In the case of "attempt_unwind", + * this makes sure that the thread don't call dht_frame_return, till + * call to dht_discover_complete is done. + */ + if (attempt_unwind) { + dht_discover_complete(this, frame); + } - this_call_cnt = dht_frame_return (frame); + this_call_cnt = dht_frame_return(frame); - if (is_last_call (this_call_cnt) && !attempt_unwind) { - dht_discover_complete (this, frame); - } + if (is_last_call(this_call_cnt) && !attempt_unwind) { + dht_discover_complete(this, frame); + } - if (is_last_call (this_call_cnt)) - DHT_STACK_DESTROY (frame); + if (is_last_call(this_call_cnt)) + DHT_STACK_DESTROY(frame); - return 0; + return 0; } - int -dht_do_discover (call_frame_t *frame, xlator_t *this, loc_t *loc) +dht_do_discover(call_frame_t *frame, xlator_t *this, loc_t *loc) { - int ret; - dht_local_t *local = NULL; - dht_conf_t *conf = NULL; - int call_cnt = 0; - int op_errno = EINVAL; - int i = 0; - call_frame_t *discover_frame = NULL; + int ret; + dht_local_t *local = NULL; + dht_conf_t *conf = NULL; + int call_cnt = 0; + int op_errno = EINVAL; + int i = 0; + call_frame_t *discover_frame = NULL; - conf = this->private; - local = frame->local; + conf = this->private; + local = frame->local; - ret = dht_set_file_xattr_req (this, loc, local->xattr_req); - if (ret) { - goto err; - } + ret = dht_set_file_xattr_req(this, loc, local->xattr_req); + if (ret) { + goto err; + } - ret = dht_set_dir_xattr_req (this, loc, local->xattr_req); - if (ret) { - goto err; - } + ret = dht_set_dir_xattr_req(this, loc, local->xattr_req); + if (ret) { + goto err; + } - if (loc_is_root (loc)) { - ret = dict_set_uint32 (local->xattr_req, - conf->commithash_xattr_name, - sizeof(uint32_t)); - } + if (loc_is_root(loc)) { + ret = dict_set_uint32(local->xattr_req, conf->commithash_xattr_name, + sizeof(uint32_t)); + } - call_cnt = conf->subvolume_cnt; - local->call_cnt = call_cnt; + call_cnt = conf->subvolume_cnt; + local->call_cnt = call_cnt; - local->layout = dht_layout_new (this, conf->subvolume_cnt); + local->layout = dht_layout_new(this, conf->subvolume_cnt); - if (!local->layout) { - op_errno = ENOMEM; - goto err; - } + if (!local->layout) { + op_errno = ENOMEM; + goto err; + } - gf_uuid_copy (local->gfid, loc->gfid); + gf_uuid_copy(local->gfid, loc->gfid); - discover_frame = copy_frame (frame); - if (!discover_frame) { - op_errno = ENOMEM; - goto err; - } + discover_frame = copy_frame(frame); + if (!discover_frame) { + op_errno = ENOMEM; + goto err; + } - discover_frame->local = local; - frame->local = NULL; - local->main_frame = frame; + discover_frame->local = local; + frame->local = NULL; + local->main_frame = frame; - for (i = 0; i < call_cnt; i++) { - STACK_WIND_COOKIE (discover_frame, dht_discover_cbk, - conf->subvolumes[i], conf->subvolumes[i], - conf->subvolumes[i]->fops->lookup, - &local->loc, local->xattr_req); - } + for (i = 0; i < call_cnt; i++) { + STACK_WIND_COOKIE(discover_frame, dht_discover_cbk, conf->subvolumes[i], + conf->subvolumes[i], + conf->subvolumes[i]->fops->lookup, &local->loc, + local->xattr_req); + } - return 0; + return 0; err: - DHT_STACK_UNWIND (lookup, frame, -1, op_errno, NULL, NULL, NULL, - NULL); + DHT_STACK_UNWIND(lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL); - return 0; + return 0; } /* Get the value of key from dict in the bytewise and save in array after convert from network byte order to host byte order */ int32_t -dht_dict_get_array (dict_t *dict, char *key, int32_t value[], int32_t size, int *errst) -{ - void *ptr = NULL; - int32_t len = -1; - int32_t vindex = -1; - int32_t err = -1; - int ret = 0; - - if (dict == NULL) { - (*errst) = -1; - return -EINVAL; - } - err = dict_get_ptr_and_len(dict, key, &ptr, &len); - if (err != 0) { - (*errst) = -1; - return err; - } - - if (len != (size * sizeof (int32_t))) { - (*errst) = -1; - return -EINVAL; - } - - for (vindex = 0; vindex < size; vindex++) { - value[vindex] = ntoh32(*((int32_t *)ptr + vindex)); - if (value[vindex] < 0) - ret = -1; - } - - return ret; +dht_dict_get_array(dict_t *dict, char *key, int32_t value[], int32_t size, + int *errst) +{ + void *ptr = NULL; + int32_t len = -1; + int32_t vindex = -1; + int32_t err = -1; + int ret = 0; + + if (dict == NULL) { + (*errst) = -1; + return -EINVAL; + } + err = dict_get_ptr_and_len(dict, key, &ptr, &len); + if (err != 0) { + (*errst) = -1; + return err; + } + + if (len != (size * sizeof(int32_t))) { + (*errst) = -1; + return -EINVAL; + } + + for (vindex = 0; vindex < size; vindex++) { + value[vindex] = ntoh32(*((int32_t *)ptr + vindex)); + if (value[vindex] < 0) + ret = -1; + } + + return ret; } - /* Code to call syntask to heal custom xattr from hashed subvol to non hashed subvol */ int -dht_dir_xattr_heal (xlator_t *this, dht_local_t *local) +dht_dir_xattr_heal(xlator_t *this, dht_local_t *local) { - dht_local_t *copy_local = NULL; - call_frame_t *copy = NULL; - int ret = -1; - char gfid_local[GF_UUID_BUF_SIZE] = {0}; - - if (gf_uuid_is_null (local->gfid)) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_DIR_XATTR_HEAL_FAILED, - "No gfid exists for path %s " - "so healing xattr is not possible", - local->loc.path); - goto out; - } + dht_local_t *copy_local = NULL; + call_frame_t *copy = NULL; + int ret = -1; + char gfid_local[GF_UUID_BUF_SIZE] = {0}; - gf_uuid_unparse(local->gfid, gfid_local); - copy = create_frame (this, this->ctx->pool); - if (copy) { - copy_local = dht_local_init (copy, &(local->loc), NULL, 0); - if (!copy_local) { - gf_msg (this->name, GF_LOG_ERROR, ENOMEM, - DHT_MSG_DIR_XATTR_HEAL_FAILED, - "Memory allocation failed " - "for path %s gfid %s ", - local->loc.path, gfid_local); - DHT_STACK_DESTROY (copy); - } else { - copy_local->stbuf = local->stbuf; - gf_uuid_copy (copy_local->loc.gfid, local->gfid); - copy_local->mds_subvol = local->mds_subvol; - FRAME_SU_DO (copy, dht_local_t); - ret = synctask_new (this->ctx->env, dht_dir_heal_xattrs, - dht_dir_heal_xattrs_done, - copy, copy); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, ENOMEM, - DHT_MSG_DIR_XATTR_HEAL_FAILED, - "Synctask creation failed to heal xattr " - "for path %s gfid %s ", - local->loc.path, gfid_local); - DHT_STACK_DESTROY (copy); - } - } + if (gf_uuid_is_null(local->gfid)) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DIR_XATTR_HEAL_FAILED, + "No gfid exists for path %s " + "so healing xattr is not possible", + local->loc.path); + goto out; + } + + gf_uuid_unparse(local->gfid, gfid_local); + copy = create_frame(this, this->ctx->pool); + if (copy) { + copy_local = dht_local_init(copy, &(local->loc), NULL, 0); + if (!copy_local) { + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, + DHT_MSG_DIR_XATTR_HEAL_FAILED, + "Memory allocation failed " + "for path %s gfid %s ", + local->loc.path, gfid_local); + DHT_STACK_DESTROY(copy); + } else { + copy_local->stbuf = local->stbuf; + gf_uuid_copy(copy_local->loc.gfid, local->gfid); + copy_local->mds_subvol = local->mds_subvol; + FRAME_SU_DO(copy, dht_local_t); + ret = synctask_new(this->ctx->env, dht_dir_heal_xattrs, + dht_dir_heal_xattrs_done, copy, copy); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, + DHT_MSG_DIR_XATTR_HEAL_FAILED, + "Synctask creation failed to heal xattr " + "for path %s gfid %s ", + local->loc.path, gfid_local); + DHT_STACK_DESTROY(copy); + } } + } out: - return ret; -} - + return ret; +} + +int +dht_lookup_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, inode_t *inode, struct iatt *stbuf, + dict_t *xattr, struct iatt *postparent) +{ + dht_local_t *local = NULL; + dht_conf_t *conf = NULL; + int this_call_cnt = 0; + xlator_t *prev = NULL; + dht_layout_t *layout = NULL; + int ret = -1; + int is_dir = 0; + int32_t check_mds = 0; + int errst = 0; + char gfid_local[GF_UUID_BUF_SIZE] = {0}; + char gfid_node[GF_UUID_BUF_SIZE] = {0}; + int32_t mds_xattr_val[1] = {0}; + call_frame_t *copy = NULL; + dht_local_t *copy_local = NULL; + + GF_VALIDATE_OR_GOTO("dht", frame, out); + GF_VALIDATE_OR_GOTO("dht", this, out); + GF_VALIDATE_OR_GOTO("dht", frame->local, out); + GF_VALIDATE_OR_GOTO("dht", this->private, out); + GF_VALIDATE_OR_GOTO("dht", cookie, out); + + local = frame->local; + prev = cookie; + conf = this->private; + + layout = local->layout; + + if (!op_ret && gf_uuid_is_null(local->gfid)) { + memcpy(local->gfid, stbuf->ia_gfid, 16); + } + if (!gf_uuid_is_null(local->gfid)) { + gf_uuid_unparse(local->gfid, gfid_local); + } + /* Check if the gfid is different for file from other node */ + if (!op_ret && gf_uuid_compare(local->gfid, stbuf->ia_gfid)) { + gf_uuid_unparse(stbuf->ia_gfid, gfid_node); -int -dht_lookup_dir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, - inode_t *inode, struct iatt *stbuf, dict_t *xattr, - struct iatt *postparent) -{ - dht_local_t *local = NULL; - dht_conf_t *conf = NULL; - int this_call_cnt = 0; - xlator_t *prev = NULL; - dht_layout_t *layout = NULL; - int ret = -1; - int is_dir = 0; - int32_t check_mds = 0; - int errst = 0; - char gfid_local[GF_UUID_BUF_SIZE] = {0}; - char gfid_node[GF_UUID_BUF_SIZE] = {0}; - int32_t mds_xattr_val[1] = {0}; - call_frame_t *copy = NULL; - dht_local_t *copy_local = NULL; + gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_GFID_MISMATCH, + "%s: gfid different on %s." + " gfid local = %s, gfid subvol = %s", + local->loc.path, prev->name, gfid_local, gfid_node); + } - GF_VALIDATE_OR_GOTO ("dht", frame, out); - GF_VALIDATE_OR_GOTO ("dht", this, out); - GF_VALIDATE_OR_GOTO ("dht", frame->local, out); - GF_VALIDATE_OR_GOTO ("dht", this->private, out); - GF_VALIDATE_OR_GOTO ("dht", cookie, out); + LOCK(&frame->lock); + { + /* TODO: assert equal mode on stbuf->st_mode and + local->stbuf->st_mode - local = frame->local; - prev = cookie; - conf = this->private; + else mkdir/chmod/chown and fix + */ + ret = dht_layout_merge(this, layout, prev, op_ret, op_errno, xattr); - layout = local->layout; + if (op_ret == -1) { + local->op_errno = op_errno; + gf_msg_debug(this->name, op_errno, + "lookup of %s on %s returned error", local->loc.path, + prev->name); - if (!op_ret && gf_uuid_is_null (local->gfid)) { - memcpy (local->gfid, stbuf->ia_gfid, 16); + goto unlock; } - if (!gf_uuid_is_null(local->gfid)) { - gf_uuid_unparse(local->gfid, gfid_local); - } - - /* Check if the gfid is different for file from other node */ - if (!op_ret && gf_uuid_compare (local->gfid, stbuf->ia_gfid)) { - gf_uuid_unparse(stbuf->ia_gfid, gfid_node); + is_dir = check_is_dir(inode, stbuf, xattr); + if (!is_dir) { + gf_msg_debug(this->name, 0, + "lookup of %s on %s returned non" + "dir 0%o" + "calling lookup_everywhere", + local->loc.path, prev->name, stbuf->ia_type); - gf_msg (this->name, GF_LOG_WARNING, 0, - DHT_MSG_GFID_MISMATCH, - "%s: gfid different on %s." - " gfid local = %s, gfid subvol = %s", - local->loc.path, prev->name, - gfid_local, gfid_node); + local->need_selfheal = 1; + goto unlock; } - LOCK (&frame->lock); - { - /* TODO: assert equal mode on stbuf->st_mode and - local->stbuf->st_mode - - else mkdir/chmod/chown and fix - */ - ret = dht_layout_merge (this, layout, prev, op_ret, op_errno, - xattr); - - if (op_ret == -1) { - local->op_errno = op_errno; - gf_msg_debug (this->name, op_errno, - "lookup of %s on %s returned error", - local->loc.path, prev->name); - - goto unlock; - } - - is_dir = check_is_dir (inode, stbuf, xattr); - if (!is_dir) { - - gf_msg_debug (this->name, 0, - "lookup of %s on %s returned non" - "dir 0%o" - "calling lookup_everywhere", - local->loc.path, prev->name, - stbuf->ia_type); - - local->need_selfheal = 1; - goto unlock; - } - - local->op_ret = 0; - if (local->xattr == NULL) { - local->xattr = dict_ref (xattr); - } else { - dht_aggregate_xattr (local->xattr, xattr); - } - - if (dict_get (xattr, conf->mds_xattr_key)) { - local->mds_subvol = prev; - local->mds_stbuf.ia_gid = stbuf->ia_gid; - local->mds_stbuf.ia_uid = stbuf->ia_uid; - local->mds_stbuf.ia_prot = stbuf->ia_prot; - } + local->op_ret = 0; + if (local->xattr == NULL) { + local->xattr = dict_ref(xattr); + } else { + dht_aggregate_xattr(local->xattr, xattr); + } - if (local->stbuf.ia_type != IA_INVAL) { - if (!__is_root_gfid (stbuf->ia_gfid) && - ((local->stbuf.ia_gid != stbuf->ia_gid) || - (local->stbuf.ia_uid != stbuf->ia_uid) || - (is_permission_different (&local->stbuf.ia_prot, - &stbuf->ia_prot)))) { - local->need_attrheal = 1; - } - } + if (dict_get(xattr, conf->mds_xattr_key)) { + local->mds_subvol = prev; + local->mds_stbuf.ia_gid = stbuf->ia_gid; + local->mds_stbuf.ia_uid = stbuf->ia_uid; + local->mds_stbuf.ia_prot = stbuf->ia_prot; + } - if (local->inode == NULL) - local->inode = inode_ref (inode); + if (local->stbuf.ia_type != IA_INVAL) { + if (!__is_root_gfid(stbuf->ia_gfid) && + ((local->stbuf.ia_gid != stbuf->ia_gid) || + (local->stbuf.ia_uid != stbuf->ia_uid) || + (is_permission_different(&local->stbuf.ia_prot, + &stbuf->ia_prot)))) { + local->need_attrheal = 1; + } + } - dht_iatt_merge (this, &local->stbuf, stbuf); - dht_iatt_merge (this, &local->postparent, postparent); + if (local->inode == NULL) + local->inode = inode_ref(inode); - if (!dict_get (xattr, conf->mds_xattr_key)) { - gf_msg_debug (this->name, 0, - "Internal xattr %s is not present " - " on path %s gfid is %s " , - conf->mds_xattr_key, - local->loc.path, gfid_local); - goto unlock; - } else { - /* Save mds subvol on inode ctx */ - ret = dht_inode_ctx_mdsvol_set (local->inode, this, - prev); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_SET_INODE_CTX_FAILED, - "Failed to set hashed subvol for %s vol is %s", - local->loc.path, prev->name); - } - } - check_mds = dht_dict_get_array (xattr, conf->mds_xattr_key, - mds_xattr_val, 1, &errst); - if ((check_mds < 0) && !errst) { - local->mds_xattr = dict_ref (xattr); - gf_msg_debug (this->name, 0, - "Value of %s is not zero on hashed subvol " - "so xattr needs to be heal on non hashed" - " path is %s and vol name is %s " - " gfid is %s" , - conf->mds_xattr_key, - local->loc.path, - prev->name, gfid_local); - local->need_xattr_heal = 1; - local->mds_subvol = prev; - } + dht_iatt_merge(this, &local->stbuf, stbuf); + dht_iatt_merge(this, &local->postparent, postparent); + if (!dict_get(xattr, conf->mds_xattr_key)) { + gf_msg_debug(this->name, 0, + "Internal xattr %s is not present " + " on path %s gfid is %s ", + conf->mds_xattr_key, local->loc.path, gfid_local); + goto unlock; + } else { + /* Save mds subvol on inode ctx */ + ret = dht_inode_ctx_mdsvol_set(local->inode, this, prev); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, + DHT_MSG_SET_INODE_CTX_FAILED, + "Failed to set hashed subvol for %s vol is %s", + local->loc.path, prev->name); + } } + check_mds = dht_dict_get_array(xattr, conf->mds_xattr_key, + mds_xattr_val, 1, &errst); + if ((check_mds < 0) && !errst) { + local->mds_xattr = dict_ref(xattr); + gf_msg_debug(this->name, 0, + "Value of %s is not zero on hashed subvol " + "so xattr needs to be heal on non hashed" + " path is %s and vol name is %s " + " gfid is %s", + conf->mds_xattr_key, local->loc.path, prev->name, + gfid_local); + local->need_xattr_heal = 1; + local->mds_subvol = prev; + } + } unlock: - UNLOCK (&frame->lock); + UNLOCK(&frame->lock); + this_call_cnt = dht_frame_return(frame); - this_call_cnt = dht_frame_return (frame); - - if (is_last_call (this_call_cnt)) { - /* No need to call xattr heal code if volume count is 1 - */ - if (conf->subvolume_cnt == 1) - local->need_xattr_heal = 0; + if (is_last_call(this_call_cnt)) { + /* No need to call xattr heal code if volume count is 1 + */ + if (conf->subvolume_cnt == 1) + local->need_xattr_heal = 0; - /* Code to update all extended attributed from hashed subvol - to local->xattr - */ - if (local->need_xattr_heal && (local->mds_xattr)) { - dht_dir_set_heal_xattr (this, local, local->xattr, - local->mds_xattr, NULL, NULL); - dict_unref (local->mds_xattr); - local->mds_xattr = NULL; - } + /* Code to update all extended attributed from hashed subvol + to local->xattr + */ + if (local->need_xattr_heal && (local->mds_xattr)) { + dht_dir_set_heal_xattr(this, local, local->xattr, local->mds_xattr, + NULL, NULL); + dict_unref(local->mds_xattr); + local->mds_xattr = NULL; + } - if (local->need_selfheal) { - local->need_selfheal = 0; - dht_lookup_everywhere (frame, this, &local->loc); - return 0; - } + if (local->need_selfheal) { + local->need_selfheal = 0; + dht_lookup_everywhere(frame, this, &local->loc); + return 0; + } - if (local->op_ret == 0) { - ret = dht_layout_normalize (this, &local->loc, layout); + if (local->op_ret == 0) { + ret = dht_layout_normalize(this, &local->loc, layout); - if (ret != 0) { - gf_msg_debug (this->name, 0, - "fixing assignment on %s", - local->loc.path); - goto selfheal; - } + if (ret != 0) { + gf_msg_debug(this->name, 0, "fixing assignment on %s", + local->loc.path); + goto selfheal; + } - dht_layout_set (this, local->inode, layout); - if (!dict_get (local->xattr, conf->mds_xattr_key) || - local->need_xattr_heal) - goto selfheal; - } + dht_layout_set(this, local->inode, layout); + if (!dict_get(local->xattr, conf->mds_xattr_key) || + local->need_xattr_heal) + goto selfheal; + } - if (local->inode) { - dht_inode_ctx_time_update (local->inode, this, - &local->stbuf, 1); - } + if (local->inode) { + dht_inode_ctx_time_update(local->inode, this, &local->stbuf, 1); + } - if (local->loc.parent) { - dht_inode_ctx_time_update (local->loc.parent, this, - &local->postparent, 1); - } + if (local->loc.parent) { + dht_inode_ctx_time_update(local->loc.parent, this, + &local->postparent, 1); + } - if (local->need_attrheal) { - local->need_attrheal = 0; - if (!__is_root_gfid (inode->gfid)) { - local->stbuf.ia_gid = local->mds_stbuf.ia_gid; - local->stbuf.ia_uid = local->mds_stbuf.ia_uid; - local->stbuf.ia_prot = local->mds_stbuf.ia_prot; - } - copy = create_frame (this, this->ctx->pool); - if (copy) { - copy_local = dht_local_init (copy, &local->loc, - NULL, 0); - if (!copy_local) { - DHT_STACK_DESTROY (copy); - goto skip_attr_heal; - } - copy_local->stbuf = local->stbuf; - gf_uuid_copy (copy_local->loc.gfid, - local->stbuf.ia_gfid); - copy_local->mds_stbuf = local->mds_stbuf; - copy_local->mds_subvol = local->mds_subvol; - copy->local = copy_local; - FRAME_SU_DO (copy, dht_local_t); - ret = synctask_new (this->ctx->env, - dht_dir_attr_heal, - dht_dir_attr_heal_done, - copy, copy); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, ENOMEM, - DHT_MSG_DIR_ATTR_HEAL_FAILED, - "Synctask creation failed to heal attr " - "for path %s gfid %s ", - local->loc.path, local->gfid); - DHT_STACK_DESTROY (copy); - } - } + if (local->need_attrheal) { + local->need_attrheal = 0; + if (!__is_root_gfid(inode->gfid)) { + local->stbuf.ia_gid = local->mds_stbuf.ia_gid; + local->stbuf.ia_uid = local->mds_stbuf.ia_uid; + local->stbuf.ia_prot = local->mds_stbuf.ia_prot; + } + copy = create_frame(this, this->ctx->pool); + if (copy) { + copy_local = dht_local_init(copy, &local->loc, NULL, 0); + if (!copy_local) { + DHT_STACK_DESTROY(copy); + goto skip_attr_heal; + } + copy_local->stbuf = local->stbuf; + gf_uuid_copy(copy_local->loc.gfid, local->stbuf.ia_gfid); + copy_local->mds_stbuf = local->mds_stbuf; + copy_local->mds_subvol = local->mds_subvol; + copy->local = copy_local; + FRAME_SU_DO(copy, dht_local_t); + ret = synctask_new(this->ctx->env, dht_dir_attr_heal, + dht_dir_attr_heal_done, copy, copy); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, + DHT_MSG_DIR_ATTR_HEAL_FAILED, + "Synctask creation failed to heal attr " + "for path %s gfid %s ", + local->loc.path, local->gfid); + DHT_STACK_DESTROY(copy); } - -skip_attr_heal: - DHT_STRIP_PHASE1_FLAGS (&local->stbuf); - dht_set_fixed_dir_stat (&local->postparent); - /* Delete mds xattr at the time of STACK UNWIND */ - if (local->xattr) - GF_REMOVE_INTERNAL_XATTR (conf->mds_xattr_key, local->xattr); - DHT_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno, - local->inode, &local->stbuf, local->xattr, - &local->postparent); + } } - return 0; + skip_attr_heal: + DHT_STRIP_PHASE1_FLAGS(&local->stbuf); + dht_set_fixed_dir_stat(&local->postparent); + /* Delete mds xattr at the time of STACK UNWIND */ + if (local->xattr) + GF_REMOVE_INTERNAL_XATTR(conf->mds_xattr_key, local->xattr); + DHT_STACK_UNWIND(lookup, frame, local->op_ret, local->op_errno, + local->inode, &local->stbuf, local->xattr, + &local->postparent); + } + + return 0; selfheal: - FRAME_SU_DO (frame, dht_local_t); - ret = dht_selfheal_directory (frame, dht_lookup_selfheal_cbk, - &local->loc, layout); + FRAME_SU_DO(frame, dht_local_t); + ret = dht_selfheal_directory(frame, dht_lookup_selfheal_cbk, &local->loc, + layout); out: - return ret; -} + return ret; +} + +int +is_permission_different(ia_prot_t *prot1, ia_prot_t *prot2) +{ + if ((prot1->owner.read != prot2->owner.read) || + (prot1->owner.write != prot2->owner.write) || + (prot1->owner.exec != prot2->owner.exec) || + (prot1->group.read != prot2->group.read) || + (prot1->group.write != prot2->group.write) || + (prot1->group.exec != prot2->group.exec) || + (prot1->other.read != prot2->other.read) || + (prot1->other.write != prot2->other.write) || + (prot1->other.exec != prot2->other.exec) || + (prot1->suid != prot2->suid) || (prot1->sgid != prot2->sgid) || + (prot1->sticky != prot2->sticky)) { + return 1; + } else { + return 0; + } +} + +int +dht_revalidate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, inode_t *inode, struct iatt *stbuf, + dict_t *xattr, struct iatt *postparent) +{ + dht_local_t *local = NULL; + int this_call_cnt = 0; + xlator_t *prev = NULL; + dht_layout_t *layout = NULL; + dht_conf_t *conf = NULL; + int ret = -1; + int is_dir = 0; + int is_linkfile = 0; + int follow_link = 0; + call_frame_t *copy = NULL; + dht_local_t *copy_local = NULL; + char gfid[GF_UUID_BUF_SIZE] = {0}; + uint32_t vol_commit_hash = 0; + xlator_t *subvol = NULL; + int32_t check_mds = 0; + int errst = 0; + int32_t mds_xattr_val[1] = {0}; + + GF_VALIDATE_OR_GOTO("dht", frame, err); + GF_VALIDATE_OR_GOTO("dht", this, err); + GF_VALIDATE_OR_GOTO("dht", frame->local, err); + GF_VALIDATE_OR_GOTO("dht", cookie, err); + + local = frame->local; + prev = cookie; + conf = this->private; + if (!conf) + goto out; -int -is_permission_different (ia_prot_t *prot1, ia_prot_t *prot2) -{ - if ((prot1->owner.read != prot2->owner.read) || - (prot1->owner.write != prot2->owner.write) || - (prot1->owner.exec != prot2->owner.exec) || - (prot1->group.read != prot2->group.read) || - (prot1->group.write != prot2->group.write) || - (prot1->group.exec != prot2->group.exec) || - (prot1->other.read != prot2->other.read) || - (prot1->other.write != prot2->other.write) || - (prot1->other.exec != prot2->other.exec) || - (prot1->suid != prot2->suid) || - (prot1->sgid != prot2->sgid) || - (prot1->sticky != prot2->sticky)) { - return 1; - } else { - return 0; + if (!conf->vch_forced) { + ret = dict_get_uint32(xattr, conf->commithash_xattr_name, + &vol_commit_hash); + if (ret == 0) { + conf->vol_commit_hash = vol_commit_hash; } -} - -int -dht_revalidate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, - inode_t *inode, struct iatt *stbuf, dict_t *xattr, - struct iatt *postparent) -{ - dht_local_t *local = NULL; - int this_call_cnt = 0; - xlator_t *prev = NULL; - dht_layout_t *layout = NULL; - dht_conf_t *conf = NULL; - int ret = -1; - int is_dir = 0; - int is_linkfile = 0; - int follow_link = 0; - call_frame_t *copy = NULL; - dht_local_t *copy_local = NULL; - char gfid[GF_UUID_BUF_SIZE] = {0}; - uint32_t vol_commit_hash = 0; - xlator_t *subvol = NULL; - int32_t check_mds = 0; - int errst = 0; - int32_t mds_xattr_val[1] = {0}; - - GF_VALIDATE_OR_GOTO ("dht", frame, err); - GF_VALIDATE_OR_GOTO ("dht", this, err); - GF_VALIDATE_OR_GOTO ("dht", frame->local, err); - GF_VALIDATE_OR_GOTO ("dht", cookie, err); + } - local = frame->local; - prev = cookie; - conf = this->private; - if (!conf) - goto out; + gf_uuid_unparse(local->loc.gfid, gfid); - if (!conf->vch_forced) { - ret = dict_get_uint32 (xattr, conf->commithash_xattr_name, - &vol_commit_hash); - if (ret == 0) { - conf->vol_commit_hash = vol_commit_hash; - } + LOCK(&frame->lock); + { + if (gf_uuid_is_null(local->gfid)) { + memcpy(local->gfid, local->loc.gfid, 16); } - gf_uuid_unparse (local->loc.gfid, gfid); + gf_msg_debug(this->name, op_errno, + "revalidate lookup of %s " + "returned with op_ret %d", + local->loc.path, op_ret); - LOCK (&frame->lock); - { - if (gf_uuid_is_null (local->gfid)) { - memcpy (local->gfid, local->loc.gfid, 16); - } + if (op_ret == -1) { + local->op_errno = op_errno; + + if ((op_errno != ENOTCONN) && (op_errno != ENOENT) && + (op_errno != ESTALE)) { + gf_msg(this->name, GF_LOG_INFO, op_errno, + DHT_MSG_REVALIDATE_CBK_INFO, + "Revalidate: subvolume %s for %s " + "(gfid = %s) returned -1", + prev->name, local->loc.path, gfid); + } + if (op_errno == ESTALE) { + /* propagate the ESTALE to parent. + * setting local->return_estale would send + * ESTALE to parent. */ + local->return_estale = 1; + } - gf_msg_debug (this->name, op_errno, - "revalidate lookup of %s " - "returned with op_ret %d", - local->loc.path, op_ret); - - if (op_ret == -1) { - local->op_errno = op_errno; - - if ((op_errno != ENOTCONN) - && (op_errno != ENOENT) - && (op_errno != ESTALE)) { - gf_msg (this->name, GF_LOG_INFO, op_errno, - DHT_MSG_REVALIDATE_CBK_INFO, - "Revalidate: subvolume %s for %s " - "(gfid = %s) returned -1", - prev->name, local->loc.path, - gfid); - } - if (op_errno == ESTALE) { - /* propagate the ESTALE to parent. - * setting local->return_estale would send - * ESTALE to parent. */ - local->return_estale = 1; - } - - /* if it is ENOENT, we may have to do a - * 'lookup_everywhere()' to make sure - * the file is not migrated */ - if (op_errno == ENOENT) { - if (IA_ISREG (local->loc.inode->ia_type)) { - - gf_msg_debug (this->name, 0, - "found ENOENT for %s. " - "Setting " - "need_lookup_everywhere" - " flag to 1", - local->loc.path); - - local->need_lookup_everywhere = 1; - } - } - goto unlock; + /* if it is ENOENT, we may have to do a + * 'lookup_everywhere()' to make sure + * the file is not migrated */ + if (op_errno == ENOENT) { + if (IA_ISREG(local->loc.inode->ia_type)) { + gf_msg_debug(this->name, 0, + "found ENOENT for %s. " + "Setting " + "need_lookup_everywhere" + " flag to 1", + local->loc.path); + + local->need_lookup_everywhere = 1; } + } + goto unlock; + } + + if ((!IA_ISINVAL(local->inode->ia_type)) && + stbuf->ia_type != local->inode->ia_type) { + gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_FILE_TYPE_MISMATCH, + "mismatching filetypes 0%o v/s 0%o for %s," + " gfid = %s", + (stbuf->ia_type), (local->inode->ia_type), local->loc.path, + gfid); - if ((!IA_ISINVAL(local->inode->ia_type)) && - stbuf->ia_type != local->inode->ia_type) { - gf_msg (this->name, GF_LOG_WARNING, 0, - DHT_MSG_FILE_TYPE_MISMATCH, - "mismatching filetypes 0%o v/s 0%o for %s," - " gfid = %s", - (stbuf->ia_type), (local->inode->ia_type), - local->loc.path, gfid); + local->op_ret = -1; + local->op_errno = EINVAL; + + goto unlock; + } - local->op_ret = -1; - local->op_errno = EINVAL; + layout = local->layout; - goto unlock; + is_dir = check_is_dir(inode, stbuf, xattr); + is_linkfile = check_is_linkfile(inode, stbuf, xattr, + conf->link_xattr_name); + if (is_linkfile) { + follow_link = 1; + goto unlock; + } + if (is_dir) { + ret = dht_dir_has_layout(xattr, conf->xattr_name); + if (ret >= 0) { + if (is_greater_time(local->stbuf.ia_ctime, + local->stbuf.ia_ctime_nsec, stbuf->ia_ctime, + stbuf->ia_ctime_nsec)) { + /* Choose source */ + local->prebuf.ia_gid = stbuf->ia_gid; + local->prebuf.ia_uid = stbuf->ia_uid; + + if (__is_root_gfid(stbuf->ia_gfid)) + local->prebuf.ia_prot = stbuf->ia_prot; + } + } + if (local->stbuf.ia_type != IA_INVAL) { + if ((local->stbuf.ia_gid != stbuf->ia_gid) || + (local->stbuf.ia_uid != stbuf->ia_uid) || + is_permission_different(&local->stbuf.ia_prot, + &stbuf->ia_prot)) { + local->need_selfheal = 1; } + } - layout = local->layout; + if (!dict_get(xattr, conf->mds_xattr_key)) { + gf_msg_debug(this->name, 0, + "internal xattr %s is not present" + " on path %s gfid is %s ", + conf->mds_xattr_key, local->loc.path, gfid); + } else { + check_mds = dht_dict_get_array(xattr, conf->mds_xattr_key, + mds_xattr_val, 1, &errst); + local->mds_subvol = prev; + local->mds_stbuf.ia_gid = stbuf->ia_gid; + local->mds_stbuf.ia_uid = stbuf->ia_uid; + local->mds_stbuf.ia_prot = stbuf->ia_prot; - is_dir = check_is_dir (inode, stbuf, xattr); - is_linkfile = check_is_linkfile (inode, stbuf, xattr, - conf->link_xattr_name); - if (is_linkfile) { - follow_link = 1; - goto unlock; + /* save mds subvol on inode ctx */ + ret = dht_inode_ctx_mdsvol_set(local->inode, this, prev); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, + DHT_MSG_SET_INODE_CTX_FAILED, + "Failed to set MDS subvol for %s vol is %s", + local->loc.path, prev->name); } - if (is_dir) { - ret = dht_dir_has_layout (xattr, conf->xattr_name); - if (ret >= 0) { - if (is_greater_time(local->stbuf.ia_ctime, - local->stbuf.ia_ctime_nsec, - stbuf->ia_ctime, - stbuf->ia_ctime_nsec)) { - /* Choose source */ - local->prebuf.ia_gid = stbuf->ia_gid; - local->prebuf.ia_uid = stbuf->ia_uid; - - if (__is_root_gfid (stbuf->ia_gfid)) - local->prebuf.ia_prot = stbuf->ia_prot; - } - } - - if (local->stbuf.ia_type != IA_INVAL) - { - if ((local->stbuf.ia_gid != stbuf->ia_gid) || - (local->stbuf.ia_uid != stbuf->ia_uid) || - is_permission_different (&local->stbuf.ia_prot, - &stbuf->ia_prot)) { - local->need_selfheal = 1; - } - } - - if (!dict_get (xattr, conf->mds_xattr_key)) { - gf_msg_debug (this->name, 0, - "internal xattr %s is not present" - " on path %s gfid is %s " , - conf->mds_xattr_key, - local->loc.path, gfid); - } else { - check_mds = dht_dict_get_array (xattr, conf->mds_xattr_key, - mds_xattr_val, 1, &errst); - local->mds_subvol = prev; - local->mds_stbuf.ia_gid = stbuf->ia_gid; - local->mds_stbuf.ia_uid = stbuf->ia_uid; - local->mds_stbuf.ia_prot = stbuf->ia_prot; - - /* save mds subvol on inode ctx */ - ret = dht_inode_ctx_mdsvol_set (local->inode, this, - prev); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_SET_INODE_CTX_FAILED, - "Failed to set MDS subvol for %s vol is %s", - local->loc.path, prev->name); - } - if ((check_mds < 0) && !errst) { - local->mds_xattr = dict_ref (xattr); - gf_msg_debug (this->name, 0, - "Value of %s is not zero on " - "hashed subvol so xattr needs to" - " be healed on non hashed" - " path is %s and vol name is %s " - " gfid is %s" , - conf->mds_xattr_key, - local->loc.path, - prev->name, gfid); - local->need_xattr_heal = 1; - } - } - ret = dht_layout_dir_mismatch (this, layout, - prev, &local->loc, - xattr); - if (ret != 0) { - gf_msg (this->name, GF_LOG_INFO, 0, - DHT_MSG_LAYOUT_MISMATCH, - "Mismatching layouts for %s, gfid = %s", - local->loc.path, gfid); - - local->layout_mismatch = 1; - - goto unlock; - } + if ((check_mds < 0) && !errst) { + local->mds_xattr = dict_ref(xattr); + gf_msg_debug(this->name, 0, + "Value of %s is not zero on " + "hashed subvol so xattr needs to" + " be healed on non hashed" + " path is %s and vol name is %s " + " gfid is %s", + conf->mds_xattr_key, local->loc.path, + prev->name, gfid); + local->need_xattr_heal = 1; } + } + ret = dht_layout_dir_mismatch(this, layout, prev, &local->loc, + xattr); + if (ret != 0) { + gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LAYOUT_MISMATCH, + "Mismatching layouts for %s, gfid = %s", local->loc.path, + gfid); + local->layout_mismatch = 1; - /* Update stbuf from the servers where layout is present. This - * is an indication that the server is not a newly added brick. - * Merging stbuf from newly added brick may result in the added - * brick being the source of heal for uid/gid */ - if (!is_dir || (is_dir && - dht_dir_has_layout (xattr, conf->xattr_name) >= 0) - || conf->subvolume_cnt == 1) { + goto unlock; + } + } - dht_iatt_merge (this, &local->stbuf, stbuf); - dht_iatt_merge (this, &local->postparent, postparent); - } else { - /* copy the gfid anyway */ - gf_uuid_copy (local->stbuf.ia_gfid, stbuf->ia_gfid); - } + /* Update stbuf from the servers where layout is present. This + * is an indication that the server is not a newly added brick. + * Merging stbuf from newly added brick may result in the added + * brick being the source of heal for uid/gid */ + if (!is_dir || + (is_dir && dht_dir_has_layout(xattr, conf->xattr_name) >= 0) || + conf->subvolume_cnt == 1) { + dht_iatt_merge(this, &local->stbuf, stbuf); + dht_iatt_merge(this, &local->postparent, postparent); + } else { + /* copy the gfid anyway */ + gf_uuid_copy(local->stbuf.ia_gfid, stbuf->ia_gfid); + } - local->op_ret = 0; + local->op_ret = 0; - if (!local->xattr) { - local->xattr = dict_ref (xattr); - } else if (is_dir) { - dht_aggregate_xattr (local->xattr, xattr); - } + if (!local->xattr) { + local->xattr = dict_ref(xattr); + } else if (is_dir) { + dht_aggregate_xattr(local->xattr, xattr); } + } unlock: - UNLOCK (&frame->lock); - - if (follow_link) { - gf_uuid_copy (local->gfid, stbuf->ia_gfid); + UNLOCK(&frame->lock); - subvol = dht_linkfile_subvol (this, inode, stbuf, xattr); - if (!subvol) { - op_errno = ESTALE; - local->op_ret = -1; - } else { + if (follow_link) { + gf_uuid_copy(local->gfid, stbuf->ia_gfid); - STACK_WIND_COOKIE (frame, dht_lookup_linkfile_cbk, - subvol, subvol, subvol->fops->lookup, - &local->loc, local->xattr_req); - return 0; - } + subvol = dht_linkfile_subvol(this, inode, stbuf, xattr); + if (!subvol) { + op_errno = ESTALE; + local->op_ret = -1; + } else { + STACK_WIND_COOKIE(frame, dht_lookup_linkfile_cbk, subvol, subvol, + subvol->fops->lookup, &local->loc, + local->xattr_req); + return 0; } + } out: - this_call_cnt = dht_frame_return (frame); - - if (is_last_call (this_call_cnt)) { - if (!IA_ISDIR (local->stbuf.ia_type) - && (local->hashed_subvol != local->cached_subvol) - && (local->stbuf.ia_nlink == 1) - && (conf && conf->unhashed_sticky_bit)) { - local->stbuf.ia_prot.sticky = 1; - } - /* No need to call heal code if volume count is 1 + this_call_cnt = dht_frame_return(frame); + + if (is_last_call(this_call_cnt)) { + if (!IA_ISDIR(local->stbuf.ia_type) && + (local->hashed_subvol != local->cached_subvol) && + (local->stbuf.ia_nlink == 1) && + (conf && conf->unhashed_sticky_bit)) { + local->stbuf.ia_prot.sticky = 1; + } + /* No need to call heal code if volume count is 1 + */ + if (conf->subvolume_cnt == 1) + local->need_xattr_heal = 0; + + if (IA_ISDIR(local->stbuf.ia_type)) { + /* Code to update all extended attributed from hashed + subvol to local->xattr and call heal code to heal + custom xattr from hashed subvol to non-hashed subvol + */ + if (local->need_xattr_heal && (local->mds_xattr)) { + dht_dir_set_heal_xattr(this, local, local->xattr, + local->mds_xattr, NULL, NULL); + dict_unref(local->mds_xattr); + local->mds_xattr = NULL; + local->need_xattr_heal = 0; + ret = dht_dir_xattr_heal(this, local); + if (ret) + gf_msg(this->name, GF_LOG_ERROR, ret, + DHT_MSG_DIR_XATTR_HEAL_FAILED, + "xattr heal failed for directory %s " + " gfid %s ", + local->loc.path, gfid); + } else { + /* Call function to save hashed subvol on inode + ctx if internal mds xattr is not present and + all subvols are up */ - if (conf->subvolume_cnt == 1) - local->need_xattr_heal = 0; - - if (IA_ISDIR (local->stbuf.ia_type)) { - /* Code to update all extended attributed from hashed - subvol to local->xattr and call heal code to heal - custom xattr from hashed subvol to non-hashed subvol - */ - if (local->need_xattr_heal && (local->mds_xattr)) { - dht_dir_set_heal_xattr (this, local, - local->xattr, - local->mds_xattr, NULL, - NULL); - dict_unref (local->mds_xattr); - local->mds_xattr = NULL; - local->need_xattr_heal = 0; - ret = dht_dir_xattr_heal (this, local); - if (ret) - gf_msg (this->name, GF_LOG_ERROR, - ret, DHT_MSG_DIR_XATTR_HEAL_FAILED, - "xattr heal failed for directory %s " - " gfid %s ", local->loc.path, - gfid); - } else { - /* Call function to save hashed subvol on inode - ctx if internal mds xattr is not present and - all subvols are up - */ - if (inode && !__is_root_gfid (inode->gfid) && - (!local->op_ret)) - (void) dht_common_mark_mdsxattr (frame, NULL, 1); - } - } - if (local->need_selfheal) { - local->need_selfheal = 0; - if (!__is_root_gfid (inode->gfid)) { - gf_uuid_copy (local->gfid, local->mds_stbuf.ia_gfid); - local->stbuf.ia_gid = local->mds_stbuf.ia_gid; - local->stbuf.ia_uid = local->mds_stbuf.ia_uid; - local->stbuf.ia_prot = local->mds_stbuf.ia_prot; - } else { - gf_uuid_copy (local->gfid, local->stbuf.ia_gfid); - local->stbuf.ia_gid = local->prebuf.ia_gid; - local->stbuf.ia_uid = local->prebuf.ia_uid; - local->stbuf.ia_prot = local->prebuf.ia_prot; - } - - copy = create_frame (this, this->ctx->pool); - if (copy) { - copy_local = dht_local_init (copy, &local->loc, - NULL, 0); - if (!copy_local) { - DHT_STACK_DESTROY (copy); - goto cont; - } - copy_local->stbuf = local->stbuf; - copy_local->mds_stbuf = local->mds_stbuf; - copy_local->mds_subvol = local->mds_subvol; - copy->local = copy_local; - FRAME_SU_DO (copy, dht_local_t); - ret = synctask_new (this->ctx->env, - dht_dir_attr_heal, - dht_dir_attr_heal_done, - copy, copy); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, ENOMEM, - DHT_MSG_DIR_ATTR_HEAL_FAILED, - "Synctask creation failed to heal attr " - "for path %s gfid %s ", - local->loc.path, local->gfid); - DHT_STACK_DESTROY (copy); - } - } - } -cont: - if (local->layout_mismatch) { - /* Found layout mismatch in the directory, need to - fix this in the inode context */ - dht_layout_unref (this, local->layout); - local->layout = NULL; - dht_lookup_directory (frame, this, &local->loc); - return 0; - } + if (inode && !__is_root_gfid(inode->gfid) && (!local->op_ret)) + (void)dht_common_mark_mdsxattr(frame, NULL, 1); + } + } + if (local->need_selfheal) { + local->need_selfheal = 0; + if (!__is_root_gfid(inode->gfid)) { + gf_uuid_copy(local->gfid, local->mds_stbuf.ia_gfid); + local->stbuf.ia_gid = local->mds_stbuf.ia_gid; + local->stbuf.ia_uid = local->mds_stbuf.ia_uid; + local->stbuf.ia_prot = local->mds_stbuf.ia_prot; + } else { + gf_uuid_copy(local->gfid, local->stbuf.ia_gfid); + local->stbuf.ia_gid = local->prebuf.ia_gid; + local->stbuf.ia_uid = local->prebuf.ia_uid; + local->stbuf.ia_prot = local->prebuf.ia_prot; + } - if (local->need_lookup_everywhere) { - /* As the current layout gave ENOENT error, we would - need a new layout */ - dht_layout_unref (this, local->layout); - local->layout = NULL; - - /* We know that current cached subvol is no more - valid, get the new one */ - local->cached_subvol = NULL; - dht_lookup_everywhere (frame, this, &local->loc); - return 0; - } - if (local->return_estale) { - local->op_ret = -1; - local->op_errno = ESTALE; + copy = create_frame(this, this->ctx->pool); + if (copy) { + copy_local = dht_local_init(copy, &local->loc, NULL, 0); + if (!copy_local) { + DHT_STACK_DESTROY(copy); + goto cont; + } + copy_local->stbuf = local->stbuf; + copy_local->mds_stbuf = local->mds_stbuf; + copy_local->mds_subvol = local->mds_subvol; + copy->local = copy_local; + FRAME_SU_DO(copy, dht_local_t); + ret = synctask_new(this->ctx->env, dht_dir_attr_heal, + dht_dir_attr_heal_done, copy, copy); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, + DHT_MSG_DIR_ATTR_HEAL_FAILED, + "Synctask creation failed to heal attr " + "for path %s gfid %s ", + local->loc.path, local->gfid); + DHT_STACK_DESTROY(copy); } + } + } + cont: + if (local->layout_mismatch) { + /* Found layout mismatch in the directory, need to + fix this in the inode context */ + dht_layout_unref(this, local->layout); + local->layout = NULL; + dht_lookup_directory(frame, this, &local->loc); + return 0; + } - if (local->loc.parent) { - dht_inode_ctx_time_update (local->loc.parent, this, - &local->postparent, 1); - } + if (local->need_lookup_everywhere) { + /* As the current layout gave ENOENT error, we would + need a new layout */ + dht_layout_unref(this, local->layout); + local->layout = NULL; - DHT_STRIP_PHASE1_FLAGS (&local->stbuf); - dht_set_fixed_dir_stat (&local->postparent); - - /* local->stbuf is updated only from subvols which have a layout - * The reason is to avoid choosing attr heal source from newly - * added bricks. In case e.g we have only one subvol and for - * some reason layout is not present on it, then local->stbuf - * will be EINVAL. This is an indication that the subvols - * active in the cluster do not have layouts on disk. - * Unwind with ESTALE to trigger a fresh lookup */ - if (is_dir && local->stbuf.ia_type == IA_INVAL) { - local->op_ret = -1; - local->op_errno = ESTALE; - } - /* Delete mds xattr at the time of STACK UNWIND */ - if (local->xattr) - GF_REMOVE_INTERNAL_XATTR (conf->mds_xattr_key, local->xattr); + /* We know that current cached subvol is no more + valid, get the new one */ + local->cached_subvol = NULL; + dht_lookup_everywhere(frame, this, &local->loc); + return 0; + } + if (local->return_estale) { + local->op_ret = -1; + local->op_errno = ESTALE; + } + + if (local->loc.parent) { + dht_inode_ctx_time_update(local->loc.parent, this, + &local->postparent, 1); + } + + DHT_STRIP_PHASE1_FLAGS(&local->stbuf); + dht_set_fixed_dir_stat(&local->postparent); - DHT_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno, - local->inode, &local->stbuf, local->xattr, - &local->postparent); + /* local->stbuf is updated only from subvols which have a layout + * The reason is to avoid choosing attr heal source from newly + * added bricks. In case e.g we have only one subvol and for + * some reason layout is not present on it, then local->stbuf + * will be EINVAL. This is an indication that the subvols + * active in the cluster do not have layouts on disk. + * Unwind with ESTALE to trigger a fresh lookup */ + if (is_dir && local->stbuf.ia_type == IA_INVAL) { + local->op_ret = -1; + local->op_errno = ESTALE; } + /* Delete mds xattr at the time of STACK UNWIND */ + if (local->xattr) + GF_REMOVE_INTERNAL_XATTR(conf->mds_xattr_key, local->xattr); + + DHT_STACK_UNWIND(lookup, frame, local->op_ret, local->op_errno, + local->inode, &local->stbuf, local->xattr, + &local->postparent); + } err: - return ret; + return ret; } - int -dht_lookup_linkfile_create_cbk (call_frame_t *frame, void *cookie, - xlator_t *this, - int32_t op_ret, int32_t op_errno, - inode_t *inode, struct iatt *stbuf, - struct iatt *preparent, struct iatt *postparent, - dict_t *xdata) +dht_lookup_linkfile_create_cbk(call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, int32_t op_errno, + inode_t *inode, struct iatt *stbuf, + struct iatt *preparent, struct iatt *postparent, + dict_t *xdata) { - dht_local_t *local = NULL; - xlator_t *cached_subvol = NULL; - dht_conf_t *conf = NULL; - int ret = -1; - char gfid[GF_UUID_BUF_SIZE] = {0}; + dht_local_t *local = NULL; + xlator_t *cached_subvol = NULL; + dht_conf_t *conf = NULL; + int ret = -1; + char gfid[GF_UUID_BUF_SIZE] = {0}; - GF_VALIDATE_OR_GOTO ("dht", frame, out); - GF_VALIDATE_OR_GOTO ("dht", this, out); - GF_VALIDATE_OR_GOTO ("dht", frame->local, out); - GF_VALIDATE_OR_GOTO ("dht", this->private, out); - - local = frame->local; - cached_subvol = local->cached_subvol; - conf = this->private; + GF_VALIDATE_OR_GOTO("dht", frame, out); + GF_VALIDATE_OR_GOTO("dht", this, out); + GF_VALIDATE_OR_GOTO("dht", frame->local, out); + GF_VALIDATE_OR_GOTO("dht", this->private, out); - gf_uuid_unparse(local->loc.gfid, gfid); + local = frame->local; + cached_subvol = local->cached_subvol; + conf = this->private; - if (local->locked) - dht_unlock_namespace (frame, &local->lock[0]); + gf_uuid_unparse(local->loc.gfid, gfid); - ret = dht_layout_preset (this, local->cached_subvol, local->loc.inode); - if (ret < 0) { - gf_msg_debug (this->name, EINVAL, - "Failed to set layout for subvolume %s, " - "(gfid = %s)", - cached_subvol ? cached_subvol->name : "", - gfid); - local->op_ret = -1; - local->op_errno = EINVAL; - goto unwind; - } + if (local->locked) + dht_unlock_namespace(frame, &local->lock[0]); - local->op_ret = 0; - if ((local->stbuf.ia_nlink == 1) - && (conf && conf->unhashed_sticky_bit)) { - local->stbuf.ia_prot.sticky = 1; - } + ret = dht_layout_preset(this, local->cached_subvol, local->loc.inode); + if (ret < 0) { + gf_msg_debug(this->name, EINVAL, + "Failed to set layout for subvolume %s, " + "(gfid = %s)", + cached_subvol ? cached_subvol->name : "", gfid); + local->op_ret = -1; + local->op_errno = EINVAL; + goto unwind; + } - if (local->loc.parent) { - dht_inode_ctx_time_update (local->loc.parent, this, - postparent, 1); - } + local->op_ret = 0; + if ((local->stbuf.ia_nlink == 1) && (conf && conf->unhashed_sticky_bit)) { + local->stbuf.ia_prot.sticky = 1; + } + if (local->loc.parent) { + dht_inode_ctx_time_update(local->loc.parent, this, postparent, 1); + } unwind: - gf_msg_debug (this->name, 0, - "creation of linkto on hashed subvol:%s, " - "returned with op_ret %d and op_errno %d: %s", - local->hashed_subvol->name, - op_ret, op_errno, uuid_utoa (local->loc.gfid)); + gf_msg_debug(this->name, 0, + "creation of linkto on hashed subvol:%s, " + "returned with op_ret %d and op_errno %d: %s", + local->hashed_subvol->name, op_ret, op_errno, + uuid_utoa(local->loc.gfid)); - if (local->linked == _gf_true) - dht_linkfile_attr_heal (frame, this); + if (local->linked == _gf_true) + dht_linkfile_attr_heal(frame, this); + dht_set_fixed_dir_stat(&local->postparent); - dht_set_fixed_dir_stat (&local->postparent); - - DHT_STRIP_PHASE1_FLAGS (&local->stbuf); - DHT_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno, - local->inode, &local->stbuf, local->xattr, - &local->postparent); + DHT_STRIP_PHASE1_FLAGS(&local->stbuf); + DHT_STACK_UNWIND(lookup, frame, local->op_ret, local->op_errno, + local->inode, &local->stbuf, local->xattr, + &local->postparent); out: - return ret; + return ret; } int -dht_lookup_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, - struct iatt *preparent, struct iatt *postparent, - dict_t *xdata) +dht_lookup_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) { - int this_call_cnt = 0; - dht_local_t *local = NULL; - const char *path = NULL; + int this_call_cnt = 0; + dht_local_t *local = NULL; + const char *path = NULL; - local = (dht_local_t*)frame->local; - path = local->loc.path; - FRAME_SU_UNDO (frame, dht_local_t); + local = (dht_local_t *)frame->local; + path = local->loc.path; + FRAME_SU_UNDO(frame, dht_local_t); - gf_msg (this->name, GF_LOG_INFO, 0, - DHT_MSG_UNLINK_LOOKUP_INFO, "lookup_unlink returned with " - "op_ret -> %d and op-errno -> %d for %s", op_ret, op_errno, - ((path == NULL)? "null" : path )); + gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_UNLINK_LOOKUP_INFO, + "lookup_unlink returned with " + "op_ret -> %d and op-errno -> %d for %s", + op_ret, op_errno, ((path == NULL) ? "null" : path)); - this_call_cnt = dht_frame_return (frame); - if (is_last_call (this_call_cnt)) { - dht_lookup_everywhere_done (frame, this); - } + this_call_cnt = dht_frame_return(frame); + if (is_last_call(this_call_cnt)) { + dht_lookup_everywhere_done(frame, this); + } - return 0; + return 0; } int -dht_lookup_unlink_of_false_linkto_cbk (call_frame_t *frame, void *cookie, - xlator_t *this, int op_ret, int op_errno, - struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) +dht_lookup_unlink_of_false_linkto_cbk(call_frame_t *frame, void *cookie, + xlator_t *this, int op_ret, int op_errno, + struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) { - int this_call_cnt = 0; - dht_local_t *local = NULL; - const char *path = NULL; - - local = (dht_local_t*)frame->local; - path = local->loc.path; + int this_call_cnt = 0; + dht_local_t *local = NULL; + const char *path = NULL; - FRAME_SU_UNDO (frame, dht_local_t); + local = (dht_local_t *)frame->local; + path = local->loc.path; - gf_msg (this->name, GF_LOG_INFO, 0, - DHT_MSG_UNLINK_LOOKUP_INFO, "lookup_unlink returned with " - "op_ret -> %d and op-errno -> %d for %s", op_ret, op_errno, - ((path == NULL)? "null" : path )); + FRAME_SU_UNDO(frame, dht_local_t); - this_call_cnt = dht_frame_return (frame); - if (is_last_call (this_call_cnt)) { + gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_UNLINK_LOOKUP_INFO, + "lookup_unlink returned with " + "op_ret -> %d and op-errno -> %d for %s", + op_ret, op_errno, ((path == NULL) ? "null" : path)); - if (op_ret == 0) { - dht_lookup_everywhere_done (frame, this); - } else { - /*When dht_lookup_everywhere is performed, one cached - *and one hashed file was found and hashed file does - *not point to the above mentioned cached node. So it - *was considered as stale and an unlink was performed. - *But unlink fails. So may be rebalance is in progress. - *now ideally we have two data-files. One obtained during - *lookup_everywhere and one where unlink-failed. So - *at this point in time we cannot decide which one to - *choose because there are chances of first cached - *file is truncated after rebalance and if it is chosen - *as cached node, application will fail. So return EIO.*/ - - if (op_errno == EBUSY) { - - gf_msg (this->name, GF_LOG_ERROR, op_errno, - DHT_MSG_UNLINK_FAILED, - "Could not unlink the linkto file as " - "either fd is open and/or linkto xattr " - "is set for %s", - ((path == NULL)? "null":path)); - - } - DHT_STACK_UNWIND (lookup, frame, -1, EIO, NULL, NULL, - NULL, NULL); - - } + this_call_cnt = dht_frame_return(frame); + if (is_last_call(this_call_cnt)) { + if (op_ret == 0) { + dht_lookup_everywhere_done(frame, this); + } else { + /*When dht_lookup_everywhere is performed, one cached + *and one hashed file was found and hashed file does + *not point to the above mentioned cached node. So it + *was considered as stale and an unlink was performed. + *But unlink fails. So may be rebalance is in progress. + *now ideally we have two data-files. One obtained during + *lookup_everywhere and one where unlink-failed. So + *at this point in time we cannot decide which one to + *choose because there are chances of first cached + *file is truncated after rebalance and if it is chosen + *as cached node, application will fail. So return EIO.*/ + + if (op_errno == EBUSY) { + gf_msg(this->name, GF_LOG_ERROR, op_errno, + DHT_MSG_UNLINK_FAILED, + "Could not unlink the linkto file as " + "either fd is open and/or linkto xattr " + "is set for %s", + ((path == NULL) ? "null" : path)); + } + DHT_STACK_UNWIND(lookup, frame, -1, EIO, NULL, NULL, NULL, NULL); } + } - return 0; + return 0; } int -dht_lookup_unlink_stale_linkto_cbk (call_frame_t *frame, void *cookie, - xlator_t *this, int op_ret, int op_errno, - struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) +dht_lookup_unlink_stale_linkto_cbk(call_frame_t *frame, void *cookie, + xlator_t *this, int op_ret, int op_errno, + struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) { + dht_local_t *local = NULL; + const char *path = NULL; - dht_local_t *local = NULL; - const char *path = NULL; + /* NOTE: + * If stale file unlink fails either there is an open-fd or is not an + * dht-linkto-file then posix_unlink returns EBUSY, which is overwritten + * to ENOENT + */ - /* NOTE: - * If stale file unlink fails either there is an open-fd or is not an - * dht-linkto-file then posix_unlink returns EBUSY, which is overwritten - * to ENOENT - */ + local = frame->local; - local = frame->local; - - if (local && local->loc.path) - path = local->loc.path; + if (local && local->loc.path) + path = local->loc.path; - gf_msg (this->name, GF_LOG_INFO, 0, - DHT_MSG_UNLINK_LOOKUP_INFO, - "Returned with op_ret %d and " - "op_errno %d for %s", op_ret, op_errno, - ((path==NULL)?"null":path)); + gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_UNLINK_LOOKUP_INFO, + "Returned with op_ret %d and " + "op_errno %d for %s", + op_ret, op_errno, ((path == NULL) ? "null" : path)); - FRAME_SU_UNDO (frame, dht_local_t); - DHT_STACK_UNWIND (lookup, frame, -1, ENOENT, NULL, NULL, NULL, - NULL); + FRAME_SU_UNDO(frame, dht_local_t); + DHT_STACK_UNWIND(lookup, frame, -1, ENOENT, NULL, NULL, NULL, NULL); - return 0; + return 0; } int -dht_fill_dict_to_avoid_unlink_of_migrating_file (dict_t *dict) { - - int ret = 0; - xlator_t *this = NULL; - char *linktoskip_key = NULL; - - this = THIS; - GF_VALIDATE_OR_GOTO ("dht", this, err); +dht_fill_dict_to_avoid_unlink_of_migrating_file(dict_t *dict) +{ + int ret = 0; + xlator_t *this = NULL; + char *linktoskip_key = NULL; - if (dht_is_tier_xlator (this)) - linktoskip_key = TIER_SKIP_NON_LINKTO_UNLINK; - else - linktoskip_key = DHT_SKIP_NON_LINKTO_UNLINK; + this = THIS; + GF_VALIDATE_OR_GOTO("dht", this, err); - ret = dict_set_int32 (dict, linktoskip_key, 1); + if (dht_is_tier_xlator(this)) + linktoskip_key = TIER_SKIP_NON_LINKTO_UNLINK; + else + linktoskip_key = DHT_SKIP_NON_LINKTO_UNLINK; - if (ret) - goto err; + ret = dict_set_int32(dict, linktoskip_key, 1); - ret = dict_set_int32 (dict, DHT_SKIP_OPEN_FD_UNLINK, 1); + if (ret) + goto err; - if (ret) - goto err; + ret = dict_set_int32(dict, DHT_SKIP_OPEN_FD_UNLINK, 1); + if (ret) + goto err; - return 0; + return 0; err: - return -1; - + return -1; } int32_t -dht_linkfile_create_lookup_cbk (call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, - int32_t op_errno, inode_t *inode, - struct iatt *buf, dict_t *xdata, - struct iatt *postparent) -{ - dht_local_t *local = NULL; - int call_cnt = 0, ret = 0; - xlator_t *subvol = NULL; - uuid_t gfid = {0, }; - char gfid_str[GF_UUID_BUF_SIZE] = {0}; - - subvol = cookie; - local = frame->local; +dht_linkfile_create_lookup_cbk(call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, int32_t op_errno, + inode_t *inode, struct iatt *buf, dict_t *xdata, + struct iatt *postparent) +{ + dht_local_t *local = NULL; + int call_cnt = 0, ret = 0; + xlator_t *subvol = NULL; + uuid_t gfid = { + 0, + }; + char gfid_str[GF_UUID_BUF_SIZE] = {0}; + + subvol = cookie; + local = frame->local; + + if (subvol == local->hashed_subvol) { + if ((op_ret == 0) || (op_errno != ENOENT)) + local->dont_create_linkto = _gf_true; + } else { + if (gf_uuid_is_null(local->gfid)) + gf_uuid_copy(gfid, local->loc.gfid); + else + gf_uuid_copy(gfid, local->gfid); + + if ((op_ret == 0) && gf_uuid_compare(gfid, buf->ia_gfid)) { + gf_uuid_unparse(gfid, gfid_str); + gf_msg_debug(this->name, 0, + "gfid (%s) different on cached subvol " + "(%s) and looked up inode (%s), not " + "creating linkto", + uuid_utoa(buf->ia_gfid), subvol->name, gfid_str); + local->dont_create_linkto = _gf_true; + } else if (op_ret == -1) { + local->dont_create_linkto = _gf_true; + } + } + + call_cnt = dht_frame_return(frame); + if (is_last_call(call_cnt)) { + if (local->dont_create_linkto) + goto no_linkto; + else { + gf_msg_debug(this->name, 0, + "Creating linkto file on %s(hash) to " + "%s on %s (gfid = %s)", + local->hashed_subvol->name, local->loc.path, + local->cached_subvol->name, gfid); - if (subvol == local->hashed_subvol) { - if ((op_ret == 0) || (op_errno != ENOENT)) - local->dont_create_linkto = _gf_true; - } else { - if (gf_uuid_is_null (local->gfid)) - gf_uuid_copy (gfid, local->loc.gfid); - else - gf_uuid_copy (gfid, local->gfid); - - if ((op_ret == 0) && gf_uuid_compare (gfid, buf->ia_gfid)) { - gf_uuid_unparse (gfid, gfid_str); - gf_msg_debug (this->name, 0, - "gfid (%s) different on cached subvol " - "(%s) and looked up inode (%s), not " - "creating linkto", - uuid_utoa (buf->ia_gfid), subvol->name, - gfid_str); - local->dont_create_linkto = _gf_true; - } else if (op_ret == -1) { - local->dont_create_linkto = _gf_true; - } - } + ret = dht_linkfile_create(frame, dht_lookup_linkfile_create_cbk, + this, local->cached_subvol, + local->hashed_subvol, &local->loc); - call_cnt = dht_frame_return (frame); - if (is_last_call (call_cnt)) { - if (local->dont_create_linkto) - goto no_linkto; - else { - gf_msg_debug (this->name, 0, - "Creating linkto file on %s(hash) to " - "%s on %s (gfid = %s)", - local->hashed_subvol->name, - local->loc.path, - local->cached_subvol->name, gfid); - - ret = dht_linkfile_create - (frame, dht_lookup_linkfile_create_cbk, - this, local->cached_subvol, - local->hashed_subvol, &local->loc); - - if (ret < 0) - goto no_linkto; - } + if (ret < 0) + goto no_linkto; } + } - return 0; + return 0; no_linkto: - gf_msg_debug (this->name, 0, - "skipped linkto creation (path:%s) (gfid:%s) " - "(hashed-subvol:%s) (cached-subvol:%s)", - local->loc.path, gfid_str, local->hashed_subvol->name, - local->cached_subvol->name); - - dht_lookup_linkfile_create_cbk (frame, NULL, this, 0, 0, - local->loc.inode, &local->stbuf, - &local->preparent, &local->postparent, - local->xattr); - return 0; -} + gf_msg_debug(this->name, 0, + "skipped linkto creation (path:%s) (gfid:%s) " + "(hashed-subvol:%s) (cached-subvol:%s)", + local->loc.path, gfid_str, local->hashed_subvol->name, + local->cached_subvol->name); + dht_lookup_linkfile_create_cbk(frame, NULL, this, 0, 0, local->loc.inode, + &local->stbuf, &local->preparent, + &local->postparent, local->xattr); + return 0; +} int32_t -dht_call_lookup_linkfile_create (call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, - int32_t op_errno, dict_t *xdata) +dht_call_lookup_linkfile_create(call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, + int32_t op_errno, dict_t *xdata) { - dht_local_t *local = NULL; - char gfid[GF_UUID_BUF_SIZE] = {0}; - int i = 0; - xlator_t *subvol = NULL; + dht_local_t *local = NULL; + char gfid[GF_UUID_BUF_SIZE] = {0}; + int i = 0; + xlator_t *subvol = NULL; - local = frame->local; - if (gf_uuid_is_null (local->gfid)) - gf_uuid_unparse (local->loc.gfid, gfid); - else - gf_uuid_unparse (local->gfid, gfid); - - if (op_ret < 0) { - gf_log (this->name, GF_LOG_WARNING, - "protecting namespace failed, skipping linkto " - "creation (path:%s)(gfid:%s)(hashed-subvol:%s)" - "(cached-subvol:%s)", local->loc.path, gfid, - local->hashed_subvol->name, local->cached_subvol->name); - goto err; - } + local = frame->local; + if (gf_uuid_is_null(local->gfid)) + gf_uuid_unparse(local->loc.gfid, gfid); + else + gf_uuid_unparse(local->gfid, gfid); - local->locked = _gf_true; + if (op_ret < 0) { + gf_log(this->name, GF_LOG_WARNING, + "protecting namespace failed, skipping linkto " + "creation (path:%s)(gfid:%s)(hashed-subvol:%s)" + "(cached-subvol:%s)", + local->loc.path, gfid, local->hashed_subvol->name, + local->cached_subvol->name); + goto err; + } + local->locked = _gf_true; - local->call_cnt = 2; + local->call_cnt = 2; - for (i = 0; i < 2; i++) { - subvol = (subvol == NULL) ? local->hashed_subvol - : local->cached_subvol; + for (i = 0; i < 2; i++) { + subvol = (subvol == NULL) ? local->hashed_subvol : local->cached_subvol; - STACK_WIND_COOKIE (frame, dht_linkfile_create_lookup_cbk, - subvol, subvol, subvol->fops->lookup, - &local->loc, NULL); - } + STACK_WIND_COOKIE(frame, dht_linkfile_create_lookup_cbk, subvol, subvol, + subvol->fops->lookup, &local->loc, NULL); + } - return 0; + return 0; err: - dht_lookup_linkfile_create_cbk (frame, NULL, this, 0, 0, - local->loc.inode, - &local->stbuf, &local->preparent, - &local->postparent, local->xattr); - return 0; + dht_lookup_linkfile_create_cbk(frame, NULL, this, 0, 0, local->loc.inode, + &local->stbuf, &local->preparent, + &local->postparent, local->xattr); + return 0; } /* Rebalance is performed from cached_node to hashed_node. Initial cached_node @@ -2387,4546 +2235,4265 @@ err: */ int -dht_lookup_everywhere_done (call_frame_t *frame, xlator_t *this) -{ - int ret = 0; - dht_local_t *local = NULL; - xlator_t *hashed_subvol = NULL; - xlator_t *cached_subvol = NULL; - dht_layout_t *layout = NULL; - char gfid[GF_UUID_BUF_SIZE] = {0}; - gf_boolean_t found_non_linkto_on_hashed = _gf_false; +dht_lookup_everywhere_done(call_frame_t *frame, xlator_t *this) +{ + int ret = 0; + dht_local_t *local = NULL; + xlator_t *hashed_subvol = NULL; + xlator_t *cached_subvol = NULL; + dht_layout_t *layout = NULL; + char gfid[GF_UUID_BUF_SIZE] = {0}; + gf_boolean_t found_non_linkto_on_hashed = _gf_false; + + local = frame->local; + hashed_subvol = local->hashed_subvol; + cached_subvol = local->cached_subvol; + + gf_uuid_unparse(local->loc.gfid, gfid); + + if (local->file_count && local->dir_count) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_FILE_TYPE_MISMATCH, + "path %s (gfid = %s)exists as a file on one " + "subvolume and directory on another. " + "Please fix it manually", + local->loc.path, gfid); + DHT_STACK_UNWIND(lookup, frame, -1, EIO, NULL, NULL, NULL, NULL); + return 0; + } + + if (local->dir_count) { + dht_lookup_directory(frame, this, &local->loc); + return 0; + } + + gf_msg_debug(this->name, 0, + "STATUS: hashed_subvol %s " + "cached_subvol %s", + (hashed_subvol == NULL) ? "null" : hashed_subvol->name, + (cached_subvol == NULL) ? "null" : cached_subvol->name); + + if (!cached_subvol) { + if (local->skip_unlink.handle_valid_link && hashed_subvol) { + /*Purpose of "DHT_SKIP_NON_LINKTO_UNLINK": + * If this lookup is performed by rebalance and this + * rebalance process detected hashed file and by + * the time it sends the lookup request to cached node, + * file got migrated and now at initial hashed_node, + * final migrated file is present. With current logic, + * because this process fails to find the cached_node, + * it will unlink the file at initial hashed_node. + * + * So we avoid this by setting key, and checking at the + * posix_unlink that unlink the file only if file is a + * linkto file and not a migrated_file. + */ + + ret = dht_fill_dict_to_avoid_unlink_of_migrating_file( + local->xattr_req); + + if (ret) { + /* If for some reason, setting key in the dict + * fails, return with ENOENT, as with respect to + * this process, it detected only a stale link + * file. + * + * Next lookup will delete it. + * + * Performing deletion of stale link file when + * setting key in dict fails, may cause the data + * loss because of the above mentioned race. + */ - local = frame->local; - hashed_subvol = local->hashed_subvol; - cached_subvol = local->cached_subvol; + DHT_STACK_UNWIND(lookup, frame, -1, ENOENT, NULL, NULL, NULL, + NULL); + } else { + local->skip_unlink.handle_valid_link = _gf_false; - gf_uuid_unparse (local->loc.gfid, gfid); - - if (local->file_count && local->dir_count) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_FILE_TYPE_MISMATCH, - "path %s (gfid = %s)exists as a file on one " - "subvolume and directory on another. " - "Please fix it manually", - local->loc.path, gfid); - DHT_STACK_UNWIND (lookup, frame, -1, EIO, NULL, NULL, NULL, - NULL); - return 0; - } + gf_msg_debug(this->name, 0, + "No Cached was found and " + "unlink on hashed was skipped" + " so performing now: %s", + local->loc.path); + FRAME_SU_DO(frame, dht_local_t); + STACK_WIND(frame, dht_lookup_unlink_stale_linkto_cbk, + hashed_subvol, hashed_subvol->fops->unlink, + &local->loc, 0, local->xattr_req); + } - if (local->dir_count) { - dht_lookup_directory (frame, this, &local->loc); - return 0; + } else { + gf_msg_debug(this->name, 0, + "There was no cached file and " + "unlink on hashed is not skipped %s", + local->loc.path); + + DHT_STACK_UNWIND(lookup, frame, -1, ENOENT, NULL, NULL, NULL, NULL); } + return 0; + } - gf_msg_debug (this->name, 0, "STATUS: hashed_subvol %s " - "cached_subvol %s", - (hashed_subvol == NULL)?"null":hashed_subvol->name, - (cached_subvol == NULL)?"null":cached_subvol->name); + /* At the time of dht_lookup, no file was found on hashed and that is + * why dht_lookup_everywhere is called, but by the time + * dht_lookup_everywhere + * reached to server, file might have already migrated. In that case we + * will find a migrated file at the hashed_node. In this case store the + * layout in context and return successfully. + */ - if (!cached_subvol) { + if (hashed_subvol || local->need_lookup_everywhere) { + if (local->need_lookup_everywhere) { + found_non_linkto_on_hashed = _gf_true; - if (local->skip_unlink.handle_valid_link && hashed_subvol) { - - /*Purpose of "DHT_SKIP_NON_LINKTO_UNLINK": - * If this lookup is performed by rebalance and this - * rebalance process detected hashed file and by - * the time it sends the lookup request to cached node, - * file got migrated and now at initial hashed_node, - * final migrated file is present. With current logic, - * because this process fails to find the cached_node, - * it will unlink the file at initial hashed_node. - * - * So we avoid this by setting key, and checking at the - * posix_unlink that unlink the file only if file is a - * linkto file and not a migrated_file. - */ - - - ret = dht_fill_dict_to_avoid_unlink_of_migrating_file - (local->xattr_req); - - if (ret) { - /* If for some reason, setting key in the dict - * fails, return with ENOENT, as with respect to - * this process, it detected only a stale link - * file. - * - * Next lookup will delete it. - * - * Performing deletion of stale link file when - * setting key in dict fails, may cause the data - * loss because of the above mentioned race. - */ - - - DHT_STACK_UNWIND (lookup, frame, -1, ENOENT, - NULL, NULL, NULL, NULL); - } else { - local->skip_unlink.handle_valid_link = _gf_false; - - gf_msg_debug (this->name, 0, - "No Cached was found and " - "unlink on hashed was skipped" - " so performing now: %s", - local->loc.path); - FRAME_SU_DO (frame, dht_local_t); - STACK_WIND (frame, - dht_lookup_unlink_stale_linkto_cbk, - hashed_subvol, - hashed_subvol->fops->unlink, - &local->loc, 0, local->xattr_req); - } - - } else { - - gf_msg_debug (this->name, 0, - "There was no cached file and " - "unlink on hashed is not skipped %s", - local->loc.path); - - DHT_STACK_UNWIND (lookup, frame, -1, ENOENT, - NULL, NULL, NULL, NULL); - } - return 0; + } else if ((local->file_count == 1) && + (hashed_subvol == cached_subvol)) { + gf_msg_debug(this->name, 0, + "found cached file on hashed subvolume " + "so store in context and return for %s", + local->loc.path); + + found_non_linkto_on_hashed = _gf_true; } - /* At the time of dht_lookup, no file was found on hashed and that is - * why dht_lookup_everywhere is called, but by the time - * dht_lookup_everywhere - * reached to server, file might have already migrated. In that case we - * will find a migrated file at the hashed_node. In this case store the - * layout in context and return successfully. - */ + if (found_non_linkto_on_hashed) + goto preset_layout; + } - if (hashed_subvol || local->need_lookup_everywhere) { + if (hashed_subvol) { + if (local->skip_unlink.handle_valid_link == _gf_true) { + if (cached_subvol == local->skip_unlink.hash_links_to) { + if (gf_uuid_compare(local->skip_unlink.cached_gfid, + local->skip_unlink.hashed_gfid)) { + /*GFID different, return error*/ + DHT_STACK_UNWIND(lookup, frame, -1, ESTALE, NULL, NULL, + NULL, NULL); - if (local->need_lookup_everywhere) { + return 0; + } - found_non_linkto_on_hashed = _gf_true; + ret = dht_layout_preset(this, cached_subvol, local->loc.inode); + if (ret) { + gf_msg(this->name, GF_LOG_INFO, 0, + DHT_MSG_LAYOUT_PRESET_FAILED, + "Could not set pre-set layout " + "for subvolume %s", + cached_subvol->name); + } - } else if ((local->file_count == 1) && - (hashed_subvol == cached_subvol)) { + local->op_ret = (ret == 0) ? ret : -1; + local->op_errno = (ret == 0) ? ret : EINVAL; - gf_msg_debug (this->name, 0, - "found cached file on hashed subvolume " - "so store in context and return for %s", - local->loc.path); + /* Presence of local->cached_subvol validates + * that lookup from cached node is successful + */ - found_non_linkto_on_hashed = _gf_true; + if (!local->op_ret && local->loc.parent) { + dht_inode_ctx_time_update(local->loc.parent, this, + &local->postparent, 1); } - if (found_non_linkto_on_hashed) - goto preset_layout; - - } - + gf_msg_debug(this->name, 0, + "Skipped unlinking linkto file " + "on the hashed subvolume. " + "Returning success as it is a " + "valid linkto file. Path:%s", + local->loc.path); - if (hashed_subvol) { - if (local->skip_unlink.handle_valid_link == _gf_true) { - if (cached_subvol == local->skip_unlink.hash_links_to) { + goto unwind_hashed_and_cached; + } else { + local->skip_unlink.handle_valid_link = _gf_false; - if (gf_uuid_compare (local->skip_unlink.cached_gfid, - local->skip_unlink.hashed_gfid)){ + gf_msg_debug(this->name, 0, + "Linkto file found on hashed " + "subvol " + "and data file found on cached " + "subvolume. But linkto points to " + "different cached subvolume (%s) " + "path %s", + (local->skip_unlink.hash_links_to + ? local->skip_unlink.hash_links_to->name + : " "), + local->loc.path); + + if (local->skip_unlink.opend_fd_count == 0) { + ret = dht_fill_dict_to_avoid_unlink_of_migrating_file( + local->xattr_req); + + if (ret) { + DHT_STACK_UNWIND(lookup, frame, -1, EIO, NULL, NULL, + NULL, NULL); + } else { + local->call_cnt = 1; + FRAME_SU_DO(frame, dht_local_t); + STACK_WIND(frame, dht_lookup_unlink_of_false_linkto_cbk, + hashed_subvol, hashed_subvol->fops->unlink, + &local->loc, 0, local->xattr_req); + } + + return 0; + } + } + } + } - /*GFID different, return error*/ - DHT_STACK_UNWIND (lookup, frame, -1, - ESTALE, NULL, NULL, - NULL, NULL); +preset_layout: - return 0; - } + if (found_non_linkto_on_hashed) { + if (local->need_lookup_everywhere) { + if (gf_uuid_compare(local->gfid, local->inode->gfid)) { + /* GFID different, return error */ + DHT_STACK_UNWIND(lookup, frame, -1, ENOENT, NULL, NULL, NULL, + NULL); + return 0; + } + } - ret = dht_layout_preset (this, cached_subvol, - local->loc.inode); - if (ret) { - gf_msg (this->name, GF_LOG_INFO, 0, - DHT_MSG_LAYOUT_PRESET_FAILED, - "Could not set pre-set layout " - "for subvolume %s", - cached_subvol->name); - } + local->op_ret = 0; + local->op_errno = 0; + layout = dht_layout_for_subvol(this, cached_subvol); + if (!layout) { + gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_SUBVOL_INFO, + "%s: no pre-set layout for subvolume %s," + " gfid = %s", + local->loc.path, + (cached_subvol ? cached_subvol->name : ""), gfid); + } - local->op_ret = (ret == 0) ? ret : -1; - local->op_errno = (ret == 0) ? ret : EINVAL; + ret = dht_layout_set(this, local->inode, layout); + if (ret < 0) { + gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_SUBVOL_INFO, + "%s: failed to set layout for subvol %s, " + "gfid = %s", + local->loc.path, + (cached_subvol ? cached_subvol->name : ""), gfid); + } - /* Presence of local->cached_subvol validates - * that lookup from cached node is successful - */ + if (local->loc.parent) { + dht_inode_ctx_time_update(local->loc.parent, this, + &local->postparent, 1); + } - if (!local->op_ret && local->loc.parent) { - dht_inode_ctx_time_update - (local->loc.parent, this, - &local->postparent, 1); - } + DHT_STRIP_PHASE1_FLAGS(&local->stbuf); + dht_set_fixed_dir_stat(&local->postparent); + DHT_STACK_UNWIND(lookup, frame, local->op_ret, local->op_errno, + local->inode, &local->stbuf, local->xattr, + &local->postparent); + return 0; + } - gf_msg_debug (this->name, 0, - "Skipped unlinking linkto file " - "on the hashed subvolume. " - "Returning success as it is a " - "valid linkto file. Path:%s" - ,local->loc.path); + if (!hashed_subvol) { + gf_msg_debug(this->name, 0, + "Cannot create linkfile for %s on %s: " + "hashed subvolume cannot be found, gfid = %s.", + local->loc.path, cached_subvol->name, gfid); - goto unwind_hashed_and_cached; - } else { + local->op_ret = 0; + local->op_errno = 0; - local->skip_unlink.handle_valid_link = _gf_false; + ret = dht_layout_preset(frame->this, cached_subvol, local->inode); + if (ret < 0) { + gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LAYOUT_PRESET_FAILED, + "Failed to set layout for subvol %s" + ", gfid = %s", + cached_subvol ? cached_subvol->name : "", gfid); + local->op_ret = -1; + local->op_errno = EINVAL; + } - gf_msg_debug (this->name, 0, - "Linkto file found on hashed " - "subvol " - "and data file found on cached " - "subvolume. But linkto points to " - "different cached subvolume (%s) " - "path %s", - (local->skip_unlink.hash_links_to ? - local->skip_unlink.hash_links_to->name : - " "), local->loc.path); + if (local->loc.parent) { + dht_inode_ctx_time_update(local->loc.parent, this, + &local->postparent, 1); + } - if (local->skip_unlink.opend_fd_count == 0) { + DHT_STRIP_PHASE1_FLAGS(&local->stbuf); + dht_set_fixed_dir_stat(&local->postparent); + DHT_STACK_UNWIND(lookup, frame, local->op_ret, local->op_errno, + local->inode, &local->stbuf, local->xattr, + &local->postparent); + return 0; + } + if (frame->root->op != GF_FOP_RENAME) { + local->current = &local->lock[0]; + ret = dht_protect_namespace(frame, &local->loc, hashed_subvol, + &local->current->ns, + dht_call_lookup_linkfile_create); + } else { + gf_msg_debug(this->name, 0, + "Creating linkto file on %s(hash) to %s on %s " + "(gfid = %s)", + hashed_subvol->name, local->loc.path, cached_subvol->name, + gfid); + + ret = dht_linkfile_create(frame, dht_lookup_linkfile_create_cbk, this, + cached_subvol, hashed_subvol, &local->loc); + } + + return ret; - ret = dht_fill_dict_to_avoid_unlink_of_migrating_file - (local->xattr_req); +unwind_hashed_and_cached: + DHT_STRIP_PHASE1_FLAGS(&local->stbuf); + dht_set_fixed_dir_stat(&local->postparent); + DHT_STACK_UNWIND(lookup, frame, local->op_ret, local->op_errno, + local->inode, &local->stbuf, local->xattr, + &local->postparent); + return 0; +} + +int +dht_lookup_everywhere_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, dict_t *xattr, + struct iatt *postparent) +{ + dht_local_t *local = NULL; + int this_call_cnt = 0; + xlator_t *prev = NULL; + int is_linkfile = 0; + int is_dir = 0; + loc_t *loc = NULL; + xlator_t *link_subvol = NULL; + int ret = -1; + int32_t fd_count = 0; + dht_conf_t *conf = NULL; + char gfid[GF_UUID_BUF_SIZE] = {0}; + dict_t *dict_req = {0}; + + GF_VALIDATE_OR_GOTO("dht", frame, out); + GF_VALIDATE_OR_GOTO("dht", this, out); + GF_VALIDATE_OR_GOTO("dht", frame->local, out); + GF_VALIDATE_OR_GOTO("dht", cookie, out); + GF_VALIDATE_OR_GOTO("dht", this->private, out); + + local = frame->local; + loc = &local->loc; + conf = this->private; + + prev = cookie; + + gf_msg_debug(this->name, 0, + "returned with op_ret %d and op_errno %d (%s) " + "from subvol %s", + op_ret, op_errno, loc->path, prev->name); + + LOCK(&frame->lock); + { + if (op_ret == -1) { + if (op_errno != ENOENT) + local->op_errno = op_errno; + goto unlock; + } + if (gf_uuid_is_null(local->gfid)) + gf_uuid_copy(local->gfid, buf->ia_gfid); - if (ret) { - DHT_STACK_UNWIND (lookup, frame, -1, - EIO, NULL, NULL, - NULL, NULL); - } else { - local->call_cnt = 1; - FRAME_SU_DO (frame, dht_local_t); - STACK_WIND (frame, - dht_lookup_unlink_of_false_linkto_cbk, - hashed_subvol, - hashed_subvol->fops->unlink, - &local->loc, 0, - local->xattr_req); - } + gf_uuid_unparse(local->gfid, gfid); - return 0; + if (gf_uuid_compare(local->gfid, buf->ia_gfid)) { + gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_GFID_MISMATCH, + "%s: gfid differs on subvolume %s," + " gfid local = %s, gfid node = %s", + loc->path, prev->name, gfid, uuid_utoa(buf->ia_gfid)); + } - } - } + is_linkfile = check_is_linkfile(inode, buf, xattr, + conf->link_xattr_name); - } + if (is_linkfile) { + link_subvol = dht_linkfile_subvol(this, inode, buf, xattr); + gf_msg_debug(this->name, 0, "found on %s linkfile %s (-> %s)", + prev->name, loc->path, + link_subvol ? link_subvol->name : "''"); + goto unlock; } + is_dir = check_is_dir(inode, buf, xattr); -preset_layout: + /* non linkfile GFID takes precedence but don't overwrite + gfid if we have already found a cached file*/ + if (!local->cached_subvol) + gf_uuid_copy(local->gfid, buf->ia_gfid); - if (found_non_linkto_on_hashed) { + if (is_dir) { + local->dir_count++; - if (local->need_lookup_everywhere) { - if (gf_uuid_compare (local->gfid, local->inode->gfid)) { - /* GFID different, return error */ - DHT_STACK_UNWIND (lookup, frame, -1, ENOENT, - NULL, NULL, NULL, NULL); - return 0; - } - } + gf_msg_debug(this->name, 0, "found on %s directory %s", prev->name, + loc->path); + } else { + local->file_count++; - local->op_ret = 0; - local->op_errno = 0; - layout = dht_layout_for_subvol (this, cached_subvol); - if (!layout) { - gf_msg (this->name, GF_LOG_INFO, 0, - DHT_MSG_SUBVOL_INFO, - "%s: no pre-set layout for subvolume %s," - " gfid = %s", - local->loc.path, (cached_subvol ? - cached_subvol->name : - ""), gfid); - } + gf_msg_debug(this->name, 0, "found cached file on %s for %s", + prev->name, loc->path); - ret = dht_layout_set (this, local->inode, layout); - if (ret < 0) { - gf_msg (this->name, GF_LOG_INFO, 0, - DHT_MSG_SUBVOL_INFO, - "%s: failed to set layout for subvol %s, " - "gfid = %s", - local->loc.path, (cached_subvol ? - cached_subvol->name : - ""), gfid); - } + if (!local->cached_subvol) { + /* found one file */ + dht_iatt_merge(this, &local->stbuf, buf); - if (local->loc.parent) { - dht_inode_ctx_time_update (local->loc.parent, this, - &local->postparent, 1); - } + local->xattr = dict_ref(xattr); + local->cached_subvol = prev; - DHT_STRIP_PHASE1_FLAGS (&local->stbuf); - dht_set_fixed_dir_stat (&local->postparent); - DHT_STACK_UNWIND (lookup, frame, local->op_ret, - local->op_errno, local->inode, - &local->stbuf, local->xattr, - &local->postparent); - return 0; + gf_msg_debug(this->name, 0, + "storing cached on %s file" + " %s", + prev->name, loc->path); + + dht_iatt_merge(this, &local->postparent, postparent); + + gf_uuid_copy(local->skip_unlink.cached_gfid, buf->ia_gfid); + } else { + /* This is where we need 'rename' both entries logic */ + gf_msg(this->name, GF_LOG_WARNING, 0, + DHT_MSG_FILE_ON_MULT_SUBVOL, + "multiple subvolumes (%s and %s) have " + "file %s (preferably rename the file " + "in the backend,and do a fresh lookup)", + local->cached_subvol->name, prev->name, local->loc.path); + } } + } +unlock: + UNLOCK(&frame->lock); - if (!hashed_subvol) { + if (is_linkfile) { + ret = dict_get_int32(xattr, GLUSTERFS_OPEN_FD_COUNT, &fd_count); - gf_msg_debug (this->name, 0, - "Cannot create linkfile for %s on %s: " - "hashed subvolume cannot be found, gfid = %s.", - local->loc.path, cached_subvol->name, gfid); + /* Any linkto file found on the non-hashed subvolume should + * be unlinked (performed in the "else if" block below) + * + * But if a linkto file is found on hashed subvolume, it may be + * pointing to valid cached node. So unlinking of linkto + * file on hashed subvolume is skipped and inside + * dht_lookup_everywhere_done, checks are performed. If this + * linkto file is found as stale linkto file, it is deleted + * otherwise unlink is skipped. + */ - local->op_ret = 0; - local->op_errno = 0; + if (local->hashed_subvol && local->hashed_subvol == prev) { + local->skip_unlink.handle_valid_link = _gf_true; + local->skip_unlink.opend_fd_count = fd_count; + local->skip_unlink.hash_links_to = link_subvol; + gf_uuid_copy(local->skip_unlink.hashed_gfid, buf->ia_gfid); + + gf_msg_debug(this->name, 0, + "Found" + " one linkto file on hashed subvol %s " + "for %s: Skipping unlinking till " + "everywhere_done", + prev->name, loc->path); + + } else if (!ret && (fd_count == 0)) { + dict_req = dict_new(); + + ret = dht_fill_dict_to_avoid_unlink_of_migrating_file(dict_req); + + if (ret) { + /* Skip unlinking for dict_failure + *File is found as a linkto file on non-hashed, + *subvolume. In the current implementation, + *finding a linkto-file on non-hashed does not + *always implies that it is stale. So deletion + *of file should be done only when both fd is + *closed and linkto-xattr is set. In case of + *dict_set failure, avoid skipping of file. + *NOTE: dht_frame_return should get called for + * this block. + */ - ret = dht_layout_preset (frame->this, cached_subvol, - local->inode); - if (ret < 0) { - gf_msg (this->name, GF_LOG_INFO, 0, - DHT_MSG_LAYOUT_PRESET_FAILED, - "Failed to set layout for subvol %s" - ", gfid = %s", - cached_subvol ? cached_subvol->name : - "", gfid); - local->op_ret = -1; - local->op_errno = EINVAL; - } + dict_unref(dict_req); + + } else { + gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_SUBVOL_INFO, + "attempting deletion of stale linkfile " + "%s on %s (hashed subvol is %s)", + loc->path, prev->name, + (local->hashed_subvol ? local->hashed_subvol->name + : "")); + /* * + * These stale files may be created using root + * user. Hence deletion will work only with + * root. + */ + FRAME_SU_DO(frame, dht_local_t); + STACK_WIND(frame, dht_lookup_unlink_cbk, prev, + prev->fops->unlink, loc, 0, dict_req); - if (local->loc.parent) { - dht_inode_ctx_time_update (local->loc.parent, this, - &local->postparent, 1); - } + dict_unref(dict_req); - DHT_STRIP_PHASE1_FLAGS (&local->stbuf); - dht_set_fixed_dir_stat (&local->postparent); - DHT_STACK_UNWIND (lookup, frame, local->op_ret, - local->op_errno, local->inode, - &local->stbuf, local->xattr, - &local->postparent); return 0; + } } + } - if (frame->root->op != GF_FOP_RENAME) { - local->current = &local->lock[0]; - ret = dht_protect_namespace (frame, &local->loc, hashed_subvol, - &local->current->ns, - dht_call_lookup_linkfile_create); - } else { - gf_msg_debug (this->name, 0, - "Creating linkto file on %s(hash) to %s on %s " - "(gfid = %s)", - hashed_subvol->name, local->loc.path, - cached_subvol->name, gfid); - - ret = dht_linkfile_create (frame, - dht_lookup_linkfile_create_cbk, this, - cached_subvol, hashed_subvol, - &local->loc); - } - - return ret; + this_call_cnt = dht_frame_return(frame); + if (is_last_call(this_call_cnt)) { + dht_lookup_everywhere_done(frame, this); + } -unwind_hashed_and_cached: - DHT_STRIP_PHASE1_FLAGS (&local->stbuf); - dht_set_fixed_dir_stat (&local->postparent); - DHT_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno, - local->inode, &local->stbuf, local->xattr, - &local->postparent); - return 0; +out: + return ret; } int -dht_lookup_everywhere_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, - inode_t *inode, struct iatt *buf, dict_t *xattr, - struct iatt *postparent) -{ - dht_local_t *local = NULL; - int this_call_cnt = 0; - xlator_t *prev = NULL; - int is_linkfile = 0; - int is_dir = 0; - loc_t *loc = NULL; - xlator_t *link_subvol = NULL; - int ret = -1; - int32_t fd_count = 0; - dht_conf_t *conf = NULL; - char gfid[GF_UUID_BUF_SIZE] = {0}; - dict_t *dict_req = {0}; - - GF_VALIDATE_OR_GOTO ("dht", frame, out); - GF_VALIDATE_OR_GOTO ("dht", this, out); - GF_VALIDATE_OR_GOTO ("dht", frame->local, out); - GF_VALIDATE_OR_GOTO ("dht", cookie, out); - GF_VALIDATE_OR_GOTO ("dht", this->private, out); - - local = frame->local; - loc = &local->loc; - conf = this->private; - - prev = cookie; - - gf_msg_debug (this->name, 0, - "returned with op_ret %d and op_errno %d (%s) " - "from subvol %s", op_ret, op_errno, loc->path, - prev->name); - - LOCK (&frame->lock); - { - if (op_ret == -1) { - if (op_errno != ENOENT) - local->op_errno = op_errno; - goto unlock; - } - - if (gf_uuid_is_null (local->gfid)) - gf_uuid_copy (local->gfid, buf->ia_gfid); - - gf_uuid_unparse(local->gfid, gfid); - - if (gf_uuid_compare (local->gfid, buf->ia_gfid)) { - gf_msg (this->name, GF_LOG_WARNING, 0, - DHT_MSG_GFID_MISMATCH, - "%s: gfid differs on subvolume %s," - " gfid local = %s, gfid node = %s", - loc->path, prev->name, gfid, - uuid_utoa(buf->ia_gfid)); - } +dht_lookup_everywhere(call_frame_t *frame, xlator_t *this, loc_t *loc) +{ + dht_conf_t *conf = NULL; + dht_local_t *local = NULL; + int i = 0; + int call_cnt = 0; - is_linkfile = check_is_linkfile (inode, buf, xattr, - conf->link_xattr_name); - - if (is_linkfile) { - link_subvol = dht_linkfile_subvol (this, inode, buf, - xattr); - gf_msg_debug (this->name, 0, - "found on %s linkfile %s (-> %s)", - prev->name, loc->path, - link_subvol ? link_subvol->name : "''"); - goto unlock; - } + GF_VALIDATE_OR_GOTO("dht", frame, err); + GF_VALIDATE_OR_GOTO("dht", this, out); + GF_VALIDATE_OR_GOTO("dht", frame->local, out); + GF_VALIDATE_OR_GOTO("dht", this->private, out); + GF_VALIDATE_OR_GOTO("dht", loc, out); - is_dir = check_is_dir (inode, buf, xattr); + conf = this->private; + local = frame->local; - /* non linkfile GFID takes precedence but don't overwrite - gfid if we have already found a cached file*/ - if (!local->cached_subvol) - gf_uuid_copy (local->gfid, buf->ia_gfid); + call_cnt = conf->subvolume_cnt; + local->call_cnt = call_cnt; - if (is_dir) { - local->dir_count++; + if (!local->inode) + local->inode = inode_ref(loc->inode); - gf_msg_debug (this->name, 0, - "found on %s directory %s", - prev->name, loc->path); - } else { - local->file_count++; - - gf_msg_debug (this->name, 0, - "found cached file on %s for %s", - prev->name, loc->path); - - if (!local->cached_subvol) { - /* found one file */ - dht_iatt_merge (this, &local->stbuf, buf); - - local->xattr = dict_ref (xattr); - local->cached_subvol = prev; - - gf_msg_debug (this->name, 0, - "storing cached on %s file" - " %s", prev->name, loc->path); - - dht_iatt_merge (this, &local->postparent, - postparent); - - gf_uuid_copy (local->skip_unlink.cached_gfid, - buf->ia_gfid); - } else { - /* This is where we need 'rename' both entries logic */ - gf_msg (this->name, GF_LOG_WARNING, 0, - DHT_MSG_FILE_ON_MULT_SUBVOL, - "multiple subvolumes (%s and %s) have " - "file %s (preferably rename the file " - "in the backend,and do a fresh lookup)", - local->cached_subvol->name, - prev->name, local->loc.path); - } - } - } -unlock: - UNLOCK (&frame->lock); + gf_msg_debug(this->name, 0, "winding lookup call to %d subvols", call_cnt); - if (is_linkfile) { - ret = dict_get_int32 (xattr, GLUSTERFS_OPEN_FD_COUNT, &fd_count); + for (i = 0; i < call_cnt; i++) { + STACK_WIND_COOKIE(frame, dht_lookup_everywhere_cbk, conf->subvolumes[i], + conf->subvolumes[i], + conf->subvolumes[i]->fops->lookup, loc, + local->xattr_req); + } - /* Any linkto file found on the non-hashed subvolume should - * be unlinked (performed in the "else if" block below) - * - * But if a linkto file is found on hashed subvolume, it may be - * pointing to valid cached node. So unlinking of linkto - * file on hashed subvolume is skipped and inside - * dht_lookup_everywhere_done, checks are performed. If this - * linkto file is found as stale linkto file, it is deleted - * otherwise unlink is skipped. - */ + return 0; +out: + DHT_STACK_UNWIND(lookup, frame, -1, EINVAL, NULL, NULL, NULL, NULL); +err: + return -1; +} + +int +dht_lookup_linkfile_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, inode_t *inode, + struct iatt *stbuf, dict_t *xattr, + struct iatt *postparent) +{ + xlator_t *prev = NULL; + dht_local_t *local = NULL; + xlator_t *subvol = NULL; + loc_t *loc = NULL; + dht_conf_t *conf = NULL; + int ret = 0; + char gfid[GF_UUID_BUF_SIZE] = {0}; + + GF_VALIDATE_OR_GOTO("dht", frame, out); + GF_VALIDATE_OR_GOTO("dht", this, unwind); + GF_VALIDATE_OR_GOTO("dht", frame->local, unwind); + GF_VALIDATE_OR_GOTO("dht", this->private, unwind); + GF_VALIDATE_OR_GOTO("dht", cookie, unwind); + + prev = cookie; + subvol = prev; + conf = this->private; + local = frame->local; + loc = &local->loc; + + gf_uuid_unparse(loc->gfid, gfid); + + if (op_ret == -1) { + gf_msg(this->name, GF_LOG_INFO, op_errno, DHT_MSG_LINK_FILE_LOOKUP_INFO, + "Lookup of %s on %s (following linkfile) failed " + ",gfid = %s", + local->loc.path, subvol->name, gfid); + + /* If cached subvol returned ENOTCONN, do not do + lookup_everywhere. We need to make sure linkfile does not get + removed, which can take away the namespace, and subvol is + anyways down. */ + + local->cached_subvol = NULL; + if (op_errno != ENOTCONN) + goto err; + else + goto unwind; + } + + if (check_is_dir(inode, stbuf, xattr)) { + gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LINK_FILE_LOOKUP_INFO, + "Lookup of %s on %s (following linkfile) reached dir," + " gfid = %s", + local->loc.path, subvol->name, gfid); + goto err; + } + + if (check_is_linkfile(inode, stbuf, xattr, conf->link_xattr_name)) { + gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LINK_FILE_LOOKUP_INFO, + "lookup of %s on %s (following linkfile) reached link," + "gfid = %s", + local->loc.path, subvol->name, gfid); + goto err; + } + + if (gf_uuid_compare(local->gfid, stbuf->ia_gfid)) { + gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_GFID_MISMATCH, + "%s: gfid different on data file on %s," + " gfid local = %s, gfid node = %s ", + local->loc.path, subvol->name, gfid, uuid_utoa(stbuf->ia_gfid)); + goto err; + } + + if ((stbuf->ia_nlink == 1) && (conf && conf->unhashed_sticky_bit)) { + stbuf->ia_prot.sticky = 1; + } + + ret = dht_layout_preset(this, prev, inode); + if (ret < 0) { + gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LAYOUT_PRESET_FAILED, + "Failed to set layout for subvolume %s," + "gfid = %s", + prev->name, gfid); + op_ret = -1; + op_errno = EINVAL; + } + + if (local->loc.parent) { + dht_inode_ctx_time_update(local->loc.parent, this, postparent, 1); + } - if (local->hashed_subvol && local->hashed_subvol == prev) { - - local->skip_unlink.handle_valid_link = _gf_true; - local->skip_unlink.opend_fd_count = fd_count; - local->skip_unlink.hash_links_to = link_subvol; - gf_uuid_copy (local->skip_unlink.hashed_gfid, - buf->ia_gfid); - - gf_msg_debug (this->name, 0, "Found" - " one linkto file on hashed subvol %s " - "for %s: Skipping unlinking till " - "everywhere_done", prev->name, - loc->path); - - } else if (!ret && (fd_count == 0)) { - - dict_req = dict_new (); - - ret = dht_fill_dict_to_avoid_unlink_of_migrating_file - (dict_req); - - if (ret) { - - /* Skip unlinking for dict_failure - *File is found as a linkto file on non-hashed, - *subvolume. In the current implementation, - *finding a linkto-file on non-hashed does not - *always implies that it is stale. So deletion - *of file should be done only when both fd is - *closed and linkto-xattr is set. In case of - *dict_set failure, avoid skipping of file. - *NOTE: dht_frame_return should get called for - * this block. - */ - - dict_unref (dict_req); - - } else { - gf_msg (this->name, GF_LOG_INFO, 0, - DHT_MSG_SUBVOL_INFO, - "attempting deletion of stale linkfile " - "%s on %s (hashed subvol is %s)", - loc->path, prev->name, - (local->hashed_subvol? - local->hashed_subvol->name : "")); - /* * - * These stale files may be created using root - * user. Hence deletion will work only with - * root. - */ - FRAME_SU_DO (frame, dht_local_t); - STACK_WIND (frame, dht_lookup_unlink_cbk, - prev, prev->fops->unlink, loc, - 0, dict_req); - - dict_unref (dict_req); - - return 0; - } - } - } +unwind: + DHT_STRIP_PHASE1_FLAGS(stbuf); + dht_set_fixed_dir_stat(postparent); + DHT_STACK_UNWIND(lookup, frame, op_ret, op_errno, inode, stbuf, xattr, + postparent); - this_call_cnt = dht_frame_return (frame); - if (is_last_call (this_call_cnt)) { - dht_lookup_everywhere_done (frame, this); - } + return 0; +err: + dht_lookup_everywhere(frame, this, loc); out: - return ret; + return 0; } - int -dht_lookup_everywhere (call_frame_t *frame, xlator_t *this, loc_t *loc) +dht_lookup_directory(call_frame_t *frame, xlator_t *this, loc_t *loc) { - dht_conf_t *conf = NULL; - dht_local_t *local = NULL; - int i = 0; - int call_cnt = 0; + int call_cnt = 0; + int i = 0; + dht_conf_t *conf = NULL; + dht_local_t *local = NULL; + int ret = 0; - GF_VALIDATE_OR_GOTO ("dht", frame, err); - GF_VALIDATE_OR_GOTO ("dht", this, out); - GF_VALIDATE_OR_GOTO ("dht", frame->local, out); - GF_VALIDATE_OR_GOTO ("dht", this->private, out); - GF_VALIDATE_OR_GOTO ("dht", loc, out); - - conf = this->private; - local = frame->local; + GF_VALIDATE_OR_GOTO("dht", frame, out); + GF_VALIDATE_OR_GOTO("dht", this, unwind); + GF_VALIDATE_OR_GOTO("dht", frame->local, unwind); + GF_VALIDATE_OR_GOTO("dht", this->private, unwind); + GF_VALIDATE_OR_GOTO("dht", loc, unwind); - call_cnt = conf->subvolume_cnt; - local->call_cnt = call_cnt; + conf = this->private; + local = frame->local; - if (!local->inode) - local->inode = inode_ref (loc->inode); + call_cnt = conf->subvolume_cnt; + local->call_cnt = call_cnt; - gf_msg_debug (this->name, 0, - "winding lookup call to %d subvols", call_cnt); + local->layout = dht_layout_new(this, conf->subvolume_cnt); + if (!local->layout) { + goto unwind; + } - for (i = 0; i < call_cnt; i++) { - STACK_WIND_COOKIE (frame, dht_lookup_everywhere_cbk, - conf->subvolumes[i], conf->subvolumes[i], - conf->subvolumes[i]->fops->lookup, - loc, local->xattr_req); - } + if (local->xattr != NULL) { + dict_unref(local->xattr); + local->xattr = NULL; + } - return 0; + if (!gf_uuid_is_null(local->gfid)) { + ret = dict_set_gfuuid(local->xattr_req, "gfid-req", local->gfid, true); + if (ret) + gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED, + "%s: Failed to set dictionary value:" + " key = gfid-req", + local->loc.path); + } + + for (i = 0; i < call_cnt; i++) { + STACK_WIND_COOKIE( + frame, dht_lookup_dir_cbk, conf->subvolumes[i], conf->subvolumes[i], + conf->subvolumes[i]->fops->lookup, &local->loc, local->xattr_req); + } + return 0; +unwind: + DHT_STACK_UNWIND(lookup, frame, -1, ENOMEM, NULL, NULL, NULL, NULL); out: - DHT_STACK_UNWIND (lookup, frame, -1, EINVAL, NULL, NULL, NULL, NULL); -err: - return -1; + return 0; } +/* Code to get hashed subvol based on inode and loc + First it check if loc->parent and loc->path exist then it get + hashed subvol based on loc. +*/ -int -dht_lookup_linkfile_cbk (call_frame_t *frame, void *cookie, - xlator_t *this, int op_ret, int op_errno, - inode_t *inode, struct iatt *stbuf, dict_t *xattr, - struct iatt *postparent) +xlator_t * +dht_inode_get_hashed_subvol(inode_t *inode, xlator_t *this, loc_t *loc) { - xlator_t *prev = NULL; - dht_local_t *local = NULL; - xlator_t *subvol = NULL; - loc_t *loc = NULL; - dht_conf_t *conf = NULL; - int ret = 0; - char gfid[GF_UUID_BUF_SIZE] = {0}; - - GF_VALIDATE_OR_GOTO ("dht", frame, out); - GF_VALIDATE_OR_GOTO ("dht", this, unwind); - GF_VALIDATE_OR_GOTO ("dht", frame->local, unwind); - GF_VALIDATE_OR_GOTO ("dht", this->private, unwind); - GF_VALIDATE_OR_GOTO ("dht", cookie, unwind); + char *path = NULL; + loc_t populate_loc = { + 0, + }; + char *name = NULL; + xlator_t *hash_subvol = NULL; - prev = cookie; - subvol = prev; - conf = this->private; - local = frame->local; - loc = &local->loc; - - gf_uuid_unparse(loc->gfid, gfid); + if (!inode) + return hash_subvol; - if (op_ret == -1) { - gf_msg (this->name, GF_LOG_INFO, op_errno, - DHT_MSG_LINK_FILE_LOOKUP_INFO, - "Lookup of %s on %s (following linkfile) failed " - ",gfid = %s", local->loc.path, subvol->name, gfid); - - /* If cached subvol returned ENOTCONN, do not do - lookup_everywhere. We need to make sure linkfile does not get - removed, which can take away the namespace, and subvol is - anyways down. */ - - local->cached_subvol = NULL; - if (op_errno != ENOTCONN) - goto err; - else - goto unwind; + if (loc && loc->parent && loc->path) { + if (!loc->name) { + name = strrchr(loc->path, '/'); + if (name) { + loc->name = name + 1; + } else { + goto out; + } } + hash_subvol = dht_subvol_get_hashed(this, loc); + goto out; + } - if (check_is_dir (inode, stbuf, xattr)) { - gf_msg (this->name, GF_LOG_INFO, 0, - DHT_MSG_LINK_FILE_LOOKUP_INFO, - "Lookup of %s on %s (following linkfile) reached dir," - " gfid = %s", local->loc.path, subvol->name, gfid); - goto err; - } + if (!gf_uuid_is_null(inode->gfid)) { + populate_loc.inode = inode_ref(inode); + populate_loc.parent = inode_parent(populate_loc.inode, NULL, NULL); + inode_path(populate_loc.inode, NULL, &path); - if (check_is_linkfile (inode, stbuf, xattr, conf->link_xattr_name)) { - gf_msg (this->name, GF_LOG_INFO, 0, - DHT_MSG_LINK_FILE_LOOKUP_INFO, - "lookup of %s on %s (following linkfile) reached link," - "gfid = %s", local->loc.path, subvol->name, gfid); - goto err; - } + if (!path) + goto out; - if (gf_uuid_compare (local->gfid, stbuf->ia_gfid)) { - gf_msg (this->name, GF_LOG_WARNING, 0, - DHT_MSG_GFID_MISMATCH, - "%s: gfid different on data file on %s," - " gfid local = %s, gfid node = %s ", - local->loc.path, subvol->name, gfid, - uuid_utoa(stbuf->ia_gfid)); - goto err; - } + populate_loc.path = path; + if (!populate_loc.name && populate_loc.path) { + name = strrchr(populate_loc.path, '/'); + if (name) { + populate_loc.name = name + 1; - if ((stbuf->ia_nlink == 1) - && (conf && conf->unhashed_sticky_bit)) { - stbuf->ia_prot.sticky = 1; + } else { + goto out; + } } - - ret = dht_layout_preset (this, prev, inode); - if (ret < 0) { - gf_msg (this->name, GF_LOG_INFO, 0, - DHT_MSG_LAYOUT_PRESET_FAILED, - "Failed to set layout for subvolume %s," - "gfid = %s", prev->name, gfid); - op_ret = -1; - op_errno = EINVAL; + hash_subvol = dht_subvol_get_hashed(this, &populate_loc); + } +out: + if (populate_loc.inode) + loc_wipe(&populate_loc); + return hash_subvol; +} + +int +dht_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, + int op_errno, inode_t *inode, struct iatt *stbuf, dict_t *xattr, + struct iatt *postparent) +{ + char is_linkfile = 0; + char is_dir = 0; + xlator_t *subvol = NULL; + dht_conf_t *conf = NULL; + dht_local_t *local = NULL; + loc_t *loc = NULL; + xlator_t *prev = NULL; + int ret = 0; + dht_layout_t *parent_layout = NULL; + uint32_t vol_commit_hash = 0; + + GF_VALIDATE_OR_GOTO("dht", frame, err); + GF_VALIDATE_OR_GOTO("dht", this, out); + GF_VALIDATE_OR_GOTO("dht", frame->local, out); + GF_VALIDATE_OR_GOTO("dht", cookie, out); + GF_VALIDATE_OR_GOTO("dht", this->private, out); + + conf = this->private; + + prev = cookie; + local = frame->local; + loc = &local->loc; + + /* This is required for handling stale linkfile deletion, + * or any more call which happens from this 'loc'. + */ + if (!op_ret && gf_uuid_is_null(local->gfid)) + memcpy(local->gfid, stbuf->ia_gfid, 16); + + gf_msg_debug(this->name, op_errno, + "fresh_lookup returned for %s with op_ret %d", loc->path, + op_ret); + + if (!conf->vch_forced) { + ret = dict_get_uint32(xattr, conf->commithash_xattr_name, + &vol_commit_hash); + if (ret == 0) { + conf->vol_commit_hash = vol_commit_hash; } + } - if (local->loc.parent) { - dht_inode_ctx_time_update (local->loc.parent, this, - postparent, 1); + if (ENTRY_MISSING(op_ret, op_errno)) { + if (1 == conf->subvolume_cnt) { + /* No need to lookup again */ + goto out; } -unwind: - DHT_STRIP_PHASE1_FLAGS (stbuf); - dht_set_fixed_dir_stat (postparent); - DHT_STACK_UNWIND (lookup, frame, op_ret, op_errno, inode, stbuf, xattr, - postparent); - - return 0; + gf_msg_debug(this->name, 0, "Entry %s missing on subvol %s", loc->path, + prev->name); -err: - dht_lookup_everywhere (frame, this, loc); -out: - return 0; -} + /* lookup-optimize supersedes lookup-unhashed settings, + * - so if it is set, do not process search_unhashed + * - except, in the case of rebalance daemon, we want to + * force the lookup_everywhere behavior */ + if (!conf->defrag && conf->lookup_optimize && loc->parent) { + ret = dht_inode_ctx_layout_get(loc->parent, this, &parent_layout); + if (ret || !parent_layout || + (parent_layout->commit_hash != conf->vol_commit_hash)) { + gf_msg_debug(this->name, 0, + "hashes don't match (ret - %d," + " parent_layout - %p, parent_hash - %x," + " vol_hash - %x), do global lookup", + ret, parent_layout, + (parent_layout ? parent_layout->commit_hash : -1), + conf->vol_commit_hash); + local->op_errno = ENOENT; + dht_lookup_everywhere(frame, this, loc); + return 0; + } + } else { + if (conf->search_unhashed == GF_DHT_LOOKUP_UNHASHED_ON) { + local->op_errno = ENOENT; + dht_lookup_everywhere(frame, this, loc); + return 0; + } + if ((conf->search_unhashed == GF_DHT_LOOKUP_UNHASHED_AUTO) && + (loc->parent)) { + ret = dht_inode_ctx_layout_get(loc->parent, this, + &parent_layout); + if (ret || !parent_layout) + goto out; + if (parent_layout->search_unhashed) { + local->op_errno = ENOENT; + dht_lookup_everywhere(frame, this, loc); + return 0; + } + } + } + } -int -dht_lookup_directory (call_frame_t *frame, xlator_t *this, loc_t *loc) -{ - int call_cnt = 0; - int i = 0; - dht_conf_t *conf = NULL; - dht_local_t *local = NULL; - int ret = 0; + if (op_ret == 0) { + is_dir = check_is_dir(inode, stbuf, xattr); + if (is_dir) { + local->inode = inode_ref(inode); + local->xattr = dict_ref(xattr); + } + } - GF_VALIDATE_OR_GOTO ("dht", frame, out); - GF_VALIDATE_OR_GOTO ("dht", this, unwind); - GF_VALIDATE_OR_GOTO ("dht", frame->local, unwind); - GF_VALIDATE_OR_GOTO ("dht", this->private, unwind); - GF_VALIDATE_OR_GOTO ("dht", loc, unwind); + if (is_dir || (op_ret == -1 && op_errno == ENOTCONN)) { + dht_lookup_directory(frame, this, &local->loc); + return 0; + } - conf = this->private; - local = frame->local; + if (op_ret == -1) { + gf_msg_debug(this->name, op_errno, + "Lookup of %s for subvolume" + " %s failed", + loc->path, prev->name); + goto out; + } - call_cnt = conf->subvolume_cnt; - local->call_cnt = call_cnt; + is_linkfile = check_is_linkfile(inode, stbuf, xattr, conf->link_xattr_name); - local->layout = dht_layout_new (this, conf->subvolume_cnt); - if (!local->layout) { - goto unwind; - } + if (!is_linkfile) { + /* non-directory and not a linkfile */ - if (local->xattr != NULL) { - dict_unref (local->xattr); - local->xattr = NULL; + ret = dht_layout_preset(this, prev, inode); + if (ret < 0) { + gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LAYOUT_PRESET_FAILED, + "could not set pre-set layout for subvolume %s", prev->name); + op_ret = -1; + op_errno = EINVAL; + goto out; } + goto out; + } - if (!gf_uuid_is_null (local->gfid)) { - ret = dict_set_gfuuid (local->xattr_req, "gfid-req", - local->gfid, true); - if (ret) - gf_msg (this->name, GF_LOG_WARNING, 0, - DHT_MSG_DICT_SET_FAILED, - "%s: Failed to set dictionary value:" - " key = gfid-req", local->loc.path); - } + subvol = dht_linkfile_subvol(this, inode, stbuf, xattr); + if (!subvol) { + gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_SUBVOL_INFO, + "linkfile not having link " + "subvol for %s", + loc->path); - for (i = 0; i < call_cnt; i++) { - STACK_WIND_COOKIE (frame, dht_lookup_dir_cbk, - conf->subvolumes[i], conf->subvolumes[i], - conf->subvolumes[i]->fops->lookup, - &local->loc, local->xattr_req); - } - return 0; -unwind: - DHT_STACK_UNWIND (lookup, frame, -1, ENOMEM, NULL, NULL, NULL, NULL); -out: + gf_msg_debug(this->name, 0, + "linkfile not having link subvolume. path=%s", loc->path); + dht_lookup_everywhere(frame, this, loc); return 0; + } -} - -/* Code to get hashed subvol based on inode and loc - First it check if loc->parent and loc->path exist then it get - hashed subvol based on loc. -*/ - -xlator_t * -dht_inode_get_hashed_subvol (inode_t *inode, xlator_t *this, loc_t *loc) -{ - char *path = NULL; - loc_t populate_loc = {0, }; - char *name = NULL; - xlator_t *hash_subvol = NULL; - - if (!inode) - return hash_subvol; - - if (loc && loc->parent && loc->path) { - if (!loc->name) { - name = strrchr (loc->path, '/'); - if (name) { - loc->name = name + 1; - } else { - goto out; - } - } - hash_subvol = dht_subvol_get_hashed (this, loc); - goto out; - } - - if (!gf_uuid_is_null (inode->gfid)) { - populate_loc.inode = inode_ref (inode); - populate_loc.parent = inode_parent (populate_loc.inode, - NULL, NULL); - inode_path (populate_loc.inode, NULL, &path); + gf_msg_debug(this->name, 0, + "Calling lookup on linkto target %s for path %s", subvol->name, + loc->path); - if (!path) - goto out; + STACK_WIND_COOKIE(frame, dht_lookup_linkfile_cbk, subvol, subvol, + subvol->fops->lookup, &local->loc, local->xattr_req); - populate_loc.path = path; - if (!populate_loc.name && populate_loc.path) { - name = strrchr (populate_loc.path, '/'); - if (name) { - populate_loc.name = name + 1; + return 0; - } else { - goto out; - } - } - hash_subvol = dht_subvol_get_hashed (this, &populate_loc); - } out: - if (populate_loc.inode) - loc_wipe (&populate_loc); - return hash_subvol; + /* + * FIXME: postparent->ia_size and postparent->st_blocks do not have + * correct values. since, postparent corresponds to a directory these + * two members should have values equal to sum of corresponding values + * from each of the subvolume. See dht_iatt_merge for reference. + */ + + if (!op_ret && local && local->loc.parent) { + dht_inode_ctx_time_update(local->loc.parent, this, postparent, 1); + } + + DHT_STRIP_PHASE1_FLAGS(stbuf); + dht_set_fixed_dir_stat(postparent); + DHT_STACK_UNWIND(lookup, frame, op_ret, op_errno, inode, stbuf, xattr, + postparent); +err: + return 0; } - -int -dht_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, - inode_t *inode, struct iatt *stbuf, dict_t *xattr, - struct iatt *postparent) +/* For directories, check if acl xattrs have been requested (by the acl xlator), + * if not, request for them. These xattrs are needed for dht dir self-heal to + * perform proper self-healing of dirs + */ +void +dht_check_and_set_acl_xattr_req(xlator_t *this, dict_t *xattr_req) { - char is_linkfile = 0; - char is_dir = 0; - xlator_t *subvol = NULL; - dht_conf_t *conf = NULL; - dht_local_t *local = NULL; - loc_t *loc = NULL; - xlator_t *prev = NULL; - int ret = 0; - dht_layout_t *parent_layout = NULL; - uint32_t vol_commit_hash = 0; + int ret = 0; - GF_VALIDATE_OR_GOTO ("dht", frame, err); - GF_VALIDATE_OR_GOTO ("dht", this, out); - GF_VALIDATE_OR_GOTO ("dht", frame->local, out); - GF_VALIDATE_OR_GOTO ("dht", cookie, out); - GF_VALIDATE_OR_GOTO ("dht", this->private, out); + GF_ASSERT(xattr_req); - conf = this->private; + if (!dict_get(xattr_req, POSIX_ACL_ACCESS_XATTR)) { + ret = dict_set_int8(xattr_req, POSIX_ACL_ACCESS_XATTR, 0); + if (ret) + gf_msg(this->name, GF_LOG_WARNING, -ret, DHT_MSG_DICT_SET_FAILED, + "Failed to set dictionary value:key = %s", + POSIX_ACL_ACCESS_XATTR); + } - prev = cookie; - local = frame->local; - loc = &local->loc; + if (!dict_get(xattr_req, POSIX_ACL_DEFAULT_XATTR)) { + ret = dict_set_int8(xattr_req, POSIX_ACL_DEFAULT_XATTR, 0); + if (ret) + gf_msg(this->name, GF_LOG_WARNING, -ret, DHT_MSG_DICT_SET_FAILED, + "Failed to set dictionary value:key = %s", + POSIX_ACL_DEFAULT_XATTR); + } - /* This is required for handling stale linkfile deletion, - * or any more call which happens from this 'loc'. - */ - if (!op_ret && gf_uuid_is_null (local->gfid)) - memcpy (local->gfid, stbuf->ia_gfid, 16); - - gf_msg_debug (this->name, op_errno, - "fresh_lookup returned for %s with op_ret %d", - loc->path, op_ret); - - if (!conf->vch_forced) { - ret = dict_get_uint32 (xattr, conf->commithash_xattr_name, - &vol_commit_hash); - if (ret == 0) { - conf->vol_commit_hash = vol_commit_hash; - } - } - - if (ENTRY_MISSING (op_ret, op_errno)) { - - if (1 == conf->subvolume_cnt) { - /* No need to lookup again */ - goto out; - } - - gf_msg_debug (this->name, 0, - "Entry %s missing on subvol %s", - loc->path, prev->name); - - /* lookup-optimize supersedes lookup-unhashed settings, - * - so if it is set, do not process search_unhashed - * - except, in the case of rebalance daemon, we want to - * force the lookup_everywhere behavior */ - if (!conf->defrag && conf->lookup_optimize && loc->parent) { - ret = dht_inode_ctx_layout_get (loc->parent, this, - &parent_layout); - if (ret || !parent_layout || - (parent_layout->commit_hash != - conf->vol_commit_hash)) { - gf_msg_debug (this->name, 0, - "hashes don't match (ret - %d," - " parent_layout - %p, parent_hash - %x," - " vol_hash - %x), do global lookup", - ret, parent_layout, - (parent_layout ? - parent_layout->commit_hash : -1), - conf->vol_commit_hash); - local->op_errno = ENOENT; - dht_lookup_everywhere (frame, this, loc); - return 0; - } - } else { - if (conf->search_unhashed == - GF_DHT_LOOKUP_UNHASHED_ON) { - local->op_errno = ENOENT; - dht_lookup_everywhere (frame, this, loc); - return 0; - } - - if ((conf->search_unhashed == - GF_DHT_LOOKUP_UNHASHED_AUTO) && - (loc->parent)) { - ret = dht_inode_ctx_layout_get (loc->parent, - this, - &parent_layout); - if (ret || !parent_layout) - goto out; - if (parent_layout->search_unhashed) { - local->op_errno = ENOENT; - dht_lookup_everywhere (frame, this, - loc); - return 0; - } - } - } - } - - if (op_ret == 0) { - is_dir = check_is_dir (inode, stbuf, xattr); - if (is_dir) { - local->inode = inode_ref (inode); - local->xattr = dict_ref (xattr); - } - } - - if (is_dir || (op_ret == -1 && op_errno == ENOTCONN)) { - dht_lookup_directory (frame, this, &local->loc); - return 0; - } - - if (op_ret == -1) { - gf_msg_debug (this->name, op_errno, - "Lookup of %s for subvolume" - " %s failed", loc->path, - prev->name); - goto out; - } - - is_linkfile = check_is_linkfile (inode, stbuf, xattr, - conf->link_xattr_name); - - if (!is_linkfile) { - /* non-directory and not a linkfile */ - - ret = dht_layout_preset (this, prev, inode); - if (ret < 0) { - gf_msg (this->name, GF_LOG_INFO, 0, - DHT_MSG_LAYOUT_PRESET_FAILED, - "could not set pre-set layout for subvolume %s", - prev->name); - op_ret = -1; - op_errno = EINVAL; - goto out; - } - goto out; - } - - subvol = dht_linkfile_subvol (this, inode, stbuf, xattr); - if (!subvol) { - - gf_msg (this->name, GF_LOG_INFO, 0, - DHT_MSG_SUBVOL_INFO, "linkfile not having link " - "subvol for %s", loc->path); - - gf_msg_debug (this->name, 0, - "linkfile not having link subvolume. path=%s", - loc->path); - dht_lookup_everywhere (frame, this, loc); - return 0; - } - - gf_msg_debug (this->name, 0, - "Calling lookup on linkto target %s for path %s", - subvol->name, loc->path); - - STACK_WIND_COOKIE (frame, dht_lookup_linkfile_cbk, subvol, - subvol, subvol->fops->lookup, - &local->loc, local->xattr_req); - - return 0; - -out: - /* - * FIXME: postparent->ia_size and postparent->st_blocks do not have - * correct values. since, postparent corresponds to a directory these - * two members should have values equal to sum of corresponding values - * from each of the subvolume. See dht_iatt_merge for reference. - */ - - if (!op_ret && local && local->loc.parent) { - dht_inode_ctx_time_update (local->loc.parent, this, - postparent, 1); - } - - DHT_STRIP_PHASE1_FLAGS (stbuf); - dht_set_fixed_dir_stat (postparent); - DHT_STACK_UNWIND (lookup, frame, op_ret, op_errno, inode, stbuf, xattr, - postparent); -err: - return 0; + return; } -/* For directories, check if acl xattrs have been requested (by the acl xlator), - * if not, request for them. These xattrs are needed for dht dir self-heal to - * perform proper self-healing of dirs - */ -void -dht_check_and_set_acl_xattr_req (xlator_t *this, dict_t *xattr_req) -{ - int ret = 0; - - GF_ASSERT (xattr_req); - - if (!dict_get (xattr_req, POSIX_ACL_ACCESS_XATTR)) { - ret = dict_set_int8 (xattr_req, POSIX_ACL_ACCESS_XATTR, 0); - if (ret) - gf_msg (this->name, GF_LOG_WARNING, -ret, - DHT_MSG_DICT_SET_FAILED, - "Failed to set dictionary value:key = %s", - POSIX_ACL_ACCESS_XATTR); - } - - if (!dict_get (xattr_req, POSIX_ACL_DEFAULT_XATTR)) { - ret = dict_set_int8 (xattr_req, POSIX_ACL_DEFAULT_XATTR, 0); - if (ret) - gf_msg (this->name, GF_LOG_WARNING, -ret, - DHT_MSG_DICT_SET_FAILED, - "Failed to set dictionary value:key = %s", - POSIX_ACL_DEFAULT_XATTR); - } - - return; -} - - /* for directories, we need the following info: * the layout : trusted.glusterfs.dht * the mds information : trusted.glusterfs.dht.mds * the acl info: See above */ int -dht_set_dir_xattr_req (xlator_t *this, loc_t *loc, dict_t *xattr_req) -{ - int ret = -EINVAL; - dht_conf_t *conf = NULL; - - conf = this->private; - if (!conf) { - goto err; - } - - if (!xattr_req) { - goto err; - } - - /* Xattr to get the layout for a directory - */ - ret = dict_set_uint32 (xattr_req, conf->xattr_name, 4 * 4); - if (ret) { - gf_msg (this->name, GF_LOG_WARNING, ENOMEM, - DHT_MSG_DICT_SET_FAILED, - "Failed to set dictionary value:key = %s for " - "path %s", conf->xattr_name, loc->path); - goto err; - } - - /*Non-fatal failure */ - ret = dict_set_uint32 (xattr_req, conf->mds_xattr_key, 4); - if (ret) { - gf_msg (this->name, GF_LOG_WARNING, ENOMEM, - DHT_MSG_DICT_SET_FAILED, - "Failed to set dictionary value:key = %s for " - "path %s", conf->mds_xattr_key, loc->path); - } - - dht_check_and_set_acl_xattr_req (this, xattr_req); - ret = 0; +dht_set_dir_xattr_req(xlator_t *this, loc_t *loc, dict_t *xattr_req) +{ + int ret = -EINVAL; + dht_conf_t *conf = NULL; + + conf = this->private; + if (!conf) { + goto err; + } + + if (!xattr_req) { + goto err; + } + + /* Xattr to get the layout for a directory + */ + ret = dict_set_uint32(xattr_req, conf->xattr_name, 4 * 4); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, ENOMEM, DHT_MSG_DICT_SET_FAILED, + "Failed to set dictionary value:key = %s for " + "path %s", + conf->xattr_name, loc->path); + goto err; + } + + /*Non-fatal failure */ + ret = dict_set_uint32(xattr_req, conf->mds_xattr_key, 4); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, ENOMEM, DHT_MSG_DICT_SET_FAILED, + "Failed to set dictionary value:key = %s for " + "path %s", + conf->mds_xattr_key, loc->path); + } + + dht_check_and_set_acl_xattr_req(this, xattr_req); + ret = 0; err: - return ret; -} - - -int dht_set_file_xattr_req (xlator_t *this, loc_t *loc, dict_t *xattr_req) -{ - int ret = -EINVAL; - dht_conf_t *conf = NULL; - - conf = this->private; - if (!conf) { - goto err; - } - - if (!xattr_req) { - goto err; - } - - /* Used to check whether this is a linkto file. - */ - ret = dict_set_uint32 (xattr_req, - conf->link_xattr_name, 256); - if (ret < 0) { - gf_msg (this->name, GF_LOG_WARNING, ENOMEM, - DHT_MSG_DICT_SET_FAILED, - "Failed to set dictionary value:key = %s for " - "path %s", conf->link_xattr_name, loc->path); - goto err; - } - - /* This is used to make sure we don't unlink linkto files - * which are the target of an ongoing file migration. - */ - ret = dict_set_uint32 (xattr_req, - GLUSTERFS_OPEN_FD_COUNT, 4); - if (ret) { - gf_msg (this->name, GF_LOG_WARNING, ENOMEM, - DHT_MSG_DICT_SET_FAILED, - "Failed to set dictionary value:key = %s for " - "path %s", GLUSTERFS_OPEN_FD_COUNT, loc->path); - goto err; - } - - ret = 0; + return ret; +} + +int +dht_set_file_xattr_req(xlator_t *this, loc_t *loc, dict_t *xattr_req) +{ + int ret = -EINVAL; + dht_conf_t *conf = NULL; + + conf = this->private; + if (!conf) { + goto err; + } + + if (!xattr_req) { + goto err; + } + + /* Used to check whether this is a linkto file. + */ + ret = dict_set_uint32(xattr_req, conf->link_xattr_name, 256); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, ENOMEM, DHT_MSG_DICT_SET_FAILED, + "Failed to set dictionary value:key = %s for " + "path %s", + conf->link_xattr_name, loc->path); + goto err; + } + + /* This is used to make sure we don't unlink linkto files + * which are the target of an ongoing file migration. + */ + ret = dict_set_uint32(xattr_req, GLUSTERFS_OPEN_FD_COUNT, 4); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, ENOMEM, DHT_MSG_DICT_SET_FAILED, + "Failed to set dictionary value:key = %s for " + "path %s", + GLUSTERFS_OPEN_FD_COUNT, loc->path); + goto err; + } + + ret = 0; err: - return ret; -} - - - -int -dht_do_revalidate (call_frame_t *frame, xlator_t *this, loc_t *loc) -{ - xlator_t *subvol = NULL; - xlator_t *mds_subvol = NULL; - dht_local_t *local = NULL; - dht_conf_t *conf = NULL; - int ret = -1; - int op_errno = -1; - dht_layout_t *layout = NULL; - int i = 0; - int call_cnt = 0; - int gen = 0; - - conf = this->private; - if (!conf) { - op_errno = EINVAL; - goto err; - } - - local = frame->local; - if (!local) { - op_errno = EINVAL; - goto err; - } - - layout = local->layout; - if (!layout) { - gf_msg_debug (this->name, 0, - "path = %s. No layout found in the inode ctx.", - loc->path); - op_errno = EINVAL; - goto err; - } - - /* Generation number has changed. This layout may be stale. */ - if (layout->gen && (layout->gen < conf->gen)) { - gen = layout->gen; - dht_layout_unref (this, local->layout); - local->layout = NULL; - local->cached_subvol = NULL; - - gf_msg_debug(this->name, 0, - "path = %s. In memory layout may be stale." - "(layout->gen (%d) is less than " - "conf->gen (%d)). Calling fresh lookup.", - loc->path, gen, conf->gen); - - dht_do_fresh_lookup (frame, this, loc); - return 0; - } - - local->inode = inode_ref (loc->inode); - - /* Since we don't know whether this has changed, - * request all xattrs*/ - ret = dht_set_file_xattr_req (this, loc, local->xattr_req); - if (ret) { - op_errno = -ret; - goto err; - } - - ret = dht_set_dir_xattr_req (this, loc, local->xattr_req); - if (ret) { - op_errno = -ret; - goto err; - } - - if (IA_ISDIR (local->inode->ia_type)) { - ret = dht_inode_ctx_mdsvol_get (local->inode, this, - &mds_subvol); - if (ret || !mds_subvol) { - gf_msg_debug (this->name, 0, - "path = %s. No mds subvol in inode ctx", - local->loc.path); - } - local->mds_subvol = mds_subvol; - local->call_cnt = conf->subvolume_cnt; - call_cnt = local->call_cnt; - - for (i = 0; i < call_cnt; i++) { - STACK_WIND_COOKIE (frame, dht_revalidate_cbk, - conf->subvolumes[i], - conf->subvolumes[i], - conf->subvolumes[i]->fops->lookup, - loc, local->xattr_req); - } - return 0; + return ret; +} + +int +dht_do_revalidate(call_frame_t *frame, xlator_t *this, loc_t *loc) +{ + xlator_t *subvol = NULL; + xlator_t *mds_subvol = NULL; + dht_local_t *local = NULL; + dht_conf_t *conf = NULL; + int ret = -1; + int op_errno = -1; + dht_layout_t *layout = NULL; + int i = 0; + int call_cnt = 0; + int gen = 0; + + conf = this->private; + if (!conf) { + op_errno = EINVAL; + goto err; + } + + local = frame->local; + if (!local) { + op_errno = EINVAL; + goto err; + } + + layout = local->layout; + if (!layout) { + gf_msg_debug(this->name, 0, + "path = %s. No layout found in the inode ctx.", loc->path); + op_errno = EINVAL; + goto err; + } + + /* Generation number has changed. This layout may be stale. */ + if (layout->gen && (layout->gen < conf->gen)) { + gen = layout->gen; + dht_layout_unref(this, local->layout); + local->layout = NULL; + local->cached_subvol = NULL; + + gf_msg_debug(this->name, 0, + "path = %s. In memory layout may be stale." + "(layout->gen (%d) is less than " + "conf->gen (%d)). Calling fresh lookup.", + loc->path, gen, conf->gen); + + dht_do_fresh_lookup(frame, this, loc); + return 0; + } + + local->inode = inode_ref(loc->inode); + + /* Since we don't know whether this has changed, + * request all xattrs*/ + ret = dht_set_file_xattr_req(this, loc, local->xattr_req); + if (ret) { + op_errno = -ret; + goto err; + } + + ret = dht_set_dir_xattr_req(this, loc, local->xattr_req); + if (ret) { + op_errno = -ret; + goto err; + } + + if (IA_ISDIR(local->inode->ia_type)) { + ret = dht_inode_ctx_mdsvol_get(local->inode, this, &mds_subvol); + if (ret || !mds_subvol) { + gf_msg_debug(this->name, 0, "path = %s. No mds subvol in inode ctx", + local->loc.path); } - - /* If not a dir, this should be 1 */ - local->call_cnt = layout->cnt; + local->mds_subvol = mds_subvol; + local->call_cnt = conf->subvolume_cnt; call_cnt = local->call_cnt; for (i = 0; i < call_cnt; i++) { - subvol = layout->list[i].xlator; - - gf_msg_debug (this->name, 0, "path = %s. Calling " - "revalidate lookup on %s", - loc->path, subvol->name); - - STACK_WIND_COOKIE (frame, dht_revalidate_cbk, subvol, - subvol, subvol->fops->lookup, - &local->loc, local->xattr_req); - } - return 0; -err: - op_errno = (op_errno == -1) ? errno : op_errno; - DHT_STACK_UNWIND (lookup, frame, -1, op_errno, NULL, NULL, NULL, - NULL); - return 0; -} - -int -dht_do_fresh_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc) -{ - int ret = -1; - dht_conf_t *conf = NULL; - xlator_t *hashed_subvol = NULL; - dht_local_t *local = NULL; - int op_errno = -1; - int call_cnt = 0; - int i = 0; - - conf = this->private; - if (!conf) { - op_errno = EINVAL; - goto err; - } - - local = frame->local; - if (!local) { - op_errno = EINVAL; - goto err; - } - - /* Since we don't know whether this is a file or a directory, - * request all xattrs*/ - ret = dht_set_file_xattr_req (this, loc, local->xattr_req); - if (ret) { - op_errno = -ret; - goto err; - } - - ret = dht_set_dir_xattr_req (this, loc, local->xattr_req); - if (ret) { - op_errno = -ret; - goto err; - } - - /* This should have been set in dht_lookup */ - hashed_subvol = local->hashed_subvol; - - if (!hashed_subvol) { - gf_msg_debug (this->name, 0, - "%s: no subvolume in layout for path, " - "checking on all the subvols to see if " - "it is a directory", loc->path); - - call_cnt = conf->subvolume_cnt; - local->call_cnt = call_cnt; - - local->layout = dht_layout_new (this, - conf->subvolume_cnt); - if (!local->layout) { - op_errno = ENOMEM; - goto err; - } - - gf_msg_debug (this->name, 0, - "%s: Found null hashed subvol. Calling lookup" - " on all nodes.", loc->path); - - for (i = 0; i < call_cnt; i++) { - STACK_WIND_COOKIE (frame, dht_lookup_dir_cbk, - conf->subvolumes[i], - conf->subvolumes[i], - conf->subvolumes[i]->fops->lookup, - &local->loc, local->xattr_req); - } - return 0; + STACK_WIND_COOKIE(frame, dht_revalidate_cbk, conf->subvolumes[i], + conf->subvolumes[i], + conf->subvolumes[i]->fops->lookup, loc, + local->xattr_req); } - - /* if we have the hashed_subvol, send the lookup there first so - * as to see whether we have a file or a directory */ - gf_msg_debug (this->name, 0, "Calling fresh lookup for %s on" - " %s", loc->path, hashed_subvol->name); - - STACK_WIND_COOKIE (frame, dht_lookup_cbk, hashed_subvol, - hashed_subvol, hashed_subvol->fops->lookup, - loc, local->xattr_req); - return 0; -err: - op_errno = (op_errno == -1) ? errno : op_errno; - DHT_STACK_UNWIND (lookup, frame, -1, op_errno, NULL, NULL, NULL, - NULL); return 0; -} + } + /* If not a dir, this should be 1 */ + local->call_cnt = layout->cnt; + call_cnt = local->call_cnt; -int -dht_lookup (call_frame_t *frame, xlator_t *this, - loc_t *loc, dict_t *xattr_req) -{ - xlator_t *hashed_subvol = NULL; - dht_local_t *local = NULL; - dht_conf_t *conf = NULL; - int ret = -1; - int op_errno = -1; - loc_t new_loc = {0,}; + for (i = 0; i < call_cnt; i++) { + subvol = layout->list[i].xlator; - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (loc, err); - VALIDATE_OR_GOTO (loc->inode, err); + gf_msg_debug(this->name, 0, + "path = %s. Calling " + "revalidate lookup on %s", + loc->path, subvol->name); - conf = this->private; - if (!conf) - goto err; - - local = dht_local_init (frame, loc, NULL, GF_FOP_LOOKUP); - if (!local) { - op_errno = ENOMEM; - goto err; - } - - ret = dht_filter_loc_subvol_key (this, loc, &new_loc, - &hashed_subvol); - if (ret) { - loc_wipe (&local->loc); - ret = loc_dup (&new_loc, &local->loc); - - /* we no longer need 'new_loc' entries */ - loc_wipe (&new_loc); - - /* check if loc_dup() is successful */ - if (ret == -1) { - op_errno = errno; - gf_msg_debug (this->name, errno, - "copying location failed for path=%s", - loc->path); - goto err; - } - } - - if (xattr_req) { - local->xattr_req = dict_ref (xattr_req); - } else { - local->xattr_req = dict_new (); - } - - /* Nameless lookup */ - - if (gf_uuid_is_null (loc->pargfid) && !gf_uuid_is_null (loc->gfid) && - !__is_root_gfid (loc->inode->gfid)) { - local->cached_subvol = NULL; - dht_do_discover (frame, this, loc); - return 0; - } - - if (loc_is_root (loc)) { - ret = dict_set_uint32 (local->xattr_req, - conf->commithash_xattr_name, - sizeof(uint32_t)); - } - - if (!hashed_subvol) - hashed_subvol = dht_subvol_get_hashed (this, loc); - local->hashed_subvol = hashed_subvol; - - - /* The entry has been looked up before and has an inode_ctx set - */ - if (is_revalidate (loc)) { - dht_do_revalidate (frame, this, loc); - return 0; - } else { - dht_do_fresh_lookup (frame, this, loc); - return 0; - } - - return 0; + STACK_WIND_COOKIE(frame, dht_revalidate_cbk, subvol, subvol, + subvol->fops->lookup, &local->loc, local->xattr_req); + } + return 0; err: - op_errno = (op_errno == -1) ? errno : op_errno; - DHT_STACK_UNWIND (lookup, frame, -1, op_errno, NULL, NULL, NULL, - NULL); - return 0; -} - -int -dht_unlink_linkfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) -{ - dht_local_t *local = NULL; - xlator_t *prev = NULL; - - local = frame->local; - prev = cookie; - - LOCK (&frame->lock); - { - if ((op_ret == -1) && !((op_errno == ENOENT) || - (op_errno == ENOTCONN))) { - local->op_errno = op_errno; - gf_msg_debug (this->name, op_errno, - "Unlink link: subvolume %s" - " returned -1", - prev->name); - goto unlock; - } + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND(lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL); + return 0; +} + +int +dht_do_fresh_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc) +{ + int ret = -1; + dht_conf_t *conf = NULL; + xlator_t *hashed_subvol = NULL; + dht_local_t *local = NULL; + int op_errno = -1; + int call_cnt = 0; + int i = 0; + + conf = this->private; + if (!conf) { + op_errno = EINVAL; + goto err; + } + + local = frame->local; + if (!local) { + op_errno = EINVAL; + goto err; + } + + /* Since we don't know whether this is a file or a directory, + * request all xattrs*/ + ret = dht_set_file_xattr_req(this, loc, local->xattr_req); + if (ret) { + op_errno = -ret; + goto err; + } + + ret = dht_set_dir_xattr_req(this, loc, local->xattr_req); + if (ret) { + op_errno = -ret; + goto err; + } + + /* This should have been set in dht_lookup */ + hashed_subvol = local->hashed_subvol; + + if (!hashed_subvol) { + gf_msg_debug(this->name, 0, + "%s: no subvolume in layout for path, " + "checking on all the subvols to see if " + "it is a directory", + loc->path); - local->op_ret = 0; - } -unlock: - UNLOCK (&frame->lock); - - dht_set_fixed_dir_stat (&local->preparent); - dht_set_fixed_dir_stat (&local->postparent); - DHT_STACK_UNWIND (unlink, frame, local->op_ret, local->op_errno, - &local->preparent, &local->postparent, xdata); - - return 0; -} - -int -dht_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) -{ - dht_local_t *local = NULL; - xlator_t *prev = NULL; - xlator_t *hashed_subvol = NULL; - - local = frame->local; - prev = cookie; - - LOCK (&frame->lock); - { - if (op_ret == -1) { - if (op_errno != ENOENT) { - local->op_ret = -1; - local->op_errno = op_errno; - } else { - local->op_ret = 0; - } - gf_msg_debug (this->name, op_errno, - "Unlink: subvolume %s returned -1", - prev->name); - goto unlock; - } - - local->op_ret = 0; - - local->postparent = *postparent; - local->preparent = *preparent; - - if (local->loc.parent) { - dht_inode_ctx_time_update (local->loc.parent, this, - &local->preparent, 0); - dht_inode_ctx_time_update (local->loc.parent, this, - &local->postparent, 1); - } - } -unlock: - UNLOCK (&frame->lock); - - if (!local->op_ret) { - hashed_subvol = dht_subvol_get_hashed (this, &local->loc); - if (hashed_subvol && hashed_subvol != local->cached_subvol) { - /* - * If hashed and cached are different, then we need - * to unlink linkfile from hashed subvol if data - * file is deleted successfully - */ - STACK_WIND_COOKIE (frame, dht_unlink_linkfile_cbk, - hashed_subvol, hashed_subvol, - hashed_subvol->fops->unlink, - &local->loc, local->flags, xdata); - return 0; - } - } - - dht_set_fixed_dir_stat (&local->preparent); - dht_set_fixed_dir_stat (&local->postparent); - DHT_STACK_UNWIND (unlink, frame, local->op_ret, local->op_errno, - &local->preparent, &local->postparent, xdata); - - return 0; -} - -static int -dht_common_setxattr_cbk (call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, int32_t op_errno, - dict_t *xdata) -{ - DHT_STACK_UNWIND (setxattr, frame, op_ret, op_errno, xdata); - return 0; -} - - - -static int -dht_fix_layout_setxattr_cbk (call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, int32_t op_errno, - dict_t *xdata) -{ - dht_local_t *local = NULL; - dht_layout_t *layout = NULL; - - if (op_ret == 0) { - - /* update the layout in the inode ctx */ - local = frame->local; - layout = local->selfheal.layout; - - dht_layout_set (this, local->loc.inode, layout); - } - - DHT_STACK_UNWIND (setxattr, frame, op_ret, op_errno, xdata); - return 0; -} - - -int -dht_err_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, dict_t *xdata) -{ - dht_local_t *local = NULL; - int this_call_cnt = 0; - xlator_t *prev = NULL; - - local = frame->local; - prev = cookie; - - LOCK (&frame->lock); - { - if (op_ret == -1) { - local->op_errno = op_errno; - gf_msg_debug (this->name, op_errno, - "subvolume %s returned -1", - prev->name); - goto unlock; - } - - local->op_ret = 0; - } -unlock: - UNLOCK (&frame->lock); - - this_call_cnt = dht_frame_return (frame); - if (is_last_call (this_call_cnt)) { - if ((local->fop == GF_FOP_SETXATTR) || - (local->fop == GF_FOP_FSETXATTR)) { - DHT_STACK_UNWIND (setxattr, frame, local->op_ret, - local->op_errno, NULL); - } - if ((local->fop == GF_FOP_REMOVEXATTR) || - (local->fop == GF_FOP_FREMOVEXATTR)) { - DHT_STACK_UNWIND (removexattr, frame, local->op_ret, - local->op_errno, NULL); - } - } - - return 0; -} - -/* Set the value[] of key into dict after convert from - host byte order to network byte order -*/ -int32_t dht_dict_set_array (dict_t *dict, char *key, int32_t value[], - int32_t size) -{ - int ret = -1; - int32_t *ptr = NULL; - int32_t vindex; - - if (value == NULL) { - return -EINVAL; - } - - ptr = GF_MALLOC(sizeof(int32_t) * size, gf_common_mt_char); - if (ptr == NULL) { - return -ENOMEM; - } - for (vindex = 0; vindex < size; vindex++) { - ptr[vindex] = hton32(value[vindex]); - } - ret = dict_set_bin(dict, key, ptr, sizeof(int32_t) * size); - if (ret) - GF_FREE (ptr); - return ret; -} - -int -dht_common_mds_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata) -{ - dht_local_t *local = NULL; - call_frame_t *prev = cookie; - - local = frame->local; - - if (op_ret) - gf_msg_debug (this->name, op_errno, - "subvolume %s returned -1", - prev->this->name); - - if (local->fop == GF_FOP_SETXATTR) { - DHT_STACK_UNWIND (setxattr, frame, 0, op_errno, local->xdata); - } - - if (local->fop == GF_FOP_FSETXATTR) { - DHT_STACK_UNWIND (fsetxattr, frame, 0, op_errno, local->xdata); - } - - if (local->fop == GF_FOP_REMOVEXATTR) { - DHT_STACK_UNWIND (removexattr, frame, 0, op_errno, NULL); - } - - if (local->fop == GF_FOP_FREMOVEXATTR) { - DHT_STACK_UNWIND (fremovexattr, frame, 0, op_errno, NULL); - } - - return 0; -} - -/* Code to wind a xattrop call to add 1 on current mds internal xattr - value -*/ -int -dht_setxattr_non_mds_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, dict_t *xdata) -{ - dht_local_t *local = NULL; - int this_call_cnt = 0; - int ret = 0; - dict_t *xattrop = NULL; - int32_t addone[1] = {1}; - call_frame_t *prev = NULL; - dht_conf_t *conf = NULL; - - local = frame->local; - prev = cookie; - conf = this->private; - - LOCK (&frame->lock); - { - if (op_ret && !local->op_ret) { - local->op_ret = op_ret; - local->op_errno = op_errno; - gf_msg_debug (this->name, op_errno, - "subvolume %s returned -1", - prev->this->name); - } - } - UNLOCK (&frame->lock); - this_call_cnt = dht_frame_return (frame); - - if (is_last_call (this_call_cnt)) { - if (!local->op_ret) { - xattrop = dict_new (); - if (!xattrop) { - gf_msg (this->name, GF_LOG_ERROR, - DHT_MSG_NO_MEMORY, 0, - "dictionary creation failed"); - ret = -1; - goto out; - } - ret = dht_dict_set_array (xattrop, - conf->mds_xattr_key, - addone, 1); - if (ret != 0) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_DICT_SET_FAILED, - "dictionary set array failed "); - ret = -1; - goto out; - } - if ((local->fop == GF_FOP_SETXATTR) || - (local->fop == GF_FOP_REMOVEXATTR)) { - STACK_WIND (frame, dht_common_mds_xattrop_cbk, - local->mds_subvol, - local->mds_subvol->fops->xattrop, - &local->loc, GF_XATTROP_ADD_ARRAY, - xattrop, NULL); - } else { - STACK_WIND (frame, dht_common_mds_xattrop_cbk, - local->mds_subvol, - local->mds_subvol->fops->fxattrop, - local->fd, GF_XATTROP_ADD_ARRAY, - xattrop, NULL); - } - } else { - if (local->fop == GF_FOP_SETXATTR) { - DHT_STACK_UNWIND (setxattr, frame, 0, 0, local->xdata); - } - - if (local->fop == GF_FOP_FSETXATTR) { - DHT_STACK_UNWIND (fsetxattr, frame, 0, 0, local->xdata); - } - - if (local->fop == GF_FOP_REMOVEXATTR) { - DHT_STACK_UNWIND (removexattr, frame, 0, 0, NULL); - } - - if (local->fop == GF_FOP_FREMOVEXATTR) { - DHT_STACK_UNWIND (fremovexattr, frame, 0, 0, NULL); - } - } - } -out: - if (xattrop) - dict_unref (xattrop); - if (ret) { - if (local->fop == GF_FOP_SETXATTR) { - DHT_STACK_UNWIND (setxattr, frame, 0, 0, local->xdata); - } - - if (local->fop == GF_FOP_FSETXATTR) { - DHT_STACK_UNWIND (fsetxattr, frame, 0, 0, local->xdata); - } - - if (local->fop == GF_FOP_REMOVEXATTR) { - DHT_STACK_UNWIND (removexattr, frame, 0, 0, NULL); - } - - if (local->fop == GF_FOP_FREMOVEXATTR) { - DHT_STACK_UNWIND (fremovexattr, frame, 0, 0, NULL); - } - } - return 0; -} - - -int -dht_setxattr_mds_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, dict_t *xdata) -{ - dht_local_t *local = NULL; - dht_conf_t *conf = NULL; - call_frame_t *prev = NULL; - xlator_t *mds_subvol = NULL; - int i = 0; - - local = frame->local; - prev = cookie; - conf = this->private; - mds_subvol = local->mds_subvol; - - if (op_ret == -1) { - local->op_ret = op_ret; - local->op_errno = op_errno; - gf_msg_debug (this->name, op_errno, - "subvolume %s returned -1", - prev->this->name); - goto out; - } - - local->op_ret = 0; - local->call_cnt = conf->subvolume_cnt - 1; - local->xdata = dict_ref (xdata); - - for (i = 0; i < conf->subvolume_cnt; i++) { - if (mds_subvol && (mds_subvol == conf->subvolumes[i])) - continue; - if (local->fop == GF_FOP_SETXATTR) { - STACK_WIND (frame, dht_setxattr_non_mds_cbk, - conf->subvolumes[i], - conf->subvolumes[i]->fops->setxattr, - &local->loc, local->xattr, - local->flags, local->xattr_req); - } - - if (local->fop == GF_FOP_FSETXATTR) { - STACK_WIND (frame, dht_setxattr_non_mds_cbk, - conf->subvolumes[i], - conf->subvolumes[i]->fops->fsetxattr, - local->fd, local->xattr, - local->flags, local->xattr_req); - } - - if (local->fop == GF_FOP_REMOVEXATTR) { - STACK_WIND (frame, dht_setxattr_non_mds_cbk, - conf->subvolumes[i], - conf->subvolumes[i]->fops->removexattr, - &local->loc, local->key, - local->xattr_req); - } - - if (local->fop == GF_FOP_FREMOVEXATTR) { - STACK_WIND (frame, dht_setxattr_non_mds_cbk, - conf->subvolumes[i], - conf->subvolumes[i]->fops->fremovexattr, - local->fd, local->key, - local->xattr_req); - } - } - - return 0; -out: - if (local->fop == GF_FOP_SETXATTR) { - DHT_STACK_UNWIND (setxattr, frame, local->op_ret, - local->op_errno, xdata); - } - - if (local->fop == GF_FOP_FSETXATTR) { - DHT_STACK_UNWIND (fsetxattr, frame, local->op_ret, - local->op_errno, xdata); - } - - if (local->fop == GF_FOP_REMOVEXATTR) { - DHT_STACK_UNWIND (removexattr, frame, local->op_ret, - local->op_errno, NULL); - } - - if (local->fop == GF_FOP_FREMOVEXATTR) { - DHT_STACK_UNWIND (fremovexattr, frame, local->op_ret, - local->op_errno, NULL); - } - - return 0; -} - -int -dht_xattrop_mds_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, dict_t *dict, dict_t *xdata) -{ - dht_local_t *local = NULL; - call_frame_t *prev = NULL; - - local = frame->local; - prev = cookie; - - if (op_ret == -1) { - local->op_errno = op_errno; - local->op_ret = op_ret; - gf_msg_debug (this->name, op_errno, - "subvolume %s returned -1", - prev->this->name); - goto out; - } - - if (local->fop == GF_FOP_SETXATTR) { - STACK_WIND (frame, dht_setxattr_mds_cbk, - local->mds_subvol, - local->mds_subvol->fops->setxattr, - &local->loc, local->xattr, - local->flags, local->xattr_req); - } - - if (local->fop == GF_FOP_FSETXATTR) { - STACK_WIND (frame, dht_setxattr_mds_cbk, - local->mds_subvol, - local->mds_subvol->fops->fsetxattr, - local->fd, local->xattr, - local->flags, local->xattr_req); - } - - if (local->fop == GF_FOP_REMOVEXATTR) { - STACK_WIND (frame, dht_setxattr_mds_cbk, - local->mds_subvol, - local->mds_subvol->fops->removexattr, - &local->loc, local->key, - local->xattr_req); - } - - if (local->fop == GF_FOP_FREMOVEXATTR) { - STACK_WIND (frame, dht_setxattr_mds_cbk, - local->mds_subvol, - local->mds_subvol->fops->fremovexattr, - local->fd, local->key, - local->xattr_req); - } - - - return 0; -out: - if (local->fop == GF_FOP_SETXATTR) { - DHT_STACK_UNWIND (setxattr, frame, local->op_ret, - local->op_errno, xdata); - } - - if (local->fop == GF_FOP_FSETXATTR) { - DHT_STACK_UNWIND (fsetxattr, frame, local->op_ret, - local->op_errno, xdata); - } - - if (local->fop == GF_FOP_REMOVEXATTR) { - DHT_STACK_UNWIND (removexattr, frame, local->op_ret, - local->op_errno, NULL); - } - - if (local->fop == GF_FOP_FREMOVEXATTR) { - DHT_STACK_UNWIND (fremovexattr, frame, local->op_ret, - local->op_errno, NULL); - } - - return 0; -} - -static void -fill_layout_info (dht_layout_t *layout, char *buf) -{ - int i = 0; - char tmp_buf[128] = {0,}; - - for (i = 0; i < layout->cnt; i++) { - snprintf (tmp_buf, sizeof (tmp_buf), "(%s %u %u)", - layout->list[i].xlator->name, - layout->list[i].start, - layout->list[i].stop); - if (i) - strcat (buf, " "); - strcat (buf, tmp_buf); - } -} - -static void -dht_fill_pathinfo_xattr (xlator_t *this, dht_local_t *local, - char *xattr_buf, int32_t alloc_len, - int flag, char *layout_buf) -{ - if (flag) { - if (local->xattr_val) { - snprintf (xattr_buf, alloc_len, - "((<"DHT_PATHINFO_HEADER"%s> %s) (%s-layout %s))", - this->name, local->xattr_val, this->name, - layout_buf); - } else { - snprintf (xattr_buf, alloc_len, "(%s-layout %s)", - this->name, layout_buf); - } - } else if (local->xattr_val) { - snprintf (xattr_buf, alloc_len, - "(<"DHT_PATHINFO_HEADER"%s> %s)", - this->name, local->xattr_val); - } else { - xattr_buf[0] = '\0'; - } -} - -int -dht_vgetxattr_alloc_and_fill (dht_local_t *local, dict_t *xattr, xlator_t *this, - int op_errno) -{ - int ret = -1; - char *value = NULL; - - ret = dict_get_str (xattr, local->xsel, &value); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, op_errno, - DHT_MSG_GET_XATTR_FAILED, - "Subvolume %s returned -1", this->name); - local->op_ret = -1; - local->op_errno = op_errno; - goto out; - } - - local->alloc_len += strlen(value); - - if (!local->xattr_val) { - local->alloc_len += (SLEN (DHT_PATHINFO_HEADER) + 10); - local->xattr_val = GF_MALLOC (local->alloc_len, - gf_common_mt_char); - if (!local->xattr_val) { - ret = -1; - goto out; - } - local->xattr_val[0] = '\0'; - } - - int plen = strlen (local->xattr_val); - if (plen) { - /* extra byte(s) for \0 to be safe */ - local->alloc_len += (plen + 2); - local->xattr_val = GF_REALLOC (local->xattr_val, - local->alloc_len); - if (!local->xattr_val) { - ret = -1; - goto out; - } - } - - (void) strcat (local->xattr_val, value); - (void) strcat (local->xattr_val, " "); - local->op_ret = 0; - - ret = 0; - - out: - return ret; -} - -int -dht_vgetxattr_fill_and_set (dht_local_t *local, dict_t **dict, xlator_t *this, - gf_boolean_t flag) -{ - int ret = -1; - char *xattr_buf = NULL; - char layout_buf[8192] = {0,}; - - if (flag) - fill_layout_info (local->layout, layout_buf); - - *dict = dict_new (); - if (!*dict) - goto out; + call_cnt = conf->subvolume_cnt; + local->call_cnt = call_cnt; - local->xattr_val[strlen (local->xattr_val) - 1] = '\0'; + local->layout = dht_layout_new(this, conf->subvolume_cnt); + if (!local->layout) { + op_errno = ENOMEM; + goto err; + } - /* we would need max this many bytes to create xattr string - * extra 40 bytes is just an estimated amount of additional - * space required as we include translator name and some - * spaces, brackets etc. when forming the pathinfo string. - * - * For node-uuid we just don't have all the pretty formatting, - * but since this is a generic routine for pathinfo & node-uuid - * we don't have conditional space allocation and try to be - * generic - */ - local->alloc_len += (2 * strlen (this->name)) - + strlen (layout_buf) - + 40; - xattr_buf = GF_MALLOC (local->alloc_len, gf_common_mt_char); - if (!xattr_buf) - goto out; + gf_msg_debug(this->name, 0, + "%s: Found null hashed subvol. Calling lookup" + " on all nodes.", + loc->path); - if (XATTR_IS_PATHINFO (local->xsel)) { - (void) dht_fill_pathinfo_xattr (this, local, xattr_buf, - local->alloc_len, flag, - layout_buf); - } else if ((XATTR_IS_NODE_UUID (local->xsel)) - || (XATTR_IS_NODE_UUID_LIST (local->xsel))) { - (void) snprintf (xattr_buf, local->alloc_len, "%s", - local->xattr_val); - } else { - gf_msg (this->name, GF_LOG_WARNING, 0, - DHT_MSG_GET_XATTR_FAILED, - "Unknown local->xsel (%s)", local->xsel); - GF_FREE (xattr_buf); - goto out; + for (i = 0; i < call_cnt; i++) { + STACK_WIND_COOKIE(frame, dht_lookup_dir_cbk, conf->subvolumes[i], + conf->subvolumes[i], + conf->subvolumes[i]->fops->lookup, &local->loc, + local->xattr_req); } + return 0; + } - ret = dict_set_dynstr (*dict, local->xsel, xattr_buf); - if (ret) - GF_FREE (xattr_buf); - GF_FREE (local->xattr_val); + /* if we have the hashed_subvol, send the lookup there first so + * as to see whether we have a file or a directory */ + gf_msg_debug(this->name, 0, + "Calling fresh lookup for %s on" + " %s", + loc->path, hashed_subvol->name); - out: - return ret; + STACK_WIND_COOKIE(frame, dht_lookup_cbk, hashed_subvol, hashed_subvol, + hashed_subvol->fops->lookup, loc, local->xattr_req); + return 0; +err: + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND(lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL); + return 0; } - int -dht_find_local_subvol_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, dict_t *xattr, - dict_t *xdata) +dht_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req) { - dht_local_t *local = NULL; - dht_conf_t *conf = NULL; - xlator_t *prev = NULL; - int this_call_cnt = 0; - int ret = 0; - char *uuid_str = NULL; - char *uuid_list = NULL; - char *next_uuid_str = NULL; - char *saveptr = NULL; - uuid_t node_uuid = {0,}; - char *uuid_list_copy = NULL; - int count = 0; - int i = 0; - int index = 0; - int found = 0; - nodeuuid_info_t *tmp_ptr = NULL; - - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (frame->local, out); - - local = frame->local; - prev = cookie; - conf = this->private; - - VALIDATE_OR_GOTO (conf->defrag, out); - - gf_msg_debug (this->name, 0, "subvol %s returned", prev->name); - - LOCK (&frame->lock); - { - this_call_cnt = --local->call_cnt; - if (op_ret < 0) { - gf_msg (this->name, GF_LOG_ERROR, op_errno, - DHT_MSG_GET_XATTR_FAILED, - "getxattr err for dir"); - local->op_ret = -1; - local->op_errno = op_errno; - goto unlock; - } - - ret = dict_get_str (xattr, local->xsel, &uuid_list); - - if (ret < 0) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_DICT_GET_FAILED, - "Failed to get %s", local->xsel); - local->op_ret = -1; - local->op_errno = EINVAL; - goto unlock; - } - - /* As DHT will not know details of its child xlators - * we need to parse this twice to get the count first - * and allocate memory later. - */ - count = 0; - index = conf->local_subvols_cnt; - - uuid_list_copy = gf_strdup (uuid_list); - - for (uuid_str = strtok_r (uuid_list, " ", &saveptr); - uuid_str; - uuid_str = next_uuid_str) { - - next_uuid_str = strtok_r (NULL, " ", &saveptr); - if (gf_uuid_parse (uuid_str, node_uuid)) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_UUID_PARSE_ERROR, - "Failed to parse uuid" - " for %s", prev->name); - local->op_ret = -1; - local->op_errno = EINVAL; - goto unlock; - } - - count++; - if (gf_uuid_compare (node_uuid, conf->defrag->node_uuid)) { - gf_msg_debug (this->name, 0, "subvol %s does not" - "belong to this node", - prev->name); - } else { - - /* handle multiple bricks of the same replica - * on the same node */ - if (found) - continue; - conf->local_subvols[(conf->local_subvols_cnt)++] - = prev; - found = 1; - gf_msg_debug (this->name, 0, "subvol %s belongs to" - " this node", prev->name); - } - } - - if (!found) { - local->op_ret = 0; - goto unlock; - } + xlator_t *hashed_subvol = NULL; + dht_local_t *local = NULL; + dht_conf_t *conf = NULL; + int ret = -1; + int op_errno = -1; + loc_t new_loc = { + 0, + }; - conf->local_nodeuuids[index].count = count; - conf->local_nodeuuids[index].elements - = GF_CALLOC (count, sizeof (nodeuuid_info_t), 1); + VALIDATE_OR_GOTO(frame, err); + VALIDATE_OR_GOTO(this, err); + VALIDATE_OR_GOTO(loc, err); + VALIDATE_OR_GOTO(loc->inode, err); - /* The node-uuids are guaranteed to be returned in the same - * order as the bricks - * A null node-uuid is returned for a brick that is down. - */ + conf = this->private; + if (!conf) + goto err; - saveptr = NULL; - i = 0; + local = dht_local_init(frame, loc, NULL, GF_FOP_LOOKUP); + if (!local) { + op_errno = ENOMEM; + goto err; + } - for (uuid_str = strtok_r (uuid_list_copy, " ", &saveptr); - uuid_str; - uuid_str = next_uuid_str) { + ret = dht_filter_loc_subvol_key(this, loc, &new_loc, &hashed_subvol); + if (ret) { + loc_wipe(&local->loc); + ret = loc_dup(&new_loc, &local->loc); - next_uuid_str = strtok_r (NULL, " ", &saveptr); - tmp_ptr = &(conf->local_nodeuuids[index].elements[i]); - gf_uuid_parse (uuid_str, tmp_ptr->uuid); + /* we no longer need 'new_loc' entries */ + loc_wipe(&new_loc); - if (!gf_uuid_compare (tmp_ptr->uuid, - conf->defrag->node_uuid)) { - tmp_ptr->info = REBAL_NODEUUID_MINE; - } - i++; - tmp_ptr = NULL; - } + /* check if loc_dup() is successful */ + if (ret == -1) { + op_errno = errno; + gf_msg_debug(this->name, errno, + "copying location failed for path=%s", loc->path); + goto err; } + } - local->op_ret = 0; - unlock: - UNLOCK (&frame->lock); - - if (!is_last_call (this_call_cnt)) - goto out; + if (xattr_req) { + local->xattr_req = dict_ref(xattr_req); + } else { + local->xattr_req = dict_new(); + } - if (local->op_ret == -1) { - goto unwind; - } + /* Nameless lookup */ - DHT_STACK_UNWIND (getxattr, frame, 0, 0, xattr, xdata); - goto out; + if (gf_uuid_is_null(loc->pargfid) && !gf_uuid_is_null(loc->gfid) && + !__is_root_gfid(loc->inode->gfid)) { + local->cached_subvol = NULL; + dht_do_discover(frame, this, loc); + return 0; + } - unwind: + if (loc_is_root(loc)) { + ret = dict_set_uint32(local->xattr_req, conf->commithash_xattr_name, + sizeof(uint32_t)); + } - GF_FREE (conf->local_nodeuuids[index].elements); - conf->local_nodeuuids[index].elements = NULL; + if (!hashed_subvol) + hashed_subvol = dht_subvol_get_hashed(this, loc); + local->hashed_subvol = hashed_subvol; - DHT_STACK_UNWIND (getxattr, frame, -1, local->op_errno, NULL, xdata); - out: - GF_FREE (uuid_list_copy); + /* The entry has been looked up before and has an inode_ctx set + */ + if (is_revalidate(loc)) { + dht_do_revalidate(frame, this, loc); + return 0; + } else { + dht_do_fresh_lookup(frame, this, loc); return 0; + } + + return 0; +err: + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND(lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL); + return 0; } int -dht_vgetxattr_dir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, dict_t *xattr, dict_t *xdata) +dht_unlink_linkfile_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) { - int ret = 0; - dht_local_t *local = NULL; - int this_call_cnt = 0; - dict_t *dict = NULL; - - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (frame->local, out); - - local = frame->local; - - LOCK (&frame->lock); - { - this_call_cnt = --local->call_cnt; - if (op_ret < 0) { - if (op_errno != ENOTCONN) { - gf_msg (this->name, GF_LOG_ERROR, op_errno, - DHT_MSG_GET_XATTR_FAILED, - "getxattr err for dir"); - local->op_ret = -1; - local->op_errno = op_errno; - } - - goto unlock; - } - - ret = dht_vgetxattr_alloc_and_fill (local, xattr, this, - op_errno); - if (ret) - gf_msg (this->name, GF_LOG_ERROR, op_errno, - DHT_MSG_DICT_SET_FAILED, - "alloc or fill failure"); - } - unlock: - UNLOCK (&frame->lock); + dht_local_t *local = NULL; + xlator_t *prev = NULL; - if (!is_last_call (this_call_cnt)) - goto out; - - /* -- last call: do patch ups -- */ + local = frame->local; + prev = cookie; - if (local->op_ret == -1) { - goto unwind; + LOCK(&frame->lock); + { + if ((op_ret == -1) && + !((op_errno == ENOENT) || (op_errno == ENOTCONN))) { + local->op_errno = op_errno; + gf_msg_debug(this->name, op_errno, + "Unlink link: subvolume %s" + " returned -1", + prev->name); + goto unlock; } - ret = dht_vgetxattr_fill_and_set (local, &dict, this, _gf_true); - if (ret) - goto unwind; + local->op_ret = 0; + } +unlock: + UNLOCK(&frame->lock); - DHT_STACK_UNWIND (getxattr, frame, 0, 0, dict, xdata); - goto cleanup; + dht_set_fixed_dir_stat(&local->preparent); + dht_set_fixed_dir_stat(&local->postparent); + DHT_STACK_UNWIND(unlink, frame, local->op_ret, local->op_errno, + &local->preparent, &local->postparent, xdata); - unwind: - DHT_STACK_UNWIND (getxattr, frame, -1, local->op_errno, NULL, NULL); - cleanup: - if (dict) - dict_unref (dict); - out: - return 0; + return 0; } int -dht_vgetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, dict_t *xattr, dict_t *xdata) +dht_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, + int op_errno, struct iatt *preparent, struct iatt *postparent, + dict_t *xdata) { - dht_local_t *local = NULL; - int ret = 0; - dict_t *dict = NULL; - xlator_t *prev = NULL; - gf_boolean_t flag = _gf_true; + dht_local_t *local = NULL; + xlator_t *prev = NULL; + xlator_t *hashed_subvol = NULL; - local = frame->local; - prev = cookie; + local = frame->local; + prev = cookie; - if (op_ret < 0) { + LOCK(&frame->lock); + { + if (op_ret == -1) { + if (op_errno != ENOENT) { local->op_ret = -1; local->op_errno = op_errno; - gf_msg (this->name, GF_LOG_ERROR, op_errno, - DHT_MSG_GET_XATTR_FAILED, - "vgetxattr: Subvolume %s returned -1", - prev->name); - goto unwind; + } else { + local->op_ret = 0; + } + gf_msg_debug(this->name, op_errno, + "Unlink: subvolume %s returned -1", prev->name); + goto unlock; } - ret = dht_vgetxattr_alloc_and_fill (local, xattr, this, - op_errno); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_NO_MEMORY, - "Allocation or fill failure"); - goto unwind; - } + local->op_ret = 0; - flag = (local->layout->cnt > 1) ? _gf_true : _gf_false; + local->postparent = *postparent; + local->preparent = *preparent; - ret = dht_vgetxattr_fill_and_set (local, &dict, this, flag); - if (ret) - goto unwind; + if (local->loc.parent) { + dht_inode_ctx_time_update(local->loc.parent, this, + &local->preparent, 0); + dht_inode_ctx_time_update(local->loc.parent, this, + &local->postparent, 1); + } + } +unlock: + UNLOCK(&frame->lock); - DHT_STACK_UNWIND (getxattr, frame, 0, 0, dict, xdata); - goto cleanup; + if (!local->op_ret) { + hashed_subvol = dht_subvol_get_hashed(this, &local->loc); + if (hashed_subvol && hashed_subvol != local->cached_subvol) { + /* + * If hashed and cached are different, then we need + * to unlink linkfile from hashed subvol if data + * file is deleted successfully + */ + STACK_WIND_COOKIE(frame, dht_unlink_linkfile_cbk, hashed_subvol, + hashed_subvol, hashed_subvol->fops->unlink, + &local->loc, local->flags, xdata); + return 0; + } + } - unwind: - DHT_STACK_UNWIND (getxattr, frame, -1, local->op_errno, - NULL, NULL); - cleanup: - if (dict) - dict_unref (dict); + dht_set_fixed_dir_stat(&local->preparent); + dht_set_fixed_dir_stat(&local->postparent); + DHT_STACK_UNWIND(unlink, frame, local->op_ret, local->op_errno, + &local->preparent, &local->postparent, xdata); - return 0; + return 0; } -int -dht_linkinfo_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, dict_t *xattr, - dict_t *xdata) +static int +dht_common_setxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - int ret = 0; - char *value = NULL; + DHT_STACK_UNWIND(setxattr, frame, op_ret, op_errno, xdata); + return 0; +} - if (op_ret != -1) { - ret = dict_get_str (xattr, GF_XATTR_PATHINFO_KEY, &value); - if (!ret) { - ret = dict_set_str (xattr, GF_XATTR_LINKINFO_KEY, value); - if (!ret) - gf_msg_trace (this->name, 0, - "failed to set linkinfo"); - } - } +static int +dht_fix_layout_setxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + dht_local_t *local = NULL; + dht_layout_t *layout = NULL; + + if (op_ret == 0) { + /* update the layout in the inode ctx */ + local = frame->local; + layout = local->selfheal.layout; - DHT_STACK_UNWIND (getxattr, frame, op_ret, op_errno, xattr, xdata); + dht_layout_set(this, local->loc.inode, layout); + } - return 0; + DHT_STACK_UNWIND(setxattr, frame, op_ret, op_errno, xdata); + return 0; } - int -dht_mds_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, dict_t *xattr, dict_t *xdata) +dht_err_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, + int op_errno, dict_t *xdata) { - dht_local_t *local = NULL; - dht_conf_t *conf = NULL; + dht_local_t *local = NULL; + int this_call_cnt = 0; + xlator_t *prev = NULL; - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (frame->local, out); - VALIDATE_OR_GOTO (this->private, out); + local = frame->local; + prev = cookie; - conf = this->private; - local = frame->local; - - if (!xattr || (op_ret == -1)) { - local->op_ret = op_ret; - goto out; - } - if (dict_get (xattr, conf->xattr_name)) { - dict_del (xattr, conf->xattr_name); + LOCK(&frame->lock); + { + if (op_ret == -1) { + local->op_errno = op_errno; + gf_msg_debug(this->name, op_errno, "subvolume %s returned -1", + prev->name); + goto unlock; } + local->op_ret = 0; + } +unlock: + UNLOCK(&frame->lock); - if (!local->xattr) { - local->xattr = dict_copy_with_ref (xattr, NULL); + this_call_cnt = dht_frame_return(frame); + if (is_last_call(this_call_cnt)) { + if ((local->fop == GF_FOP_SETXATTR) || + (local->fop == GF_FOP_FSETXATTR)) { + DHT_STACK_UNWIND(setxattr, frame, local->op_ret, local->op_errno, + NULL); + } + if ((local->fop == GF_FOP_REMOVEXATTR) || + (local->fop == GF_FOP_FREMOVEXATTR)) { + DHT_STACK_UNWIND(removexattr, frame, local->op_ret, local->op_errno, + NULL); } + } -out: - DHT_STACK_UNWIND (getxattr, frame, local->op_ret, op_errno, - local->xattr, xdata); - return 0; + return 0; } +/* Set the value[] of key into dict after convert from + host byte order to network byte order +*/ +int32_t +dht_dict_set_array(dict_t *dict, char *key, int32_t value[], int32_t size) +{ + int ret = -1; + int32_t *ptr = NULL; + int32_t vindex; + + if (value == NULL) { + return -EINVAL; + } + + ptr = GF_MALLOC(sizeof(int32_t) * size, gf_common_mt_char); + if (ptr == NULL) { + return -ENOMEM; + } + for (vindex = 0; vindex < size; vindex++) { + ptr[vindex] = hton32(value[vindex]); + } + ret = dict_set_bin(dict, key, ptr, sizeof(int32_t) * size); + if (ret) + GF_FREE(ptr); + return ret; +} int -dht_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, dict_t *xattr, dict_t *xdata) +dht_common_mds_xattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *dict, + dict_t *xdata) { - int this_call_cnt = 0; - dht_local_t *local = NULL; - dht_conf_t *conf = NULL; - - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (frame->local, out); - VALIDATE_OR_GOTO (this->private, out); + dht_local_t *local = NULL; + call_frame_t *prev = cookie; - conf = this->private; - local = frame->local; + local = frame->local; - LOCK (&frame->lock); - { - if (!xattr || (op_ret == -1)) { - local->op_ret = op_ret; - goto unlock; - } + if (op_ret) + gf_msg_debug(this->name, op_errno, "subvolume %s returned -1", + prev->this->name); - if (dict_get (xattr, conf->xattr_name)) { - dict_del (xattr, conf->xattr_name); - } + if (local->fop == GF_FOP_SETXATTR) { + DHT_STACK_UNWIND(setxattr, frame, 0, op_errno, local->xdata); + } - if (dict_get (xattr, conf->mds_xattr_key)) { - dict_del (xattr, conf->mds_xattr_key); - } + if (local->fop == GF_FOP_FSETXATTR) { + DHT_STACK_UNWIND(fsetxattr, frame, 0, op_errno, local->xdata); + } - /* filter out following two xattrs that need not - * be visible on the mount point for geo-rep - - * trusted.tier.fix.layout.complete and - * trusted.tier.tier-dht.commithash - */ + if (local->fop == GF_FOP_REMOVEXATTR) { + DHT_STACK_UNWIND(removexattr, frame, 0, op_errno, NULL); + } - if (dict_get (xattr, conf->commithash_xattr_name)) { - dict_del (xattr, conf->commithash_xattr_name); - } + if (local->fop == GF_FOP_FREMOVEXATTR) { + DHT_STACK_UNWIND(fremovexattr, frame, 0, op_errno, NULL); + } - if (frame->root->pid >= 0 && dht_is_tier_xlator (this)) { - dict_del(xattr, GF_XATTR_TIER_LAYOUT_FIXED_KEY); - } + return 0; +} - if (frame->root->pid >= 0) { - GF_REMOVE_INTERNAL_XATTR - ("trusted.glusterfs.quota*", xattr); - GF_REMOVE_INTERNAL_XATTR("trusted.pgfid*", xattr); - } +/* Code to wind a xattrop call to add 1 on current mds internal xattr + value +*/ +int +dht_setxattr_non_mds_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, dict_t *xdata) +{ + dht_local_t *local = NULL; + int this_call_cnt = 0; + int ret = 0; + dict_t *xattrop = NULL; + int32_t addone[1] = {1}; + call_frame_t *prev = NULL; + dht_conf_t *conf = NULL; + + local = frame->local; + prev = cookie; + conf = this->private; + + LOCK(&frame->lock); + { + if (op_ret && !local->op_ret) { + local->op_ret = op_ret; + local->op_errno = op_errno; + gf_msg_debug(this->name, op_errno, "subvolume %s returned -1", + prev->this->name); + } + } + UNLOCK(&frame->lock); + this_call_cnt = dht_frame_return(frame); + + if (is_last_call(this_call_cnt)) { + if (!local->op_ret) { + xattrop = dict_new(); + if (!xattrop) { + gf_msg(this->name, GF_LOG_ERROR, DHT_MSG_NO_MEMORY, 0, + "dictionary creation failed"); + ret = -1; + goto out; + } + ret = dht_dict_set_array(xattrop, conf->mds_xattr_key, addone, 1); + if (ret != 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED, + "dictionary set array failed "); + ret = -1; + goto out; + } + if ((local->fop == GF_FOP_SETXATTR) || + (local->fop == GF_FOP_REMOVEXATTR)) { + STACK_WIND(frame, dht_common_mds_xattrop_cbk, local->mds_subvol, + local->mds_subvol->fops->xattrop, &local->loc, + GF_XATTROP_ADD_ARRAY, xattrop, NULL); + } else { + STACK_WIND(frame, dht_common_mds_xattrop_cbk, local->mds_subvol, + local->mds_subvol->fops->fxattrop, local->fd, + GF_XATTROP_ADD_ARRAY, xattrop, NULL); + } + } else { + if (local->fop == GF_FOP_SETXATTR) { + DHT_STACK_UNWIND(setxattr, frame, 0, 0, local->xdata); + } - local->op_ret = 0; + if (local->fop == GF_FOP_FSETXATTR) { + DHT_STACK_UNWIND(fsetxattr, frame, 0, 0, local->xdata); + } - if (!local->xattr) { - local->xattr = dict_copy_with_ref (xattr, NULL); - } else { - dht_aggregate_xattr (local->xattr, xattr); - } + if (local->fop == GF_FOP_REMOVEXATTR) { + DHT_STACK_UNWIND(removexattr, frame, 0, 0, NULL); + } + if (local->fop == GF_FOP_FREMOVEXATTR) { + DHT_STACK_UNWIND(fremovexattr, frame, 0, 0, NULL); + } } -unlock: - UNLOCK (&frame->lock); - - this_call_cnt = dht_frame_return (frame); + } out: - if (is_last_call (this_call_cnt)) { + if (xattrop) + dict_unref(xattrop); + if (ret) { + if (local->fop == GF_FOP_SETXATTR) { + DHT_STACK_UNWIND(setxattr, frame, 0, 0, local->xdata); + } - /* If we have a valid xattr received from any one of the - * subvolume, let's return it */ - if (local->xattr) { - local->op_ret = 0; - } + if (local->fop == GF_FOP_FSETXATTR) { + DHT_STACK_UNWIND(fsetxattr, frame, 0, 0, local->xdata); + } - DHT_STACK_UNWIND (getxattr, frame, local->op_ret, op_errno, - local->xattr, NULL); + if (local->fop == GF_FOP_REMOVEXATTR) { + DHT_STACK_UNWIND(removexattr, frame, 0, 0, NULL); } - return 0; -} -int32_t -dht_getxattr_unwind (call_frame_t *frame, - int op_ret, int op_errno, dict_t *dict, dict_t *xdata) -{ - DHT_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict, xdata); - return 0; + if (local->fop == GF_FOP_FREMOVEXATTR) { + DHT_STACK_UNWIND(fremovexattr, frame, 0, 0, NULL); + } + } + return 0; } - int -dht_getxattr_get_real_filename_cbk (call_frame_t *frame, void *cookie, - xlator_t *this, int op_ret, int op_errno, - dict_t *xattr, dict_t *xdata) +dht_setxattr_mds_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, dict_t *xdata) { - int this_call_cnt = 0; - dht_local_t *local = NULL; + dht_local_t *local = NULL; + dht_conf_t *conf = NULL; + call_frame_t *prev = NULL; + xlator_t *mds_subvol = NULL; + int i = 0; + local = frame->local; + prev = cookie; + conf = this->private; + mds_subvol = local->mds_subvol; - local = frame->local; + if (op_ret == -1) { + local->op_ret = op_ret; + local->op_errno = op_errno; + gf_msg_debug(this->name, op_errno, "subvolume %s returned -1", + prev->this->name); + goto out; + } - LOCK (&frame->lock); - { - if (local->op_errno == ENODATA || - local->op_errno == EOPNOTSUPP) { - /* Nothing to do here, we have already found - * a subvol which does not have the get_real_filename - * optimization. If condition is for simple logic. - */ - goto unlock; - } + local->op_ret = 0; + local->call_cnt = conf->subvolume_cnt - 1; + local->xdata = dict_ref(xdata); - if (op_ret == -1) { - - if (op_errno == ENODATA || op_errno == EOPNOTSUPP) { - /* This subvol does not have the optimization. - * Better let the user know we don't support it. - * Remove previous results if any. - */ - - if (local->xattr) { - dict_unref (local->xattr); - local->xattr = NULL; - } - - if (local->xattr_req) { - dict_unref (local->xattr_req); - local->xattr_req = NULL; - } - - local->op_ret = op_ret; - local->op_errno = op_errno; - gf_msg (this->name, GF_LOG_WARNING, op_errno, - DHT_MSG_UPGRADE_BRICKS, "At least " - "one of the bricks does not support " - "this operation. Please upgrade all " - "bricks."); - goto unlock; - } - - if (op_errno == ENOENT) { - /* Do nothing, our defaults are set to this. - */ - goto unlock; - } - - /* This is a place holder for every other error - * case. I am not sure of how to interpret - * ENOTCONN etc. As of now, choosing to ignore - * down subvol and return a good result(if any) - * from other subvol. - */ - gf_msg (this->name, GF_LOG_WARNING, op_errno, - DHT_MSG_GET_XATTR_FAILED, - "Failed to get real filename."); - goto unlock; + for (i = 0; i < conf->subvolume_cnt; i++) { + if (mds_subvol && (mds_subvol == conf->subvolumes[i])) + continue; + if (local->fop == GF_FOP_SETXATTR) { + STACK_WIND(frame, dht_setxattr_non_mds_cbk, conf->subvolumes[i], + conf->subvolumes[i]->fops->setxattr, &local->loc, + local->xattr, local->flags, local->xattr_req); + } - } + if (local->fop == GF_FOP_FSETXATTR) { + STACK_WIND(frame, dht_setxattr_non_mds_cbk, conf->subvolumes[i], + conf->subvolumes[i]->fops->fsetxattr, local->fd, + local->xattr, local->flags, local->xattr_req); + } + if (local->fop == GF_FOP_REMOVEXATTR) { + STACK_WIND(frame, dht_setxattr_non_mds_cbk, conf->subvolumes[i], + conf->subvolumes[i]->fops->removexattr, &local->loc, + local->key, local->xattr_req); + } - /* This subvol has the required file. - * There could be other subvols which have returned - * success already, choosing to return the latest good - * result. - */ - if (local->xattr) - dict_unref (local->xattr); - local->xattr = dict_ref (xattr); + if (local->fop == GF_FOP_FREMOVEXATTR) { + STACK_WIND(frame, dht_setxattr_non_mds_cbk, conf->subvolumes[i], + conf->subvolumes[i]->fops->fremovexattr, local->fd, + local->key, local->xattr_req); + } + } - if (local->xattr_req) { - dict_unref (local->xattr_req); - local->xattr_req = NULL; - } - if (xdata) - local->xattr_req = dict_ref (xdata); + return 0; +out: + if (local->fop == GF_FOP_SETXATTR) { + DHT_STACK_UNWIND(setxattr, frame, local->op_ret, local->op_errno, + xdata); + } - local->op_ret = op_ret; - local->op_errno = 0; - gf_msg_debug (this->name, 0, "Found a matching " - "file."); - } -unlock: - UNLOCK (&frame->lock); + if (local->fop == GF_FOP_FSETXATTR) { + DHT_STACK_UNWIND(fsetxattr, frame, local->op_ret, local->op_errno, + xdata); + } + if (local->fop == GF_FOP_REMOVEXATTR) { + DHT_STACK_UNWIND(removexattr, frame, local->op_ret, local->op_errno, + NULL); + } - this_call_cnt = dht_frame_return (frame); - if (is_last_call (this_call_cnt)) { - DHT_STACK_UNWIND (getxattr, frame, local->op_ret, - local->op_errno, local->xattr, - local->xattr_req); - } + if (local->fop == GF_FOP_FREMOVEXATTR) { + DHT_STACK_UNWIND(fremovexattr, frame, local->op_ret, local->op_errno, + NULL); + } - return 0; + return 0; } - int -dht_getxattr_get_real_filename (call_frame_t *frame, xlator_t *this, - loc_t *loc, const char *key, dict_t *xdata) +dht_xattrop_mds_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, dict_t *dict, dict_t *xdata) { - dht_local_t *local = NULL; - int i = 0; - dht_layout_t *layout = NULL; - int cnt = 0; - xlator_t *subvol = NULL; + dht_local_t *local = NULL; + call_frame_t *prev = NULL; + local = frame->local; + prev = cookie; - local = frame->local; - layout = local->layout; + if (op_ret == -1) { + local->op_errno = op_errno; + local->op_ret = op_ret; + gf_msg_debug(this->name, op_errno, "subvolume %s returned -1", + prev->this->name); + goto out; + } + + if (local->fop == GF_FOP_SETXATTR) { + STACK_WIND(frame, dht_setxattr_mds_cbk, local->mds_subvol, + local->mds_subvol->fops->setxattr, &local->loc, local->xattr, + local->flags, local->xattr_req); + } + + if (local->fop == GF_FOP_FSETXATTR) { + STACK_WIND(frame, dht_setxattr_mds_cbk, local->mds_subvol, + local->mds_subvol->fops->fsetxattr, local->fd, local->xattr, + local->flags, local->xattr_req); + } + + if (local->fop == GF_FOP_REMOVEXATTR) { + STACK_WIND(frame, dht_setxattr_mds_cbk, local->mds_subvol, + local->mds_subvol->fops->removexattr, &local->loc, + local->key, local->xattr_req); + } + + if (local->fop == GF_FOP_FREMOVEXATTR) { + STACK_WIND(frame, dht_setxattr_mds_cbk, local->mds_subvol, + local->mds_subvol->fops->fremovexattr, local->fd, local->key, + local->xattr_req); + } + + return 0; +out: + if (local->fop == GF_FOP_SETXATTR) { + DHT_STACK_UNWIND(setxattr, frame, local->op_ret, local->op_errno, + xdata); + } - cnt = local->call_cnt = layout->cnt; + if (local->fop == GF_FOP_FSETXATTR) { + DHT_STACK_UNWIND(fsetxattr, frame, local->op_ret, local->op_errno, + xdata); + } - local->op_ret = -1; - local->op_errno = ENOENT; + if (local->fop == GF_FOP_REMOVEXATTR) { + DHT_STACK_UNWIND(removexattr, frame, local->op_ret, local->op_errno, + NULL); + } - for (i = 0; i < cnt; i++) { - subvol = layout->list[i].xlator; - STACK_WIND (frame, dht_getxattr_get_real_filename_cbk, - subvol, subvol->fops->getxattr, - loc, key, xdata); - } + if (local->fop == GF_FOP_FREMOVEXATTR) { + DHT_STACK_UNWIND(fremovexattr, frame, local->op_ret, local->op_errno, + NULL); + } - return 0; + return 0; } -int -dht_marker_populate_args (call_frame_t *frame, int type, int *gauge, - xlator_t **subvols) +static void +fill_layout_info(dht_layout_t *layout, char *buf) { - dht_local_t *local = NULL; - int i = 0; - dht_layout_t *layout = NULL; - - local = frame->local; - layout = local->layout; - - for (i = 0; i < layout->cnt; i++) - subvols[i] = layout->list[i].xlator; + int i = 0; + char tmp_buf[128] = { + 0, + }; - return layout->cnt; + for (i = 0; i < layout->cnt; i++) { + snprintf(tmp_buf, sizeof(tmp_buf), "(%s %u %u)", + layout->list[i].xlator->name, layout->list[i].start, + layout->list[i].stop); + if (i) + strcat(buf, " "); + strcat(buf, tmp_buf); + } } - -int -dht_is_debug_xattr_key (char **array, char *key) -{ - int i = 0; - - for (i = 0; array[i]; i++) { - if (fnmatch (array[i], key, FNM_NOESCAPE) == 0) - return i; +static void +dht_fill_pathinfo_xattr(xlator_t *this, dht_local_t *local, char *xattr_buf, + int32_t alloc_len, int flag, char *layout_buf) +{ + if (flag) { + if (local->xattr_val) { + snprintf(xattr_buf, alloc_len, + "((<" DHT_PATHINFO_HEADER "%s> %s) (%s-layout %s))", + this->name, local->xattr_val, this->name, layout_buf); + } else { + snprintf(xattr_buf, alloc_len, "(%s-layout %s)", this->name, + layout_buf); } - - return -1; + } else if (local->xattr_val) { + snprintf(xattr_buf, alloc_len, "(<" DHT_PATHINFO_HEADER "%s> %s)", + this->name, local->xattr_val); + } else { + xattr_buf[0] = '\0'; + } } - -/* Note we already have frame->local initialised here*/ - int -dht_handle_debug_getxattr (call_frame_t *frame, xlator_t *this, - loc_t *loc, const char *key) +dht_vgetxattr_alloc_and_fill(dht_local_t *local, dict_t *xattr, xlator_t *this, + int op_errno) { - dht_local_t *local = NULL; - int ret = -1; - int op_errno = ENODATA; - char *value = NULL; - loc_t file_loc = {0}; - const char *name = NULL; + int ret = -1; + char *value = NULL; - local = frame->local; - if (!key) { - op_errno = EINVAL; - goto out; - } + ret = dict_get_str(xattr, local->xsel, &value); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, op_errno, DHT_MSG_GET_XATTR_FAILED, + "Subvolume %s returned -1", this->name); + local->op_ret = -1; + local->op_errno = op_errno; + goto out; + } - if (dht_is_debug_xattr_key (dht_dbg_vxattrs, (char *)key) == -1) { - goto out; + local->alloc_len += strlen(value); + + if (!local->xattr_val) { + local->alloc_len += (SLEN(DHT_PATHINFO_HEADER) + 10); + local->xattr_val = GF_MALLOC(local->alloc_len, gf_common_mt_char); + if (!local->xattr_val) { + ret = -1; + goto out; } + local->xattr_val[0] = '\0'; + } - local->xattr = dict_new (); - if (!local->xattr) { - op_errno = ENOMEM; - goto out; + int plen = strlen(local->xattr_val); + if (plen) { + /* extra byte(s) for \0 to be safe */ + local->alloc_len += (plen + 2); + local->xattr_val = GF_REALLOC(local->xattr_val, local->alloc_len); + if (!local->xattr_val) { + ret = -1; + goto out; } + } - if (strncmp (key, DHT_DBG_HASHED_SUBVOL_KEY, - SLEN (DHT_DBG_HASHED_SUBVOL_KEY)) == 0) { + (void)strcat(local->xattr_val, value); + (void)strcat(local->xattr_val, " "); + local->op_ret = 0; - name = key + strlen(DHT_DBG_HASHED_SUBVOL_KEY); - if (strlen(name) == 0) { - op_errno = EINVAL; - goto out; - } + ret = 0; - ret = dht_build_child_loc (this, &file_loc, loc, (char *)name); - if (ret) { - op_errno = ENOMEM; - goto out; - } +out: + return ret; +} - local->hashed_subvol = dht_subvol_get_hashed (this, &file_loc); - if (local->hashed_subvol == NULL) { - op_errno = ENODATA; - goto out; - } +int +dht_vgetxattr_fill_and_set(dht_local_t *local, dict_t **dict, xlator_t *this, + gf_boolean_t flag) +{ + int ret = -1; + char *xattr_buf = NULL; + char layout_buf[8192] = { + 0, + }; - value = gf_strdup (local->hashed_subvol->name); - if (!value) { - op_errno = ENOMEM; - goto out; - } + if (flag) + fill_layout_info(local->layout, layout_buf); - ret = dict_set_dynstr (local->xattr, (char *)key, value); - if (ret < 0) { - op_errno = -ret; - ret = -1; - goto out; - } - ret = 0; - goto out; - } + *dict = dict_new(); + if (!*dict) + goto out; -out: - loc_wipe (&file_loc); - DHT_STACK_UNWIND (getxattr, frame, ret, op_errno, local->xattr, NULL); - return 0; -} + local->xattr_val[strlen(local->xattr_val) - 1] = '\0'; + + /* we would need max this many bytes to create xattr string + * extra 40 bytes is just an estimated amount of additional + * space required as we include translator name and some + * spaces, brackets etc. when forming the pathinfo string. + * + * For node-uuid we just don't have all the pretty formatting, + * but since this is a generic routine for pathinfo & node-uuid + * we don't have conditional space allocation and try to be + * generic + */ + local->alloc_len += (2 * strlen(this->name)) + strlen(layout_buf) + 40; + xattr_buf = GF_MALLOC(local->alloc_len, gf_common_mt_char); + if (!xattr_buf) + goto out; + if (XATTR_IS_PATHINFO(local->xsel)) { + (void)dht_fill_pathinfo_xattr(this, local, xattr_buf, local->alloc_len, + flag, layout_buf); + } else if ((XATTR_IS_NODE_UUID(local->xsel)) || + (XATTR_IS_NODE_UUID_LIST(local->xsel))) { + (void)snprintf(xattr_buf, local->alloc_len, "%s", local->xattr_val); + } else { + gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_GET_XATTR_FAILED, + "Unknown local->xsel (%s)", local->xsel); + GF_FREE(xattr_buf); + goto out; + } -int -dht_getxattr (call_frame_t *frame, xlator_t *this, - loc_t *loc, const char *key, dict_t *xdata) -#define DHT_IS_DIR(layout) (layout->cnt > 1) -{ + ret = dict_set_dynstr(*dict, local->xsel, xattr_buf); + if (ret) + GF_FREE(xattr_buf); + GF_FREE(local->xattr_val); - xlator_t *subvol = NULL; - xlator_t *hashed_subvol = NULL; - xlator_t *mds_subvol = NULL; - xlator_t *cached_subvol = NULL; - dht_conf_t *conf = NULL; - dht_local_t *local = NULL; - dht_layout_t *layout = NULL; - int op_errno = -1; - int i = 0; - int cnt = 0; - char *node_uuid_key = NULL; - int ret = -1; +out: + return ret; +} + +int +dht_find_local_subvol_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, dict_t *xattr, + dict_t *xdata) +{ + dht_local_t *local = NULL; + dht_conf_t *conf = NULL; + xlator_t *prev = NULL; + int this_call_cnt = 0; + int ret = 0; + char *uuid_str = NULL; + char *uuid_list = NULL; + char *next_uuid_str = NULL; + char *saveptr = NULL; + uuid_t node_uuid = { + 0, + }; + char *uuid_list_copy = NULL; + int count = 0; + int i = 0; + int index = 0; + int found = 0; + nodeuuid_info_t *tmp_ptr = NULL; + + VALIDATE_OR_GOTO(frame, out); + VALIDATE_OR_GOTO(frame->local, out); + + local = frame->local; + prev = cookie; + conf = this->private; + + VALIDATE_OR_GOTO(conf->defrag, out); + + gf_msg_debug(this->name, 0, "subvol %s returned", prev->name); + + LOCK(&frame->lock); + { + this_call_cnt = --local->call_cnt; + if (op_ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, op_errno, DHT_MSG_GET_XATTR_FAILED, + "getxattr err for dir"); + local->op_ret = -1; + local->op_errno = op_errno; + goto unlock; + } - GF_CHECK_XATTR_KEY_AND_GOTO (key, IO_THREADS_QUEUE_SIZE_KEY, - op_errno, err); - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (loc, err); - VALIDATE_OR_GOTO (loc->inode, err); - VALIDATE_OR_GOTO (this->private, err); + ret = dict_get_str(xattr, local->xsel, &uuid_list); - conf = this->private; + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_GET_FAILED, + "Failed to get %s", local->xsel); + local->op_ret = -1; + local->op_errno = EINVAL; + goto unlock; + } - local = dht_local_init (frame, loc, NULL, GF_FOP_GETXATTR); - if (!local) { - op_errno = ENOMEM; + /* As DHT will not know details of its child xlators + * we need to parse this twice to get the count first + * and allocate memory later. + */ + count = 0; + index = conf->local_subvols_cnt; + + uuid_list_copy = gf_strdup(uuid_list); + + for (uuid_str = strtok_r(uuid_list, " ", &saveptr); uuid_str; + uuid_str = next_uuid_str) { + next_uuid_str = strtok_r(NULL, " ", &saveptr); + if (gf_uuid_parse(uuid_str, node_uuid)) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_UUID_PARSE_ERROR, + "Failed to parse uuid" + " for %s", + prev->name); + local->op_ret = -1; + local->op_errno = EINVAL; + goto unlock; + } - goto err; + count++; + if (gf_uuid_compare(node_uuid, conf->defrag->node_uuid)) { + gf_msg_debug(this->name, 0, + "subvol %s does not" + "belong to this node", + prev->name); + } else { + /* handle multiple bricks of the same replica + * on the same node */ + if (found) + continue; + conf->local_subvols[(conf->local_subvols_cnt)++] = prev; + found = 1; + gf_msg_debug(this->name, 0, + "subvol %s belongs to" + " this node", + prev->name); + } } - layout = local->layout; - if (!layout) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_LAYOUT_NULL, - "Layout is NULL"); - op_errno = ENOENT; - goto err; + if (!found) { + local->op_ret = 0; + goto unlock; } - /* skip over code which is irrelevant without a valid key */ - if (!key) - goto no_key; + conf->local_nodeuuids[index].count = count; + conf->local_nodeuuids[index].elements = GF_CALLOC( + count, sizeof(nodeuuid_info_t), 1); - local->key = gf_strdup (key); - if (!local->key) { - op_errno = ENOMEM; - goto err; - } + /* The node-uuids are guaranteed to be returned in the same + * order as the bricks + * A null node-uuid is returned for a brick that is down. + */ - if (strncmp (key, conf->mds_xattr_key, strlen(key)) == 0) { - op_errno = ENOTSUP; - goto err; - } + saveptr = NULL; + i = 0; - /* skip over code which is irrelevant if !DHT_IS_DIR(layout) */ - if (!DHT_IS_DIR(layout)) - goto no_dht_is_dir; + for (uuid_str = strtok_r(uuid_list_copy, " ", &saveptr); uuid_str; + uuid_str = next_uuid_str) { + next_uuid_str = strtok_r(NULL, " ", &saveptr); + tmp_ptr = &(conf->local_nodeuuids[index].elements[i]); + gf_uuid_parse(uuid_str, tmp_ptr->uuid); - if ((strncmp (key, GF_XATTR_GET_REAL_FILENAME_KEY, - SLEN (GF_XATTR_GET_REAL_FILENAME_KEY)) == 0) - && DHT_IS_DIR(layout)) { - dht_getxattr_get_real_filename (frame, this, loc, key, xdata); - return 0; + if (!gf_uuid_compare(tmp_ptr->uuid, conf->defrag->node_uuid)) { + tmp_ptr->info = REBAL_NODEUUID_MINE; + } + i++; + tmp_ptr = NULL; } + } - if (!strcmp (key, GF_REBAL_FIND_LOCAL_SUBVOL)) { - ret = gf_asprintf (&node_uuid_key, "%s", - GF_XATTR_LIST_NODE_UUIDS_KEY); - if (ret == -1 || !node_uuid_key) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_NO_MEMORY, - "Failed to copy node uuid key"); - op_errno = ENOMEM; - goto err; - } - (void) snprintf (local->xsel, sizeof (local->xsel), "%s", - node_uuid_key); - cnt = local->call_cnt = conf->subvolume_cnt; - for (i = 0; i < cnt; i++) { - STACK_WIND_COOKIE (frame, dht_find_local_subvol_cbk, - conf->subvolumes[i], - conf->subvolumes[i], - conf->subvolumes[i]->fops->getxattr, - loc, node_uuid_key, xdata); - } - if (node_uuid_key) - GF_FREE (node_uuid_key); - return 0; - } + local->op_ret = 0; +unlock: + UNLOCK(&frame->lock); - if (!strcmp (key, GF_REBAL_OLD_FIND_LOCAL_SUBVOL)) { - ret = gf_asprintf (&node_uuid_key, "%s", - GF_XATTR_NODE_UUID_KEY); - if (ret == -1 || !node_uuid_key) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_NO_MEMORY, - "Failed to copy node uuid key"); - op_errno = ENOMEM; - goto err; - } - (void) snprintf (local->xsel, sizeof (local->xsel), "%s", - node_uuid_key); - cnt = local->call_cnt = conf->subvolume_cnt; - for (i = 0; i < cnt; i++) { - STACK_WIND_COOKIE (frame, dht_find_local_subvol_cbk, - conf->subvolumes[i], - conf->subvolumes[i], - conf->subvolumes[i]->fops->getxattr, - loc, node_uuid_key, xdata); - } - if (node_uuid_key) - GF_FREE (node_uuid_key); - return 0; - } + if (!is_last_call(this_call_cnt)) + goto out; - /* for file use cached subvolume (obviously!): see if {} - * below - * for directory: - * wind to all subvolumes and exclude subvolumes which - * return ENOTCONN (in callback) - * - * NOTE: Don't trust inode here, as that may not be valid - * (until inode_link() happens) - */ + if (local->op_ret == -1) { + goto unwind; + } - if (XATTR_IS_PATHINFO (key) - || (strcmp (key, GF_XATTR_NODE_UUID_KEY) == 0) - || (strcmp (key, GF_XATTR_LIST_NODE_UUIDS_KEY) == 0)) { - (void) snprintf (local->xsel, sizeof (local->xsel), "%s", key); - cnt = local->call_cnt = layout->cnt; - for (i = 0; i < cnt; i++) { - subvol = layout->list[i].xlator; - STACK_WIND (frame, dht_vgetxattr_dir_cbk, - subvol, subvol->fops->getxattr, - loc, key, xdata); - } - return 0; - } + DHT_STACK_UNWIND(getxattr, frame, 0, 0, xattr, xdata); + goto out; -no_dht_is_dir: - /* node-uuid or pathinfo for files */ - if (XATTR_IS_PATHINFO (key) - || (strcmp (key, GF_XATTR_NODE_UUID_KEY) == 0)) { - cached_subvol = local->cached_subvol; - (void) snprintf (local->xsel, sizeof (local->xsel), "%s", key); - local->call_cnt = 1; - STACK_WIND_COOKIE (frame, dht_vgetxattr_cbk, cached_subvol, - cached_subvol, cached_subvol->fops->getxattr, - loc, key, xdata); +unwind: - return 0; - } + GF_FREE(conf->local_nodeuuids[index].elements); + conf->local_nodeuuids[index].elements = NULL; - if (strcmp (key, GF_XATTR_LINKINFO_KEY) == 0) { + DHT_STACK_UNWIND(getxattr, frame, -1, local->op_errno, NULL, xdata); +out: + GF_FREE(uuid_list_copy); + return 0; +} - hashed_subvol = dht_subvol_get_hashed (this, loc); - if (!hashed_subvol) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_HASHED_SUBVOL_GET_FAILED, - "Failed to get hashed subvol for %s", - loc->path); - op_errno = EINVAL; - goto err; - } +int +dht_vgetxattr_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, dict_t *xattr, dict_t *xdata) +{ + int ret = 0; + dht_local_t *local = NULL; + int this_call_cnt = 0; + dict_t *dict = NULL; - cached_subvol = dht_subvol_get_cached (this, loc->inode); - if (!cached_subvol) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_CACHED_SUBVOL_GET_FAILED, - "Failed to get cached subvol for %s", - loc->path); - op_errno = EINVAL; - goto err; - } + VALIDATE_OR_GOTO(frame, out); + VALIDATE_OR_GOTO(frame->local, out); - if (hashed_subvol == cached_subvol) { - op_errno = ENODATA; - goto err; - } + local = frame->local; - STACK_WIND (frame, dht_linkinfo_getxattr_cbk, hashed_subvol, - hashed_subvol->fops->getxattr, loc, - GF_XATTR_PATHINFO_KEY, xdata); - return 0; - } + LOCK(&frame->lock); + { + this_call_cnt = --local->call_cnt; + if (op_ret < 0) { + if (op_errno != ENOTCONN) { + gf_msg(this->name, GF_LOG_ERROR, op_errno, + DHT_MSG_GET_XATTR_FAILED, "getxattr err for dir"); + local->op_ret = -1; + local->op_errno = op_errno; + } - if (dht_is_debug_xattr_key (dht_dbg_vxattrs, (char *)key) >= 0) { - dht_handle_debug_getxattr (frame, this, loc, key); - return 0; + goto unlock; } -no_key: - if (cluster_handle_marker_getxattr (frame, loc, key, conf->vol_uuid, - dht_getxattr_unwind, - dht_marker_populate_args) == 0) - return 0; + ret = dht_vgetxattr_alloc_and_fill(local, xattr, this, op_errno); + if (ret) + gf_msg(this->name, GF_LOG_ERROR, op_errno, DHT_MSG_DICT_SET_FAILED, + "alloc or fill failure"); + } +unlock: + UNLOCK(&frame->lock); - if (DHT_IS_DIR(layout)) { - local->call_cnt = conf->subvolume_cnt; - cnt = conf->subvolume_cnt; - ret = dht_inode_ctx_mdsvol_get (loc->inode, this, &mds_subvol); - if (!mds_subvol) { - gf_msg (this->name, GF_LOG_INFO, 0, - DHT_MSG_HASHED_SUBVOL_GET_FAILED, - "Cannot determine MDS, fetching xattr %s randomly" - " from a subvol for path %s ", key, loc->path); - } else { - /* TODO need to handle it, As of now we are - choosing availability instead of chossing - consistencty, in case of mds_subvol is - down winding a getxattr call on other subvol - and return xattr - */ - local->mds_subvol = mds_subvol; - for (i = 0; i < cnt; i++) { - if (conf->subvolumes[i] == mds_subvol) { - if (!conf->subvolume_status[i]) { - gf_msg (this->name, - GF_LOG_INFO, 0, - DHT_MSG_HASHED_SUBVOL_DOWN, - "MDS %s is down for path" - " path %s so fetching xattr " - "%s randomly from a subvol ", - local->mds_subvol->name, - loc->path, key); - ret = 1; - } - } - } - } + if (!is_last_call(this_call_cnt)) + goto out; - if (!ret && key && local->mds_subvol && dht_match_xattr (key)) { - STACK_WIND (frame, dht_mds_getxattr_cbk, - local->mds_subvol, - local->mds_subvol->fops->getxattr, - loc, key, xdata); + /* -- last call: do patch ups -- */ - return 0; - } - } else { - cnt = local->call_cnt = 1; - } + if (local->op_ret == -1) { + goto unwind; + } - for (i = 0; i < cnt; i++) { - subvol = layout->list[i].xlator; - STACK_WIND (frame, dht_getxattr_cbk, - subvol, subvol->fops->getxattr, - loc, key, xdata); - } - return 0; + ret = dht_vgetxattr_fill_and_set(local, &dict, this, _gf_true); + if (ret) + goto unwind; -err: - op_errno = (op_errno == -1) ? errno : op_errno; - DHT_STACK_UNWIND (getxattr, frame, -1, op_errno, NULL, NULL); + DHT_STACK_UNWIND(getxattr, frame, 0, 0, dict, xdata); + goto cleanup; - return 0; +unwind: + DHT_STACK_UNWIND(getxattr, frame, -1, local->op_errno, NULL, NULL); +cleanup: + if (dict) + dict_unref(dict); +out: + return 0; } -#undef DHT_IS_DIR int -dht_fgetxattr (call_frame_t *frame, xlator_t *this, - fd_t *fd, const char *key, dict_t *xdata) +dht_vgetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, + int op_errno, dict_t *xattr, dict_t *xdata) { - xlator_t *subvol = NULL; - dht_local_t *local = NULL; - dht_layout_t *layout = NULL; - int op_errno = -1; - int i = 0; - int cnt = 0; - xlator_t *mds_subvol = NULL; - int ret = -1; - dht_conf_t *conf = NULL; - char gfid[GF_UUID_BUF_SIZE] = {0}; + dht_local_t *local = NULL; + int ret = 0; + dict_t *dict = NULL; + xlator_t *prev = NULL; + gf_boolean_t flag = _gf_true; - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (fd, err); - VALIDATE_OR_GOTO (fd->inode, err); - VALIDATE_OR_GOTO (this->private, err); + local = frame->local; + prev = cookie; - conf = this->private; + if (op_ret < 0) { + local->op_ret = -1; + local->op_errno = op_errno; + gf_msg(this->name, GF_LOG_ERROR, op_errno, DHT_MSG_GET_XATTR_FAILED, + "vgetxattr: Subvolume %s returned -1", prev->name); + goto unwind; + } - local = dht_local_init (frame, NULL, fd, GF_FOP_FGETXATTR); - if (!local) { - op_errno = ENOMEM; + ret = dht_vgetxattr_alloc_and_fill(local, xattr, this, op_errno); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_NO_MEMORY, + "Allocation or fill failure"); + goto unwind; + } - goto err; - } + flag = (local->layout->cnt > 1) ? _gf_true : _gf_false; - layout = local->layout; - if (!layout) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_LAYOUT_NULL, - "Layout is NULL"); - op_errno = ENOENT; - goto err; - } + ret = dht_vgetxattr_fill_and_set(local, &dict, this, flag); + if (ret) + goto unwind; - if (key) { - local->key = gf_strdup (key); - if (!local->key) { - op_errno = ENOMEM; - goto err; - } - } + DHT_STACK_UNWIND(getxattr, frame, 0, 0, dict, xdata); + goto cleanup; - if (fd->inode) - gf_uuid_unparse(fd->inode->gfid, gfid); +unwind: + DHT_STACK_UNWIND(getxattr, frame, -1, local->op_errno, NULL, NULL); +cleanup: + if (dict) + dict_unref(dict); - if ((fd->inode->ia_type == IA_IFDIR) - && key - && (strncmp (key, GF_XATTR_LOCKINFO_KEY, - SLEN (GF_XATTR_LOCKINFO_KEY)) != 0)) { - local->call_cnt = conf->subvolume_cnt; - cnt = conf->subvolume_cnt; - ret = dht_inode_ctx_mdsvol_get (fd->inode, this, &mds_subvol); + return 0; +} - if (!mds_subvol) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_HASHED_SUBVOL_GET_FAILED, - "cannot determine MDS, fetching xattr %s " - " randomly from a subvol for gfid %s ", - key, gfid); - } else { - /* TODO need to handle it, As of now we are - choosing availability instead of chossing - consistencty, in case of hashed_subvol is - down winding a getxattr call on other subvol - and return xattr - */ - local->mds_subvol = mds_subvol; - for (i = 0; i < cnt; i++) { - if (conf->subvolumes[i] == mds_subvol) { - if (!conf->subvolume_status[i]) { - gf_msg (this->name, - GF_LOG_WARNING, 0, - DHT_MSG_HASHED_SUBVOL_DOWN, - "MDS subvolume %s is down" - " for gfid %s so fetching xattr " - " %s randomly from a subvol ", - local->mds_subvol->name, - gfid, key); - ret = 1; - } - } - } - } +int +dht_linkinfo_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, dict_t *xattr, + dict_t *xdata) +{ + int ret = 0; + char *value = NULL; - if (!ret && key && local->mds_subvol && - dht_match_xattr (key)) { - STACK_WIND (frame, dht_mds_getxattr_cbk, - local->mds_subvol, - local->mds_subvol->fops->fgetxattr, - fd, key, NULL); + if (op_ret != -1) { + ret = dict_get_str(xattr, GF_XATTR_PATHINFO_KEY, &value); + if (!ret) { + ret = dict_set_str(xattr, GF_XATTR_LINKINFO_KEY, value); + if (!ret) + gf_msg_trace(this->name, 0, "failed to set linkinfo"); + } + } - return 0; - } + DHT_STACK_UNWIND(getxattr, frame, op_ret, op_errno, xattr, xdata); - } else { - cnt = local->call_cnt = 1; - } + return 0; +} +int +dht_mds_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, dict_t *xattr, dict_t *xdata) +{ + dht_local_t *local = NULL; + dht_conf_t *conf = NULL; - for (i = 0; i < cnt; i++) { - subvol = layout->list[i].xlator; - STACK_WIND (frame, dht_getxattr_cbk, - subvol, subvol->fops->fgetxattr, - fd, key, NULL); - } - return 0; + VALIDATE_OR_GOTO(frame, out); + VALIDATE_OR_GOTO(frame->local, out); + VALIDATE_OR_GOTO(this->private, out); -err: - op_errno = (op_errno == -1) ? errno : op_errno; - DHT_STACK_UNWIND (fgetxattr, frame, -1, op_errno, NULL, NULL); + conf = this->private; + local = frame->local; - return 0; + if (!xattr || (op_ret == -1)) { + local->op_ret = op_ret; + goto out; + } + if (dict_get(xattr, conf->xattr_name)) { + dict_del(xattr, conf->xattr_name); + } + local->op_ret = 0; + + if (!local->xattr) { + local->xattr = dict_copy_with_ref(xattr, NULL); + } + +out: + DHT_STACK_UNWIND(getxattr, frame, local->op_ret, op_errno, local->xattr, + xdata); + return 0; } int -dht_file_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, dict_t *xdata) +dht_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, + int op_errno, dict_t *xattr, dict_t *xdata) { - int ret = -1; - dht_local_t *local = NULL; - xlator_t *prev = NULL; - struct iatt *stbuf = NULL; - inode_t *inode = NULL; - xlator_t *subvol1 = NULL, *subvol2 = NULL; + int this_call_cnt = 0; + dht_local_t *local = NULL; + dht_conf_t *conf = NULL; - local = frame->local; - prev = cookie; + VALIDATE_OR_GOTO(frame, out); + VALIDATE_OR_GOTO(frame->local, out); + VALIDATE_OR_GOTO(this->private, out); - local->op_errno = op_errno; + conf = this->private; + local = frame->local; - if ((local->fop == GF_FOP_FSETXATTR) && - op_ret == -1 && (op_errno == EBADF) && !(local->fd_checked)) { - ret = dht_check_and_open_fd_on_subvol (this, frame); - if (ret) - goto out; - return 0; + LOCK(&frame->lock); + { + if (!xattr || (op_ret == -1)) { + local->op_ret = op_ret; + goto unlock; } - if ((op_ret == -1) && !dht_inode_missing (op_errno)) { - gf_msg_debug (this->name, op_errno, - "subvolume %s returned -1.", - prev->name); - goto out; + if (dict_get(xattr, conf->xattr_name)) { + dict_del(xattr, conf->xattr_name); } - if (local->call_cnt != 1) - goto out; + if (dict_get(xattr, conf->mds_xattr_key)) { + dict_del(xattr, conf->mds_xattr_key); + } - ret = dict_get_bin (xdata, DHT_IATT_IN_XDATA_KEY, (void **) &stbuf); + /* filter out following two xattrs that need not + * be visible on the mount point for geo-rep - + * trusted.tier.fix.layout.complete and + * trusted.tier.tier-dht.commithash + */ - if ((!op_ret) && !stbuf) { - goto out; + if (dict_get(xattr, conf->commithash_xattr_name)) { + dict_del(xattr, conf->commithash_xattr_name); } - local->op_ret = op_ret; - local->rebalance.target_op_fn = dht_setxattr2; - if (xdata) - local->rebalance.xdata = dict_ref (xdata); - - /* Phase 2 of migration */ - if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2 (stbuf)) { - ret = dht_rebalance_complete_check (this, frame); - if (!ret) - return 0; + if (frame->root->pid >= 0 && dht_is_tier_xlator(this)) { + dict_del(xattr, GF_XATTR_TIER_LAYOUT_FIXED_KEY); } - /* Phase 1 of migration */ - if (IS_DHT_MIGRATION_PHASE1 (stbuf)) { - inode = (local->fd) ? local->fd->inode : local->loc.inode; + if (frame->root->pid >= 0) { + GF_REMOVE_INTERNAL_XATTR("trusted.glusterfs.quota*", xattr); + GF_REMOVE_INTERNAL_XATTR("trusted.pgfid*", xattr); + } - ret = dht_inode_ctx_get_mig_info (this, inode, - &subvol1, &subvol2); - if (!dht_mig_info_is_invalid (local->cached_subvol, - subvol1, subvol2)) { - dht_setxattr2 (this, subvol2, frame, 0); - return 0; - } + local->op_ret = 0; - ret = dht_rebalance_in_progress_check (this, frame); - if (!ret) - return 0; + if (!local->xattr) { + local->xattr = dict_copy_with_ref(xattr, NULL); + } else { + dht_aggregate_xattr(local->xattr, xattr); } + } +unlock: + UNLOCK(&frame->lock); + this_call_cnt = dht_frame_return(frame); out: - - if (local->fop == GF_FOP_SETXATTR) { - DHT_STACK_UNWIND (setxattr, frame, op_ret, op_errno, xdata); - } else { - DHT_STACK_UNWIND (fsetxattr, frame, op_ret, op_errno, xdata); + if (is_last_call(this_call_cnt)) { + /* If we have a valid xattr received from any one of the + * subvolume, let's return it */ + if (local->xattr) { + local->op_ret = 0; } - return 0; + DHT_STACK_UNWIND(getxattr, frame, local->op_ret, op_errno, local->xattr, + NULL); + } + return 0; } -/* Function is call by dict_foreach_fnmatch if key is match with - user.* and set boolean flag to true -*/ -static int -dht_is_user_xattr (dict_t *this, char *key, data_t *value, void *data) +int32_t +dht_getxattr_unwind(call_frame_t *frame, int op_ret, int op_errno, dict_t *dict, + dict_t *xdata) { - gf_boolean_t *user_xattr_found = data; - *user_xattr_found = _gf_true; - return 0; + DHT_STACK_UNWIND(getxattr, frame, op_ret, op_errno, dict, xdata); + return 0; } - -/* Common code to wind a (f)(set|remove)xattr call to set xattr on directory -*/ int -dht_dir_common_set_remove_xattr (call_frame_t *frame, xlator_t *this, loc_t *loc, - fd_t *fd, dict_t *xattr, int flags, dict_t *xdata, - int *op_errno) - -{ - dict_t *xattrop = NULL; - int32_t subone[1] = {-1}; - gf_boolean_t uxattr_key_found = _gf_false; - xlator_t *mds_subvol = NULL; - xlator_t *travvol = NULL; - dht_conf_t *conf = NULL; - int ret = -1; - int i = 0; - int call_cnt = 0; - dht_local_t *local = NULL; - char gfid_local[GF_UUID_BUF_SIZE] = {0}; - - conf = this->private; - local = frame->local; - call_cnt = conf->subvolume_cnt; - local->flags = flags; - - if (!gf_uuid_is_null (local->gfid)) { - gf_uuid_unparse(local->gfid, gfid_local); - } +dht_getxattr_get_real_filename_cbk(call_frame_t *frame, void *cookie, + xlator_t *this, int op_ret, int op_errno, + dict_t *xattr, dict_t *xdata) +{ + int this_call_cnt = 0; + dht_local_t *local = NULL; - if ((local->fop == GF_FOP_SETXATTR) || - (local->fop == GF_FOP_FSETXATTR)) { - /* Check if any user xattr present in xattr - */ - dict_foreach_fnmatch (xattr, "user*", dht_is_user_xattr, - &uxattr_key_found); + local = frame->local; - /* Check if any custom key xattr present in dict xattr - and start index from 1 because user xattr already - checked in previous line - */ - for (i = 1; xattrs_to_heal[i]; i++) - if (dict_get (xattr, xattrs_to_heal[i])) - uxattr_key_found = _gf_true; + LOCK(&frame->lock); + { + if (local->op_errno == ENODATA || local->op_errno == EOPNOTSUPP) { + /* Nothing to do here, we have already found + * a subvol which does not have the get_real_filename + * optimization. If condition is for simple logic. + */ + goto unlock; } - if ((local->fop == GF_FOP_REMOVEXATTR) || - (local->fop == GF_FOP_FREMOVEXATTR)) { - /* Check if any custom key xattr present in local->key - */ - for (i = 0; xattrs_to_heal[i]; i++) - if (strstr (local->key, xattrs_to_heal[i])) - uxattr_key_found = _gf_true; - } + if (op_ret == -1) { + if (op_errno == ENODATA || op_errno == EOPNOTSUPP) { + /* This subvol does not have the optimization. + * Better let the user know we don't support it. + * Remove previous results if any. + */ - /* If there is no custom key xattr present or gfid is root - or call_cnt is 1 then wind a (f)setxattr call on all subvols - */ - if (!uxattr_key_found || __is_root_gfid (local->gfid) || call_cnt == 1) { - for (i = 0; i < conf->subvolume_cnt; i++) { - travvol = conf->subvolumes[i]; - if ((local->fop == GF_FOP_SETXATTR) || - (local->fop == GF_FOP_FSETXATTR)) { - if (fd) { - STACK_WIND_COOKIE (frame, dht_err_cbk, - travvol, travvol, - travvol->fops->fsetxattr, - fd, xattr, flags, xdata); - } else { - STACK_WIND_COOKIE (frame, dht_err_cbk, - travvol, travvol, - travvol->fops->setxattr, - loc, xattr, flags, xdata); - } - } - - if ((local->fop == GF_FOP_REMOVEXATTR) || - (local->fop == GF_FOP_FREMOVEXATTR)) { - if (fd) { - STACK_WIND_COOKIE (frame, dht_err_cbk, - travvol, travvol, - travvol->fops->fremovexattr, - fd, local->key, local->xattr_req); - } else { - STACK_WIND_COOKIE (frame, dht_err_cbk, - travvol, travvol, - travvol->fops->removexattr, - loc, local->key, local->xattr_req); - } - } + if (local->xattr) { + dict_unref(local->xattr); + local->xattr = NULL; } - return 0; - } - - /* Calculate hash subvol based on inode and parent inode - */ - if (fd) { - ret = dht_inode_ctx_mdsvol_get (fd->inode, this, &mds_subvol); - } else { - ret = dht_inode_ctx_mdsvol_get (loc->inode, this, &mds_subvol); - } - if (ret || !mds_subvol) { - if (fd) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_HASHED_SUBVOL_GET_FAILED, - "Failed to get mds subvol for fd %p" - "gfid is %s ", fd, gfid_local); - } else { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_HASHED_SUBVOL_GET_FAILED, - "Failed to get mds subvol for path %s" - "gfid is %s ", loc->path, gfid_local); + if (local->xattr_req) { + dict_unref(local->xattr_req); + local->xattr_req = NULL; } - (*op_errno) = ENOENT; - goto err; - } - local->mds_subvol = mds_subvol; + local->op_ret = op_ret; + local->op_errno = op_errno; + gf_msg(this->name, GF_LOG_WARNING, op_errno, + DHT_MSG_UPGRADE_BRICKS, + "At least " + "one of the bricks does not support " + "this operation. Please upgrade all " + "bricks."); + goto unlock; + } - for (i = 0; i < conf->subvolume_cnt; i++) { - if (conf->subvolumes[i] == mds_subvol) { - if (!conf->subvolume_status[i]) { - gf_msg (this->name, GF_LOG_WARNING, - 0, DHT_MSG_HASHED_SUBVOL_DOWN, - "MDS subvol is down for path " - " %s gfid is %s Unable to set xattr " , - local->loc.path, gfid_local); - (*op_errno) = ENOTCONN; - goto err; - } - } - } + if (op_errno == ENOENT) { + /* Do nothing, our defaults are set to this. + */ + goto unlock; + } - if (uxattr_key_found) { - xattrop = dict_new (); - if (!xattrop) { - gf_msg (this->name, GF_LOG_ERROR, DHT_MSG_NO_MEMORY, - 0, "dictionary creation failed for path %s " - "for gfid is %s ", local->loc.path, gfid_local); - (*op_errno) = ENOMEM; - goto err; - } - local->xattr = dict_ref (xattr); - /* Subtract current MDS xattr value to -1 , value of MDS - xattr represents no. of times xattr modification failed - on non MDS subvols. - */ - ret = dht_dict_set_array (xattrop, conf->mds_xattr_key, subone, 1); - if (ret != 0) { - gf_msg (this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED, - "dictionary set array failed for path %s " - "for gfid is %s ", local->loc.path, gfid_local); - if (xattrop) - dict_unref (xattrop); - (*op_errno) = ret; - goto err; - } - /* Wind a xattrop call to use ref counting approach - update mds xattr to -1 before update xattr on - hashed subvol and update mds xattr to +1 after update - xattr on all non hashed subvol - */ - if (fd) { - STACK_WIND (frame, dht_xattrop_mds_cbk, - local->mds_subvol, - local->mds_subvol->fops->fxattrop, - fd, GF_XATTROP_ADD_ARRAY, xattrop, NULL); - } else { - STACK_WIND (frame, dht_xattrop_mds_cbk, - local->mds_subvol, - local->mds_subvol->fops->xattrop, - loc, GF_XATTROP_ADD_ARRAY, - xattrop, NULL); - } - if (xattrop) - dict_unref (xattrop); + /* This is a place holder for every other error + * case. I am not sure of how to interpret + * ENOTCONN etc. As of now, choosing to ignore + * down subvol and return a good result(if any) + * from other subvol. + */ + gf_msg(this->name, GF_LOG_WARNING, op_errno, + DHT_MSG_GET_XATTR_FAILED, "Failed to get real filename."); + goto unlock; + } + + /* This subvol has the required file. + * There could be other subvols which have returned + * success already, choosing to return the latest good + * result. + */ + if (local->xattr) + dict_unref(local->xattr); + local->xattr = dict_ref(xattr); + + if (local->xattr_req) { + dict_unref(local->xattr_req); + local->xattr_req = NULL; } + if (xdata) + local->xattr_req = dict_ref(xdata); - return 0; -err: - return -1; -} + local->op_ret = op_ret; + local->op_errno = 0; + gf_msg_debug(this->name, 0, + "Found a matching " + "file."); + } +unlock: + UNLOCK(&frame->lock); + this_call_cnt = dht_frame_return(frame); + if (is_last_call(this_call_cnt)) { + DHT_STACK_UNWIND(getxattr, frame, local->op_ret, local->op_errno, + local->xattr, local->xattr_req); + } + + return 0; +} int -dht_fsetxattr (call_frame_t *frame, xlator_t *this, - fd_t *fd, dict_t *xattr, int flags, dict_t *xdata) +dht_getxattr_get_real_filename(call_frame_t *frame, xlator_t *this, loc_t *loc, + const char *key, dict_t *xdata) { - xlator_t *subvol = NULL; - dht_local_t *local = NULL; - int op_errno = EINVAL; - dht_conf_t *conf = NULL; - dht_layout_t *layout = NULL; - int ret = -1; - int call_cnt = 0; + dht_local_t *local = NULL; + int i = 0; + dht_layout_t *layout = NULL; + int cnt = 0; + xlator_t *subvol = NULL; - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (fd, err); - VALIDATE_OR_GOTO (fd->inode, err); - VALIDATE_OR_GOTO (this->private, err); + local = frame->local; + layout = local->layout; - conf = this->private; + cnt = local->call_cnt = layout->cnt; - if (!conf->defrag) - GF_IF_INTERNAL_XATTR_GOTO (conf->wild_xattr_name, xattr, - op_errno, err); - - local = dht_local_init (frame, NULL, fd, GF_FOP_FSETXATTR); - if (!local) { - op_errno = ENOMEM; - goto err; - } + local->op_ret = -1; + local->op_errno = ENOENT; - subvol = local->cached_subvol; - if (!subvol) { - gf_msg_debug (this->name, 0, - "no cached subvolume for fd=%p", fd); - op_errno = EINVAL; - goto err; - } + for (i = 0; i < cnt; i++) { + subvol = layout->list[i].xlator; + STACK_WIND(frame, dht_getxattr_get_real_filename_cbk, subvol, + subvol->fops->getxattr, loc, key, xdata); + } - layout = local->layout; - if (!layout) { - gf_msg_debug (this->name, 0, - "no layout for fd=%p", fd); - op_errno = EINVAL; - goto err; - } + return 0; +} - local->xattr_req = xdata ? dict_ref (xdata) : dict_new (); - local->call_cnt = call_cnt = layout->cnt; +int +dht_marker_populate_args(call_frame_t *frame, int type, int *gauge, + xlator_t **subvols) +{ + dht_local_t *local = NULL; + int i = 0; + dht_layout_t *layout = NULL; - if (IA_ISDIR (fd->inode->ia_type)) { - local->hashed_subvol = NULL; - ret = dht_dir_common_set_remove_xattr (frame, this, NULL, fd, - xattr, flags, xdata, &op_errno); - if (ret) - goto err; - } else { + local = frame->local; + layout = local->layout; - local->call_cnt = 1; - local->rebalance.xattr = dict_ref (xattr); - local->rebalance.flags = flags; + for (i = 0; i < layout->cnt; i++) + subvols[i] = layout->list[i].xlator; - ret = dict_set_int8 (local->xattr_req, DHT_IATT_IN_XDATA_KEY, 1); - if (ret) { - gf_msg_debug (this->name, 0, - "Failed to set dictionary key %s for fd=%p", - DHT_IATT_IN_XDATA_KEY, fd); - } + return layout->cnt; +} - STACK_WIND_COOKIE (frame, dht_file_setxattr_cbk, subvol, - subvol, subvol->fops->fsetxattr, fd, xattr, - flags, local->xattr_req); - } - return 0; +int +dht_is_debug_xattr_key(char **array, char *key) +{ + int i = 0; -err: - op_errno = (op_errno == -1) ? errno : op_errno; - DHT_STACK_UNWIND (fsetxattr, frame, -1, op_errno, NULL); + for (i = 0; array[i]; i++) { + if (fnmatch(array[i], key, FNM_NOESCAPE) == 0) + return i; + } - return 0; + return -1; } +/* Note we already have frame->local initialised here*/ int -dht_checking_pathinfo_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, dict_t *xattr, - dict_t *xdata) +dht_handle_debug_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, + const char *key) { - int i = -1; - int ret = -1; - char *value = NULL; - dht_local_t *local = NULL; - dht_conf_t *conf = NULL; - xlator_t *prev = NULL; - int this_call_cnt = 0; + dht_local_t *local = NULL; + int ret = -1; + int op_errno = ENODATA; + char *value = NULL; + loc_t file_loc = {0}; + const char *name = NULL; - local = frame->local; - prev = cookie; - conf = this->private; + local = frame->local; + if (!key) { + op_errno = EINVAL; + goto out; + } - if (op_ret == -1) - goto out; + if (dht_is_debug_xattr_key(dht_dbg_vxattrs, (char *)key) == -1) { + goto out; + } + local->xattr = dict_new(); + if (!local->xattr) { + op_errno = ENOMEM; + goto out; + } - ret = dict_get_str (xattr, GF_XATTR_PATHINFO_KEY, &value); - if (ret) - goto out; + if (strncmp(key, DHT_DBG_HASHED_SUBVOL_KEY, + SLEN(DHT_DBG_HASHED_SUBVOL_KEY)) == 0) { + name = key + strlen(DHT_DBG_HASHED_SUBVOL_KEY); + if (strlen(name) == 0) { + op_errno = EINVAL; + goto out; + } - if (!strcmp (value, local->key)) { - for (i = 0; i < conf->subvolume_cnt; i++) { - if (conf->subvolumes[i] == prev) - conf->decommissioned_bricks[i] = prev; - } + ret = dht_build_child_loc(this, &file_loc, loc, (char *)name); + if (ret) { + op_errno = ENOMEM; + goto out; + } + + local->hashed_subvol = dht_subvol_get_hashed(this, &file_loc); + if (local->hashed_subvol == NULL) { + op_errno = ENODATA; + goto out; + } + + value = gf_strdup(local->hashed_subvol->name); + if (!value) { + op_errno = ENOMEM; + goto out; + } + + ret = dict_set_dynstr(local->xattr, (char *)key, value); + if (ret < 0) { + op_errno = -ret; + ret = -1; + goto out; + } + ret = 0; + goto out; + } + +out: + loc_wipe(&file_loc); + DHT_STACK_UNWIND(getxattr, frame, ret, op_errno, local->xattr, NULL); + return 0; +} + +int +dht_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, const char *key, + dict_t *xdata) +#define DHT_IS_DIR(layout) (layout->cnt > 1) +{ + xlator_t *subvol = NULL; + xlator_t *hashed_subvol = NULL; + xlator_t *mds_subvol = NULL; + xlator_t *cached_subvol = NULL; + dht_conf_t *conf = NULL; + dht_local_t *local = NULL; + dht_layout_t *layout = NULL; + int op_errno = -1; + int i = 0; + int cnt = 0; + char *node_uuid_key = NULL; + int ret = -1; + + GF_CHECK_XATTR_KEY_AND_GOTO(key, IO_THREADS_QUEUE_SIZE_KEY, op_errno, err); + VALIDATE_OR_GOTO(frame, err); + VALIDATE_OR_GOTO(this, err); + VALIDATE_OR_GOTO(loc, err); + VALIDATE_OR_GOTO(loc->inode, err); + VALIDATE_OR_GOTO(this->private, err); + + conf = this->private; + + local = dht_local_init(frame, loc, NULL, GF_FOP_GETXATTR); + if (!local) { + op_errno = ENOMEM; + + goto err; + } + + layout = local->layout; + if (!layout) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LAYOUT_NULL, + "Layout is NULL"); + op_errno = ENOENT; + goto err; + } + + /* skip over code which is irrelevant without a valid key */ + if (!key) + goto no_key; + + local->key = gf_strdup(key); + if (!local->key) { + op_errno = ENOMEM; + goto err; + } + + if (strncmp(key, conf->mds_xattr_key, strlen(key)) == 0) { + op_errno = ENOTSUP; + goto err; + } + + /* skip over code which is irrelevant if !DHT_IS_DIR(layout) */ + if (!DHT_IS_DIR(layout)) + goto no_dht_is_dir; + + if ((strncmp(key, GF_XATTR_GET_REAL_FILENAME_KEY, + SLEN(GF_XATTR_GET_REAL_FILENAME_KEY)) == 0) && + DHT_IS_DIR(layout)) { + dht_getxattr_get_real_filename(frame, this, loc, key, xdata); + return 0; + } + + if (!strcmp(key, GF_REBAL_FIND_LOCAL_SUBVOL)) { + ret = gf_asprintf(&node_uuid_key, "%s", GF_XATTR_LIST_NODE_UUIDS_KEY); + if (ret == -1 || !node_uuid_key) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_NO_MEMORY, + "Failed to copy node uuid key"); + op_errno = ENOMEM; + goto err; + } + (void)snprintf(local->xsel, sizeof(local->xsel), "%s", node_uuid_key); + cnt = local->call_cnt = conf->subvolume_cnt; + for (i = 0; i < cnt; i++) { + STACK_WIND_COOKIE(frame, dht_find_local_subvol_cbk, + conf->subvolumes[i], conf->subvolumes[i], + conf->subvolumes[i]->fops->getxattr, loc, + node_uuid_key, xdata); } + if (node_uuid_key) + GF_FREE(node_uuid_key); + return 0; + } -out: - this_call_cnt = dht_frame_return (frame); - if (is_last_call (this_call_cnt)) { - DHT_STACK_UNWIND (setxattr, frame, local->op_ret, ENOTSUP, NULL); + if (!strcmp(key, GF_REBAL_OLD_FIND_LOCAL_SUBVOL)) { + ret = gf_asprintf(&node_uuid_key, "%s", GF_XATTR_NODE_UUID_KEY); + if (ret == -1 || !node_uuid_key) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_NO_MEMORY, + "Failed to copy node uuid key"); + op_errno = ENOMEM; + goto err; + } + (void)snprintf(local->xsel, sizeof(local->xsel), "%s", node_uuid_key); + cnt = local->call_cnt = conf->subvolume_cnt; + for (i = 0; i < cnt; i++) { + STACK_WIND_COOKIE(frame, dht_find_local_subvol_cbk, + conf->subvolumes[i], conf->subvolumes[i], + conf->subvolumes[i]->fops->getxattr, loc, + node_uuid_key, xdata); + } + if (node_uuid_key) + GF_FREE(node_uuid_key); + return 0; + } + + /* for file use cached subvolume (obviously!): see if {} + * below + * for directory: + * wind to all subvolumes and exclude subvolumes which + * return ENOTCONN (in callback) + * + * NOTE: Don't trust inode here, as that may not be valid + * (until inode_link() happens) + */ + + if (XATTR_IS_PATHINFO(key) || (strcmp(key, GF_XATTR_NODE_UUID_KEY) == 0) || + (strcmp(key, GF_XATTR_LIST_NODE_UUIDS_KEY) == 0)) { + (void)snprintf(local->xsel, sizeof(local->xsel), "%s", key); + cnt = local->call_cnt = layout->cnt; + for (i = 0; i < cnt; i++) { + subvol = layout->list[i].xlator; + STACK_WIND(frame, dht_vgetxattr_dir_cbk, subvol, + subvol->fops->getxattr, loc, key, xdata); } return 0; + } -} - +no_dht_is_dir: + /* node-uuid or pathinfo for files */ + if (XATTR_IS_PATHINFO(key) || (strcmp(key, GF_XATTR_NODE_UUID_KEY) == 0)) { + cached_subvol = local->cached_subvol; + (void)snprintf(local->xsel, sizeof(local->xsel), "%s", key); + local->call_cnt = 1; + STACK_WIND_COOKIE(frame, dht_vgetxattr_cbk, cached_subvol, + cached_subvol, cached_subvol->fops->getxattr, loc, + key, xdata); -int -dht_setxattr2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret) -{ - dht_local_t *local = NULL; - int op_errno = EINVAL; + return 0; + } - if (!frame || !frame->local) - goto err; + if (strcmp(key, GF_XATTR_LINKINFO_KEY) == 0) { + hashed_subvol = dht_subvol_get_hashed(this, loc); + if (!hashed_subvol) { + gf_msg(this->name, GF_LOG_ERROR, 0, + DHT_MSG_HASHED_SUBVOL_GET_FAILED, + "Failed to get hashed subvol for %s", loc->path); + op_errno = EINVAL; + goto err; + } - local = frame->local; - op_errno = local->op_errno; + cached_subvol = dht_subvol_get_cached(this, loc->inode); + if (!cached_subvol) { + gf_msg(this->name, GF_LOG_ERROR, 0, + DHT_MSG_CACHED_SUBVOL_GET_FAILED, + "Failed to get cached subvol for %s", loc->path); + op_errno = EINVAL; + goto err; + } - if (we_are_not_migrating (ret)) { - /* This dht xlator is not migrating the file. Unwind and - * pass on the original mode bits so the higher DHT layer - * can handle this. - */ - DHT_STACK_UNWIND (setxattr, frame, local->op_ret, - local->op_errno, local->rebalance.xdata); - return 0; + if (hashed_subvol == cached_subvol) { + op_errno = ENODATA; + goto err; } - if (subvol == NULL) - goto err; + STACK_WIND(frame, dht_linkinfo_getxattr_cbk, hashed_subvol, + hashed_subvol->fops->getxattr, loc, GF_XATTR_PATHINFO_KEY, + xdata); + return 0; + } + if (dht_is_debug_xattr_key(dht_dbg_vxattrs, (char *)key) >= 0) { + dht_handle_debug_getxattr(frame, this, loc, key); + return 0; + } - local->call_cnt = 2; /* This is the second attempt */ +no_key: + if (cluster_handle_marker_getxattr(frame, loc, key, conf->vol_uuid, + dht_getxattr_unwind, + dht_marker_populate_args) == 0) + return 0; - if (local->fop == GF_FOP_SETXATTR) { - STACK_WIND_COOKIE (frame, dht_file_setxattr_cbk, subvol, - subvol, subvol->fops->setxattr, &local->loc, - local->rebalance.xattr, - local->rebalance.flags, local->xattr_req); + if (DHT_IS_DIR(layout)) { + local->call_cnt = conf->subvolume_cnt; + cnt = conf->subvolume_cnt; + ret = dht_inode_ctx_mdsvol_get(loc->inode, this, &mds_subvol); + if (!mds_subvol) { + gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_HASHED_SUBVOL_GET_FAILED, + "Cannot determine MDS, fetching xattr %s randomly" + " from a subvol for path %s ", + key, loc->path); } else { - STACK_WIND_COOKIE (frame, dht_file_setxattr_cbk, subvol, - subvol, subvol->fops->fsetxattr, local->fd, - local->rebalance.xattr, - local->rebalance.flags, local->xattr_req); + /* TODO need to handle it, As of now we are + choosing availability instead of chossing + consistencty, in case of mds_subvol is + down winding a getxattr call on other subvol + and return xattr + */ + local->mds_subvol = mds_subvol; + for (i = 0; i < cnt; i++) { + if (conf->subvolumes[i] == mds_subvol) { + if (!conf->subvolume_status[i]) { + gf_msg(this->name, GF_LOG_INFO, 0, + DHT_MSG_HASHED_SUBVOL_DOWN, + "MDS %s is down for path" + " path %s so fetching xattr " + "%s randomly from a subvol ", + local->mds_subvol->name, loc->path, key); + ret = 1; + } + } + } } - return 0; + if (!ret && key && local->mds_subvol && dht_match_xattr(key)) { + STACK_WIND(frame, dht_mds_getxattr_cbk, local->mds_subvol, + local->mds_subvol->fops->getxattr, loc, key, xdata); + + return 0; + } + } else { + cnt = local->call_cnt = 1; + } + + for (i = 0; i < cnt; i++) { + subvol = layout->list[i].xlator; + STACK_WIND(frame, dht_getxattr_cbk, subvol, subvol->fops->getxattr, loc, + key, xdata); + } + return 0; err: - DHT_STACK_UNWIND (setxattr, frame, (local ? local->op_ret : -1), - op_errno, NULL); - return 0; -} + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND(getxattr, frame, -1, op_errno, NULL, NULL); -int -dht_nuke_dir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) -{ - STACK_UNWIND_STRICT (setxattr, frame, op_ret, op_errno, NULL); - return 0; + return 0; } +#undef DHT_IS_DIR int -dht_nuke_dir (call_frame_t *frame, xlator_t *this, loc_t *loc, data_t *tmp) -{ - if (!IA_ISDIR(loc->inode->ia_type)) { - DHT_STACK_UNWIND (setxattr, frame, -1, ENOTSUP, NULL); - return 0; +dht_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *key, + dict_t *xdata) +{ + xlator_t *subvol = NULL; + dht_local_t *local = NULL; + dht_layout_t *layout = NULL; + int op_errno = -1; + int i = 0; + int cnt = 0; + xlator_t *mds_subvol = NULL; + int ret = -1; + dht_conf_t *conf = NULL; + char gfid[GF_UUID_BUF_SIZE] = {0}; + + VALIDATE_OR_GOTO(frame, err); + VALIDATE_OR_GOTO(this, err); + VALIDATE_OR_GOTO(fd, err); + VALIDATE_OR_GOTO(fd->inode, err); + VALIDATE_OR_GOTO(this->private, err); + + conf = this->private; + + local = dht_local_init(frame, NULL, fd, GF_FOP_FGETXATTR); + if (!local) { + op_errno = ENOMEM; + + goto err; + } + + layout = local->layout; + if (!layout) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LAYOUT_NULL, + "Layout is NULL"); + op_errno = ENOENT; + goto err; + } + + if (key) { + local->key = gf_strdup(key); + if (!local->key) { + op_errno = ENOMEM; + goto err; } + } - /* Setxattr didn't need the parent, but rmdir does. */ - loc->parent = inode_parent (loc->inode, NULL, NULL); - if (!loc->parent) { - DHT_STACK_UNWIND (setxattr, frame, -1, ENOENT, NULL); - return 0; - } - gf_uuid_copy (loc->pargfid, loc->parent->gfid); + if (fd->inode) + gf_uuid_unparse(fd->inode->gfid, gfid); - if (!loc->name && loc->path) { - loc->name = strrchr (loc->path, '/'); - if (loc->name) { - ++(loc->name); + if ((fd->inode->ia_type == IA_IFDIR) && key && + (strncmp(key, GF_XATTR_LOCKINFO_KEY, SLEN(GF_XATTR_LOCKINFO_KEY)) != + 0)) { + local->call_cnt = conf->subvolume_cnt; + cnt = conf->subvolume_cnt; + ret = dht_inode_ctx_mdsvol_get(fd->inode, this, &mds_subvol); + + if (!mds_subvol) { + gf_msg(this->name, GF_LOG_ERROR, 0, + DHT_MSG_HASHED_SUBVOL_GET_FAILED, + "cannot determine MDS, fetching xattr %s " + " randomly from a subvol for gfid %s ", + key, gfid); + } else { + /* TODO need to handle it, As of now we are + choosing availability instead of chossing + consistencty, in case of hashed_subvol is + down winding a getxattr call on other subvol + and return xattr + */ + local->mds_subvol = mds_subvol; + for (i = 0; i < cnt; i++) { + if (conf->subvolumes[i] == mds_subvol) { + if (!conf->subvolume_status[i]) { + gf_msg(this->name, GF_LOG_WARNING, 0, + DHT_MSG_HASHED_SUBVOL_DOWN, + "MDS subvolume %s is down" + " for gfid %s so fetching xattr " + " %s randomly from a subvol ", + local->mds_subvol->name, gfid, key); + ret = 1; + } } + } } - /* - * We do this instead of calling dht_rmdir_do directly for two reasons. - * The first is that we want to reuse all of the initialization that - * dht_rmdir does, so if it ever changes we'll just follow along. The - * second (i.e. why we don't use STACK_WIND_TAIL) is so that we don't - * obscure the fact that we came in via this path instead of a genuine - * rmdir. That makes debugging just a tiny bit easier. - */ - STACK_WIND (frame, dht_nuke_dir_cbk, this, this->fops->rmdir, - loc, 1, NULL); + if (!ret && key && local->mds_subvol && dht_match_xattr(key)) { + STACK_WIND(frame, dht_mds_getxattr_cbk, local->mds_subvol, + local->mds_subvol->fops->fgetxattr, fd, key, NULL); - return 0; -} + return 0; + } + + } else { + cnt = local->call_cnt = 1; + } + + for (i = 0; i < cnt; i++) { + subvol = layout->list[i].xlator; + STACK_WIND(frame, dht_getxattr_cbk, subvol, subvol->fops->fgetxattr, fd, + key, NULL); + } + return 0; + +err: + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND(fgetxattr, frame, -1, op_errno, NULL, NULL); + return 0; +} int -dht_setxattr (call_frame_t *frame, xlator_t *this, - loc_t *loc, dict_t *xattr, int flags, dict_t *xdata) +dht_file_setxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, dict_t *xdata) { - xlator_t *subvol = NULL; - dht_local_t *local = NULL; - dht_conf_t *conf = NULL; - dht_methods_t *methods = NULL; - dht_layout_t *layout = NULL; - int i = 0; - int op_errno = EINVAL; - int ret = -1; - data_t *tmp = NULL; - uint32_t dir_spread = 0; - char value[4096] = {0,}; - gf_dht_migrate_data_type_t forced_rebalance = GF_DHT_MIGRATE_DATA; - int call_cnt = 0; - uint32_t new_hash = 0; + int ret = -1; + dht_local_t *local = NULL; + xlator_t *prev = NULL; + struct iatt *stbuf = NULL; + inode_t *inode = NULL; + xlator_t *subvol1 = NULL, *subvol2 = NULL; + + local = frame->local; + prev = cookie; + local->op_errno = op_errno; - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (loc, err); - VALIDATE_OR_GOTO (loc->inode, err); + if ((local->fop == GF_FOP_FSETXATTR) && op_ret == -1 && + (op_errno == EBADF) && !(local->fd_checked)) { + ret = dht_check_and_open_fd_on_subvol(this, frame); + if (ret) + goto out; + return 0; + } - conf = this->private; - GF_VALIDATE_OR_GOTO (this->name, conf, err); + if ((op_ret == -1) && !dht_inode_missing(op_errno)) { + gf_msg_debug(this->name, op_errno, "subvolume %s returned -1.", + prev->name); + goto out; + } - methods = &(conf->methods); + if (local->call_cnt != 1) + goto out; - /* Rebalance daemon is allowed to set internal keys */ - if (!conf->defrag) - GF_IF_INTERNAL_XATTR_GOTO (conf->wild_xattr_name, xattr, - op_errno, err); + ret = dict_get_bin(xdata, DHT_IATT_IN_XDATA_KEY, (void **)&stbuf); - local = dht_local_init (frame, loc, NULL, GF_FOP_SETXATTR); - if (!local) { - op_errno = ENOMEM; - goto err; - } + if ((!op_ret) && !stbuf) { + goto out; + } - subvol = local->cached_subvol; - if (!subvol) { - gf_msg_debug (this->name, 0, - "no cached subvolume for path=%s", - loc->path); - op_errno = EINVAL; - goto err; - } + local->op_ret = op_ret; + local->rebalance.target_op_fn = dht_setxattr2; + if (xdata) + local->rebalance.xdata = dict_ref(xdata); - layout = local->layout; - if (!layout) { - gf_msg_debug (this->name, 0, - "no layout for path=%s", loc->path); - op_errno = EINVAL; - goto err; - } + /* Phase 2 of migration */ + if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2(stbuf)) { + ret = dht_rebalance_complete_check(this, frame); + if (!ret) + return 0; + } - local->call_cnt = call_cnt = layout->cnt; - tmp = dict_get (xattr, conf->mds_xattr_key); - if (tmp) { - op_errno = ENOTSUP; - goto err; + /* Phase 1 of migration */ + if (IS_DHT_MIGRATION_PHASE1(stbuf)) { + inode = (local->fd) ? local->fd->inode : local->loc.inode; + + ret = dht_inode_ctx_get_mig_info(this, inode, &subvol1, &subvol2); + if (!dht_mig_info_is_invalid(local->cached_subvol, subvol1, subvol2)) { + dht_setxattr2(this, subvol2, frame, 0); + return 0; } - tmp = dict_get (xattr, GF_XATTR_FILE_MIGRATE_KEY); - if (tmp) { + ret = dht_rebalance_in_progress_check(this, frame); + if (!ret) + return 0; + } - if (IA_ISDIR (loc->inode->ia_type)) { - op_errno = ENOTSUP; - goto err; - } +out: - /* TODO: need to interpret the 'value' for more meaning - (ie, 'target' subvolume given there, etc) */ - memcpy (value, tmp->data, tmp->len); - if (strcmp (value, "force") == 0) - forced_rebalance = - GF_DHT_MIGRATE_DATA_EVEN_IF_LINK_EXISTS; + if (local->fop == GF_FOP_SETXATTR) { + DHT_STACK_UNWIND(setxattr, frame, op_ret, op_errno, xdata); + } else { + DHT_STACK_UNWIND(fsetxattr, frame, op_ret, op_errno, xdata); + } - if (conf->decommission_in_progress) - forced_rebalance = GF_DHT_MIGRATE_HARDLINK; + return 0; +} - if (!loc->path) { - op_errno = EINVAL; - goto err; - } +/* Function is call by dict_foreach_fnmatch if key is match with + user.* and set boolean flag to true +*/ +static int +dht_is_user_xattr(dict_t *this, char *key, data_t *value, void *data) +{ + gf_boolean_t *user_xattr_found = data; + *user_xattr_found = _gf_true; + return 0; +} - if (!local->loc.name) - local->loc.name = strrchr (local->loc.path, '/')+1; +/* Common code to wind a (f)(set|remove)xattr call to set xattr on directory + */ +int +dht_dir_common_set_remove_xattr(call_frame_t *frame, xlator_t *this, loc_t *loc, + fd_t *fd, dict_t *xattr, int flags, + dict_t *xdata, int *op_errno) + +{ + dict_t *xattrop = NULL; + int32_t subone[1] = {-1}; + gf_boolean_t uxattr_key_found = _gf_false; + xlator_t *mds_subvol = NULL; + xlator_t *travvol = NULL; + dht_conf_t *conf = NULL; + int ret = -1; + int i = 0; + int call_cnt = 0; + dht_local_t *local = NULL; + char gfid_local[GF_UUID_BUF_SIZE] = {0}; + + conf = this->private; + local = frame->local; + call_cnt = conf->subvolume_cnt; + local->flags = flags; + + if (!gf_uuid_is_null(local->gfid)) { + gf_uuid_unparse(local->gfid, gfid_local); + } - if (!local->loc.parent) - local->loc.parent = - inode_parent(local->loc.inode, NULL, NULL); + if ((local->fop == GF_FOP_SETXATTR) || (local->fop == GF_FOP_FSETXATTR)) { + /* Check if any user xattr present in xattr + */ + dict_foreach_fnmatch(xattr, "user*", dht_is_user_xattr, + &uxattr_key_found); - if ((!local->loc.name) || (!local->loc.parent)) { - op_errno = EINVAL; - goto err; + /* Check if any custom key xattr present in dict xattr + and start index from 1 because user xattr already + checked in previous line + */ + for (i = 1; xattrs_to_heal[i]; i++) + if (dict_get(xattr, xattrs_to_heal[i])) + uxattr_key_found = _gf_true; + } + + if ((local->fop == GF_FOP_REMOVEXATTR) || + (local->fop == GF_FOP_FREMOVEXATTR)) { + /* Check if any custom key xattr present in local->key + */ + for (i = 0; xattrs_to_heal[i]; i++) + if (strstr(local->key, xattrs_to_heal[i])) + uxattr_key_found = _gf_true; + } + + /* If there is no custom key xattr present or gfid is root + or call_cnt is 1 then wind a (f)setxattr call on all subvols + */ + if (!uxattr_key_found || __is_root_gfid(local->gfid) || call_cnt == 1) { + for (i = 0; i < conf->subvolume_cnt; i++) { + travvol = conf->subvolumes[i]; + if ((local->fop == GF_FOP_SETXATTR) || + (local->fop == GF_FOP_FSETXATTR)) { + if (fd) { + STACK_WIND_COOKIE(frame, dht_err_cbk, travvol, travvol, + travvol->fops->fsetxattr, fd, xattr, + flags, xdata); + } else { + STACK_WIND_COOKIE(frame, dht_err_cbk, travvol, travvol, + travvol->fops->setxattr, loc, xattr, + flags, xdata); } + } - if (gf_uuid_is_null (local->loc.pargfid)) - gf_uuid_copy (local->loc.pargfid, local->loc.parent->gfid); - - methods->migration_get_dst_subvol(this, local); - - if (!local->rebalance.target_node) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_HASHED_SUBVOL_GET_FAILED, - "Failed to get hashed subvol for %s", - loc->path); - op_errno = EINVAL; - goto err; + if ((local->fop == GF_FOP_REMOVEXATTR) || + (local->fop == GF_FOP_FREMOVEXATTR)) { + if (fd) { + STACK_WIND_COOKIE(frame, dht_err_cbk, travvol, travvol, + travvol->fops->fremovexattr, fd, + local->key, local->xattr_req); + } else { + STACK_WIND_COOKIE(frame, dht_err_cbk, travvol, travvol, + travvol->fops->removexattr, loc, + local->key, local->xattr_req); } + } + } - local->rebalance.from_subvol = local->cached_subvol; + return 0; + } - if (local->rebalance.target_node == local->rebalance.from_subvol) { - op_errno = EEXIST; - goto err; - } - if (local->rebalance.target_node) { - local->flags = forced_rebalance; - - /* Flag to suggest its a tiering migration - * The reason for this dic key-value is that - * promotions and demotions are multithreaded - * so the original frame from gf_defrag_start() - * is not carried. A new frame will be created when - * we do syncop_setxattr(). This does not have the - * frame->root->pid of the original frame. So we pass - * this dic key-value when we do syncop_setxattr() to do - * data migration and set the frame->root->pid to - * GF_CLIENT_PID_TIER_DEFRAG in dht_setxattr() just before - * calling dht_start_rebalance_task() */ - tmp = dict_get (xattr, TIERING_MIGRATION_KEY); - if (tmp) - frame->root->pid = GF_CLIENT_PID_TIER_DEFRAG; - else - frame->root->pid = GF_CLIENT_PID_DEFRAG; - - ret = dht_start_rebalance_task (this, frame); - if (!ret) - return 0; - - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_REBALANCE_START_FAILED, - "%s: failed to create a new rebalance synctask", - loc->path); - } - op_errno = EINVAL; + /* Calculate hash subvol based on inode and parent inode + */ + if (fd) { + ret = dht_inode_ctx_mdsvol_get(fd->inode, this, &mds_subvol); + } else { + ret = dht_inode_ctx_mdsvol_get(loc->inode, this, &mds_subvol); + } + if (ret || !mds_subvol) { + if (fd) { + gf_msg(this->name, GF_LOG_ERROR, 0, + DHT_MSG_HASHED_SUBVOL_GET_FAILED, + "Failed to get mds subvol for fd %p" + "gfid is %s ", + fd, gfid_local); + } else { + gf_msg(this->name, GF_LOG_ERROR, 0, + DHT_MSG_HASHED_SUBVOL_GET_FAILED, + "Failed to get mds subvol for path %s" + "gfid is %s ", + loc->path, gfid_local); + } + (*op_errno) = ENOENT; + goto err; + } + + local->mds_subvol = mds_subvol; + + for (i = 0; i < conf->subvolume_cnt; i++) { + if (conf->subvolumes[i] == mds_subvol) { + if (!conf->subvolume_status[i]) { + gf_msg(this->name, GF_LOG_WARNING, 0, + DHT_MSG_HASHED_SUBVOL_DOWN, + "MDS subvol is down for path " + " %s gfid is %s Unable to set xattr ", + local->loc.path, gfid_local); + (*op_errno) = ENOTCONN; goto err; + } + } + } + + if (uxattr_key_found) { + xattrop = dict_new(); + if (!xattrop) { + gf_msg(this->name, GF_LOG_ERROR, DHT_MSG_NO_MEMORY, 0, + "dictionary creation failed for path %s " + "for gfid is %s ", + local->loc.path, gfid_local); + (*op_errno) = ENOMEM; + goto err; + } + local->xattr = dict_ref(xattr); + /* Subtract current MDS xattr value to -1 , value of MDS + xattr represents no. of times xattr modification failed + on non MDS subvols. + */ + ret = dht_dict_set_array(xattrop, conf->mds_xattr_key, subone, 1); + if (ret != 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED, + "dictionary set array failed for path %s " + "for gfid is %s ", + local->loc.path, gfid_local); + if (xattrop) + dict_unref(xattrop); + (*op_errno) = ret; + goto err; + } + /* Wind a xattrop call to use ref counting approach + update mds xattr to -1 before update xattr on + hashed subvol and update mds xattr to +1 after update + xattr on all non hashed subvol + */ + if (fd) { + STACK_WIND(frame, dht_xattrop_mds_cbk, local->mds_subvol, + local->mds_subvol->fops->fxattrop, fd, + GF_XATTROP_ADD_ARRAY, xattrop, NULL); + } else { + STACK_WIND(frame, dht_xattrop_mds_cbk, local->mds_subvol, + local->mds_subvol->fops->xattrop, loc, + GF_XATTROP_ADD_ARRAY, xattrop, NULL); + } + if (xattrop) + dict_unref(xattrop); + } + + return 0; +err: + return -1; +} + +int +dht_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xattr, + int flags, dict_t *xdata) +{ + xlator_t *subvol = NULL; + dht_local_t *local = NULL; + int op_errno = EINVAL; + dht_conf_t *conf = NULL; + dht_layout_t *layout = NULL; + int ret = -1; + int call_cnt = 0; + + VALIDATE_OR_GOTO(frame, err); + VALIDATE_OR_GOTO(this, err); + VALIDATE_OR_GOTO(fd, err); + VALIDATE_OR_GOTO(fd->inode, err); + VALIDATE_OR_GOTO(this->private, err); + + conf = this->private; + + if (!conf->defrag) + GF_IF_INTERNAL_XATTR_GOTO(conf->wild_xattr_name, xattr, op_errno, err); + + local = dht_local_init(frame, NULL, fd, GF_FOP_FSETXATTR); + if (!local) { + op_errno = ENOMEM; + goto err; + } + + subvol = local->cached_subvol; + if (!subvol) { + gf_msg_debug(this->name, 0, "no cached subvolume for fd=%p", fd); + op_errno = EINVAL; + goto err; + } + + layout = local->layout; + if (!layout) { + gf_msg_debug(this->name, 0, "no layout for fd=%p", fd); + op_errno = EINVAL; + goto err; + } + + local->xattr_req = xdata ? dict_ref(xdata) : dict_new(); + local->call_cnt = call_cnt = layout->cnt; + + if (IA_ISDIR(fd->inode->ia_type)) { + local->hashed_subvol = NULL; + ret = dht_dir_common_set_remove_xattr(frame, this, NULL, fd, xattr, + flags, xdata, &op_errno); + if (ret) + goto err; + } else { + local->call_cnt = 1; + local->rebalance.xattr = dict_ref(xattr); + local->rebalance.flags = flags; + ret = dict_set_int8(local->xattr_req, DHT_IATT_IN_XDATA_KEY, 1); + if (ret) { + gf_msg_debug(this->name, 0, + "Failed to set dictionary key %s for fd=%p", + DHT_IATT_IN_XDATA_KEY, fd); } - tmp = dict_get (xattr, "decommission-brick"); - if (tmp) { - /* This operation should happen only on '/' */ - if (!__is_root_gfid (loc->inode->gfid)) { - op_errno = ENOTSUP; - goto err; - } + STACK_WIND_COOKIE(frame, dht_file_setxattr_cbk, subvol, subvol, + subvol->fops->fsetxattr, fd, xattr, flags, + local->xattr_req); + } + return 0; - memcpy (value, tmp->data, min (tmp->len, 4095)); - local->key = gf_strdup (value); - local->call_cnt = conf->subvolume_cnt; +err: + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND(fsetxattr, frame, -1, op_errno, NULL); - for (i = 0 ; i < conf->subvolume_cnt; i++) { - /* Get the pathinfo, and then compare */ - STACK_WIND_COOKIE (frame, dht_checking_pathinfo_cbk, - conf->subvolumes[i], conf->subvolumes[i], - conf->subvolumes[i]->fops->getxattr, - loc, GF_XATTR_PATHINFO_KEY, NULL); - } - return 0; - } + return 0; +} - tmp = dict_get (xattr, GF_XATTR_FIX_LAYOUT_KEY); - if (tmp) { - ret = dict_get_uint32(xattr, "new-commit-hash", &new_hash); - if (ret == 0) { - gf_msg_debug (this->name, 0, - "updating commit hash for %s from %u to %u", - uuid_utoa(loc->gfid), - layout->commit_hash, new_hash); - layout->commit_hash = new_hash; - - ret = dht_update_commit_hash_for_layout (frame); - if (ret) { - op_errno = ENOTCONN; - goto err; - } - return ret; - } +int +dht_checking_pathinfo_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, dict_t *xattr, + dict_t *xdata) +{ + int i = -1; + int ret = -1; + char *value = NULL; + dht_local_t *local = NULL; + dht_conf_t *conf = NULL; + xlator_t *prev = NULL; + int this_call_cnt = 0; - gf_msg (this->name, GF_LOG_INFO, 0, - DHT_MSG_FIX_LAYOUT_INFO, - "fixing the layout of %s", loc->path); + local = frame->local; + prev = cookie; + conf = this->private; - ret = dht_fix_directory_layout (frame, - dht_fix_layout_setxattr_cbk, - layout); - if (ret) { - op_errno = ENOTCONN; - goto err; - } - return ret; - } + if (op_ret == -1) + goto out; - tmp = dict_get (xattr, "distribute.directory-spread-count"); - if (tmp) { - /* Setxattr value is packed as 'binary', not string */ - memcpy (value, tmp->data, min (tmp->len, 4095)); - ret = gf_string2uint32 (value, &dir_spread); - if (!ret && ((dir_spread <= conf->subvolume_cnt) && - (dir_spread > 0))) { - layout->spread_cnt = dir_spread; - - ret = dht_fix_directory_layout (frame, - dht_common_setxattr_cbk, - layout); - if (ret) { - op_errno = ENOTCONN; - goto err; - } - return ret; - } - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_OPERATION_NOT_SUP, - "wrong 'directory-spread-count' value (%s)", value); - op_errno = ENOTSUP; - goto err; - } + ret = dict_get_str(xattr, GF_XATTR_PATHINFO_KEY, &value); + if (ret) + goto out; - tmp = dict_get (xattr, "glusterfs.dht.nuke"); - if (tmp) { - return dht_nuke_dir (frame, this, loc, tmp); + if (!strcmp(value, local->key)) { + for (i = 0; i < conf->subvolume_cnt; i++) { + if (conf->subvolumes[i] == prev) + conf->decommissioned_bricks[i] = prev; } - local->xattr_req = xdata ? dict_ref (xdata) : dict_new (); + } - if (IA_ISDIR (loc->inode->ia_type)) { - local->hashed_subvol = NULL; - ret = dht_dir_common_set_remove_xattr (frame, this, loc, NULL, - xattr, flags, xdata, &op_errno); - if (ret) - goto err; - } else { +out: + this_call_cnt = dht_frame_return(frame); + if (is_last_call(this_call_cnt)) { + DHT_STACK_UNWIND(setxattr, frame, local->op_ret, ENOTSUP, NULL); + } + return 0; +} - local->rebalance.xattr = dict_ref (xattr); - local->rebalance.flags = flags; - local->call_cnt = 1; +int +dht_setxattr2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret) +{ + dht_local_t *local = NULL; + int op_errno = EINVAL; - ret = dict_set_int8 (local->xattr_req, DHT_IATT_IN_XDATA_KEY, 1); + if (!frame || !frame->local) + goto err; - STACK_WIND_COOKIE (frame, dht_file_setxattr_cbk, subvol, - subvol, subvol->fops->setxattr, loc, xattr, - flags, local->xattr_req); - } + local = frame->local; + op_errno = local->op_errno; + if (we_are_not_migrating(ret)) { + /* This dht xlator is not migrating the file. Unwind and + * pass on the original mode bits so the higher DHT layer + * can handle this. + */ + DHT_STACK_UNWIND(setxattr, frame, local->op_ret, local->op_errno, + local->rebalance.xdata); return 0; + } -err: - op_errno = (op_errno == -1) ? errno : op_errno; - DHT_STACK_UNWIND (setxattr, frame, -1, op_errno, NULL); + if (subvol == NULL) + goto err; - return 0; -} + local->call_cnt = 2; /* This is the second attempt */ + if (local->fop == GF_FOP_SETXATTR) { + STACK_WIND_COOKIE(frame, dht_file_setxattr_cbk, subvol, subvol, + subvol->fops->setxattr, &local->loc, + local->rebalance.xattr, local->rebalance.flags, + local->xattr_req); + } else { + STACK_WIND_COOKIE(frame, dht_file_setxattr_cbk, subvol, subvol, + subvol->fops->fsetxattr, local->fd, + local->rebalance.xattr, local->rebalance.flags, + local->xattr_req); + } + return 0; +err: + DHT_STACK_UNWIND(setxattr, frame, (local ? local->op_ret : -1), op_errno, + NULL); + return 0; +} int -dht_file_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, dict_t *xdata) +dht_nuke_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) { - int ret = -1; - dht_local_t *local = NULL; - xlator_t *prev = NULL; - struct iatt *stbuf = NULL; - inode_t *inode = NULL; - xlator_t *subvol1 = NULL, *subvol2 = NULL; - - local = frame->local; - prev = cookie; - - local->op_errno = op_errno; - - if ((local->fop == GF_FOP_FREMOVEXATTR) && - (op_ret == -1) && (op_errno == EBADF) && !(local->fd_checked)) { - ret = dht_check_and_open_fd_on_subvol (this, frame); - if (ret) - goto out; - return 0; + STACK_UNWIND_STRICT(setxattr, frame, op_ret, op_errno, NULL); + return 0; +} + +int +dht_nuke_dir(call_frame_t *frame, xlator_t *this, loc_t *loc, data_t *tmp) +{ + if (!IA_ISDIR(loc->inode->ia_type)) { + DHT_STACK_UNWIND(setxattr, frame, -1, ENOTSUP, NULL); + return 0; + } + + /* Setxattr didn't need the parent, but rmdir does. */ + loc->parent = inode_parent(loc->inode, NULL, NULL); + if (!loc->parent) { + DHT_STACK_UNWIND(setxattr, frame, -1, ENOENT, NULL); + return 0; + } + gf_uuid_copy(loc->pargfid, loc->parent->gfid); + + if (!loc->name && loc->path) { + loc->name = strrchr(loc->path, '/'); + if (loc->name) { + ++(loc->name); } + } - if ((op_ret == -1) && !dht_inode_missing (op_errno)) { - gf_msg_debug (this->name, op_errno, - "subvolume %s returned -1", - prev->name); - goto out; - } + /* + * We do this instead of calling dht_rmdir_do directly for two reasons. + * The first is that we want to reuse all of the initialization that + * dht_rmdir does, so if it ever changes we'll just follow along. The + * second (i.e. why we don't use STACK_WIND_TAIL) is so that we don't + * obscure the fact that we came in via this path instead of a genuine + * rmdir. That makes debugging just a tiny bit easier. + */ + STACK_WIND(frame, dht_nuke_dir_cbk, this, this->fops->rmdir, loc, 1, NULL); - if (local->call_cnt != 1) - goto out; + return 0; +} - ret = dict_get_bin (xdata, DHT_IATT_IN_XDATA_KEY, (void **) &stbuf); +int +dht_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr, + int flags, dict_t *xdata) +{ + xlator_t *subvol = NULL; + dht_local_t *local = NULL; + dht_conf_t *conf = NULL; + dht_methods_t *methods = NULL; + dht_layout_t *layout = NULL; + int i = 0; + int op_errno = EINVAL; + int ret = -1; + data_t *tmp = NULL; + uint32_t dir_spread = 0; + char value[4096] = { + 0, + }; + gf_dht_migrate_data_type_t forced_rebalance = GF_DHT_MIGRATE_DATA; + int call_cnt = 0; + uint32_t new_hash = 0; - if ((!op_ret) && !stbuf) { - goto out; - } + VALIDATE_OR_GOTO(frame, err); + VALIDATE_OR_GOTO(this, err); + VALIDATE_OR_GOTO(loc, err); + VALIDATE_OR_GOTO(loc->inode, err); - local->op_ret = 0; + conf = this->private; + GF_VALIDATE_OR_GOTO(this->name, conf, err); - local->rebalance.target_op_fn = dht_removexattr2; - if (xdata) - local->rebalance.xdata = dict_ref (xdata); + methods = &(conf->methods); + + /* Rebalance daemon is allowed to set internal keys */ + if (!conf->defrag) + GF_IF_INTERNAL_XATTR_GOTO(conf->wild_xattr_name, xattr, op_errno, err); + + local = dht_local_init(frame, loc, NULL, GF_FOP_SETXATTR); + if (!local) { + op_errno = ENOMEM; + goto err; + } + + subvol = local->cached_subvol; + if (!subvol) { + gf_msg_debug(this->name, 0, "no cached subvolume for path=%s", + loc->path); + op_errno = EINVAL; + goto err; + } + + layout = local->layout; + if (!layout) { + gf_msg_debug(this->name, 0, "no layout for path=%s", loc->path); + op_errno = EINVAL; + goto err; + } + + local->call_cnt = call_cnt = layout->cnt; + tmp = dict_get(xattr, conf->mds_xattr_key); + if (tmp) { + op_errno = ENOTSUP; + goto err; + } + + tmp = dict_get(xattr, GF_XATTR_FILE_MIGRATE_KEY); + if (tmp) { + if (IA_ISDIR(loc->inode->ia_type)) { + op_errno = ENOTSUP; + goto err; + } + + /* TODO: need to interpret the 'value' for more meaning + (ie, 'target' subvolume given there, etc) */ + memcpy(value, tmp->data, tmp->len); + if (strcmp(value, "force") == 0) + forced_rebalance = GF_DHT_MIGRATE_DATA_EVEN_IF_LINK_EXISTS; + + if (conf->decommission_in_progress) + forced_rebalance = GF_DHT_MIGRATE_HARDLINK; + + if (!loc->path) { + op_errno = EINVAL; + goto err; + } + + if (!local->loc.name) + local->loc.name = strrchr(local->loc.path, '/') + 1; + + if (!local->loc.parent) + local->loc.parent = inode_parent(local->loc.inode, NULL, NULL); + + if ((!local->loc.name) || (!local->loc.parent)) { + op_errno = EINVAL; + goto err; + } + + if (gf_uuid_is_null(local->loc.pargfid)) + gf_uuid_copy(local->loc.pargfid, local->loc.parent->gfid); + + methods->migration_get_dst_subvol(this, local); + + if (!local->rebalance.target_node) { + gf_msg(this->name, GF_LOG_ERROR, 0, + DHT_MSG_HASHED_SUBVOL_GET_FAILED, + "Failed to get hashed subvol for %s", loc->path); + op_errno = EINVAL; + goto err; + } + + local->rebalance.from_subvol = local->cached_subvol; + + if (local->rebalance.target_node == local->rebalance.from_subvol) { + op_errno = EEXIST; + goto err; + } + if (local->rebalance.target_node) { + local->flags = forced_rebalance; + + /* Flag to suggest its a tiering migration + * The reason for this dic key-value is that + * promotions and demotions are multithreaded + * so the original frame from gf_defrag_start() + * is not carried. A new frame will be created when + * we do syncop_setxattr(). This does not have the + * frame->root->pid of the original frame. So we pass + * this dic key-value when we do syncop_setxattr() to do + * data migration and set the frame->root->pid to + * GF_CLIENT_PID_TIER_DEFRAG in dht_setxattr() just before + * calling dht_start_rebalance_task() */ + tmp = dict_get(xattr, TIERING_MIGRATION_KEY); + if (tmp) + frame->root->pid = GF_CLIENT_PID_TIER_DEFRAG; + else + frame->root->pid = GF_CLIENT_PID_DEFRAG; + + ret = dht_start_rebalance_task(this, frame); + if (!ret) + return 0; - /* Phase 2 of migration */ - if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2 (stbuf)) { - ret = dht_rebalance_complete_check (this, frame); - if (!ret) - return 0; + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_REBALANCE_START_FAILED, + "%s: failed to create a new rebalance synctask", loc->path); } + op_errno = EINVAL; + goto err; + } - /* Phase 1 of migration */ - if (IS_DHT_MIGRATION_PHASE1 (stbuf)) { - inode = (local->fd) ? local->fd->inode : local->loc.inode; - - ret = dht_inode_ctx_get_mig_info (this, inode, - &subvol1, &subvol2); - if (!dht_mig_info_is_invalid (local->cached_subvol, - subvol1, subvol2)) { - dht_removexattr2 (this, subvol2, frame, 0); - return 0; - } - - ret = dht_rebalance_in_progress_check (this, frame); - if (!ret) - return 0; + tmp = dict_get(xattr, "decommission-brick"); + if (tmp) { + /* This operation should happen only on '/' */ + if (!__is_root_gfid(loc->inode->gfid)) { + op_errno = ENOTSUP; + goto err; } -out: - if (local->fop == GF_FOP_REMOVEXATTR) { - DHT_STACK_UNWIND (removexattr, frame, op_ret, op_errno, xdata); - } else { - DHT_STACK_UNWIND (fremovexattr, frame, op_ret, op_errno, xdata); + memcpy(value, tmp->data, min(tmp->len, 4095)); + local->key = gf_strdup(value); + local->call_cnt = conf->subvolume_cnt; + + for (i = 0; i < conf->subvolume_cnt; i++) { + /* Get the pathinfo, and then compare */ + STACK_WIND_COOKIE(frame, dht_checking_pathinfo_cbk, + conf->subvolumes[i], conf->subvolumes[i], + conf->subvolumes[i]->fops->getxattr, loc, + GF_XATTR_PATHINFO_KEY, NULL); } return 0; + } -} - -int -dht_removexattr2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame, - int ret) -{ - dht_local_t *local = NULL; - int op_errno = EINVAL; + tmp = dict_get(xattr, GF_XATTR_FIX_LAYOUT_KEY); + if (tmp) { + ret = dict_get_uint32(xattr, "new-commit-hash", &new_hash); + if (ret == 0) { + gf_msg_debug(this->name, 0, + "updating commit hash for %s from %u to %u", + uuid_utoa(loc->gfid), layout->commit_hash, new_hash); + layout->commit_hash = new_hash; - if (!frame || !frame->local) + ret = dht_update_commit_hash_for_layout(frame); + if (ret) { + op_errno = ENOTCONN; goto err; + } + return ret; + } - local = frame->local; - op_errno = local->op_errno; - - local->call_cnt = 2; /* This is the second attempt */ - - if (we_are_not_migrating (ret)) { + gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_FIX_LAYOUT_INFO, + "fixing the layout of %s", loc->path); - /* This dht xlator is not migrating the file. Unwind and - * pass on the original mode bits so the higher DHT layer - * can handle this. - */ - DHT_STACK_UNWIND (removexattr, frame, local->op_ret, - local->op_errno, local->rebalance.xdata); - return 0; + ret = dht_fix_directory_layout(frame, dht_fix_layout_setxattr_cbk, + layout); + if (ret) { + op_errno = ENOTCONN; + goto err; } + return ret; + } - if (subvol == NULL) + tmp = dict_get(xattr, "distribute.directory-spread-count"); + if (tmp) { + /* Setxattr value is packed as 'binary', not string */ + memcpy(value, tmp->data, min(tmp->len, 4095)); + ret = gf_string2uint32(value, &dir_spread); + if (!ret && ((dir_spread <= conf->subvolume_cnt) && (dir_spread > 0))) { + layout->spread_cnt = dir_spread; + + ret = dht_fix_directory_layout(frame, dht_common_setxattr_cbk, + layout); + if (ret) { + op_errno = ENOTCONN; goto err; + } + return ret; + } + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_OPERATION_NOT_SUP, + "wrong 'directory-spread-count' value (%s)", value); + op_errno = ENOTSUP; + goto err; + } + + tmp = dict_get(xattr, "glusterfs.dht.nuke"); + if (tmp) { + return dht_nuke_dir(frame, this, loc, tmp); + } + local->xattr_req = xdata ? dict_ref(xdata) : dict_new(); + + if (IA_ISDIR(loc->inode->ia_type)) { + local->hashed_subvol = NULL; + ret = dht_dir_common_set_remove_xattr(frame, this, loc, NULL, xattr, + flags, xdata, &op_errno); + if (ret) + goto err; + } else { + local->rebalance.xattr = dict_ref(xattr); + local->rebalance.flags = flags; + local->call_cnt = 1; - if (local->fop == GF_FOP_REMOVEXATTR) { - STACK_WIND_COOKIE (frame, dht_file_removexattr_cbk, subvol, - subvol, subvol->fops->removexattr, - &local->loc, local->key, local->xattr_req); - } else { - STACK_WIND_COOKIE (frame, dht_file_removexattr_cbk, subvol, - subvol, subvol->fops->fremovexattr, - local->fd, local->key, local->xattr_req); - } + ret = dict_set_int8(local->xattr_req, DHT_IATT_IN_XDATA_KEY, 1); - return 0; + STACK_WIND_COOKIE(frame, dht_file_setxattr_cbk, subvol, subvol, + subvol->fops->setxattr, loc, xattr, flags, + local->xattr_req); + } + + return 0; err: - DHT_STACK_UNWIND (removexattr, frame, -1, op_errno, NULL); - return 0; -} + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND(setxattr, frame, -1, op_errno, NULL); + return 0; +} int -dht_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, dict_t *xdata) +dht_file_removexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, dict_t *xdata) { - dht_local_t *local = NULL; - int this_call_cnt = 0; - xlator_t *prev = NULL; - - local = frame->local; - prev = cookie; - - LOCK (&frame->lock); - { - if (op_ret == -1) { - local->op_errno = op_errno; - gf_msg_debug (this->name, op_errno, - "subvolume %s returned -1", - prev->name); - goto unlock; - } + int ret = -1; + dht_local_t *local = NULL; + xlator_t *prev = NULL; + struct iatt *stbuf = NULL; + inode_t *inode = NULL; + xlator_t *subvol1 = NULL, *subvol2 = NULL; - local->op_ret = 0; - } -unlock: - UNLOCK (&frame->lock); + local = frame->local; + prev = cookie; - this_call_cnt = dht_frame_return (frame); - if (is_last_call (this_call_cnt)) { - DHT_STACK_UNWIND (removexattr, frame, local->op_ret, - local->op_errno, NULL); - } + local->op_errno = op_errno; + if ((local->fop == GF_FOP_FREMOVEXATTR) && (op_ret == -1) && + (op_errno == EBADF) && !(local->fd_checked)) { + ret = dht_check_and_open_fd_on_subvol(this, frame); + if (ret) + goto out; return 0; -} + } + if ((op_ret == -1) && !dht_inode_missing(op_errno)) { + gf_msg_debug(this->name, op_errno, "subvolume %s returned -1", + prev->name); + goto out; + } -int -dht_removexattr (call_frame_t *frame, xlator_t *this, - loc_t *loc, const char *key, dict_t *xdata) -{ - xlator_t *subvol = NULL; - int op_errno = -1; - dht_local_t *local = NULL; - dht_layout_t *layout = NULL; - int call_cnt = 0; - dht_conf_t *conf = NULL; - int ret = 0; + if (local->call_cnt != 1) + goto out; - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (this->private, err); + ret = dict_get_bin(xdata, DHT_IATT_IN_XDATA_KEY, (void **)&stbuf); - conf = this->private; + if ((!op_ret) && !stbuf) { + goto out; + } - GF_IF_NATIVE_XATTR_GOTO (conf->wild_xattr_name, key, op_errno, err); + local->op_ret = 0; - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (loc, err); - VALIDATE_OR_GOTO (loc->inode, err); + local->rebalance.target_op_fn = dht_removexattr2; + if (xdata) + local->rebalance.xdata = dict_ref(xdata); - local = dht_local_init (frame, loc, NULL, GF_FOP_REMOVEXATTR); - if (!local) { - op_errno = ENOMEM; - goto err; - } + /* Phase 2 of migration */ + if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2(stbuf)) { + ret = dht_rebalance_complete_check(this, frame); + if (!ret) + return 0; + } - subvol = local->cached_subvol; - if (!subvol) { - gf_msg_debug (this->name, 0, - "no cached subvolume for path=%s", loc->path); - op_errno = EINVAL; - goto err; - } + /* Phase 1 of migration */ + if (IS_DHT_MIGRATION_PHASE1(stbuf)) { + inode = (local->fd) ? local->fd->inode : local->loc.inode; - layout = local->layout; - if (!local->layout) { - gf_msg_debug (this->name, 0, - "no layout for path=%s", loc->path); - op_errno = EINVAL; - goto err; + ret = dht_inode_ctx_get_mig_info(this, inode, &subvol1, &subvol2); + if (!dht_mig_info_is_invalid(local->cached_subvol, subvol1, subvol2)) { + dht_removexattr2(this, subvol2, frame, 0); + return 0; } - local->xattr_req = (xdata) ? dict_ref (xdata) : dict_new (); - local->call_cnt = call_cnt = layout->cnt; - local->key = gf_strdup (key); + ret = dht_rebalance_in_progress_check(this, frame); + if (!ret) + return 0; + } - if (key && - (strncmp (key, conf->mds_xattr_key, strlen(key)) == 0)) { - op_errno = ENOTSUP; - goto err; - } +out: + if (local->fop == GF_FOP_REMOVEXATTR) { + DHT_STACK_UNWIND(removexattr, frame, op_ret, op_errno, xdata); + } else { + DHT_STACK_UNWIND(fremovexattr, frame, op_ret, op_errno, xdata); + } + return 0; +} - if (IA_ISDIR (loc->inode->ia_type)) { - local->hashed_subvol = NULL; - ret = dht_dir_common_set_remove_xattr (frame, this, loc, NULL, - NULL, 0, local->xattr_req, &op_errno); - if (ret) - goto err; +int +dht_removexattr2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret) +{ + dht_local_t *local = NULL; + int op_errno = EINVAL; - } else { + if (!frame || !frame->local) + goto err; - local->call_cnt = 1; - ret = dict_set_int8 (local->xattr_req, DHT_IATT_IN_XDATA_KEY, 1); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, ENOMEM, - DHT_MSG_DICT_SET_FAILED, "Failed to " - "set dictionary key %s for %s", - DHT_IATT_IN_XDATA_KEY, loc->path); - } + local = frame->local; + op_errno = local->op_errno; - STACK_WIND_COOKIE (frame, dht_file_removexattr_cbk, subvol, - subvol, subvol->fops->removexattr, loc, key, - local->xattr_req); - } + local->call_cnt = 2; /* This is the second attempt */ + + if (we_are_not_migrating(ret)) { + /* This dht xlator is not migrating the file. Unwind and + * pass on the original mode bits so the higher DHT layer + * can handle this. + */ + DHT_STACK_UNWIND(removexattr, frame, local->op_ret, local->op_errno, + local->rebalance.xdata); + return 0; + } + + if (subvol == NULL) + goto err; + + if (local->fop == GF_FOP_REMOVEXATTR) { + STACK_WIND_COOKIE(frame, dht_file_removexattr_cbk, subvol, subvol, + subvol->fops->removexattr, &local->loc, local->key, + local->xattr_req); + } else { + STACK_WIND_COOKIE(frame, dht_file_removexattr_cbk, subvol, subvol, + subvol->fops->fremovexattr, local->fd, local->key, + local->xattr_req); + } - return 0; + return 0; err: - op_errno = (op_errno == -1) ? errno : op_errno; - DHT_STACK_UNWIND (removexattr, frame, -1, op_errno, NULL); - - return 0; + DHT_STACK_UNWIND(removexattr, frame, -1, op_errno, NULL); + return 0; } int -dht_fremovexattr (call_frame_t *frame, xlator_t *this, - fd_t *fd, const char *key, dict_t *xdata) +dht_removexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, dict_t *xdata) { - xlator_t *subvol = NULL; - int op_errno = -1; - dht_local_t *local = NULL; - dht_layout_t *layout = NULL; - int call_cnt = 0; - dht_conf_t *conf = 0; - int ret = 0; - + dht_local_t *local = NULL; + int this_call_cnt = 0; + xlator_t *prev = NULL; - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (this->private, err); + local = frame->local; + prev = cookie; - conf = this->private; - - GF_IF_NATIVE_XATTR_GOTO (conf->wild_xattr_name, key, op_errno, err); - - VALIDATE_OR_GOTO (frame, err); - - local = dht_local_init (frame, NULL, fd, GF_FOP_FREMOVEXATTR); - if (!local) { - op_errno = ENOMEM; - goto err; + LOCK(&frame->lock); + { + if (op_ret == -1) { + local->op_errno = op_errno; + gf_msg_debug(this->name, op_errno, "subvolume %s returned -1", + prev->name); + goto unlock; } - subvol = local->cached_subvol; - if (!subvol) { - gf_msg_debug (this->name, 0, - "no cached subvolume for inode=%s", - uuid_utoa (fd->inode->gfid)); - op_errno = EINVAL; - goto err; - } + local->op_ret = 0; + } +unlock: + UNLOCK(&frame->lock); + + this_call_cnt = dht_frame_return(frame); + if (is_last_call(this_call_cnt)) { + DHT_STACK_UNWIND(removexattr, frame, local->op_ret, local->op_errno, + NULL); + } + + return 0; +} + +int +dht_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc, + const char *key, dict_t *xdata) +{ + xlator_t *subvol = NULL; + int op_errno = -1; + dht_local_t *local = NULL; + dht_layout_t *layout = NULL; + int call_cnt = 0; + dht_conf_t *conf = NULL; + int ret = 0; + + VALIDATE_OR_GOTO(this, err); + VALIDATE_OR_GOTO(this->private, err); + + conf = this->private; + + GF_IF_NATIVE_XATTR_GOTO(conf->wild_xattr_name, key, op_errno, err); + + VALIDATE_OR_GOTO(frame, err); + VALIDATE_OR_GOTO(loc, err); + VALIDATE_OR_GOTO(loc->inode, err); + + local = dht_local_init(frame, loc, NULL, GF_FOP_REMOVEXATTR); + if (!local) { + op_errno = ENOMEM; + goto err; + } + + subvol = local->cached_subvol; + if (!subvol) { + gf_msg_debug(this->name, 0, "no cached subvolume for path=%s", + loc->path); + op_errno = EINVAL; + goto err; + } + + layout = local->layout; + if (!local->layout) { + gf_msg_debug(this->name, 0, "no layout for path=%s", loc->path); + op_errno = EINVAL; + goto err; + } + local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); + + local->call_cnt = call_cnt = layout->cnt; + local->key = gf_strdup(key); + + if (key && (strncmp(key, conf->mds_xattr_key, strlen(key)) == 0)) { + op_errno = ENOTSUP; + goto err; + } + + if (IA_ISDIR(loc->inode->ia_type)) { + local->hashed_subvol = NULL; + ret = dht_dir_common_set_remove_xattr(frame, this, loc, NULL, NULL, 0, + local->xattr_req, &op_errno); + if (ret) + goto err; - layout = local->layout; - if (!local->layout) { - gf_msg_debug (this->name, 0, - "no layout for inode=%s", - uuid_utoa (fd->inode->gfid)); - op_errno = EINVAL; - goto err; + } else { + local->call_cnt = 1; + ret = dict_set_int8(local->xattr_req, DHT_IATT_IN_XDATA_KEY, 1); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_DICT_SET_FAILED, + "Failed to " + "set dictionary key %s for %s", + DHT_IATT_IN_XDATA_KEY, loc->path); } - local->xattr_req = xdata ? dict_ref (xdata) : dict_new (); - local->call_cnt = call_cnt = layout->cnt; - local->key = gf_strdup (key); + STACK_WIND_COOKIE(frame, dht_file_removexattr_cbk, subvol, subvol, + subvol->fops->removexattr, loc, key, + local->xattr_req); + } - if (IA_ISDIR (fd->inode->ia_type)) { - local->hashed_subvol = NULL; - ret = dht_dir_common_set_remove_xattr (frame, this, NULL, fd, - NULL, 0, local->xattr_req, &op_errno); - if (ret) - goto err; + return 0; - } else { +err: + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND(removexattr, frame, -1, op_errno, NULL); - local->call_cnt = 1; - ret = dict_set_int8 (local->xattr_req, DHT_IATT_IN_XDATA_KEY, 1); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, ENOMEM, - DHT_MSG_DICT_SET_FAILED, "Failed to " - "set dictionary key %s for fd=%p", - DHT_IATT_IN_XDATA_KEY, fd); - } + return 0; +} + +int +dht_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *key, + dict_t *xdata) +{ + xlator_t *subvol = NULL; + int op_errno = -1; + dht_local_t *local = NULL; + dht_layout_t *layout = NULL; + int call_cnt = 0; + dht_conf_t *conf = 0; + int ret = 0; + + VALIDATE_OR_GOTO(this, err); + VALIDATE_OR_GOTO(this->private, err); + + conf = this->private; + + GF_IF_NATIVE_XATTR_GOTO(conf->wild_xattr_name, key, op_errno, err); + + VALIDATE_OR_GOTO(frame, err); + + local = dht_local_init(frame, NULL, fd, GF_FOP_FREMOVEXATTR); + if (!local) { + op_errno = ENOMEM; + goto err; + } + + subvol = local->cached_subvol; + if (!subvol) { + gf_msg_debug(this->name, 0, "no cached subvolume for inode=%s", + uuid_utoa(fd->inode->gfid)); + op_errno = EINVAL; + goto err; + } + + layout = local->layout; + if (!local->layout) { + gf_msg_debug(this->name, 0, "no layout for inode=%s", + uuid_utoa(fd->inode->gfid)); + op_errno = EINVAL; + goto err; + } + local->xattr_req = xdata ? dict_ref(xdata) : dict_new(); + + local->call_cnt = call_cnt = layout->cnt; + local->key = gf_strdup(key); + + if (IA_ISDIR(fd->inode->ia_type)) { + local->hashed_subvol = NULL; + ret = dht_dir_common_set_remove_xattr(frame, this, NULL, fd, NULL, 0, + local->xattr_req, &op_errno); + if (ret) + goto err; - STACK_WIND_COOKIE (frame, dht_file_removexattr_cbk, subvol, - subvol, subvol->fops->fremovexattr, fd, key, - local->xattr_req); + } else { + local->call_cnt = 1; + ret = dict_set_int8(local->xattr_req, DHT_IATT_IN_XDATA_KEY, 1); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_DICT_SET_FAILED, + "Failed to " + "set dictionary key %s for fd=%p", + DHT_IATT_IN_XDATA_KEY, fd); } - return 0; + STACK_WIND_COOKIE(frame, dht_file_removexattr_cbk, subvol, subvol, + subvol->fops->fremovexattr, fd, key, + local->xattr_req); + } + + return 0; err: - op_errno = (op_errno == -1) ? errno : op_errno; - DHT_STACK_UNWIND (fremovexattr, frame, -1, op_errno, NULL); + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND(fremovexattr, frame, -1, op_errno, NULL); - return 0; + return 0; } - int -dht_fd_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, fd_t *fd, dict_t *xdata) +dht_fd_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, + int op_errno, fd_t *fd, dict_t *xdata) { - dht_local_t *local = NULL; - int this_call_cnt = 0; - xlator_t *prev = NULL; + dht_local_t *local = NULL; + int this_call_cnt = 0; + xlator_t *prev = NULL; - local = frame->local; - prev = cookie; - - LOCK (&frame->lock); - { - if (op_ret == -1) { - local->op_errno = op_errno; - gf_msg_debug (this->name, op_errno, - "subvolume %s returned -1", - prev->name); - goto unlock; - } + local = frame->local; + prev = cookie; - local->op_ret = 0; + LOCK(&frame->lock); + { + if (op_ret == -1) { + local->op_errno = op_errno; + gf_msg_debug(this->name, op_errno, "subvolume %s returned -1", + prev->name); + goto unlock; } + + local->op_ret = 0; + } unlock: - UNLOCK (&frame->lock); + UNLOCK(&frame->lock); - this_call_cnt = dht_frame_return (frame); - if (is_last_call (this_call_cnt)) - DHT_STACK_UNWIND (open, frame, local->op_ret, local->op_errno, - local->fd, NULL); + this_call_cnt = dht_frame_return(frame); + if (is_last_call(this_call_cnt)) + DHT_STACK_UNWIND(open, frame, local->op_ret, local->op_errno, local->fd, + NULL); - return 0; + return 0; } /* * dht_normalize_stats - */ void -dht_normalize_stats (struct statvfs *buf, unsigned long bsize, - unsigned long frsize) +dht_normalize_stats(struct statvfs *buf, unsigned long bsize, + unsigned long frsize) { - double factor = 0; - - if (buf->f_bsize != bsize) { - buf->f_bsize = bsize; - } + double factor = 0; - if (buf->f_frsize != frsize) { - factor = ((double) buf->f_frsize) / frsize; - buf->f_frsize = frsize; - buf->f_blocks = (fsblkcnt_t) (factor * buf->f_blocks); - buf->f_bfree = (fsblkcnt_t) (factor * buf->f_bfree); - buf->f_bavail = (fsblkcnt_t) (factor * buf->f_bavail); + if (buf->f_bsize != bsize) { + buf->f_bsize = bsize; + } - } + if (buf->f_frsize != frsize) { + factor = ((double)buf->f_frsize) / frsize; + buf->f_frsize = frsize; + buf->f_blocks = (fsblkcnt_t)(factor * buf->f_blocks); + buf->f_bfree = (fsblkcnt_t)(factor * buf->f_bfree); + buf->f_bavail = (fsblkcnt_t)(factor * buf->f_bavail); + } } int -dht_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, struct statvfs *statvfs, - dict_t *xdata) +dht_statfs_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, + int op_errno, struct statvfs *statvfs, dict_t *xdata) { + gf_boolean_t event = _gf_false; + qdstatfs_action_t action = qdstatfs_action_OFF; + dht_local_t *local = NULL; + int this_call_cnt = 0; + int bsize = 0; + int frsize = 0; + GF_UNUSED int ret = 0; + unsigned long new_usage = 0; + unsigned long cur_usage = 0; - gf_boolean_t event = _gf_false; - qdstatfs_action_t action = qdstatfs_action_OFF; - dht_local_t * local = NULL; - int this_call_cnt = 0; - int bsize = 0; - int frsize = 0; - GF_UNUSED int ret = 0; - unsigned long new_usage = 0; - unsigned long cur_usage = 0; + local = frame->local; + GF_ASSERT(local); - local = frame->local; - GF_ASSERT (local); + if (xdata) + ret = dict_get_int8(xdata, "quota-deem-statfs", (int8_t *)&event); - if (xdata) - ret = dict_get_int8 (xdata, "quota-deem-statfs", - (int8_t *)&event); - - LOCK (&frame->lock); - { - if (op_ret == -1) { - local->op_errno = op_errno; - goto unlock; - } - if (!statvfs) { - op_errno = EINVAL; - local->op_ret = -1; - goto unlock; - } - local->op_ret = 0; + LOCK(&frame->lock); + { + if (op_ret == -1) { + local->op_errno = op_errno; + goto unlock; + } + if (!statvfs) { + op_errno = EINVAL; + local->op_ret = -1; + goto unlock; + } + local->op_ret = 0; - if (local->quota_deem_statfs) { - if (event == _gf_true) { - action = qdstatfs_action_COMPARE; - } else { - action = qdstatfs_action_NEGLECT; - } - } else { - if (event == _gf_true) { - action = qdstatfs_action_REPLACE; - local->quota_deem_statfs = _gf_true; - } - } + if (local->quota_deem_statfs) { + if (event == _gf_true) { + action = qdstatfs_action_COMPARE; + } else { + action = qdstatfs_action_NEGLECT; + } + } else { + if (event == _gf_true) { + action = qdstatfs_action_REPLACE; + local->quota_deem_statfs = _gf_true; + } + } - if (local->quota_deem_statfs) { - switch (action) { - case qdstatfs_action_NEGLECT: - goto unlock; - - case qdstatfs_action_REPLACE: - local->statvfs = *statvfs; - goto unlock; - - case qdstatfs_action_COMPARE: - new_usage = statvfs->f_blocks - - statvfs->f_bfree; - cur_usage = local->statvfs.f_blocks - - local->statvfs.f_bfree; - - /* Take the max of the usage from subvols */ - if (new_usage >= cur_usage) - local->statvfs = *statvfs; - goto unlock; - - default: - break; - } - } + if (local->quota_deem_statfs) { + switch (action) { + case qdstatfs_action_NEGLECT: + goto unlock; - if (local->statvfs.f_bsize != 0) { - bsize = max(local->statvfs.f_bsize, statvfs->f_bsize); - frsize = max(local->statvfs.f_frsize, statvfs->f_frsize); - dht_normalize_stats(&local->statvfs, bsize, frsize); - dht_normalize_stats(statvfs, bsize, frsize); - } else { - local->statvfs.f_bsize = statvfs->f_bsize; - local->statvfs.f_frsize = statvfs->f_frsize; - } + case qdstatfs_action_REPLACE: + local->statvfs = *statvfs; + goto unlock; - local->statvfs.f_blocks += statvfs->f_blocks; - local->statvfs.f_bfree += statvfs->f_bfree; - local->statvfs.f_bavail += statvfs->f_bavail; - local->statvfs.f_files += statvfs->f_files; - local->statvfs.f_ffree += statvfs->f_ffree; - local->statvfs.f_favail += statvfs->f_favail; - local->statvfs.f_fsid = statvfs->f_fsid; - local->statvfs.f_flag = statvfs->f_flag; - local->statvfs.f_namemax = statvfs->f_namemax; + case qdstatfs_action_COMPARE: + new_usage = statvfs->f_blocks - statvfs->f_bfree; + cur_usage = local->statvfs.f_blocks - + local->statvfs.f_bfree; + /* Take the max of the usage from subvols */ + if (new_usage >= cur_usage) + local->statvfs = *statvfs; + goto unlock; + default: + break; + } } -unlock: - UNLOCK (&frame->lock); + if (local->statvfs.f_bsize != 0) { + bsize = max(local->statvfs.f_bsize, statvfs->f_bsize); + frsize = max(local->statvfs.f_frsize, statvfs->f_frsize); + dht_normalize_stats(&local->statvfs, bsize, frsize); + dht_normalize_stats(statvfs, bsize, frsize); + } else { + local->statvfs.f_bsize = statvfs->f_bsize; + local->statvfs.f_frsize = statvfs->f_frsize; + } + + local->statvfs.f_blocks += statvfs->f_blocks; + local->statvfs.f_bfree += statvfs->f_bfree; + local->statvfs.f_bavail += statvfs->f_bavail; + local->statvfs.f_files += statvfs->f_files; + local->statvfs.f_ffree += statvfs->f_ffree; + local->statvfs.f_favail += statvfs->f_favail; + local->statvfs.f_fsid = statvfs->f_fsid; + local->statvfs.f_flag = statvfs->f_flag; + local->statvfs.f_namemax = statvfs->f_namemax; + } +unlock: + UNLOCK(&frame->lock); - this_call_cnt = dht_frame_return (frame); - if (is_last_call (this_call_cnt)) - DHT_STACK_UNWIND (statfs, frame, local->op_ret, local->op_errno, - &local->statvfs, xdata); + this_call_cnt = dht_frame_return(frame); + if (is_last_call(this_call_cnt)) + DHT_STACK_UNWIND(statfs, frame, local->op_ret, local->op_errno, + &local->statvfs, xdata); - return 0; + return 0; } - int -dht_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) +dht_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) { - dht_local_t *local = NULL; - dht_conf_t *conf = NULL; - int op_errno = -1; - int i = -1; - inode_t *inode = NULL; - inode_table_t *itable = NULL; - uuid_t root_gfid = {0, }; - loc_t newloc = {0, }; + dht_local_t *local = NULL; + dht_conf_t *conf = NULL; + int op_errno = -1; + int i = -1; + inode_t *inode = NULL; + inode_table_t *itable = NULL; + uuid_t root_gfid = { + 0, + }; + loc_t newloc = { + 0, + }; - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (loc, err); - VALIDATE_OR_GOTO (this->private, err); + VALIDATE_OR_GOTO(frame, err); + VALIDATE_OR_GOTO(this, err); + VALIDATE_OR_GOTO(loc, err); + VALIDATE_OR_GOTO(this->private, err); - conf = this->private; - - local = dht_local_init (frame, NULL, NULL, GF_FOP_STATFS); - if (!local) { - op_errno = ENOMEM; - goto err; - } + conf = this->private; - if (loc->inode && !IA_ISDIR (loc->inode->ia_type)) { - itable = loc->inode->table; - if (!itable) { - op_errno = EINVAL; - goto err; - } + local = dht_local_init(frame, NULL, NULL, GF_FOP_STATFS); + if (!local) { + op_errno = ENOMEM; + goto err; + } - loc = &local->loc2; - root_gfid[15] = 1; + if (loc->inode && !IA_ISDIR(loc->inode->ia_type)) { + itable = loc->inode->table; + if (!itable) { + op_errno = EINVAL; + goto err; + } - inode = inode_find (itable, root_gfid); - if (!inode) { - op_errno = EINVAL; - goto err; - } + loc = &local->loc2; + root_gfid[15] = 1; - dht_build_root_loc (inode, &newloc); - loc = &newloc; + inode = inode_find(itable, root_gfid); + if (!inode) { + op_errno = EINVAL; + goto err; } - local->call_cnt = conf->subvolume_cnt; + dht_build_root_loc(inode, &newloc); + loc = &newloc; + } - for (i = 0; i < conf->subvolume_cnt; i++) { - STACK_WIND (frame, dht_statfs_cbk, - conf->subvolumes[i], - conf->subvolumes[i]->fops->statfs, loc, - xdata); - } - return 0; + local->call_cnt = conf->subvolume_cnt; + + for (i = 0; i < conf->subvolume_cnt; i++) { + STACK_WIND(frame, dht_statfs_cbk, conf->subvolumes[i], + conf->subvolumes[i]->fops->statfs, loc, xdata); + } + return 0; err: - op_errno = (op_errno == -1) ? errno : op_errno; - DHT_STACK_UNWIND (statfs, frame, -1, op_errno, NULL, NULL); + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND(statfs, frame, -1, op_errno, NULL, NULL); - return 0; + return 0; } - int -dht_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd, - dict_t *xdata) +dht_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd, + dict_t *xdata) { - dht_local_t *local = NULL; - dht_conf_t *conf = NULL; - int op_errno = -1; - int i = -1; - int ret = 0; - gf_boolean_t new_xdata = _gf_false; - xlator_t **subvolumes = NULL; - int call_count = 0; - - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (fd, err); - VALIDATE_OR_GOTO (this->private, err); - - conf = this->private; - - local = dht_local_init (frame, loc, fd, GF_FOP_OPENDIR); - if (!local) { - op_errno = ENOMEM; - goto err; - } - local->first_up_subvol = dht_first_up_subvol (this); - + dht_local_t *local = NULL; + dht_conf_t *conf = NULL; + int op_errno = -1; + int i = -1; + int ret = 0; + gf_boolean_t new_xdata = _gf_false; + xlator_t **subvolumes = NULL; + int call_count = 0; + + VALIDATE_OR_GOTO(frame, err); + VALIDATE_OR_GOTO(this, err); + VALIDATE_OR_GOTO(fd, err); + VALIDATE_OR_GOTO(this->private, err); + + conf = this->private; + + local = dht_local_init(frame, loc, fd, GF_FOP_OPENDIR); + if (!local) { + op_errno = ENOMEM; + goto err; + } + local->first_up_subvol = dht_first_up_subvol(this); + + if (!xdata) { + xdata = dict_new(); if (!xdata) { - xdata = dict_new (); - if (!xdata) { - op_errno = ENOMEM; - goto err; - } - new_xdata = _gf_true; + op_errno = ENOMEM; + goto err; + } + new_xdata = _gf_true; + } + + ret = dict_set_uint32(xdata, conf->link_xattr_name, 256); + if (ret) + gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED, + "Failed to set dictionary value : key = %s", + conf->link_xattr_name); + + /* dht_readdirp will wind to all subvols so open has to be sent to + * all subvols whether or not conf->local_subvols is set */ + + call_count = local->call_cnt = conf->subvolume_cnt; + subvolumes = conf->subvolumes; + + /* In case of parallel-readdir, the readdir-ahead will be loaded + * below dht, in this case, if we want to enable or disable SKIP_DIRs + * it has to be done in opendir, so that prefetching logic in + * readdir-ahead, honors it */ + for (i = 0; i < call_count; i++) { + if (conf->readdir_optimize == _gf_true) { + if (subvolumes[i] != local->first_up_subvol) { + ret = dict_set_int32(xdata, GF_READDIR_SKIP_DIRS, 1); + if (ret) + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED, + "Failed to set dictionary" + " value :key = %s, ret:%d", + GF_READDIR_SKIP_DIRS, ret); + } } - ret = dict_set_uint32 (xdata, conf->link_xattr_name, 256); - if (ret) - gf_msg (this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED, - "Failed to set dictionary value : key = %s", - conf->link_xattr_name); - - /* dht_readdirp will wind to all subvols so open has to be sent to - * all subvols whether or not conf->local_subvols is set */ - - call_count = local->call_cnt = conf->subvolume_cnt; - subvolumes = conf->subvolumes; - - /* In case of parallel-readdir, the readdir-ahead will be loaded - * below dht, in this case, if we want to enable or disable SKIP_DIRs - * it has to be done in opendir, so that prefetching logic in - * readdir-ahead, honors it */ - for (i = 0; i < call_count; i++) { - if (conf->readdir_optimize == _gf_true) { - if (subvolumes[i] != local->first_up_subvol) { - ret = dict_set_int32 (xdata, - GF_READDIR_SKIP_DIRS, 1); - if (ret) - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_DICT_SET_FAILED, - "Failed to set dictionary" - " value :key = %s, ret:%d", - GF_READDIR_SKIP_DIRS, ret); - } - } - - STACK_WIND_COOKIE (frame, dht_fd_cbk, - subvolumes[i], - subvolumes[i], - subvolumes[i]->fops->opendir, - loc, fd, xdata); - dict_del (xdata, GF_READDIR_SKIP_DIRS); - } + STACK_WIND_COOKIE(frame, dht_fd_cbk, subvolumes[i], subvolumes[i], + subvolumes[i]->fops->opendir, loc, fd, xdata); + dict_del(xdata, GF_READDIR_SKIP_DIRS); + } - if (new_xdata) - dict_unref (xdata); + if (new_xdata) + dict_unref(xdata); - return 0; + return 0; err: - op_errno = (op_errno == -1) ? errno : op_errno; - DHT_STACK_UNWIND (opendir, frame, -1, op_errno, NULL, NULL); + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND(opendir, frame, -1, op_errno, NULL, NULL); - return 0; + return 0; } - /* dht_readdirp_cbk creates a new dentry and dentry->inode is not assigned. This functions assigns an inode if all of the following conditions are true: @@ -6937,3807 +6504,3589 @@ err: */ void -dht_populate_inode_for_dentry (xlator_t *this, xlator_t *subvol, - gf_dirent_t *entry, gf_dirent_t *orig_entry) +dht_populate_inode_for_dentry(xlator_t *this, xlator_t *subvol, + gf_dirent_t *entry, gf_dirent_t *orig_entry) { - dht_layout_t *layout = NULL; - int ret = 0; - loc_t loc = {0, }; - - if (gf_uuid_is_null (orig_entry->d_stat.ia_gfid)) { - /* this skips the '..' entry for the root of the volume */ - return; - } + dht_layout_t *layout = NULL; + int ret = 0; + loc_t loc = { + 0, + }; - gf_uuid_copy (loc.gfid, orig_entry->d_stat.ia_gfid); - loc.inode = inode_ref (orig_entry->inode); + if (gf_uuid_is_null(orig_entry->d_stat.ia_gfid)) { + /* this skips the '..' entry for the root of the volume */ + return; + } - if (is_revalidate (&loc)) { - goto out; - } + gf_uuid_copy(loc.gfid, orig_entry->d_stat.ia_gfid); + loc.inode = inode_ref(orig_entry->inode); - layout = dht_layout_new (this, 1); - if (!layout) - goto out; + if (is_revalidate(&loc)) { + goto out; + } - ret = dht_layout_merge (this, layout, subvol, 0, 0, orig_entry->dict); - if (!ret) { - ret = dht_layout_normalize (this, &loc, layout); - if (ret == 0) { - dht_layout_set (this, orig_entry->inode, layout); - entry->inode = inode_ref (orig_entry->inode); - layout = NULL; - } + layout = dht_layout_new(this, 1); + if (!layout) + goto out; + ret = dht_layout_merge(this, layout, subvol, 0, 0, orig_entry->dict); + if (!ret) { + ret = dht_layout_normalize(this, &loc, layout); + if (ret == 0) { + dht_layout_set(this, orig_entry->inode, layout); + entry->inode = inode_ref(orig_entry->inode); + layout = NULL; } + } - if (layout) - dht_layout_unref (this, layout); + if (layout) + dht_layout_unref(this, layout); out: - loc_wipe (&loc); - return; + loc_wipe(&loc); + return; } - /* Posix returns op_errno = ENOENT to indicate that there are no more entries */ int -dht_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, - int op_errno, gf_dirent_t *orig_entries, dict_t *xdata) -{ - dht_local_t *local = NULL; - gf_dirent_t entries; - gf_dirent_t *orig_entry = NULL; - gf_dirent_t *entry = NULL; - xlator_t *prev = NULL; - xlator_t *next_subvol = NULL; - off_t next_offset = 0; - int count = 0; - dht_layout_t *layout = NULL; - dht_conf_t *conf = NULL; - dht_methods_t *methods = NULL; - xlator_t *subvol = 0; - xlator_t *hashed_subvol = 0; - int ret = 0; - int readdir_optimize = 0; - inode_table_t *itable = NULL; - inode_t *inode = NULL; - gf_boolean_t skip_hashed_check = _gf_false; - - INIT_LIST_HEAD (&entries.list); - - prev = cookie; - local = frame->local; - itable = local->fd ? local->fd->inode->table : NULL; - - conf = this->private; - GF_VALIDATE_OR_GOTO(this->name, conf, unwind); - - methods = &(conf->methods); +dht_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, + int op_errno, gf_dirent_t *orig_entries, dict_t *xdata) +{ + dht_local_t *local = NULL; + gf_dirent_t entries; + gf_dirent_t *orig_entry = NULL; + gf_dirent_t *entry = NULL; + xlator_t *prev = NULL; + xlator_t *next_subvol = NULL; + off_t next_offset = 0; + int count = 0; + dht_layout_t *layout = NULL; + dht_conf_t *conf = NULL; + dht_methods_t *methods = NULL; + xlator_t *subvol = 0; + xlator_t *hashed_subvol = 0; + int ret = 0; + int readdir_optimize = 0; + inode_table_t *itable = NULL; + inode_t *inode = NULL; + gf_boolean_t skip_hashed_check = _gf_false; + + INIT_LIST_HEAD(&entries.list); + + prev = cookie; + local = frame->local; + itable = local->fd ? local->fd->inode->table : NULL; + + conf = this->private; + GF_VALIDATE_OR_GOTO(this->name, conf, unwind); + + methods = &(conf->methods); + + if (op_ret <= 0) { + goto done; + } + + /* Why aren't we skipping DHT entirely in case of a single subvol? + * Because if this was a larger volume earlier and all but one subvol + * was removed, there might be stale linkto files on the subvol. + */ + if (conf->subvolume_cnt == 1) { + /* return all directory and file entries except + * linkto files for a single child DHT + */ + skip_hashed_check = _gf_true; + } - if (op_ret <= 0) { - goto done; - } + if (!local->layout) + local->layout = dht_layout_get(this, local->fd->inode); - /* Why aren't we skipping DHT entirely in case of a single subvol? - * Because if this was a larger volume earlier and all but one subvol - * was removed, there might be stale linkto files on the subvol. - */ - if (conf->subvolume_cnt == 1) { - /* return all directory and file entries except - * linkto files for a single child DHT - */ - skip_hashed_check = _gf_true; - } + layout = local->layout; - if (!local->layout) - local->layout = dht_layout_get (this, local->fd->inode); + /* We have seen crashes in while running "rm -rf" on tier volumes + when the layout was NULL on the hot tier. This will skip the + entries on the subvol without a layout, hence preventing the crash + but rmdir might fail with "directory not empty" errors*/ - layout = local->layout; + if (layout == NULL) + goto done; - /* We have seen crashes in while running "rm -rf" on tier volumes - when the layout was NULL on the hot tier. This will skip the - entries on the subvol without a layout, hence preventing the crash - but rmdir might fail with "directory not empty" errors*/ + if (conf->readdir_optimize == _gf_true) + readdir_optimize = 1; - if (layout == NULL) - goto done; + gf_msg_debug(this->name, 0, "Processing entries from %s", prev->name); - if (conf->readdir_optimize == _gf_true) - readdir_optimize = 1; + list_for_each_entry(orig_entry, (&orig_entries->list), list) + { + next_offset = orig_entry->d_off; - gf_msg_debug (this->name, 0, "Processing entries from %s", - prev->name); + gf_msg_debug(this->name, 0, "%s: entry = %s, type = %d", prev->name, + orig_entry->d_name, orig_entry->d_type); - list_for_each_entry (orig_entry, (&orig_entries->list), list) { - next_offset = orig_entry->d_off; + if (IA_ISINVAL(orig_entry->d_stat.ia_type)) { + /*stat failed somewhere- ignore this entry*/ + gf_msg_debug(this->name, EINVAL, + "Invalid stat, ignoring entry " + "%s gfid %s", + orig_entry->d_name, + uuid_utoa(orig_entry->d_stat.ia_gfid)); + continue; + } - gf_msg_debug (this->name, 0, "%s: entry = %s, type = %d", - prev->name, orig_entry->d_name, - orig_entry->d_type); + if (check_is_linkfile(NULL, (&orig_entry->d_stat), orig_entry->dict, + conf->link_xattr_name)) { + gf_msg_debug(this->name, 0, "%s: %s is a linkto file", prev->name, + orig_entry->d_name); + continue; + } - if (IA_ISINVAL(orig_entry->d_stat.ia_type)) { - /*stat failed somewhere- ignore this entry*/ - gf_msg_debug (this->name, EINVAL, - "Invalid stat, ignoring entry " - "%s gfid %s", orig_entry->d_name, - uuid_utoa (orig_entry->d_stat.ia_gfid)); - continue; - } + if (skip_hashed_check) { + goto list; + } - if (check_is_linkfile (NULL, (&orig_entry->d_stat), - orig_entry->dict, - conf->link_xattr_name)) { - gf_msg_debug (this->name, 0, "%s: %s is a linkto file", - prev->name, orig_entry->d_name); - continue; - } + if (check_is_dir(NULL, (&orig_entry->d_stat), NULL)) { + /*Directory entries filtering : + * a) If rebalance is running, pick from first_up_subvol + * b) (rebalance not running)hashed subvolume is NULL or + * down then filter in first_up_subvolume. Other wise the + * corresponding hashed subvolume will take care of the + * directory entry. + */ + if (readdir_optimize) { + if (prev == local->first_up_subvol) + goto list; + else + continue; + } - if (skip_hashed_check) { - goto list; - } + hashed_subvol = methods->layout_search(this, layout, + orig_entry->d_name); - if (check_is_dir (NULL, (&orig_entry->d_stat), NULL)) { + if (prev == hashed_subvol) + goto list; + if ((hashed_subvol && dht_subvol_status(conf, hashed_subvol)) || + (prev != local->first_up_subvol)) + continue; - /*Directory entries filtering : - * a) If rebalance is running, pick from first_up_subvol - * b) (rebalance not running)hashed subvolume is NULL or - * down then filter in first_up_subvolume. Other wise the - * corresponding hashed subvolume will take care of the - * directory entry. - */ - if (readdir_optimize) { - if (prev == local->first_up_subvol) - goto list; - else - continue; + goto list; + } - } + list: + entry = gf_dirent_for_name(orig_entry->d_name); + if (!entry) { + goto unwind; + } - hashed_subvol = methods->layout_search (this, layout, - orig_entry->d_name); + /* Do this if conf->search_unhashed is set to "auto" */ + if (conf->search_unhashed == GF_DHT_LOOKUP_UNHASHED_AUTO) { + subvol = methods->layout_search(this, layout, orig_entry->d_name); + if (!subvol || (subvol != prev)) { + /* TODO: Count the number of entries which need + linkfile to prove its existence in fs */ + layout->search_unhashed++; + } + } - if (prev == hashed_subvol) - goto list; - if ((hashed_subvol - && dht_subvol_status (conf, hashed_subvol)) - || (prev != local->first_up_subvol)) - continue; + entry->d_off = orig_entry->d_off; + entry->d_stat = orig_entry->d_stat; + entry->d_ino = orig_entry->d_ino; + entry->d_type = orig_entry->d_type; + entry->d_len = orig_entry->d_len; - goto list; - } + if (orig_entry->dict) + entry->dict = dict_ref(orig_entry->dict); -list: - entry = gf_dirent_for_name (orig_entry->d_name); - if (!entry) { - goto unwind; - } + /* making sure we set the inode ctx right with layout, + currently possible only for non-directories, so for + directories don't set entry inodes */ + if (IA_ISDIR(entry->d_stat.ia_type)) { + entry->d_stat.ia_blocks = DHT_DIR_STAT_BLOCKS; + entry->d_stat.ia_size = DHT_DIR_STAT_SIZE; + if (orig_entry->inode) { + dht_inode_ctx_time_update(orig_entry->inode, this, + &entry->d_stat, 1); - /* Do this if conf->search_unhashed is set to "auto" */ - if (conf->search_unhashed == GF_DHT_LOOKUP_UNHASHED_AUTO) { - subvol = methods->layout_search (this, layout, - orig_entry->d_name); - if (!subvol || (subvol != prev)) { - /* TODO: Count the number of entries which need - linkfile to prove its existence in fs */ - layout->search_unhashed++; - } + if (conf->subvolume_cnt == 1) { + dht_populate_inode_for_dentry(this, prev, entry, + orig_entry); } - - entry->d_off = orig_entry->d_off; - entry->d_stat = orig_entry->d_stat; - entry->d_ino = orig_entry->d_ino; - entry->d_type = orig_entry->d_type; - entry->d_len = orig_entry->d_len; - - if (orig_entry->dict) - entry->dict = dict_ref (orig_entry->dict); - - /* making sure we set the inode ctx right with layout, - currently possible only for non-directories, so for - directories don't set entry inodes */ - if (IA_ISDIR(entry->d_stat.ia_type)) { - entry->d_stat.ia_blocks = DHT_DIR_STAT_BLOCKS; - entry->d_stat.ia_size = DHT_DIR_STAT_SIZE; - if (orig_entry->inode) { - dht_inode_ctx_time_update (orig_entry->inode, - this, &entry->d_stat, - 1); - - if (conf->subvolume_cnt == 1) { - dht_populate_inode_for_dentry (this, - prev, - entry, - orig_entry); - } - - } - } else { - if (orig_entry->inode) { - ret = dht_layout_preset (this, prev, - orig_entry->inode); - if (ret) - gf_msg (this->name, GF_LOG_WARNING, 0, - DHT_MSG_LAYOUT_SET_FAILED, - "failed to link the layout " - "in inode for %s", - orig_entry->d_name); - - entry->inode = inode_ref (orig_entry->inode); - } else if (itable) { - /* - * orig_entry->inode might be null if any upper - * layer xlators below client set to null, to - * force a lookup on the inode even if the inode - * is present in the inode table. In that case - * we just update the ctx to make sure we didn't - * missed anything. - */ - inode = inode_find (itable, - orig_entry->d_stat.ia_gfid); - if (inode) { - ret = dht_layout_preset - (this, prev, - inode); - if (ret) - gf_msg (this->name, - GF_LOG_WARNING, 0, - DHT_MSG_LAYOUT_SET_FAILED, - "failed to link the layout" - " in inode for %s", - orig_entry->d_name); - inode_unref (inode); - inode = NULL; - } - } + } + } else { + if (orig_entry->inode) { + ret = dht_layout_preset(this, prev, orig_entry->inode); + if (ret) + gf_msg(this->name, GF_LOG_WARNING, 0, + DHT_MSG_LAYOUT_SET_FAILED, + "failed to link the layout " + "in inode for %s", + orig_entry->d_name); + + entry->inode = inode_ref(orig_entry->inode); + } else if (itable) { + /* + * orig_entry->inode might be null if any upper + * layer xlators below client set to null, to + * force a lookup on the inode even if the inode + * is present in the inode table. In that case + * we just update the ctx to make sure we didn't + * missed anything. + */ + inode = inode_find(itable, orig_entry->d_stat.ia_gfid); + if (inode) { + ret = dht_layout_preset(this, prev, inode); + if (ret) + gf_msg(this->name, GF_LOG_WARNING, 0, + DHT_MSG_LAYOUT_SET_FAILED, + "failed to link the layout" + " in inode for %s", + orig_entry->d_name); + inode_unref(inode); + inode = NULL; } + } + } - gf_msg_debug (this->name, 0, "%s: Adding entry = %s", - prev->name, entry->d_name); + gf_msg_debug(this->name, 0, "%s: Adding entry = %s", prev->name, + entry->d_name); - list_add_tail (&entry->list, &entries.list); - count++; - } + list_add_tail(&entry->list, &entries.list); + count++; + } done: - /* We need to ensure that only the last subvolume's end-of-directory - * notification is respected so that directory reading does not stop - * before all subvolumes have been read. That could happen because the - * posix for each subvolume sends a ENOENT on end-of-directory but in - * distribute we're not concerned only with a posix's view of the - * directory but the aggregated namespace' view of the directory. - * Possible values: - * op_ret == 0 and op_errno != 0 - * if op_errno != ENOENT : Error.Unwind. - * if op_errno == ENOENT : There are no more entries on this subvol. - * Move to the next one. - * op_ret > 0 and count == 0 : - * The subvol returned entries to dht but all were stripped out. - * For example, if they were linkto files or dirs where - * hashed_subvol != prev. Try to get some entries by winding - * to the next subvol. This can be dangerous if parallel readdir - * is enabled as it grows the stack. - * - * op_ret > 0 and count > 0: - * We found some entries. Unwind even if the buffer is not full. - * + /* We need to ensure that only the last subvolume's end-of-directory + * notification is respected so that directory reading does not stop + * before all subvolumes have been read. That could happen because the + * posix for each subvolume sends a ENOENT on end-of-directory but in + * distribute we're not concerned only with a posix's view of the + * directory but the aggregated namespace' view of the directory. + * Possible values: + * op_ret == 0 and op_errno != 0 + * if op_errno != ENOENT : Error.Unwind. + * if op_errno == ENOENT : There are no more entries on this subvol. + * Move to the next one. + * op_ret > 0 and count == 0 : + * The subvol returned entries to dht but all were stripped out. + * For example, if they were linkto files or dirs where + * hashed_subvol != prev. Try to get some entries by winding + * to the next subvol. This can be dangerous if parallel readdir + * is enabled as it grows the stack. + * + * op_ret > 0 and count > 0: + * We found some entries. Unwind even if the buffer is not full. + * + */ + + op_ret = count; + if (count == 0) { + /* non-zero next_offset means that + * EOF is not yet hit on the current subvol */ + if ((next_offset == 0) || (op_errno == ENOENT)) { + next_offset = 0; + next_subvol = dht_subvol_next(this, prev); + } else { + next_subvol = prev; + } - op_ret = count; - if (count == 0) { - /* non-zero next_offset means that - * EOF is not yet hit on the current subvol - */ - if ((next_offset == 0) || (op_errno == ENOENT)) { - next_offset = 0; - next_subvol = dht_subvol_next (this, prev); - } else { - next_subvol = prev; - } - - if (!next_subvol) { - goto unwind; - } - - if (conf->readdir_optimize == _gf_true) { - if (next_subvol != local->first_up_subvol) { - ret = dict_set_int32 (local->xattr, - GF_READDIR_SKIP_DIRS, 1); - if (ret) - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_DICT_SET_FAILED, - "Failed to set dictionary value" - ":key = %s", - GF_READDIR_SKIP_DIRS ); - } else { - dict_del (local->xattr, - GF_READDIR_SKIP_DIRS); - } - } + if (!next_subvol) { + goto unwind; + } - STACK_WIND_COOKIE (frame, dht_readdirp_cbk, next_subvol, - next_subvol, next_subvol->fops->readdirp, - local->fd, local->size, - next_offset, local->xattr); - return 0; + if (conf->readdir_optimize == _gf_true) { + if (next_subvol != local->first_up_subvol) { + ret = dict_set_int32(local->xattr, GF_READDIR_SKIP_DIRS, 1); + if (ret) + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED, + "Failed to set dictionary value" + ":key = %s", + GF_READDIR_SKIP_DIRS); + } else { + dict_del(local->xattr, GF_READDIR_SKIP_DIRS); + } } + STACK_WIND_COOKIE(frame, dht_readdirp_cbk, next_subvol, next_subvol, + next_subvol->fops->readdirp, local->fd, local->size, + next_offset, local->xattr); + return 0; + } + unwind: - /* We need to ensure that only the last subvolume's end-of-directory - * notification is respected so that directory reading does not stop - * before all subvolumes have been read. That could happen because the - * posix for each subvolume sends a ENOENT on end-of-directory but in - * distribute we're not concerned only with a posix's view of the - * directory but the aggregated namespace' view of the directory. - */ - if (op_ret < 0) - op_ret = 0; + /* We need to ensure that only the last subvolume's end-of-directory + * notification is respected so that directory reading does not stop + * before all subvolumes have been read. That could happen because the + * posix for each subvolume sends a ENOENT on end-of-directory but in + * distribute we're not concerned only with a posix's view of the + * directory but the aggregated namespace' view of the directory. + */ + if (op_ret < 0) + op_ret = 0; - if (prev != dht_last_up_subvol (this)) - op_errno = 0; + if (prev != dht_last_up_subvol(this)) + op_errno = 0; - DHT_STACK_UNWIND (readdirp, frame, op_ret, op_errno, - &entries, NULL); + DHT_STACK_UNWIND(readdirp, frame, op_ret, op_errno, &entries, NULL); - gf_dirent_free (&entries); - return 0; + gf_dirent_free(&entries); + return 0; } - int -dht_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, gf_dirent_t *orig_entries, - dict_t *xdata) +dht_readdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, + int op_errno, gf_dirent_t *orig_entries, dict_t *xdata) { - dht_local_t *local = NULL; - gf_dirent_t entries; - gf_dirent_t *orig_entry = NULL; - gf_dirent_t *entry = NULL; - xlator_t *prev = NULL; - xlator_t *next_subvol = NULL; - off_t next_offset = 0; - int count = 0; - dht_layout_t *layout = 0; - xlator_t *subvol = 0; - dht_conf_t *conf = NULL; - dht_methods_t *methods = NULL; - gf_boolean_t skip_hashed_check = _gf_false; - - INIT_LIST_HEAD (&entries.list); - - prev = cookie; - local = frame->local; + dht_local_t *local = NULL; + gf_dirent_t entries; + gf_dirent_t *orig_entry = NULL; + gf_dirent_t *entry = NULL; + xlator_t *prev = NULL; + xlator_t *next_subvol = NULL; + off_t next_offset = 0; + int count = 0; + dht_layout_t *layout = 0; + xlator_t *subvol = 0; + dht_conf_t *conf = NULL; + dht_methods_t *methods = NULL; + gf_boolean_t skip_hashed_check = _gf_false; - conf = this->private; - GF_VALIDATE_OR_GOTO (this->name, conf, done); + INIT_LIST_HEAD(&entries.list); - methods = &(conf->methods); + prev = cookie; + local = frame->local; - if (op_ret <= 0) - goto done; + conf = this->private; + GF_VALIDATE_OR_GOTO(this->name, conf, done); - if (!local->layout) - local->layout = dht_layout_get (this, local->fd->inode); + methods = &(conf->methods); - layout = local->layout; + if (op_ret <= 0) + goto done; - gf_msg_debug (this->name, 0, "Processing entries from %s", - prev->name); + if (!local->layout) + local->layout = dht_layout_get(this, local->fd->inode); - if (conf->subvolume_cnt == 1) { - /*return everything*/ - skip_hashed_check = _gf_true; - count = op_ret; - goto done; - } + layout = local->layout; - list_for_each_entry (orig_entry, (&orig_entries->list), list) { - next_offset = orig_entry->d_off; + gf_msg_debug(this->name, 0, "Processing entries from %s", prev->name); - gf_msg_debug (this->name, 0, "%s: entry = %s, type = %d", - prev->name, orig_entry->d_name, - orig_entry->d_type); + if (conf->subvolume_cnt == 1) { + /*return everything*/ + skip_hashed_check = _gf_true; + count = op_ret; + goto done; + } - subvol = methods->layout_search (this, layout, - orig_entry->d_name); + list_for_each_entry(orig_entry, (&orig_entries->list), list) + { + next_offset = orig_entry->d_off; - if (!subvol || (subvol == prev)) { - entry = gf_dirent_for_name (orig_entry->d_name); - if (!entry) { - gf_msg (this->name, GF_LOG_ERROR, ENOMEM, - DHT_MSG_NO_MEMORY, - "Memory allocation failed "); - goto unwind; - } + gf_msg_debug(this->name, 0, "%s: entry = %s, type = %d", prev->name, + orig_entry->d_name, orig_entry->d_type); - entry->d_off = orig_entry->d_off; - entry->d_ino = orig_entry->d_ino; - entry->d_type = orig_entry->d_type; - entry->d_len = orig_entry->d_len; + subvol = methods->layout_search(this, layout, orig_entry->d_name); - gf_msg_debug (this->name, 0, "%s: Adding = entry %s", - prev->name, entry->d_name); + if (!subvol || (subvol == prev)) { + entry = gf_dirent_for_name(orig_entry->d_name); + if (!entry) { + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY, + "Memory allocation failed "); + goto unwind; + } - list_add_tail (&entry->list, &entries.list); - count++; - } + entry->d_off = orig_entry->d_off; + entry->d_ino = orig_entry->d_ino; + entry->d_type = orig_entry->d_type; + entry->d_len = orig_entry->d_len; + + gf_msg_debug(this->name, 0, "%s: Adding = entry %s", prev->name, + entry->d_name); + + list_add_tail(&entry->list, &entries.list); + count++; } + } done: - op_ret = count; - /* We need to ensure that only the last subvolume's end-of-directory - * notification is respected so that directory reading does not stop - * before all subvolumes have been read. That could happen because the - * posix for each subvolume sends a ENOENT on end-of-directory but in - * distribute we're not concerned only with a posix's view of the - * directory but the aggregated namespace' view of the directory. - */ - if (count == 0) { - if ((next_offset == 0) || (op_errno == ENOENT)) { - next_offset = 0; - next_subvol = dht_subvol_next (this, prev); - } else { - next_subvol = prev; - } - - if (!next_subvol) { - goto unwind; - } + op_ret = count; + /* We need to ensure that only the last subvolume's end-of-directory + * notification is respected so that directory reading does not stop + * before all subvolumes have been read. That could happen because the + * posix for each subvolume sends a ENOENT on end-of-directory but in + * distribute we're not concerned only with a posix's view of the + * directory but the aggregated namespace' view of the directory. + */ + if (count == 0) { + if ((next_offset == 0) || (op_errno == ENOENT)) { + next_offset = 0; + next_subvol = dht_subvol_next(this, prev); + } else { + next_subvol = prev; + } - STACK_WIND_COOKIE (frame, dht_readdir_cbk, next_subvol, - next_subvol, next_subvol->fops->readdir, - local->fd, local->size, - next_offset, NULL); - return 0; + if (!next_subvol) { + goto unwind; } + STACK_WIND_COOKIE(frame, dht_readdir_cbk, next_subvol, next_subvol, + next_subvol->fops->readdir, local->fd, local->size, + next_offset, NULL); + return 0; + } + unwind: - /* We need to ensure that only the last subvolume's end-of-directory - * notification is respected so that directory reading does not stop - * before all subvolumes have been read. That could happen because the - * posix for each subvolume sends a ENOENT on end-of-directory but in - * distribute we're not concerned only with a posix's view of the - * directory but the aggregated namespace' view of the directory. - */ + /* We need to ensure that only the last subvolume's end-of-directory + * notification is respected so that directory reading does not stop + * before all subvolumes have been read. That could happen because the + * posix for each subvolume sends a ENOENT on end-of-directory but in + * distribute we're not concerned only with a posix's view of the + * directory but the aggregated namespace' view of the directory. + */ - if (prev != dht_last_up_subvol (this)) - op_errno = 0; + if (prev != dht_last_up_subvol(this)) + op_errno = 0; - if (!skip_hashed_check) { - DHT_STACK_UNWIND (readdir, frame, op_ret, op_errno, - &entries, NULL); - gf_dirent_free (&entries); + if (!skip_hashed_check) { + DHT_STACK_UNWIND(readdir, frame, op_ret, op_errno, &entries, NULL); + gf_dirent_free(&entries); - } else { - DHT_STACK_UNWIND (readdir, frame, op_ret, op_errno, - orig_entries, NULL); - } - return 0; + } else { + DHT_STACK_UNWIND(readdir, frame, op_ret, op_errno, orig_entries, NULL); + } + return 0; } - int -dht_do_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, - off_t yoff, int whichop, dict_t *dict) +dht_do_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t yoff, int whichop, dict_t *dict) { - dht_local_t *local = NULL; - int op_errno = -1; - xlator_t *xvol = NULL; - int ret = 0; - dht_conf_t *conf = NULL; - - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (fd, err); - VALIDATE_OR_GOTO (this->private, err); - - conf = this->private; - - local = dht_local_init (frame, NULL, NULL, whichop); - if (!local) { - op_errno = ENOMEM; - goto err; - } - - local->fd = fd_ref (fd); - local->size = size; - local->xattr_req = (dict)? dict_ref (dict) : NULL; - local->first_up_subvol = dht_first_up_subvol (this); - local->op_ret = -1; + dht_local_t *local = NULL; + int op_errno = -1; + xlator_t *xvol = NULL; + int ret = 0; + dht_conf_t *conf = NULL; - dht_deitransform (this, yoff, &xvol); + VALIDATE_OR_GOTO(frame, err); + VALIDATE_OR_GOTO(this, err); + VALIDATE_OR_GOTO(fd, err); + VALIDATE_OR_GOTO(this->private, err); - /* TODO: do proper readdir */ - if (whichop == GF_FOP_READDIRP) { - if (dict) - local->xattr = dict_ref (dict); - else - local->xattr = dict_new (); + conf = this->private; - if (local->xattr) { - ret = dict_set_uint32 (local->xattr, - conf->link_xattr_name, 256); - if (ret) - gf_msg (this->name, GF_LOG_WARNING, 0, - DHT_MSG_DICT_SET_FAILED, - "Failed to set dictionary value" - " : key = %s", - conf->link_xattr_name); + local = dht_local_init(frame, NULL, NULL, whichop); + if (!local) { + op_errno = ENOMEM; + goto err; + } - if (conf->readdir_optimize == _gf_true) { - if (xvol != local->first_up_subvol) { - ret = dict_set_int32 (local->xattr, - GF_READDIR_SKIP_DIRS, 1); - if (ret) - gf_msg (this->name, - GF_LOG_ERROR, 0, - DHT_MSG_DICT_SET_FAILED, - "Failed to set " - "dictionary value: " - "key = %s", - GF_READDIR_SKIP_DIRS); - } else { - dict_del (local->xattr, - GF_READDIR_SKIP_DIRS); - } - } - - if (conf->subvolume_cnt == 1) { - ret = dict_set_uint32 (local->xattr, - conf->xattr_name, 4 * 4); - if (ret) { - gf_msg (this->name, GF_LOG_WARNING, - ENOMEM, - DHT_MSG_DICT_SET_FAILED, - "Failed to set dictionary " - "value:key = %s ", - conf->xattr_name); - } - } + local->fd = fd_ref(fd); + local->size = size; + local->xattr_req = (dict) ? dict_ref(dict) : NULL; + local->first_up_subvol = dht_first_up_subvol(this); + local->op_ret = -1; + dht_deitransform(this, yoff, &xvol); + /* TODO: do proper readdir */ + if (whichop == GF_FOP_READDIRP) { + if (dict) + local->xattr = dict_ref(dict); + else + local->xattr = dict_new(); + + if (local->xattr) { + ret = dict_set_uint32(local->xattr, conf->link_xattr_name, 256); + if (ret) + gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED, + "Failed to set dictionary value" + " : key = %s", + conf->link_xattr_name); + + if (conf->readdir_optimize == _gf_true) { + if (xvol != local->first_up_subvol) { + ret = dict_set_int32(local->xattr, GF_READDIR_SKIP_DIRS, 1); + if (ret) + gf_msg(this->name, GF_LOG_ERROR, 0, + DHT_MSG_DICT_SET_FAILED, + "Failed to set " + "dictionary value: " + "key = %s", + GF_READDIR_SKIP_DIRS); + } else { + dict_del(local->xattr, GF_READDIR_SKIP_DIRS); } + } - STACK_WIND_COOKIE (frame, dht_readdirp_cbk, xvol, xvol, - xvol->fops->readdirp, fd, size, yoff, - local->xattr); - } else { - STACK_WIND_COOKIE (frame, dht_readdir_cbk, xvol, xvol, - xvol->fops->readdir, fd, size, yoff, - local->xattr); + if (conf->subvolume_cnt == 1) { + ret = dict_set_uint32(local->xattr, conf->xattr_name, 4 * 4); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, ENOMEM, + DHT_MSG_DICT_SET_FAILED, + "Failed to set dictionary " + "value:key = %s ", + conf->xattr_name); + } + } } - return 0; + STACK_WIND_COOKIE(frame, dht_readdirp_cbk, xvol, xvol, + xvol->fops->readdirp, fd, size, yoff, local->xattr); + } else { + STACK_WIND_COOKIE(frame, dht_readdir_cbk, xvol, xvol, + xvol->fops->readdir, fd, size, yoff, local->xattr); + } + + return 0; err: - op_errno = (op_errno == -1) ? errno : op_errno; - DHT_STACK_UNWIND (readdir, frame, -1, op_errno, NULL, NULL); + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND(readdir, frame, -1, op_errno, NULL, NULL); - return 0; + return 0; } - int -dht_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, - off_t yoff, dict_t *xdata) +dht_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t yoff, dict_t *xdata) { - int op = GF_FOP_READDIR; - dht_conf_t *conf = NULL; - int i = 0; + int op = GF_FOP_READDIR; + dht_conf_t *conf = NULL; + int i = 0; - conf = this->private; - if (!conf) - goto out; + conf = this->private; + if (!conf) + goto out; - for (i = 0; i < conf->subvolume_cnt; i++) { - if (!conf->subvolume_status[i]) { - op = GF_FOP_READDIRP; - break; - } + for (i = 0; i < conf->subvolume_cnt; i++) { + if (!conf->subvolume_status[i]) { + op = GF_FOP_READDIRP; + break; } + } - if (conf->use_readdirp) - op = GF_FOP_READDIRP; + if (conf->use_readdirp) + op = GF_FOP_READDIRP; out: - dht_do_readdir (frame, this, fd, size, yoff, op, 0); - return 0; + dht_do_readdir(frame, this, fd, size, yoff, op, 0); + return 0; } int -dht_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, - off_t yoff, dict_t *dict) +dht_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t yoff, dict_t *dict) { - dht_do_readdir (frame, this, fd, size, yoff, GF_FOP_READDIRP, dict); - return 0; + dht_do_readdir(frame, this, fd, size, yoff, GF_FOP_READDIRP, dict); + return 0; } - - int -dht_fsyncdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, dict_t *xdata) +dht_fsyncdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, + int op_errno, dict_t *xdata) { - dht_local_t *local = NULL; - int this_call_cnt = 0; + dht_local_t *local = NULL; + int this_call_cnt = 0; + local = frame->local; - local = frame->local; - - LOCK (&frame->lock); - { - if (op_ret == -1) - local->op_errno = op_errno; + LOCK(&frame->lock); + { + if (op_ret == -1) + local->op_errno = op_errno; - if (op_ret == 0) - local->op_ret = 0; - } - UNLOCK (&frame->lock); + if (op_ret == 0) + local->op_ret = 0; + } + UNLOCK(&frame->lock); - this_call_cnt = dht_frame_return (frame); - if (is_last_call (this_call_cnt)) - DHT_STACK_UNWIND (fsyncdir, frame, local->op_ret, - local->op_errno, xdata); + this_call_cnt = dht_frame_return(frame); + if (is_last_call(this_call_cnt)) + DHT_STACK_UNWIND(fsyncdir, frame, local->op_ret, local->op_errno, + xdata); - return 0; + return 0; } - int -dht_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd, - int datasync, dict_t *xdata) +dht_fsyncdir(call_frame_t *frame, xlator_t *this, fd_t *fd, int datasync, + dict_t *xdata) { - dht_local_t *local = NULL; - dht_conf_t *conf = NULL; - int op_errno = -1; - int i = -1; - - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (fd, err); - VALIDATE_OR_GOTO (this->private, err); - - conf = this->private; - - local = dht_local_init (frame, NULL, NULL, GF_FOP_FSYNCDIR); - if (!local) { - op_errno = ENOMEM; - goto err; - } - - local->fd = fd_ref (fd); - local->call_cnt = conf->subvolume_cnt; - - for (i = 0; i < conf->subvolume_cnt; i++) { - STACK_WIND (frame, dht_fsyncdir_cbk, - conf->subvolumes[i], - conf->subvolumes[i]->fops->fsyncdir, - fd, datasync, xdata); - } - - return 0; + dht_local_t *local = NULL; + dht_conf_t *conf = NULL; + int op_errno = -1; + int i = -1; -err: - op_errno = (op_errno == -1) ? errno : op_errno; - DHT_STACK_UNWIND (fsyncdir, frame, -1, op_errno, NULL); - - return 0; -} + VALIDATE_OR_GOTO(frame, err); + VALIDATE_OR_GOTO(this, err); + VALIDATE_OR_GOTO(fd, err); + VALIDATE_OR_GOTO(this->private, err); + conf = this->private; -int -dht_newfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, - inode_t *inode, struct iatt *stbuf, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) -{ - xlator_t *prev = NULL; - int ret = -1; - dht_local_t *local = NULL; - + local = dht_local_init(frame, NULL, NULL, GF_FOP_FSYNCDIR); + if (!local) { + op_errno = ENOMEM; + goto err; + } - if (op_ret == -1) - goto out; + local->fd = fd_ref(fd); + local->call_cnt = conf->subvolume_cnt; - local = frame->local; - if (!local) { - op_ret = -1; - op_errno = EINVAL; - goto out; - } + for (i = 0; i < conf->subvolume_cnt; i++) { + STACK_WIND(frame, dht_fsyncdir_cbk, conf->subvolumes[i], + conf->subvolumes[i]->fops->fsyncdir, fd, datasync, xdata); + } - prev = cookie; + return 0; - if (local->loc.parent) { +err: + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND(fsyncdir, frame, -1, op_errno, NULL); - dht_inode_ctx_time_update (local->loc.parent, this, - preparent, 0); - dht_inode_ctx_time_update (local->loc.parent, this, - postparent, 1); - } + return 0; +} - ret = dht_layout_preset (this, prev, inode); - if (ret < 0) { - gf_msg_debug (this->name, EINVAL, - "could not set pre-set layout for subvolume %s", - prev? prev->name: NULL); - op_ret = -1; - op_errno = EINVAL; - goto out; - } - if (local->linked == _gf_true) - dht_linkfile_attr_heal (frame, this); -out: - /* - * FIXME: ia_size and st_blocks of preparent and postparent do not have - * correct values. since, preparent and postparent buffers correspond - * to a directory these two members should have values equal to sum of - * corresponding values from each of the subvolume. - * See dht_iatt_merge for reference. - */ - DHT_STRIP_PHASE1_FLAGS (stbuf); - dht_set_fixed_dir_stat (postparent); - dht_set_fixed_dir_stat (preparent); +int +dht_newfile_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, + int op_errno, inode_t *inode, struct iatt *stbuf, + struct iatt *preparent, struct iatt *postparent, dict_t *xdata) +{ + xlator_t *prev = NULL; + int ret = -1; + dht_local_t *local = NULL; - if (local && local->lock[0].layout.parent_layout.locks) { - /* store op_errno for failure case*/ - local->op_errno = op_errno; - local->refresh_layout_unlock (frame, this, op_ret, 1); + if (op_ret == -1) + goto out; - if (op_ret == 0) { - DHT_STACK_UNWIND (mknod, frame, op_ret, op_errno, - inode, stbuf, preparent, postparent, - xdata); - } - } else { - DHT_STACK_UNWIND (mknod, frame, op_ret, op_errno, inode, - stbuf, preparent, postparent, xdata); + local = frame->local; + if (!local) { + op_ret = -1; + op_errno = EINVAL; + goto out; + } + + prev = cookie; + + if (local->loc.parent) { + dht_inode_ctx_time_update(local->loc.parent, this, preparent, 0); + dht_inode_ctx_time_update(local->loc.parent, this, postparent, 1); + } + + ret = dht_layout_preset(this, prev, inode); + if (ret < 0) { + gf_msg_debug(this->name, EINVAL, + "could not set pre-set layout for subvolume %s", + prev ? prev->name : NULL); + op_ret = -1; + op_errno = EINVAL; + goto out; + } + if (local->linked == _gf_true) + dht_linkfile_attr_heal(frame, this); +out: + /* + * FIXME: ia_size and st_blocks of preparent and postparent do not have + * correct values. since, preparent and postparent buffers correspond + * to a directory these two members should have values equal to sum of + * corresponding values from each of the subvolume. + * See dht_iatt_merge for reference. + */ + DHT_STRIP_PHASE1_FLAGS(stbuf); + dht_set_fixed_dir_stat(postparent); + dht_set_fixed_dir_stat(preparent); + + if (local && local->lock[0].layout.parent_layout.locks) { + /* store op_errno for failure case*/ + local->op_errno = op_errno; + local->refresh_layout_unlock(frame, this, op_ret, 1); + + if (op_ret == 0) { + DHT_STACK_UNWIND(mknod, frame, op_ret, op_errno, inode, stbuf, + preparent, postparent, xdata); } + } else { + DHT_STACK_UNWIND(mknod, frame, op_ret, op_errno, inode, stbuf, + preparent, postparent, xdata); + } - return 0; + return 0; } int -dht_mknod_linkfile_create_cbk (call_frame_t *frame, void *cookie, - xlator_t *this, - int32_t op_ret, int32_t op_errno, - inode_t *inode, struct iatt *stbuf, - struct iatt *preparent, struct iatt *postparent, - dict_t *xdata) +dht_mknod_linkfile_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *stbuf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) { - dht_local_t *local = NULL; - xlator_t *cached_subvol = NULL; - dht_conf_t *conf = NULL; + dht_local_t *local = NULL; + xlator_t *cached_subvol = NULL; + dht_conf_t *conf = NULL; - local = frame->local; + local = frame->local; - if (!local || !local->cached_subvol) { - op_errno = EINVAL; - goto err; - } + if (!local || !local->cached_subvol) { + op_errno = EINVAL; + goto err; + } - if (op_ret == -1) { - local->op_errno = op_errno; - goto err; - } + if (op_ret == -1) { + local->op_errno = op_errno; + goto err; + } - conf = this->private; - if (!conf) { - local->op_errno = EINVAL; - op_errno = EINVAL; - goto err; - } + conf = this->private; + if (!conf) { + local->op_errno = EINVAL; + op_errno = EINVAL; + goto err; + } - cached_subvol = local->cached_subvol; + cached_subvol = local->cached_subvol; - if (local->params) { - dict_del (local->params, conf->link_xattr_name); - dict_del (local->params, GLUSTERFS_INTERNAL_FOP_KEY); - } + if (local->params) { + dict_del(local->params, conf->link_xattr_name); + dict_del(local->params, GLUSTERFS_INTERNAL_FOP_KEY); + } - STACK_WIND_COOKIE (frame, dht_newfile_cbk, (void *)cached_subvol, - cached_subvol, cached_subvol->fops->mknod, - &local->loc, local->mode, local->rdev, local->umask, - local->params); + STACK_WIND_COOKIE(frame, dht_newfile_cbk, (void *)cached_subvol, + cached_subvol, cached_subvol->fops->mknod, &local->loc, + local->mode, local->rdev, local->umask, local->params); - return 0; + return 0; err: - if (local && local->lock[0].layout.parent_layout.locks) { - local->refresh_layout_unlock (frame, this, -1, 1); - } else { - DHT_STACK_UNWIND (mknod, frame, -1, - op_errno, NULL, NULL, NULL, - NULL, NULL); - } - return 0; + if (local && local->lock[0].layout.parent_layout.locks) { + local->refresh_layout_unlock(frame, this, -1, 1); + } else { + DHT_STACK_UNWIND(mknod, frame, -1, op_errno, NULL, NULL, NULL, NULL, + NULL); + } + return 0; } int -dht_mknod_wind_to_avail_subvol (call_frame_t *frame, xlator_t *this, - xlator_t *subvol, loc_t *loc, dev_t rdev, - mode_t mode, mode_t umask, dict_t *params) +dht_mknod_wind_to_avail_subvol(call_frame_t *frame, xlator_t *this, + xlator_t *subvol, loc_t *loc, dev_t rdev, + mode_t mode, mode_t umask, dict_t *params) { - dht_local_t *local = NULL; - xlator_t *avail_subvol = NULL; - - local = frame->local; + dht_local_t *local = NULL; + xlator_t *avail_subvol = NULL; - if (!dht_is_subvol_filled (this, subvol)) { - gf_msg_debug (this->name, 0, - "creating %s on %s", loc->path, - subvol->name); + local = frame->local; - STACK_WIND_COOKIE (frame, dht_newfile_cbk, (void *)subvol, - subvol, subvol->fops->mknod, loc, mode, - rdev, umask, params); - } else { - avail_subvol = dht_free_disk_available_subvol (this, subvol, local); + if (!dht_is_subvol_filled(this, subvol)) { + gf_msg_debug(this->name, 0, "creating %s on %s", loc->path, + subvol->name); - if (avail_subvol != subvol) { - local->params = dict_ref (params); - local->rdev = rdev; - local->mode = mode; - local->umask = umask; - local->cached_subvol = avail_subvol; - local->hashed_subvol = subvol; + STACK_WIND_COOKIE(frame, dht_newfile_cbk, (void *)subvol, subvol, + subvol->fops->mknod, loc, mode, rdev, umask, params); + } else { + avail_subvol = dht_free_disk_available_subvol(this, subvol, local); - gf_msg_debug (this->name, 0, - "creating %s on %s (link at %s)", loc->path, - avail_subvol->name, subvol->name); + if (avail_subvol != subvol) { + local->params = dict_ref(params); + local->rdev = rdev; + local->mode = mode; + local->umask = umask; + local->cached_subvol = avail_subvol; + local->hashed_subvol = subvol; - dht_linkfile_create (frame, - dht_mknod_linkfile_create_cbk, - this, avail_subvol, subvol, loc); + gf_msg_debug(this->name, 0, "creating %s on %s (link at %s)", + loc->path, avail_subvol->name, subvol->name); - goto out; - } + dht_linkfile_create(frame, dht_mknod_linkfile_create_cbk, this, + avail_subvol, subvol, loc); - gf_msg_debug (this->name, 0, - "creating %s on %s", loc->path, subvol->name); + goto out; + } - STACK_WIND_COOKIE (frame, dht_newfile_cbk, - (void *)subvol, subvol, - subvol->fops->mknod, loc, mode, - rdev, umask, params); + gf_msg_debug(this->name, 0, "creating %s on %s", loc->path, + subvol->name); - } + STACK_WIND_COOKIE(frame, dht_newfile_cbk, (void *)subvol, subvol, + subvol->fops->mknod, loc, mode, rdev, umask, params); + } out: - return 0; + return 0; } int32_t -dht_mknod_do (call_frame_t *frame) +dht_mknod_do(call_frame_t *frame) { - dht_local_t *local = NULL; - dht_layout_t *refreshed = NULL; - xlator_t *subvol = NULL; - xlator_t *this = NULL; - dht_conf_t *conf = NULL; - dht_methods_t *methods = NULL; + dht_local_t *local = NULL; + dht_layout_t *refreshed = NULL; + xlator_t *subvol = NULL; + xlator_t *this = NULL; + dht_conf_t *conf = NULL; + dht_methods_t *methods = NULL; - local = frame->local; + local = frame->local; - this = THIS; + this = THIS; - conf = this->private; + conf = this->private; - GF_VALIDATE_OR_GOTO (this->name, conf, err); + GF_VALIDATE_OR_GOTO(this->name, conf, err); - methods = &(conf->methods); + methods = &(conf->methods); - /* We don't need parent_loc anymore */ - loc_wipe (&local->loc); + /* We don't need parent_loc anymore */ + loc_wipe(&local->loc); - loc_copy (&local->loc, &local->loc2); + loc_copy(&local->loc, &local->loc2); - loc_wipe (&local->loc2); + loc_wipe(&local->loc2); - refreshed = local->selfheal.refreshed_layout; + refreshed = local->selfheal.refreshed_layout; - subvol = methods->layout_search (this, refreshed, local->loc.name); + subvol = methods->layout_search(this, refreshed, local->loc.name); - if (!subvol) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_HASHED_SUBVOL_GET_FAILED, "no subvolume in " - "layout for path=%s", local->loc.path); - local->op_errno = ENOENT; - goto err; - } + if (!subvol) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_HASHED_SUBVOL_GET_FAILED, + "no subvolume in " + "layout for path=%s", + local->loc.path); + local->op_errno = ENOENT; + goto err; + } - dht_mknod_wind_to_avail_subvol (frame, this, subvol, &local->loc, - local->rdev, local->mode, - local->umask, local->params); - return 0; + dht_mknod_wind_to_avail_subvol(frame, this, subvol, &local->loc, + local->rdev, local->mode, local->umask, + local->params); + return 0; err: - local->refresh_layout_unlock (frame, this, -1, 1); + local->refresh_layout_unlock(frame, this, -1, 1); - return 0; + return 0; } - int32_t -dht_mknod_unlock_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) +dht_mknod_unlock_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - DHT_STACK_DESTROY (frame); - return 0; + DHT_STACK_DESTROY(frame); + return 0; } int32_t -dht_mknod_finish (call_frame_t *frame, xlator_t *this, int op_ret, - int invoke_cbk) -{ - dht_local_t *local = NULL, *lock_local = NULL; - call_frame_t *lock_frame = NULL; - int lock_count = 0; - - local = frame->local; - lock_count = dht_lock_count (local->lock[0].layout.parent_layout.locks, - local->lock[0].layout.parent_layout.lk_count); - if (lock_count == 0) - goto done; - - lock_frame = copy_frame (frame); - if (lock_frame == NULL) { - goto done; - } - - lock_local = dht_local_init (lock_frame, &local->loc, NULL, - lock_frame->root->op); - if (lock_local == NULL) { - goto done; - } - - lock_local->lock[0].layout.parent_layout.locks = local->lock[0].layout.parent_layout.locks; - lock_local->lock[0].layout.parent_layout.lk_count = local->lock[0].layout.parent_layout.lk_count; - - local->lock[0].layout.parent_layout.locks = NULL; - local->lock[0].layout.parent_layout.lk_count = 0; - - dht_unlock_inodelk (lock_frame, - lock_local->lock[0].layout.parent_layout.locks, - lock_local->lock[0].layout.parent_layout.lk_count, - dht_mknod_unlock_cbk); - lock_frame = NULL; +dht_mknod_finish(call_frame_t *frame, xlator_t *this, int op_ret, + int invoke_cbk) +{ + dht_local_t *local = NULL, *lock_local = NULL; + call_frame_t *lock_frame = NULL; + int lock_count = 0; + + local = frame->local; + lock_count = dht_lock_count(local->lock[0].layout.parent_layout.locks, + local->lock[0].layout.parent_layout.lk_count); + if (lock_count == 0) + goto done; + + lock_frame = copy_frame(frame); + if (lock_frame == NULL) { + goto done; + } + + lock_local = dht_local_init(lock_frame, &local->loc, NULL, + lock_frame->root->op); + if (lock_local == NULL) { + goto done; + } + + lock_local->lock[0] + .layout.parent_layout.locks = local->lock[0].layout.parent_layout.locks; + lock_local->lock[0].layout.parent_layout.lk_count = + local->lock[0].layout.parent_layout.lk_count; + + local->lock[0].layout.parent_layout.locks = NULL; + local->lock[0].layout.parent_layout.lk_count = 0; + + dht_unlock_inodelk(lock_frame, + lock_local->lock[0].layout.parent_layout.locks, + lock_local->lock[0].layout.parent_layout.lk_count, + dht_mknod_unlock_cbk); + lock_frame = NULL; done: - if (lock_frame != NULL) { - DHT_STACK_DESTROY (lock_frame); - } - - if (op_ret == 0) - return 0; + if (lock_frame != NULL) { + DHT_STACK_DESTROY(lock_frame); + } - DHT_STACK_UNWIND (mknod, frame, op_ret, local->op_errno, NULL, NULL, - NULL, NULL, NULL); + if (op_ret == 0) return 0; + + DHT_STACK_UNWIND(mknod, frame, op_ret, local->op_errno, NULL, NULL, NULL, + NULL, NULL); + return 0; } int32_t -dht_mknod_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) +dht_mknod_lock_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - dht_local_t *local = NULL; + dht_local_t *local = NULL; - local = frame->local; + local = frame->local; - if (!local) { - goto err; - } + if (!local) { + goto err; + } - if (op_ret < 0) { - gf_msg ("DHT", GF_LOG_ERROR, 0, DHT_MSG_INODE_LK_ERROR, - "mknod lock failed for file: %s", local->loc2.name); + if (op_ret < 0) { + gf_msg("DHT", GF_LOG_ERROR, 0, DHT_MSG_INODE_LK_ERROR, + "mknod lock failed for file: %s", local->loc2.name); - local->op_errno = op_errno; + local->op_errno = op_errno; - goto err; - } + goto err; + } - local->refresh_layout_unlock = dht_mknod_finish; + local->refresh_layout_unlock = dht_mknod_finish; - local->refresh_layout_done = dht_mknod_do; + local->refresh_layout_done = dht_mknod_do; - dht_refresh_layout (frame); + dht_refresh_layout(frame); - return 0; + return 0; err: - dht_mknod_finish (frame, this, -1, 0); - return 0; + dht_mknod_finish(frame, this, -1, 0); + return 0; } int32_t -dht_mknod_lock (call_frame_t *frame, xlator_t *subvol) +dht_mknod_lock(call_frame_t *frame, xlator_t *subvol) { - dht_local_t *local = NULL; - int count = 1, ret = -1; - dht_lock_t **lk_array = NULL; + dht_local_t *local = NULL; + int count = 1, ret = -1; + dht_lock_t **lk_array = NULL; - GF_VALIDATE_OR_GOTO ("dht", frame, err); - GF_VALIDATE_OR_GOTO (frame->this->name, frame->local, err); + GF_VALIDATE_OR_GOTO("dht", frame, err); + GF_VALIDATE_OR_GOTO(frame->this->name, frame->local, err); - local = frame->local; + local = frame->local; - lk_array = GF_CALLOC (count, sizeof (*lk_array), gf_common_mt_pointer); + lk_array = GF_CALLOC(count, sizeof(*lk_array), gf_common_mt_pointer); - if (lk_array == NULL) - goto err; + if (lk_array == NULL) + goto err; - lk_array[0] = dht_lock_new (frame->this, subvol, &local->loc, F_RDLCK, - DHT_LAYOUT_HEAL_DOMAIN, NULL, - IGNORE_ENOENT_ESTALE); + lk_array[0] = dht_lock_new(frame->this, subvol, &local->loc, F_RDLCK, + DHT_LAYOUT_HEAL_DOMAIN, NULL, + IGNORE_ENOENT_ESTALE); - if (lk_array[0] == NULL) - goto err; + if (lk_array[0] == NULL) + goto err; - local->lock[0].layout.parent_layout.locks = lk_array; - local->lock[0].layout.parent_layout.lk_count = count; + local->lock[0].layout.parent_layout.locks = lk_array; + local->lock[0].layout.parent_layout.lk_count = count; - ret = dht_blocking_inodelk (frame, lk_array, count, dht_mknod_lock_cbk); + ret = dht_blocking_inodelk(frame, lk_array, count, dht_mknod_lock_cbk); - if (ret < 0) { - local->lock[0].layout.parent_layout.locks = NULL; - local->lock[0].layout.parent_layout.lk_count = 0; - goto err; - } + if (ret < 0) { + local->lock[0].layout.parent_layout.locks = NULL; + local->lock[0].layout.parent_layout.lk_count = 0; + goto err; + } - return 0; + return 0; err: - if (lk_array != NULL) { - dht_lock_array_free (lk_array, count); - GF_FREE (lk_array); - } + if (lk_array != NULL) { + dht_lock_array_free(lk_array, count); + GF_FREE(lk_array); + } - return -1; + return -1; } int -dht_refresh_parent_layout_resume (call_frame_t *frame, xlator_t *this, int ret, - int invoke_cbk) +dht_refresh_parent_layout_resume(call_frame_t *frame, xlator_t *this, int ret, + int invoke_cbk) { - dht_local_t *local = NULL, *parent_local = NULL; - call_stub_t *stub = NULL; - call_frame_t *parent_frame = NULL; + dht_local_t *local = NULL, *parent_local = NULL; + call_stub_t *stub = NULL; + call_frame_t *parent_frame = NULL; - local = frame->local; + local = frame->local; - stub = local->stub; - local->stub = NULL; + stub = local->stub; + local->stub = NULL; - parent_frame = stub->frame; - parent_local = parent_frame->local; + parent_frame = stub->frame; + parent_local = parent_frame->local; - if (ret < 0) { - parent_local->op_ret = -1; - parent_local->op_errno = local->op_errno - ? local->op_errno : EIO; - } else { - parent_local->op_ret = 0; - } + if (ret < 0) { + parent_local->op_ret = -1; + parent_local->op_errno = local->op_errno ? local->op_errno : EIO; + } else { + parent_local->op_ret = 0; + } - call_resume (stub); + call_resume(stub); - DHT_STACK_DESTROY (frame); + DHT_STACK_DESTROY(frame); - return 0; + return 0; } - int -dht_refresh_parent_layout_done (call_frame_t *frame) +dht_refresh_parent_layout_done(call_frame_t *frame) { - dht_local_t *local = NULL; - int ret = 0; + dht_local_t *local = NULL; + int ret = 0; - local = frame->local; + local = frame->local; - if (local->op_ret < 0) { - ret = -1; - goto resume; - } + if (local->op_ret < 0) { + ret = -1; + goto resume; + } - dht_layout_set (frame->this, local->loc.inode, - local->selfheal.refreshed_layout); + dht_layout_set(frame->this, local->loc.inode, + local->selfheal.refreshed_layout); resume: - dht_refresh_parent_layout_resume (frame, frame->this, ret, 1); - return 0; + dht_refresh_parent_layout_resume(frame, frame->this, ret, 1); + return 0; } - int -dht_handle_parent_layout_change (xlator_t *this, call_stub_t *stub) +dht_handle_parent_layout_change(xlator_t *this, call_stub_t *stub) { - call_frame_t *refresh_frame = NULL, *frame = NULL; - dht_local_t *refresh_local = NULL, *local = NULL; + call_frame_t *refresh_frame = NULL, *frame = NULL; + dht_local_t *refresh_local = NULL, *local = NULL; - frame = stub->frame; - local = frame->local; + frame = stub->frame; + local = frame->local; - refresh_frame = copy_frame (frame); - refresh_local = dht_local_init (refresh_frame, NULL, NULL, - stub->fop); + refresh_frame = copy_frame(frame); + refresh_local = dht_local_init(refresh_frame, NULL, NULL, stub->fop); - refresh_local->loc.inode = inode_ref (local->loc.parent); - gf_uuid_copy (refresh_local->loc.gfid, local->loc.parent->gfid); + refresh_local->loc.inode = inode_ref(local->loc.parent); + gf_uuid_copy(refresh_local->loc.gfid, local->loc.parent->gfid); - refresh_local->stub = stub; + refresh_local->stub = stub; - refresh_local->refresh_layout_unlock = dht_refresh_parent_layout_resume; - refresh_local->refresh_layout_done = dht_refresh_parent_layout_done; + refresh_local->refresh_layout_unlock = dht_refresh_parent_layout_resume; + refresh_local->refresh_layout_done = dht_refresh_parent_layout_done; - dht_refresh_layout (refresh_frame); - return 0; + dht_refresh_layout(refresh_frame); + return 0; } int32_t -dht_call_mkdir_stub (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) +dht_call_mkdir_stub(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - dht_local_t *local = NULL; - call_stub_t *stub = NULL; + dht_local_t *local = NULL; + call_stub_t *stub = NULL; - local = frame->local; - stub = local->stub; - local->stub = NULL; + local = frame->local; + stub = local->stub; + local->stub = NULL; - if (op_ret < 0) { - local->op_ret = -1; - local->op_errno = op_errno; - } else { - local->op_ret = 0; - } + if (op_ret < 0) { + local->op_ret = -1; + local->op_errno = op_errno; + } else { + local->op_ret = 0; + } - call_resume (stub); + call_resume(stub); - return 0; + return 0; } int32_t -dht_guard_parent_layout_and_namespace (xlator_t *subvol, call_stub_t *stub) +dht_guard_parent_layout_and_namespace(xlator_t *subvol, call_stub_t *stub) { - dht_local_t *local = NULL; - int ret = -1; - loc_t *loc = NULL; - xlator_t *hashed_subvol = NULL, *this = NULL;; - call_frame_t *frame = NULL; - char pgfid[GF_UUID_BUF_SIZE] = {0}; - int32_t *parent_disk_layout = NULL; - dht_layout_t *parent_layout = NULL; - dht_conf_t *conf = NULL; + dht_local_t *local = NULL; + int ret = -1; + loc_t *loc = NULL; + xlator_t *hashed_subvol = NULL, *this = NULL; + ; + call_frame_t *frame = NULL; + char pgfid[GF_UUID_BUF_SIZE] = {0}; + int32_t *parent_disk_layout = NULL; + dht_layout_t *parent_layout = NULL; + dht_conf_t *conf = NULL; - GF_VALIDATE_OR_GOTO ("dht", stub, err); + GF_VALIDATE_OR_GOTO("dht", stub, err); - frame = stub->frame; - this = frame->this; + frame = stub->frame; + this = frame->this; - conf = this->private; + conf = this->private; - local = frame->local; + local = frame->local; - local->stub = stub; + local->stub = stub; - /* TODO: recheck whether we should lock on src or dst if we do similar - * stale layout checks for rename. - */ - loc = &stub->args.loc; + /* TODO: recheck whether we should lock on src or dst if we do similar + * stale layout checks for rename. + */ + loc = &stub->args.loc; - gf_uuid_unparse (loc->parent->gfid, pgfid); + gf_uuid_unparse(loc->parent->gfid, pgfid); + if (local->params == NULL) { + local->params = dict_new(); if (local->params == NULL) { - local->params = dict_new (); - if (local->params == NULL) { - local->op_errno = ENOMEM; - gf_msg (this->name, GF_LOG_WARNING, local->op_errno, - DHT_MSG_PARENT_LAYOUT_CHANGED, - "%s (%s/%s) (path: %s): " - "dict allocation failed", - gf_fop_list[stub->fop], - pgfid, loc->name, loc->path); - goto err; - } - } - - hashed_subvol = dht_subvol_get_hashed (this, loc); - if (hashed_subvol == NULL) { - local->op_errno = EINVAL; - - gf_msg (this->name, GF_LOG_WARNING, local->op_errno, - DHT_MSG_PARENT_LAYOUT_CHANGED, - "%s (%s/%s) (path: %s): " - "hashed subvolume not found", gf_fop_list[stub->fop], - pgfid, loc->name, loc->path); - goto err; - } - - parent_layout = dht_layout_get (this, loc->parent); - - ret = dht_disk_layout_extract_for_subvol (this, parent_layout, - hashed_subvol, - &parent_disk_layout); - if (ret == -1) { - local->op_errno = EINVAL; - gf_msg (this->name, GF_LOG_WARNING, local->op_errno, - DHT_MSG_PARENT_LAYOUT_CHANGED, - "%s (%s/%s) (path: %s): " - "extracting in-memory layout of parent failed. ", - gf_fop_list[stub->fop], pgfid, loc->name, loc->path); - goto err; - } - - memcpy ((void *)local->parent_disk_layout, (void *)parent_disk_layout, - sizeof (local->parent_disk_layout)); - - dht_layout_unref (this, parent_layout); - parent_layout = NULL; - - ret = dict_set_str (local->params, GF_PREOP_PARENT_KEY, - conf->xattr_name); - if (ret < 0) { - local->op_errno = -ret; - gf_msg (this->name, GF_LOG_WARNING, local->op_errno, - DHT_MSG_PARENT_LAYOUT_CHANGED, - "%s (%s/%s) (path: %s): " - "setting %s key in params dictionary failed. ", - gf_fop_list[stub->fop], pgfid, loc->name, loc->path, - GF_PREOP_PARENT_KEY); - goto err; - } - - ret = dict_set_bin (local->params, conf->xattr_name, parent_disk_layout, - 4 * 4); - if (ret < 0) { - local->op_errno = -ret; - gf_msg (this->name, GF_LOG_WARNING, local->op_errno, - DHT_MSG_PARENT_LAYOUT_CHANGED, - "%s (%s/%s) (path: %s): " - "setting parent-layout in params dictionary failed. ", - gf_fop_list[stub->fop], pgfid, loc->name, loc->path); - goto err; - } - - parent_disk_layout = NULL; - local->hashed_subvol = hashed_subvol; - - local->current = &local->lock[0]; - ret = dht_protect_namespace (frame, loc, hashed_subvol, - &local->current->ns, dht_call_mkdir_stub); - if (ret < 0) - goto err; - - return 0; + local->op_errno = ENOMEM; + gf_msg(this->name, GF_LOG_WARNING, local->op_errno, + DHT_MSG_PARENT_LAYOUT_CHANGED, + "%s (%s/%s) (path: %s): " + "dict allocation failed", + gf_fop_list[stub->fop], pgfid, loc->name, loc->path); + goto err; + } + } + + hashed_subvol = dht_subvol_get_hashed(this, loc); + if (hashed_subvol == NULL) { + local->op_errno = EINVAL; + + gf_msg(this->name, GF_LOG_WARNING, local->op_errno, + DHT_MSG_PARENT_LAYOUT_CHANGED, + "%s (%s/%s) (path: %s): " + "hashed subvolume not found", + gf_fop_list[stub->fop], pgfid, loc->name, loc->path); + goto err; + } + + parent_layout = dht_layout_get(this, loc->parent); + + ret = dht_disk_layout_extract_for_subvol(this, parent_layout, hashed_subvol, + &parent_disk_layout); + if (ret == -1) { + local->op_errno = EINVAL; + gf_msg(this->name, GF_LOG_WARNING, local->op_errno, + DHT_MSG_PARENT_LAYOUT_CHANGED, + "%s (%s/%s) (path: %s): " + "extracting in-memory layout of parent failed. ", + gf_fop_list[stub->fop], pgfid, loc->name, loc->path); + goto err; + } + + memcpy((void *)local->parent_disk_layout, (void *)parent_disk_layout, + sizeof(local->parent_disk_layout)); + + dht_layout_unref(this, parent_layout); + parent_layout = NULL; + + ret = dict_set_str(local->params, GF_PREOP_PARENT_KEY, conf->xattr_name); + if (ret < 0) { + local->op_errno = -ret; + gf_msg(this->name, GF_LOG_WARNING, local->op_errno, + DHT_MSG_PARENT_LAYOUT_CHANGED, + "%s (%s/%s) (path: %s): " + "setting %s key in params dictionary failed. ", + gf_fop_list[stub->fop], pgfid, loc->name, loc->path, + GF_PREOP_PARENT_KEY); + goto err; + } + + ret = dict_set_bin(local->params, conf->xattr_name, parent_disk_layout, + 4 * 4); + if (ret < 0) { + local->op_errno = -ret; + gf_msg(this->name, GF_LOG_WARNING, local->op_errno, + DHT_MSG_PARENT_LAYOUT_CHANGED, + "%s (%s/%s) (path: %s): " + "setting parent-layout in params dictionary failed. ", + gf_fop_list[stub->fop], pgfid, loc->name, loc->path); + goto err; + } + + parent_disk_layout = NULL; + local->hashed_subvol = hashed_subvol; + + local->current = &local->lock[0]; + ret = dht_protect_namespace(frame, loc, hashed_subvol, &local->current->ns, + dht_call_mkdir_stub); + if (ret < 0) + goto err; + + return 0; err: - if (parent_disk_layout != NULL) - GF_FREE (parent_disk_layout); - - if (parent_layout != NULL) - dht_layout_unref (this, parent_layout); - - return -1; -} - -int -dht_mknod (call_frame_t *frame, xlator_t *this, - loc_t *loc, mode_t mode, dev_t rdev, mode_t umask, dict_t *params) -{ - xlator_t *subvol = NULL; - int op_errno = -1; - int i = 0; - int ret = 0; - dht_local_t *local = NULL; - dht_conf_t *conf = NULL; - - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (loc, err); - - conf = this->private; - - dht_get_du_info (frame, this, loc); - - local = dht_local_init (frame, loc, NULL, GF_FOP_MKNOD); - if (!local) { - op_errno = ENOMEM; - goto err; - } - - subvol = dht_subvol_get_hashed (this, loc); - if (!subvol) { - gf_msg_debug (this->name, 0, - "no subvolume in layout for path=%s", - loc->path); - op_errno = EIO; - goto err; - } - - /* Post remove-brick, the client layout may not be in sync with - * disk layout because of lack of lookup. Hence,a mknod call - * may fall on the decommissioned brick. Hence, if the - * hashed_subvol is part of decommissioned bricks list, do a - * lookup on parent dir. If a fix-layout is already done by the - * remove-brick process, the parent directory layout will be in - * sync with that of the disk. If fix-layout is still ending - * on the parent directory, we can let the file get created on - * the decommissioned brick which will be eventually migrated to - * non-decommissioned brick based on the new layout. - */ - - if (conf->decommission_subvols_cnt) { - for (i = 0; i < conf->subvolume_cnt; i++) { - if (conf->decommissioned_bricks[i] && - conf->decommissioned_bricks[i] == subvol) { - - gf_msg_debug (this->name, 0, "hashed subvol:%s is " - "part of decommission brick list for " - "file: %s", subvol->name, loc->path); - - /* dht_refresh_layout needs directory info in - * local->loc. Hence, storing the parent_loc in - * local->loc and storing the create context in - * local->loc2. We will restore this information - * in dht_creation do */ - - ret = loc_copy (&local->loc2, &local->loc); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, ENOMEM, - DHT_MSG_NO_MEMORY, - "loc_copy failed %s", loc->path); - - goto err; - } - - local->params = dict_ref (params); - local->rdev = rdev; - local->mode = mode; - local->umask = umask; - - loc_wipe (&local->loc); - - ret = dht_build_parent_loc (this, &local->loc, loc, - &op_errno); - - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, ENOMEM, - DHT_MSG_LOC_FAILED, - "parent loc build failed"); - goto err; - } - - ret = dht_mknod_lock (frame, subvol); - - if (ret < 0) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_INODE_LK_ERROR, - "locking parent failed"); - goto err; - } - - goto done; - } - } - } - - dht_mknod_wind_to_avail_subvol (frame, this, subvol, loc, rdev, mode, - umask, params); + if (parent_disk_layout != NULL) + GF_FREE(parent_disk_layout); -done: - return 0; - -err: - op_errno = (op_errno == -1) ? errno : op_errno; - DHT_STACK_UNWIND (mknod, frame, -1, op_errno, - NULL, NULL, NULL, NULL, NULL); + if (parent_layout != NULL) + dht_layout_unref(this, parent_layout); - return 0; + return -1; } - int -dht_symlink (call_frame_t *frame, xlator_t *this, - const char *linkname, loc_t *loc, mode_t umask, dict_t *params) +dht_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, + dev_t rdev, mode_t umask, dict_t *params) { - xlator_t *subvol = NULL; - int op_errno = -1; - dht_local_t *local = NULL; - - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (loc, err); - - local = dht_local_init (frame, loc, NULL, GF_FOP_SYMLINK); - if (!local) { - op_errno = ENOMEM; - goto err; - } - - subvol = dht_subvol_get_hashed (this, loc); - if (!subvol) { - gf_msg_debug (this->name, 0, - "no subvolume in layout for path=%s", - loc->path); - op_errno = EIO; - goto err; - } - - gf_msg_trace (this->name, 0, - "creating %s on %s", loc->path, subvol->name); - - STACK_WIND_COOKIE (frame, dht_newfile_cbk, (void *)subvol, subvol, - subvol->fops->symlink, linkname, loc, umask, - params); + xlator_t *subvol = NULL; + int op_errno = -1; + int i = 0; + int ret = 0; + dht_local_t *local = NULL; + dht_conf_t *conf = NULL; - return 0; - -err: - op_errno = (op_errno == -1) ? errno : op_errno; - DHT_STACK_UNWIND (link, frame, -1, op_errno, - NULL, NULL, NULL, NULL, NULL); - - return 0; -} + VALIDATE_OR_GOTO(frame, err); + VALIDATE_OR_GOTO(this, err); + VALIDATE_OR_GOTO(loc, err); + conf = this->private; -int -dht_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag, - dict_t *xdata) -{ - xlator_t *cached_subvol = NULL; - int op_errno = -1; - dht_local_t *local = NULL; - - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (loc, err); + dht_get_du_info(frame, this, loc); - local = dht_local_init (frame, loc, NULL, GF_FOP_UNLINK); - if (!local) { - op_errno = ENOMEM; + local = dht_local_init(frame, loc, NULL, GF_FOP_MKNOD); + if (!local) { + op_errno = ENOMEM; + goto err; + } - goto err; - } + subvol = dht_subvol_get_hashed(this, loc); + if (!subvol) { + gf_msg_debug(this->name, 0, "no subvolume in layout for path=%s", + loc->path); + op_errno = EIO; + goto err; + } - cached_subvol = local->cached_subvol; - if (!cached_subvol) { - gf_msg_debug (this->name, 0, - "no cached subvolume for path=%s", loc->path); - op_errno = EINVAL; - goto err; - } + /* Post remove-brick, the client layout may not be in sync with + * disk layout because of lack of lookup. Hence,a mknod call + * may fall on the decommissioned brick. Hence, if the + * hashed_subvol is part of decommissioned bricks list, do a + * lookup on parent dir. If a fix-layout is already done by the + * remove-brick process, the parent directory layout will be in + * sync with that of the disk. If fix-layout is still ending + * on the parent directory, we can let the file get created on + * the decommissioned brick which will be eventually migrated to + * non-decommissioned brick based on the new layout. + */ - local->flags = xflag; - STACK_WIND_COOKIE (frame, dht_unlink_cbk, cached_subvol, cached_subvol, - cached_subvol->fops->unlink, loc, xflag, xdata); + if (conf->decommission_subvols_cnt) { + for (i = 0; i < conf->subvolume_cnt; i++) { + if (conf->decommissioned_bricks[i] && + conf->decommissioned_bricks[i] == subvol) { + gf_msg_debug(this->name, 0, + "hashed subvol:%s is " + "part of decommission brick list for " + "file: %s", + subvol->name, loc->path); + + /* dht_refresh_layout needs directory info in + * local->loc. Hence, storing the parent_loc in + * local->loc and storing the create context in + * local->loc2. We will restore this information + * in dht_creation do */ + + ret = loc_copy(&local->loc2, &local->loc); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY, + "loc_copy failed %s", loc->path); - return 0; -err: - op_errno = (op_errno == -1) ? errno : op_errno; - DHT_STACK_UNWIND (unlink, frame, -1, op_errno, NULL, NULL, NULL); + goto err; + } - return 0; -} + local->params = dict_ref(params); + local->rdev = rdev; + local->mode = mode; + local->umask = umask; -int -dht_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, - inode_t *inode, struct iatt *stbuf, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) -{ - dht_local_t *local = NULL; - int ret = -1; - gf_boolean_t stbuf_merged = _gf_false; - xlator_t *subvol = NULL; - call_frame_t *cleanup_frame = NULL; - dht_local_t *cleanup_local = NULL; + loc_wipe(&local->loc); - local = frame->local; + ret = dht_build_parent_loc(this, &local->loc, loc, &op_errno); - if (op_ret == -1) { - /* Remove the linkto if exists */ - if (local->linked) { - cleanup_frame = create_frame (this, this->ctx->pool); - if (cleanup_frame) { - cleanup_local = dht_local_init (cleanup_frame, - &local->loc2, - NULL, 0); - if (!cleanup_local || !local->link_subvol) { - DHT_STACK_DESTROY (cleanup_frame); - goto out; - } - cleanup_local->link_subvol = local->link_subvol; - FRAME_SU_DO (cleanup_frame, dht_local_t); - ret = synctask_new (this->ctx->env, - dht_remove_stale_linkto, - dht_remove_stale_linkto_cbk, - cleanup_frame, - cleanup_frame); - } + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_LOC_FAILED, + "parent loc build failed"); + goto err; } - /* No continuation on DHT inode missing errors, as we should - * then have a good stbuf that states P2 happened. We would - * get inode missing if, the file completed migrated between - * the lookup and the link call */ - goto out; - } - /* Update parent on success, even if P1/2 checks are positive. - * The second call on success will further update the parent */ - if (local->loc.parent) { - dht_inode_ctx_time_update (local->loc.parent, this, - preparent, 0); - dht_inode_ctx_time_update (local->loc.parent, this, - postparent, 1); - } - - /* Update linkto attrs, if this is the first call and non-P2, - * if we detect P2 then we need to trust the attrs from the - * second call, not the first */ - if (local->linked == _gf_true && - ((local->call_cnt == 1 && !IS_DHT_MIGRATION_PHASE2 (stbuf)) - || (local->call_cnt != 1 && - IS_DHT_MIGRATION_PHASE2 (&local->stbuf)))) { - dht_iatt_merge (this, &local->stbuf, stbuf); - stbuf_merged = _gf_true; - dht_linkfile_attr_heal (frame, this); - } - - /* No further P1/2 checks if we are in the second iteration of - * the call */ - if (local->call_cnt != 1) { - goto out; - } else { - /* Preserve the return values, in case the migration decides - * to recreate the link on the same subvol that the current - * hased for the link was created on. */ - dht_iatt_merge (this, &local->preparent, preparent); - dht_iatt_merge (this, &local->postparent, postparent); - if (!stbuf_merged) { - dht_iatt_merge (this, &local->stbuf, stbuf); - stbuf_merged = _gf_true; + ret = dht_mknod_lock(frame, subvol); + + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_INODE_LK_ERROR, + "locking parent failed"); + goto err; } - local->inode = inode_ref (inode); + goto done; + } } + } - local->op_ret = op_ret; - local->op_errno = op_errno; - local->rebalance.target_op_fn = dht_link2; - dht_set_local_rebalance (this, local, stbuf, preparent, - postparent, xdata); - - /* Check if the rebalance phase2 is true */ - if (IS_DHT_MIGRATION_PHASE2 (stbuf)) { - ret = dht_inode_ctx_get_mig_info (this, local->loc.inode, NULL, - &subvol); - if (!subvol) { - /* Phase 2 of migration */ - ret = dht_rebalance_complete_check (this, frame); - if (!ret) - return 0; - } else { - dht_link2 (this, subvol, frame, 0); - return 0; - } - } + dht_mknod_wind_to_avail_subvol(frame, this, subvol, loc, rdev, mode, umask, + params); - /* Check if the rebalance phase1 is true */ - if (IS_DHT_MIGRATION_PHASE1 (stbuf)) { - ret = dht_inode_ctx_get_mig_info (this, local->loc.inode, NULL, - &subvol); - if (subvol) { - dht_link2 (this, subvol, frame, 0); - return 0; - } - ret = dht_rebalance_in_progress_check (this, frame); - if (!ret) - return 0; - } -out: - DHT_STRIP_PHASE1_FLAGS (stbuf); +done: + return 0; - dht_set_fixed_dir_stat (preparent); - dht_set_fixed_dir_stat (postparent); - DHT_STACK_UNWIND (link, frame, op_ret, op_errno, inode, stbuf, - preparent, postparent, NULL); +err: + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND(mknod, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL); - return 0; + return 0; } - int -dht_link2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret) +dht_symlink(call_frame_t *frame, xlator_t *this, const char *linkname, + loc_t *loc, mode_t umask, dict_t *params) { - dht_local_t *local = NULL; - int op_errno = EINVAL; + xlator_t *subvol = NULL; + int op_errno = -1; + dht_local_t *local = NULL; - local = frame->local; - if (!local) - goto err; + VALIDATE_OR_GOTO(frame, err); + VALIDATE_OR_GOTO(this, err); + VALIDATE_OR_GOTO(loc, err); - op_errno = local->op_errno; + local = dht_local_init(frame, loc, NULL, GF_FOP_SYMLINK); + if (!local) { + op_errno = ENOMEM; + goto err; + } - if (we_are_not_migrating (ret)) { - /* This dht xlator is not migrating the file. Unwind and - * pass on the original mode bits so the higher DHT layer - * can handle this. - */ - dht_set_fixed_dir_stat (&local->preparent); - dht_set_fixed_dir_stat (&local->postparent); + subvol = dht_subvol_get_hashed(this, loc); + if (!subvol) { + gf_msg_debug(this->name, 0, "no subvolume in layout for path=%s", + loc->path); + op_errno = EIO; + goto err; + } - DHT_STACK_UNWIND (link, frame, local->op_ret, op_errno, - local->inode, - &local->stbuf, &local->preparent, - &local->postparent, NULL); - return 0; - } + gf_msg_trace(this->name, 0, "creating %s on %s", loc->path, subvol->name); - if (subvol == NULL) { - op_errno = EINVAL; - goto err; - } + STACK_WIND_COOKIE(frame, dht_newfile_cbk, (void *)subvol, subvol, + subvol->fops->symlink, linkname, loc, umask, params); - /* Second call to create link file could result in EEXIST as the - * first call created the linkto in the currently - * migrating subvol, which could be the new hashed subvol */ - if (local->link_subvol == subvol) { - DHT_STRIP_PHASE1_FLAGS (&local->stbuf); - dht_set_fixed_dir_stat (&local->preparent); - dht_set_fixed_dir_stat (&local->postparent); - DHT_STACK_UNWIND (link, frame, 0, 0, local->inode, - &local->stbuf, &local->preparent, - &local->postparent, NULL); + return 0; - return 0; - } +err: + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND(link, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL); + + return 0; +} - local->call_cnt = 2; +int +dht_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag, + dict_t *xdata) +{ + xlator_t *cached_subvol = NULL; + int op_errno = -1; + dht_local_t *local = NULL; - STACK_WIND (frame, dht_link_cbk, subvol, subvol->fops->link, - &local->loc, &local->loc2, local->xattr_req); + VALIDATE_OR_GOTO(frame, err); + VALIDATE_OR_GOTO(this, err); + VALIDATE_OR_GOTO(loc, err); - return 0; + local = dht_local_init(frame, loc, NULL, GF_FOP_UNLINK); + if (!local) { + op_errno = ENOMEM; + + goto err; + } + + cached_subvol = local->cached_subvol; + if (!cached_subvol) { + gf_msg_debug(this->name, 0, "no cached subvolume for path=%s", + loc->path); + op_errno = EINVAL; + goto err; + } + + local->flags = xflag; + STACK_WIND_COOKIE(frame, dht_unlink_cbk, cached_subvol, cached_subvol, + cached_subvol->fops->unlink, loc, xflag, xdata); + + return 0; err: - DHT_STACK_UNWIND (link, frame, -1, op_errno, NULL, NULL, NULL, - NULL, NULL); + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND(unlink, frame, -1, op_errno, NULL, NULL, NULL); + + return 0; +} + +int +dht_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, + int op_errno, inode_t *inode, struct iatt *stbuf, + struct iatt *preparent, struct iatt *postparent, dict_t *xdata) +{ + dht_local_t *local = NULL; + int ret = -1; + gf_boolean_t stbuf_merged = _gf_false; + xlator_t *subvol = NULL; + call_frame_t *cleanup_frame = NULL; + dht_local_t *cleanup_local = NULL; + + local = frame->local; + + if (op_ret == -1) { + /* Remove the linkto if exists */ + if (local->linked) { + cleanup_frame = create_frame(this, this->ctx->pool); + if (cleanup_frame) { + cleanup_local = dht_local_init(cleanup_frame, &local->loc2, + NULL, 0); + if (!cleanup_local || !local->link_subvol) { + DHT_STACK_DESTROY(cleanup_frame); + goto out; + } + cleanup_local->link_subvol = local->link_subvol; + FRAME_SU_DO(cleanup_frame, dht_local_t); + ret = synctask_new(this->ctx->env, dht_remove_stale_linkto, + dht_remove_stale_linkto_cbk, cleanup_frame, + cleanup_frame); + } + } + /* No continuation on DHT inode missing errors, as we should + * then have a good stbuf that states P2 happened. We would + * get inode missing if, the file completed migrated between + * the lookup and the link call */ + goto out; + } + + /* Update parent on success, even if P1/2 checks are positive. + * The second call on success will further update the parent */ + if (local->loc.parent) { + dht_inode_ctx_time_update(local->loc.parent, this, preparent, 0); + dht_inode_ctx_time_update(local->loc.parent, this, postparent, 1); + } + + /* Update linkto attrs, if this is the first call and non-P2, + * if we detect P2 then we need to trust the attrs from the + * second call, not the first */ + if (local->linked == _gf_true && + ((local->call_cnt == 1 && !IS_DHT_MIGRATION_PHASE2(stbuf)) || + (local->call_cnt != 1 && IS_DHT_MIGRATION_PHASE2(&local->stbuf)))) { + dht_iatt_merge(this, &local->stbuf, stbuf); + stbuf_merged = _gf_true; + dht_linkfile_attr_heal(frame, this); + } + + /* No further P1/2 checks if we are in the second iteration of + * the call */ + if (local->call_cnt != 1) { + goto out; + } else { + /* Preserve the return values, in case the migration decides + * to recreate the link on the same subvol that the current + * hased for the link was created on. */ + dht_iatt_merge(this, &local->preparent, preparent); + dht_iatt_merge(this, &local->postparent, postparent); + if (!stbuf_merged) { + dht_iatt_merge(this, &local->stbuf, stbuf); + stbuf_merged = _gf_true; + } + + local->inode = inode_ref(inode); + } + + local->op_ret = op_ret; + local->op_errno = op_errno; + local->rebalance.target_op_fn = dht_link2; + dht_set_local_rebalance(this, local, stbuf, preparent, postparent, xdata); + + /* Check if the rebalance phase2 is true */ + if (IS_DHT_MIGRATION_PHASE2(stbuf)) { + ret = dht_inode_ctx_get_mig_info(this, local->loc.inode, NULL, &subvol); + if (!subvol) { + /* Phase 2 of migration */ + ret = dht_rebalance_complete_check(this, frame); + if (!ret) + return 0; + } else { + dht_link2(this, subvol, frame, 0); + return 0; + } + } + + /* Check if the rebalance phase1 is true */ + if (IS_DHT_MIGRATION_PHASE1(stbuf)) { + ret = dht_inode_ctx_get_mig_info(this, local->loc.inode, NULL, &subvol); + if (subvol) { + dht_link2(this, subvol, frame, 0); + return 0; + } + ret = dht_rebalance_in_progress_check(this, frame); + if (!ret) + return 0; + } +out: + DHT_STRIP_PHASE1_FLAGS(stbuf); - return 0; + dht_set_fixed_dir_stat(preparent); + dht_set_fixed_dir_stat(postparent); + DHT_STACK_UNWIND(link, frame, op_ret, op_errno, inode, stbuf, preparent, + postparent, NULL); + + return 0; } int -dht_link_linkfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, - inode_t *inode, struct iatt *stbuf, - struct iatt *preparent, struct iatt *postparent, - dict_t *xdata) +dht_link2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret) { - dht_local_t *local = NULL; - xlator_t *srcvol = NULL; + dht_local_t *local = NULL; + int op_errno = EINVAL; - if (op_ret == -1) - goto err; + local = frame->local; + if (!local) + goto err; - local = frame->local; - srcvol = local->linkfile.srcvol; + op_errno = local->op_errno; - STACK_WIND (frame, dht_link_cbk, srcvol, srcvol->fops->link, - &local->loc, &local->loc2, local->xattr_req); + if (we_are_not_migrating(ret)) { + /* This dht xlator is not migrating the file. Unwind and + * pass on the original mode bits so the higher DHT layer + * can handle this. + */ + dht_set_fixed_dir_stat(&local->preparent); + dht_set_fixed_dir_stat(&local->postparent); + DHT_STACK_UNWIND(link, frame, local->op_ret, op_errno, local->inode, + &local->stbuf, &local->preparent, &local->postparent, + NULL); return 0; + } -err: - DHT_STRIP_PHASE1_FLAGS (stbuf); - dht_set_fixed_dir_stat (preparent); - dht_set_fixed_dir_stat (postparent); - DHT_STACK_UNWIND (link, frame, op_ret, op_errno, inode, stbuf, preparent, - postparent, xdata); + if (subvol == NULL) { + op_errno = EINVAL; + goto err; + } + + /* Second call to create link file could result in EEXIST as the + * first call created the linkto in the currently + * migrating subvol, which could be the new hashed subvol */ + if (local->link_subvol == subvol) { + DHT_STRIP_PHASE1_FLAGS(&local->stbuf); + dht_set_fixed_dir_stat(&local->preparent); + dht_set_fixed_dir_stat(&local->postparent); + DHT_STACK_UNWIND(link, frame, 0, 0, local->inode, &local->stbuf, + &local->preparent, &local->postparent, NULL); return 0; -} + } + local->call_cnt = 2; -int -dht_link (call_frame_t *frame, xlator_t *this, - loc_t *oldloc, loc_t *newloc, dict_t *xdata) -{ - xlator_t *cached_subvol = NULL; - xlator_t *hashed_subvol = NULL; - int op_errno = -1; - int ret = -1; - dht_local_t *local = NULL; + STACK_WIND(frame, dht_link_cbk, subvol, subvol->fops->link, &local->loc, + &local->loc2, local->xattr_req); - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (oldloc, err); - VALIDATE_OR_GOTO (newloc, err); + return 0; +err: + DHT_STACK_UNWIND(link, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL); - local = dht_local_init (frame, oldloc, NULL, GF_FOP_LINK); - if (!local) { - op_errno = ENOMEM; + return 0; +} - goto err; - } - local->call_cnt = 1; +int +dht_link_linkfile_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, inode_t *inode, + struct iatt *stbuf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) +{ + dht_local_t *local = NULL; + xlator_t *srcvol = NULL; - cached_subvol = local->cached_subvol; - if (!cached_subvol) { - gf_msg_debug (this->name, 0, - "no cached subvolume for path=%s", oldloc->path); - op_errno = ENOENT; - goto err; - } + if (op_ret == -1) + goto err; - hashed_subvol = dht_subvol_get_hashed (this, newloc); - if (!hashed_subvol) { - gf_msg_debug (this->name, 0, - "no subvolume in layout for path=%s", - newloc->path); - op_errno = EIO; - goto err; - } + local = frame->local; + srcvol = local->linkfile.srcvol; - ret = loc_copy (&local->loc2, newloc); - if (ret == -1) { - op_errno = ENOMEM; - goto err; - } - if (xdata) - local->xattr_req = dict_ref (xdata); + STACK_WIND(frame, dht_link_cbk, srcvol, srcvol->fops->link, &local->loc, + &local->loc2, local->xattr_req); - if (hashed_subvol != cached_subvol) { - gf_uuid_copy (local->gfid, oldloc->inode->gfid); - dht_linkfile_create (frame, dht_link_linkfile_cbk, this, - cached_subvol, hashed_subvol, newloc); - } else { - STACK_WIND (frame, dht_link_cbk, - cached_subvol, cached_subvol->fops->link, - oldloc, newloc, xdata); - } + return 0; - return 0; +err: + DHT_STRIP_PHASE1_FLAGS(stbuf); + dht_set_fixed_dir_stat(preparent); + dht_set_fixed_dir_stat(postparent); + DHT_STACK_UNWIND(link, frame, op_ret, op_errno, inode, stbuf, preparent, + postparent, xdata); + + return 0; +} + +int +dht_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, + dict_t *xdata) +{ + xlator_t *cached_subvol = NULL; + xlator_t *hashed_subvol = NULL; + int op_errno = -1; + int ret = -1; + dht_local_t *local = NULL; + + VALIDATE_OR_GOTO(frame, err); + VALIDATE_OR_GOTO(this, err); + VALIDATE_OR_GOTO(oldloc, err); + VALIDATE_OR_GOTO(newloc, err); + + local = dht_local_init(frame, oldloc, NULL, GF_FOP_LINK); + if (!local) { + op_errno = ENOMEM; + + goto err; + } + local->call_cnt = 1; + + cached_subvol = local->cached_subvol; + if (!cached_subvol) { + gf_msg_debug(this->name, 0, "no cached subvolume for path=%s", + oldloc->path); + op_errno = ENOENT; + goto err; + } + + hashed_subvol = dht_subvol_get_hashed(this, newloc); + if (!hashed_subvol) { + gf_msg_debug(this->name, 0, "no subvolume in layout for path=%s", + newloc->path); + op_errno = EIO; + goto err; + } + + ret = loc_copy(&local->loc2, newloc); + if (ret == -1) { + op_errno = ENOMEM; + goto err; + } + if (xdata) + local->xattr_req = dict_ref(xdata); + + if (hashed_subvol != cached_subvol) { + gf_uuid_copy(local->gfid, oldloc->inode->gfid); + dht_linkfile_create(frame, dht_link_linkfile_cbk, this, cached_subvol, + hashed_subvol, newloc); + } else { + STACK_WIND(frame, dht_link_cbk, cached_subvol, + cached_subvol->fops->link, oldloc, newloc, xdata); + } + + return 0; err: - op_errno = (op_errno == -1) ? errno : op_errno; - DHT_STACK_UNWIND (link, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL); + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND(link, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL); - return 0; + return 0; } - int -dht_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, - fd_t *fd, inode_t *inode, struct iatt *stbuf, - struct iatt *preparent, struct iatt *postparent, dict_t *xdata) +dht_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, + int op_errno, fd_t *fd, inode_t *inode, struct iatt *stbuf, + struct iatt *preparent, struct iatt *postparent, dict_t *xdata) { - xlator_t *prev = NULL; - int ret = -1; - dht_local_t *local = NULL; - - local = frame->local; - if (!local) { - op_ret = -1; - op_errno = EINVAL; - goto out; - } + xlator_t *prev = NULL; + int ret = -1; + dht_local_t *local = NULL; - if (op_ret == -1) - goto out; + local = frame->local; + if (!local) { + op_ret = -1; + op_errno = EINVAL; + goto out; + } - prev = cookie; + if (op_ret == -1) + goto out; - if (local->loc.parent) { - dht_inode_ctx_time_update (local->loc.parent, this, - preparent, 0); + prev = cookie; - dht_inode_ctx_time_update (local->loc.parent, this, - postparent, 1); - } + if (local->loc.parent) { + dht_inode_ctx_time_update(local->loc.parent, this, preparent, 0); - ret = dht_fd_ctx_set (this, fd, prev); - if (ret != 0) { - gf_msg_debug (this->name, 0, "Possible fd leak. " - "Could not set fd ctx for subvol %s", - prev->name); - } + dht_inode_ctx_time_update(local->loc.parent, this, postparent, 1); + } + ret = dht_fd_ctx_set(this, fd, prev); + if (ret != 0) { + gf_msg_debug(this->name, 0, + "Possible fd leak. " + "Could not set fd ctx for subvol %s", + prev->name); + } - ret = dht_layout_preset (this, prev, inode); - if (ret != 0) { - gf_msg_debug (this->name, 0, - "could not set preset layout for subvol %s", - prev->name); - op_ret = -1; - op_errno = EINVAL; - goto out; - } + ret = dht_layout_preset(this, prev, inode); + if (ret != 0) { + gf_msg_debug(this->name, 0, "could not set preset layout for subvol %s", + prev->name); + op_ret = -1; + op_errno = EINVAL; + goto out; + } - local->op_errno = op_errno; + local->op_errno = op_errno; - if (local->linked == _gf_true) { - local->stbuf = *stbuf; - dht_linkfile_attr_heal (frame, this); - } + if (local->linked == _gf_true) { + local->stbuf = *stbuf; + dht_linkfile_attr_heal(frame, this); + } out: - DHT_STRIP_PHASE1_FLAGS (stbuf); - dht_set_fixed_dir_stat (preparent); - dht_set_fixed_dir_stat (postparent); + DHT_STRIP_PHASE1_FLAGS(stbuf); + dht_set_fixed_dir_stat(preparent); + dht_set_fixed_dir_stat(postparent); - if (local && local->lock[0].layout.parent_layout.locks) { - /* store op_errno for failure case*/ - local->op_errno = op_errno; - local->refresh_layout_unlock (frame, this, op_ret, 1); + if (local && local->lock[0].layout.parent_layout.locks) { + /* store op_errno for failure case*/ + local->op_errno = op_errno; + local->refresh_layout_unlock(frame, this, op_ret, 1); - if (op_ret == 0) { - DHT_STACK_UNWIND (create, frame, op_ret, op_errno, fd, - inode, stbuf, preparent, postparent, - xdata); - } - } else { - DHT_STACK_UNWIND (create, frame, op_ret, op_errno, fd, inode, - stbuf, preparent, postparent, xdata); + if (op_ret == 0) { + DHT_STACK_UNWIND(create, frame, op_ret, op_errno, fd, inode, stbuf, + preparent, postparent, xdata); } - return 0; + } else { + DHT_STACK_UNWIND(create, frame, op_ret, op_errno, fd, inode, stbuf, + preparent, postparent, xdata); + } + return 0; } int -dht_create_linkfile_create_cbk (call_frame_t *frame, void *cookie, - xlator_t *this, - int32_t op_ret, int32_t op_errno, - inode_t *inode, struct iatt *stbuf, - struct iatt *preparent, struct iatt *postparent, - dict_t *xdata) +dht_create_linkfile_create_cbk(call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, int32_t op_errno, + inode_t *inode, struct iatt *stbuf, + struct iatt *preparent, struct iatt *postparent, + dict_t *xdata) { - dht_local_t *local = NULL; - xlator_t *cached_subvol = NULL; - dht_conf_t *conf = NULL; + dht_local_t *local = NULL; + xlator_t *cached_subvol = NULL; + dht_conf_t *conf = NULL; - local = frame->local; - if (!local) { - op_errno = EINVAL; - goto err; - } + local = frame->local; + if (!local) { + op_errno = EINVAL; + goto err; + } - if (op_ret == -1) { - local->op_errno = op_errno; - goto err; - } + if (op_ret == -1) { + local->op_errno = op_errno; + goto err; + } - conf = this->private; - if (!conf) { - local->op_errno = EINVAL; - op_errno = EINVAL; - goto err; - } + conf = this->private; + if (!conf) { + local->op_errno = EINVAL; + op_errno = EINVAL; + goto err; + } - cached_subvol = local->cached_subvol; + cached_subvol = local->cached_subvol; - if (local->params) { - dict_del (local->params, conf->link_xattr_name); - dict_del (local->params, GLUSTERFS_INTERNAL_FOP_KEY); - } + if (local->params) { + dict_del(local->params, conf->link_xattr_name); + dict_del(local->params, GLUSTERFS_INTERNAL_FOP_KEY); + } - STACK_WIND_COOKIE (frame, dht_create_cbk, cached_subvol, - cached_subvol, cached_subvol->fops->create, - &local->loc, local->flags, local->mode, - local->umask, local->fd, local->params); + STACK_WIND_COOKIE(frame, dht_create_cbk, cached_subvol, cached_subvol, + cached_subvol->fops->create, &local->loc, local->flags, + local->mode, local->umask, local->fd, local->params); - return 0; + return 0; err: - if (local && local->lock[0].layout.parent_layout.locks) { - local->refresh_layout_unlock (frame, this, -1, 1); - } else { - DHT_STACK_UNWIND (create, frame, -1, - op_errno, NULL, NULL, NULL, - NULL, NULL, NULL); - } - return 0; + if (local && local->lock[0].layout.parent_layout.locks) { + local->refresh_layout_unlock(frame, this, -1, 1); + } else { + DHT_STACK_UNWIND(create, frame, -1, op_errno, NULL, NULL, NULL, NULL, + NULL, NULL); + } + return 0; } int -dht_create_wind_to_avail_subvol (call_frame_t *frame, xlator_t *this, - xlator_t *subvol, loc_t *loc, int32_t flags, - mode_t mode, mode_t umask, fd_t *fd, - dict_t *params) +dht_create_wind_to_avail_subvol(call_frame_t *frame, xlator_t *this, + xlator_t *subvol, loc_t *loc, int32_t flags, + mode_t mode, mode_t umask, fd_t *fd, + dict_t *params) { - dht_local_t *local = NULL; - xlator_t *avail_subvol = NULL; + dht_local_t *local = NULL; + xlator_t *avail_subvol = NULL; - local = frame->local; + local = frame->local; - if (!dht_is_subvol_filled (this, subvol)) { - gf_msg_debug (this->name, 0, - "creating %s on %s", loc->path, - subvol->name); + if (!dht_is_subvol_filled(this, subvol)) { + gf_msg_debug(this->name, 0, "creating %s on %s", loc->path, + subvol->name); - STACK_WIND_COOKIE (frame, dht_create_cbk, subvol, - subvol, subvol->fops->create, - loc, flags, mode, umask, fd, params); + STACK_WIND_COOKIE(frame, dht_create_cbk, subvol, subvol, + subvol->fops->create, loc, flags, mode, umask, fd, + params); - } else { - avail_subvol = dht_free_disk_available_subvol (this, subvol, local); + } else { + avail_subvol = dht_free_disk_available_subvol(this, subvol, local); - if (avail_subvol != subvol) { - local->params = dict_ref (params); - local->flags = flags; - local->mode = mode; - local->umask = umask; - local->cached_subvol = avail_subvol; - local->hashed_subvol = subvol; + if (avail_subvol != subvol) { + local->params = dict_ref(params); + local->flags = flags; + local->mode = mode; + local->umask = umask; + local->cached_subvol = avail_subvol; + local->hashed_subvol = subvol; - gf_msg_debug (this->name, 0, - "creating %s on %s (link at %s)", loc->path, - avail_subvol->name, subvol->name); + gf_msg_debug(this->name, 0, "creating %s on %s (link at %s)", + loc->path, avail_subvol->name, subvol->name); - dht_linkfile_create (frame, dht_create_linkfile_create_cbk, - this, avail_subvol, subvol, loc); + dht_linkfile_create(frame, dht_create_linkfile_create_cbk, this, + avail_subvol, subvol, loc); - goto out; - } + goto out; + } - gf_msg_debug (this->name, 0, - "creating %s on %s", loc->path, subvol->name); + gf_msg_debug(this->name, 0, "creating %s on %s", loc->path, + subvol->name); - STACK_WIND_COOKIE (frame, dht_create_cbk, subvol, - subvol, subvol->fops->create, - loc, flags, mode, umask, fd, params); - } + STACK_WIND_COOKIE(frame, dht_create_cbk, subvol, subvol, + subvol->fops->create, loc, flags, mode, umask, fd, + params); + } out: - return 0; + return 0; } int -dht_build_parent_loc (xlator_t *this, loc_t *parent, loc_t *child, - int32_t *op_errno) +dht_build_parent_loc(xlator_t *this, loc_t *parent, loc_t *child, + int32_t *op_errno) { - inode_table_t *table = NULL; - int ret = -1; + inode_table_t *table = NULL; + int ret = -1; - if (!parent || !child) { - if (op_errno) - *op_errno = EINVAL; - goto out; - } + if (!parent || !child) { + if (op_errno) + *op_errno = EINVAL; + goto out; + } - if (child->parent) { - parent->inode = inode_ref (child->parent); - if (!parent->inode) { - if (op_errno) - *op_errno = EINVAL; - goto out; - } + if (child->parent) { + parent->inode = inode_ref(child->parent); + if (!parent->inode) { + if (op_errno) + *op_errno = EINVAL; + goto out; + } - gf_uuid_copy (parent->gfid, child->pargfid); + gf_uuid_copy(parent->gfid, child->pargfid); - ret = 0; + ret = 0; - goto out; - } else { - if (gf_uuid_is_null (child->pargfid)) { - if (op_errno) - *op_errno = EINVAL; - goto out; - } + goto out; + } else { + if (gf_uuid_is_null(child->pargfid)) { + if (op_errno) + *op_errno = EINVAL; + goto out; + } - table = this->itable; + table = this->itable; - if (!table) { - if (op_errno) { - *op_errno = EINVAL; - goto out; - } - } + if (!table) { + if (op_errno) { + *op_errno = EINVAL; + goto out; + } + } - parent->inode = inode_find (table, child->pargfid); + parent->inode = inode_find(table, child->pargfid); - if (!parent->inode) { - if (op_errno) { - *op_errno = EINVAL; - goto out; - } - } + if (!parent->inode) { + if (op_errno) { + *op_errno = EINVAL; + goto out; + } + } - gf_uuid_copy (parent->gfid, child->pargfid); + gf_uuid_copy(parent->gfid, child->pargfid); - ret = 0; - } + ret = 0; + } out: - return ret; + return ret; } - int32_t -dht_create_do (call_frame_t *frame) +dht_create_do(call_frame_t *frame) { - dht_local_t *local = NULL; - dht_layout_t *refreshed = NULL; - xlator_t *subvol = NULL; - xlator_t *this = NULL; - dht_conf_t *conf = NULL; - dht_methods_t *methods = NULL; + dht_local_t *local = NULL; + dht_layout_t *refreshed = NULL; + xlator_t *subvol = NULL; + xlator_t *this = NULL; + dht_conf_t *conf = NULL; + dht_methods_t *methods = NULL; - local = frame->local; + local = frame->local; - this = THIS; + this = THIS; - conf = this->private; + conf = this->private; - GF_VALIDATE_OR_GOTO (this->name, conf, err); + GF_VALIDATE_OR_GOTO(this->name, conf, err); - methods = &(conf->methods); + methods = &(conf->methods); - /* We don't need parent_loc anymore */ - loc_wipe (&local->loc); + /* We don't need parent_loc anymore */ + loc_wipe(&local->loc); - loc_copy (&local->loc, &local->loc2); + loc_copy(&local->loc, &local->loc2); - loc_wipe (&local->loc2); + loc_wipe(&local->loc2); - refreshed = local->selfheal.refreshed_layout; + refreshed = local->selfheal.refreshed_layout; - subvol = methods->layout_search (this, refreshed, local->loc.name); + subvol = methods->layout_search(this, refreshed, local->loc.name); - if (!subvol) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_HASHED_SUBVOL_GET_FAILED, "no subvolume in " - "layout for path=%s", local->loc.path); - local->op_errno = ENOENT; - goto err; - } + if (!subvol) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_HASHED_SUBVOL_GET_FAILED, + "no subvolume in " + "layout for path=%s", + local->loc.path); + local->op_errno = ENOENT; + goto err; + } - dht_create_wind_to_avail_subvol (frame, this, subvol, &local->loc, - local->flags, local->mode, - local->umask, local->fd, local->params); - return 0; + dht_create_wind_to_avail_subvol(frame, this, subvol, &local->loc, + local->flags, local->mode, local->umask, + local->fd, local->params); + return 0; err: - local->refresh_layout_unlock (frame, this, -1, 1); + local->refresh_layout_unlock(frame, this, -1, 1); - return 0; + return 0; } int32_t -dht_create_unlock_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) +dht_create_unlock_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - DHT_STACK_DESTROY (frame); - return 0; + DHT_STACK_DESTROY(frame); + return 0; } int32_t -dht_create_finish (call_frame_t *frame, xlator_t *this, int op_ret, - int invoke_cbk) +dht_create_finish(call_frame_t *frame, xlator_t *this, int op_ret, + int invoke_cbk) { - dht_local_t *local = NULL, *lock_local = NULL; - call_frame_t *lock_frame = NULL; - int lock_count = 0; - - local = frame->local; - lock_count = dht_lock_count (local->lock[0].layout.parent_layout.locks, - local->lock[0].layout.parent_layout.lk_count); - if (lock_count == 0) - goto done; - - lock_frame = copy_frame (frame); - if (lock_frame == NULL) { - goto done; - } - - lock_local = dht_local_init (lock_frame, &local->loc, NULL, - lock_frame->root->op); - if (lock_local == NULL) { - goto done; - } - - lock_local->lock[0].layout.parent_layout.locks = local->lock[0].layout.parent_layout.locks; - lock_local->lock[0].layout.parent_layout.lk_count = local->lock[0].layout.parent_layout.lk_count; - - local->lock[0].layout.parent_layout.locks = NULL; - local->lock[0].layout.parent_layout.lk_count = 0; - - dht_unlock_inodelk (lock_frame, - lock_local->lock[0].layout.parent_layout.locks, - lock_local->lock[0].layout.parent_layout.lk_count, - dht_create_unlock_cbk); - lock_frame = NULL; + dht_local_t *local = NULL, *lock_local = NULL; + call_frame_t *lock_frame = NULL; + int lock_count = 0; + + local = frame->local; + lock_count = dht_lock_count(local->lock[0].layout.parent_layout.locks, + local->lock[0].layout.parent_layout.lk_count); + if (lock_count == 0) + goto done; + + lock_frame = copy_frame(frame); + if (lock_frame == NULL) { + goto done; + } + + lock_local = dht_local_init(lock_frame, &local->loc, NULL, + lock_frame->root->op); + if (lock_local == NULL) { + goto done; + } + + lock_local->lock[0] + .layout.parent_layout.locks = local->lock[0].layout.parent_layout.locks; + lock_local->lock[0].layout.parent_layout.lk_count = + local->lock[0].layout.parent_layout.lk_count; + + local->lock[0].layout.parent_layout.locks = NULL; + local->lock[0].layout.parent_layout.lk_count = 0; + + dht_unlock_inodelk(lock_frame, + lock_local->lock[0].layout.parent_layout.locks, + lock_local->lock[0].layout.parent_layout.lk_count, + dht_create_unlock_cbk); + lock_frame = NULL; done: - if (lock_frame != NULL) { - DHT_STACK_DESTROY (lock_frame); - } - - if (op_ret == 0) - return 0; + if (lock_frame != NULL) { + DHT_STACK_DESTROY(lock_frame); + } - DHT_STACK_UNWIND (create, frame, op_ret, local->op_errno, NULL, NULL, - NULL, NULL, NULL, NULL); + if (op_ret == 0) return 0; + + DHT_STACK_UNWIND(create, frame, op_ret, local->op_errno, NULL, NULL, NULL, + NULL, NULL, NULL); + return 0; } int32_t -dht_create_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) +dht_create_lock_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - dht_local_t *local = NULL; + dht_local_t *local = NULL; - local = frame->local; + local = frame->local; - if (!local) { - goto err; - } + if (!local) { + goto err; + } - if (op_ret < 0) { - gf_msg ("DHT", GF_LOG_ERROR, 0, DHT_MSG_INODE_LK_ERROR, - "Create lock failed for file: %s", local->loc2.name); + if (op_ret < 0) { + gf_msg("DHT", GF_LOG_ERROR, 0, DHT_MSG_INODE_LK_ERROR, + "Create lock failed for file: %s", local->loc2.name); - local->op_errno = op_errno; + local->op_errno = op_errno; - goto err; - } + goto err; + } - local->refresh_layout_unlock = dht_create_finish; + local->refresh_layout_unlock = dht_create_finish; - local->refresh_layout_done = dht_create_do; + local->refresh_layout_done = dht_create_do; - dht_refresh_layout (frame); + dht_refresh_layout(frame); - return 0; + return 0; err: - dht_create_finish (frame, this, -1, 0); - return 0; + dht_create_finish(frame, this, -1, 0); + return 0; } int32_t -dht_create_lock (call_frame_t *frame, xlator_t *subvol) +dht_create_lock(call_frame_t *frame, xlator_t *subvol) { - dht_local_t *local = NULL; - int count = 1, ret = -1; - dht_lock_t **lk_array = NULL; + dht_local_t *local = NULL; + int count = 1, ret = -1; + dht_lock_t **lk_array = NULL; - GF_VALIDATE_OR_GOTO ("dht", frame, err); - GF_VALIDATE_OR_GOTO (frame->this->name, frame->local, err); + GF_VALIDATE_OR_GOTO("dht", frame, err); + GF_VALIDATE_OR_GOTO(frame->this->name, frame->local, err); - local = frame->local; + local = frame->local; - lk_array = GF_CALLOC (count, sizeof (*lk_array), gf_common_mt_pointer); + lk_array = GF_CALLOC(count, sizeof(*lk_array), gf_common_mt_pointer); - if (lk_array == NULL) - goto err; + if (lk_array == NULL) + goto err; - lk_array[0] = dht_lock_new (frame->this, subvol, &local->loc, F_RDLCK, - DHT_LAYOUT_HEAL_DOMAIN, NULL, - IGNORE_ENOENT_ESTALE); + lk_array[0] = dht_lock_new(frame->this, subvol, &local->loc, F_RDLCK, + DHT_LAYOUT_HEAL_DOMAIN, NULL, + IGNORE_ENOENT_ESTALE); - if (lk_array[0] == NULL) - goto err; + if (lk_array[0] == NULL) + goto err; - local->lock[0].layout.parent_layout.locks = lk_array; - local->lock[0].layout.parent_layout.lk_count = count; + local->lock[0].layout.parent_layout.locks = lk_array; + local->lock[0].layout.parent_layout.lk_count = count; - ret = dht_blocking_inodelk (frame, lk_array, count, - dht_create_lock_cbk); + ret = dht_blocking_inodelk(frame, lk_array, count, dht_create_lock_cbk); - if (ret < 0) { - local->lock[0].layout.parent_layout.locks = NULL; - local->lock[0].layout.parent_layout.lk_count = 0; - goto err; - } + if (ret < 0) { + local->lock[0].layout.parent_layout.locks = NULL; + local->lock[0].layout.parent_layout.lk_count = 0; + goto err; + } - return 0; + return 0; err: - if (lk_array != NULL) { - dht_lock_array_free (lk_array, count); - GF_FREE (lk_array); - } + if (lk_array != NULL) { + dht_lock_array_free(lk_array, count); + GF_FREE(lk_array); + } - return -1; + return -1; } int -dht_create (call_frame_t *frame, xlator_t *this, - loc_t *loc, int32_t flags, mode_t mode, - mode_t umask, fd_t *fd, dict_t *params) +dht_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + mode_t mode, mode_t umask, fd_t *fd, dict_t *params) { - int op_errno = -1; - xlator_t *subvol = NULL; - xlator_t *hashed_subvol = NULL; - dht_local_t *local = NULL; - int i = 0; - dht_conf_t *conf = NULL; - int ret = 0; + int op_errno = -1; + xlator_t *subvol = NULL; + xlator_t *hashed_subvol = NULL; + dht_local_t *local = NULL; + int i = 0; + dht_conf_t *conf = NULL; + int ret = 0; - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (loc, err); + VALIDATE_OR_GOTO(frame, err); + VALIDATE_OR_GOTO(this, err); + VALIDATE_OR_GOTO(loc, err); - conf = this->private; + conf = this->private; - dht_get_du_info (frame, this, loc); + dht_get_du_info(frame, this, loc); - local = dht_local_init (frame, loc, fd, GF_FOP_CREATE); - if (!local) { - op_errno = ENOMEM; - goto err; - } + local = dht_local_init(frame, loc, fd, GF_FOP_CREATE); + if (!local) { + op_errno = ENOMEM; + goto err; + } - if (dht_filter_loc_subvol_key (this, loc, &local->loc, - &subvol)) { - gf_msg (this->name, GF_LOG_INFO, 0, - DHT_MSG_SUBVOL_INFO, - "creating %s on %s (got create on %s)", - local->loc.path, subvol->name, loc->path); + if (dht_filter_loc_subvol_key(this, loc, &local->loc, &subvol)) { + gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_SUBVOL_INFO, + "creating %s on %s (got create on %s)", local->loc.path, + subvol->name, loc->path); - /* Since lookup-optimize is enabled by default, we need - * to create the linkto file if required. - * Note this does not check for decommisioned bricks - * and min-free-disk limits as this is a debugging tool - * and not expected to be used in production. - */ - hashed_subvol = dht_subvol_get_hashed (this, &local->loc); - - if (hashed_subvol && (hashed_subvol != subvol)) { - /* Create the linkto file and then the data file */ - local->params = dict_ref (params); - local->flags = flags; - local->mode = mode; - local->umask = umask; - local->cached_subvol = subvol; - local->hashed_subvol = hashed_subvol; - - dht_linkfile_create (frame, - dht_create_linkfile_create_cbk, - this, subvol, hashed_subvol, - &local->loc); - goto done; + /* Since lookup-optimize is enabled by default, we need + * to create the linkto file if required. + * Note this does not check for decommisioned bricks + * and min-free-disk limits as this is a debugging tool + * and not expected to be used in production. + */ + hashed_subvol = dht_subvol_get_hashed(this, &local->loc); + + if (hashed_subvol && (hashed_subvol != subvol)) { + /* Create the linkto file and then the data file */ + local->params = dict_ref(params); + local->flags = flags; + local->mode = mode; + local->umask = umask; + local->cached_subvol = subvol; + local->hashed_subvol = hashed_subvol; + + dht_linkfile_create(frame, dht_create_linkfile_create_cbk, this, + subvol, hashed_subvol, &local->loc); + goto done; + } + /* We either don't have a hashed subvol or the hashed subvol is + * the same as the one specified. No need to create the linkto + * file as we expect a lookup everywhere if there are problems + * with the parent layout + */ + STACK_WIND_COOKIE(frame, dht_create_cbk, subvol, subvol, + subvol->fops->create, &local->loc, flags, mode, umask, + fd, params); + goto done; + } + + subvol = dht_subvol_get_hashed(this, loc); + if (!subvol) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_HASHED_SUBVOL_GET_FAILED, + "no subvolume in layout for path=%s", loc->path); + + op_errno = EIO; + goto err; + } + + /* Post remove-brick, the client layout may not be in sync with + * disk layout because of lack of lookup. Hence,a create call + * may fall on the decommissioned brick. Hence, if the + * hashed_subvol is part of decommissioned bricks list, do a + * lookup on parent dir. If a fix-layout is already done by the + * remove-brick process, the parent directory layout will be in + * sync with that of the disk. If fix-layout is still ending + * on the parent directory, we can let the file get created on + * the decommissioned brick which will be eventually migrated to + * non-decommissioned brick based on the new layout. + */ + + if (conf->decommission_subvols_cnt) { + for (i = 0; i < conf->subvolume_cnt; i++) { + if (conf->decommissioned_bricks[i] && + conf->decommissioned_bricks[i] == subvol) { + gf_msg_debug(this->name, 0, + "hashed subvol:%s is " + "part of decommission brick list for " + "file: %s", + subvol->name, loc->path); + + /* dht_refresh_layout needs directory info in + * local->loc. Hence, storing the parent_loc in + * local->loc and storing the create context in + * local->loc2. We will restore this information + * in dht_creation do */ + + ret = loc_copy(&local->loc2, &local->loc); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY, + "loc_copy failed %s", loc->path); + + goto err; } - /* We either don't have a hashed subvol or the hashed subvol is - * the same as the one specified. No need to create the linkto - * file as we expect a lookup everywhere if there are problems - * with the parent layout - */ - STACK_WIND_COOKIE (frame, dht_create_cbk, subvol, - subvol, subvol->fops->create, &local->loc, - flags, mode, umask, fd, params); - goto done; - } - subvol = dht_subvol_get_hashed (this, loc); - if (!subvol) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_HASHED_SUBVOL_GET_FAILED, - "no subvolume in layout for path=%s", - loc->path); + local->params = dict_ref(params); + local->flags = flags; + local->mode = mode; + local->umask = umask; - op_errno = EIO; - goto err; - } + loc_wipe(&local->loc); - /* Post remove-brick, the client layout may not be in sync with - * disk layout because of lack of lookup. Hence,a create call - * may fall on the decommissioned brick. Hence, if the - * hashed_subvol is part of decommissioned bricks list, do a - * lookup on parent dir. If a fix-layout is already done by the - * remove-brick process, the parent directory layout will be in - * sync with that of the disk. If fix-layout is still ending - * on the parent directory, we can let the file get created on - * the decommissioned brick which will be eventually migrated to - * non-decommissioned brick based on the new layout. - */ + ret = dht_build_parent_loc(this, &local->loc, loc, &op_errno); - if (conf->decommission_subvols_cnt) { - for (i = 0; i < conf->subvolume_cnt; i++) { - if (conf->decommissioned_bricks[i] && - conf->decommissioned_bricks[i] == subvol) { - - gf_msg_debug (this->name, 0, "hashed subvol:%s is " - "part of decommission brick list for " - "file: %s", subvol->name, loc->path); - - /* dht_refresh_layout needs directory info in - * local->loc. Hence, storing the parent_loc in - * local->loc and storing the create context in - * local->loc2. We will restore this information - * in dht_creation do */ - - ret = loc_copy (&local->loc2, &local->loc); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, ENOMEM, - DHT_MSG_NO_MEMORY, - "loc_copy failed %s", loc->path); - - goto err; - } - - local->params = dict_ref (params); - local->flags = flags; - local->mode = mode; - local->umask = umask; - - loc_wipe (&local->loc); - - ret = dht_build_parent_loc (this, &local->loc, loc, - &op_errno); - - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, ENOMEM, - DHT_MSG_LOC_FAILED, - "parent loc build failed"); - goto err; - } - - ret = dht_create_lock (frame, subvol); - - if (ret < 0) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_INODE_LK_ERROR, - "locking parent failed"); - goto err; - } - - goto done; - } + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_LOC_FAILED, + "parent loc build failed"); + goto err; + } + + ret = dht_create_lock(frame, subvol); + + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_INODE_LK_ERROR, + "locking parent failed"); + goto err; + } + + goto done; } } + } - - dht_create_wind_to_avail_subvol (frame, this, subvol, loc, flags, mode, - umask, fd, params); + dht_create_wind_to_avail_subvol(frame, this, subvol, loc, flags, mode, + umask, fd, params); done: - return 0; + return 0; err: - op_errno = (op_errno == -1) ? errno : op_errno; - DHT_STACK_UNWIND (create, frame, -1, op_errno, NULL, NULL, NULL, - NULL, NULL, NULL); - - return 0; -} - - -int -dht_mkdir_selfheal_cbk (call_frame_t *frame, void *cookie, - xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) -{ - dht_local_t *local = NULL; - dht_layout_t *layout = NULL; - - local = frame->local; - layout = local->selfheal.layout; - - FRAME_SU_UNDO (frame, dht_local_t); - dht_set_fixed_dir_stat (&local->preparent); - dht_set_fixed_dir_stat (&local->postparent); - - if (op_ret == 0) { - dht_layout_set (this, local->inode, layout); - - dht_inode_ctx_time_update (local->inode, this, - &local->stbuf, 1); - if (local->loc.parent) { - dht_inode_ctx_time_update (local->loc.parent, this, - &local->preparent, 0); - - dht_inode_ctx_time_update (local->loc.parent, this, - &local->postparent, 1); - } - } - - DHT_STACK_UNWIND (mkdir, frame, op_ret, op_errno, - local->inode, &local->stbuf, &local->preparent, - &local->postparent, NULL); + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND(create, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL, + NULL); - return 0; + return 0; } int -dht_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, inode_t *inode, struct iatt *stbuf, - struct iatt *preparent, struct iatt *postparent, dict_t *xdata) +dht_mkdir_selfheal_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - dht_local_t *local = NULL; - int this_call_cnt = 0; - int ret = -1; - gf_boolean_t subvol_filled = _gf_false; - gf_boolean_t dir_exists = _gf_false; - xlator_t *prev = NULL; - dht_layout_t *layout = NULL; + dht_local_t *local = NULL; + dht_layout_t *layout = NULL; - local = frame->local; - prev = cookie; - layout = local->layout; + local = frame->local; + layout = local->selfheal.layout; - subvol_filled = dht_is_subvol_filled (this, prev); - - LOCK (&frame->lock); - { - if (subvol_filled && (op_ret != -1)) { - ret = dht_layout_merge (this, layout, prev, - -1, ENOSPC, NULL); - } else { - if (op_ret == -1 && op_errno == EEXIST) { - /* Very likely just a race between mkdir and - self-heal (from lookup of a concurrent mkdir - attempt). - Ignore error for now. layout setting will - anyways fail if this was a different (old) - pre-existing different directory. - */ - op_ret = 0; - dir_exists = _gf_true; - } - ret = dht_layout_merge (this, layout, prev, - op_ret, op_errno, NULL); - } - if (ret) - gf_msg (this->name, GF_LOG_WARNING, 0, - DHT_MSG_LAYOUT_MERGE_FAILED, - "%s: failed to merge layouts for subvol %s", - local->loc.path, prev->name); - - if (op_ret == -1) { - local->op_errno = op_errno; - goto unlock; - } + FRAME_SU_UNDO(frame, dht_local_t); + dht_set_fixed_dir_stat(&local->preparent); + dht_set_fixed_dir_stat(&local->postparent); - if (dir_exists) - goto unlock; + if (op_ret == 0) { + dht_layout_set(this, local->inode, layout); - dht_iatt_merge (this, &local->stbuf, stbuf); - dht_iatt_merge (this, &local->preparent, preparent); - dht_iatt_merge (this, &local->postparent, postparent); - } -unlock: - UNLOCK (&frame->lock); + dht_inode_ctx_time_update(local->inode, this, &local->stbuf, 1); + if (local->loc.parent) { + dht_inode_ctx_time_update(local->loc.parent, this, + &local->preparent, 0); - this_call_cnt = dht_frame_return (frame); - if (is_last_call (this_call_cnt)) { - /*Unlock entrylk and inodelk once mkdir is done on all subvols*/ - dht_unlock_namespace (frame, &local->lock[0]); - FRAME_SU_DO (frame, dht_local_t); - dht_selfheal_new_directory (frame, dht_mkdir_selfheal_cbk, - layout); + dht_inode_ctx_time_update(local->loc.parent, this, + &local->postparent, 1); } + } - return 0; -} + DHT_STACK_UNWIND(mkdir, frame, op_ret, op_errno, local->inode, + &local->stbuf, &local->preparent, &local->postparent, + NULL); -int -dht_mkdir_hashed_cbk (call_frame_t *frame, void *cookie, - xlator_t *this, int op_ret, int op_errno, - inode_t *inode, struct iatt *stbuf, - struct iatt *preparent, struct iatt *postparent, - dict_t *xdata); + return 0; +} int -dht_mkdir_helper (call_frame_t *frame, xlator_t *this, - loc_t *loc, mode_t mode, mode_t umask, dict_t *params) +dht_mkdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, + int op_errno, inode_t *inode, struct iatt *stbuf, + struct iatt *preparent, struct iatt *postparent, dict_t *xdata) { - dht_local_t *local = NULL; - dht_conf_t *conf = NULL; - int op_errno = -1, ret = -1; - xlator_t *hashed_subvol = NULL; - int32_t *parent_disk_layout = NULL; - dht_layout_t *parent_layout = NULL; - char pgfid[GF_UUID_BUF_SIZE] = {0}; - - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (loc, err); - VALIDATE_OR_GOTO (loc->inode, err); - VALIDATE_OR_GOTO (loc->path, err); - VALIDATE_OR_GOTO (this->private, err); - - gf_uuid_unparse (loc->parent->gfid, pgfid); + dht_local_t *local = NULL; + int this_call_cnt = 0; + int ret = -1; + gf_boolean_t subvol_filled = _gf_false; + gf_boolean_t dir_exists = _gf_false; + xlator_t *prev = NULL; + dht_layout_t *layout = NULL; - conf = this->private; - local = frame->local; + local = frame->local; + prev = cookie; + layout = local->layout; - if (local->op_ret == -1) { - gf_msg (this->name, GF_LOG_WARNING, local->op_errno, - DHT_MSG_PARENT_LAYOUT_CHANGED, - "mkdir (%s/%s) (path: %s): refreshing parent layout " - "failed.", pgfid, loc->name, - loc->path); + subvol_filled = dht_is_subvol_filled(this, prev); - op_errno = local->op_errno; - goto err; + LOCK(&frame->lock); + { + if (subvol_filled && (op_ret != -1)) { + ret = dht_layout_merge(this, layout, prev, -1, ENOSPC, NULL); + } else { + if (op_ret == -1 && op_errno == EEXIST) { + /* Very likely just a race between mkdir and + self-heal (from lookup of a concurrent mkdir + attempt). + Ignore error for now. layout setting will + anyways fail if this was a different (old) + pre-existing different directory. + */ + op_ret = 0; + dir_exists = _gf_true; + } + ret = dht_layout_merge(this, layout, prev, op_ret, op_errno, NULL); } + if (ret) + gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_LAYOUT_MERGE_FAILED, + "%s: failed to merge layouts for subvol %s", local->loc.path, + prev->name); - local->op_ret = -1; - - hashed_subvol = dht_subvol_get_hashed (this, loc); - if (hashed_subvol == NULL) { - gf_msg_debug (this->name, 0, - "mkdir (%s/%s) (path: %s): hashed subvol not " - "found", pgfid, loc->name, loc->path); - op_errno = ENOENT; - goto err; + if (op_ret == -1) { + local->op_errno = op_errno; + goto unlock; } - local->hashed_subvol = hashed_subvol; + if (dir_exists) + goto unlock; - parent_layout = dht_layout_get (this, loc->parent); + dht_iatt_merge(this, &local->stbuf, stbuf); + dht_iatt_merge(this, &local->preparent, preparent); + dht_iatt_merge(this, &local->postparent, postparent); + } +unlock: + UNLOCK(&frame->lock); - ret = dht_disk_layout_extract_for_subvol (this, parent_layout, - hashed_subvol, - &parent_disk_layout); - if (ret == -1) { - gf_msg (this->name, GF_LOG_WARNING, EIO, - DHT_MSG_PARENT_LAYOUT_CHANGED, - "mkdir (%s/%s) (path: %s): " - "extracting in-memory layout of parent failed. ", - pgfid, loc->name, loc->path); - goto err; - } + this_call_cnt = dht_frame_return(frame); + if (is_last_call(this_call_cnt)) { + /*Unlock entrylk and inodelk once mkdir is done on all subvols*/ + dht_unlock_namespace(frame, &local->lock[0]); + FRAME_SU_DO(frame, dht_local_t); + dht_selfheal_new_directory(frame, dht_mkdir_selfheal_cbk, layout); + } - if (memcmp (local->parent_disk_layout, parent_disk_layout, - sizeof (local->parent_disk_layout)) == 0) { - gf_msg (this->name, GF_LOG_WARNING, EIO, - DHT_MSG_PARENT_LAYOUT_CHANGED, - "mkdir (%s/%s) (path: %s): loop detected. " - "parent layout didn't change even though " - "previous attempt of mkdir failed because of " - "in-memory layout not matching with that on disk.", - pgfid, loc->name, loc->path); - op_errno = EIO; - goto err; - } + return 0; +} - memcpy ((void *)local->parent_disk_layout, (void *)parent_disk_layout, - sizeof (local->parent_disk_layout)); +int +dht_mkdir_hashed_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, inode_t *inode, + struct iatt *stbuf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata); - dht_layout_unref (this, parent_layout); - parent_layout = NULL; +int +dht_mkdir_helper(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, + mode_t umask, dict_t *params) +{ + dht_local_t *local = NULL; + dht_conf_t *conf = NULL; + int op_errno = -1, ret = -1; + xlator_t *hashed_subvol = NULL; + int32_t *parent_disk_layout = NULL; + dht_layout_t *parent_layout = NULL; + char pgfid[GF_UUID_BUF_SIZE] = {0}; - ret = dict_set_str (params, GF_PREOP_PARENT_KEY, conf->xattr_name); - if (ret < 0) { - local->op_errno = -ret; - gf_msg (this->name, GF_LOG_WARNING, local->op_errno, - DHT_MSG_PARENT_LAYOUT_CHANGED, - "mkdir (%s/%s) (path: %s): " - "setting %s key in params dictionary failed. ", - pgfid, loc->name, loc->path, GF_PREOP_PARENT_KEY); - goto err; - } + VALIDATE_OR_GOTO(frame, err); + VALIDATE_OR_GOTO(this, err); + VALIDATE_OR_GOTO(loc, err); + VALIDATE_OR_GOTO(loc->inode, err); + VALIDATE_OR_GOTO(loc->path, err); + VALIDATE_OR_GOTO(this->private, err); - ret = dict_set_bin (params, conf->xattr_name, parent_disk_layout, - 4 * 4); - if (ret < 0) { - local->op_errno = -ret; - gf_msg (this->name, GF_LOG_WARNING, local->op_errno, - DHT_MSG_PARENT_LAYOUT_CHANGED, - "setting parent-layout in params dictionary failed. " - "mkdir (%s/%s) (path: %s)", pgfid, loc->name, - loc->path); - goto err; - } + gf_uuid_unparse(loc->parent->gfid, pgfid); - parent_disk_layout = NULL; + conf = this->private; + local = frame->local; - STACK_WIND_COOKIE (frame, dht_mkdir_hashed_cbk, hashed_subvol, - hashed_subvol, hashed_subvol->fops->mkdir, - loc, mode, umask, params); + if (local->op_ret == -1) { + gf_msg(this->name, GF_LOG_WARNING, local->op_errno, + DHT_MSG_PARENT_LAYOUT_CHANGED, + "mkdir (%s/%s) (path: %s): refreshing parent layout " + "failed.", + pgfid, loc->name, loc->path); - return 0; + op_errno = local->op_errno; + goto err; + } + + local->op_ret = -1; + + hashed_subvol = dht_subvol_get_hashed(this, loc); + if (hashed_subvol == NULL) { + gf_msg_debug(this->name, 0, + "mkdir (%s/%s) (path: %s): hashed subvol not " + "found", + pgfid, loc->name, loc->path); + op_errno = ENOENT; + goto err; + } + + local->hashed_subvol = hashed_subvol; + + parent_layout = dht_layout_get(this, loc->parent); + + ret = dht_disk_layout_extract_for_subvol(this, parent_layout, hashed_subvol, + &parent_disk_layout); + if (ret == -1) { + gf_msg(this->name, GF_LOG_WARNING, EIO, DHT_MSG_PARENT_LAYOUT_CHANGED, + "mkdir (%s/%s) (path: %s): " + "extracting in-memory layout of parent failed. ", + pgfid, loc->name, loc->path); + goto err; + } + + if (memcmp(local->parent_disk_layout, parent_disk_layout, + sizeof(local->parent_disk_layout)) == 0) { + gf_msg(this->name, GF_LOG_WARNING, EIO, DHT_MSG_PARENT_LAYOUT_CHANGED, + "mkdir (%s/%s) (path: %s): loop detected. " + "parent layout didn't change even though " + "previous attempt of mkdir failed because of " + "in-memory layout not matching with that on disk.", + pgfid, loc->name, loc->path); + op_errno = EIO; + goto err; + } + + memcpy((void *)local->parent_disk_layout, (void *)parent_disk_layout, + sizeof(local->parent_disk_layout)); + + dht_layout_unref(this, parent_layout); + parent_layout = NULL; + + ret = dict_set_str(params, GF_PREOP_PARENT_KEY, conf->xattr_name); + if (ret < 0) { + local->op_errno = -ret; + gf_msg(this->name, GF_LOG_WARNING, local->op_errno, + DHT_MSG_PARENT_LAYOUT_CHANGED, + "mkdir (%s/%s) (path: %s): " + "setting %s key in params dictionary failed. ", + pgfid, loc->name, loc->path, GF_PREOP_PARENT_KEY); + goto err; + } + + ret = dict_set_bin(params, conf->xattr_name, parent_disk_layout, 4 * 4); + if (ret < 0) { + local->op_errno = -ret; + gf_msg(this->name, GF_LOG_WARNING, local->op_errno, + DHT_MSG_PARENT_LAYOUT_CHANGED, + "setting parent-layout in params dictionary failed. " + "mkdir (%s/%s) (path: %s)", + pgfid, loc->name, loc->path); + goto err; + } + + parent_disk_layout = NULL; + + STACK_WIND_COOKIE(frame, dht_mkdir_hashed_cbk, hashed_subvol, hashed_subvol, + hashed_subvol->fops->mkdir, loc, mode, umask, params); + + return 0; err: - dht_unlock_namespace (frame, &local->lock[0]); + dht_unlock_namespace(frame, &local->lock[0]); - op_errno = local ? local->op_errno : op_errno; - DHT_STACK_UNWIND (mkdir, frame, -1, op_errno, NULL, NULL, NULL, - NULL, NULL); + op_errno = local ? local->op_errno : op_errno; + DHT_STACK_UNWIND(mkdir, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL); - if (parent_disk_layout != NULL) - GF_FREE (parent_disk_layout); + if (parent_disk_layout != NULL) + GF_FREE(parent_disk_layout); - if (parent_layout != NULL) - dht_layout_unref (this, parent_layout); + if (parent_layout != NULL) + dht_layout_unref(this, parent_layout); - return 0; + return 0; } int -dht_mkdir_hashed_cbk (call_frame_t *frame, void *cookie, - xlator_t *this, int op_ret, int op_errno, - inode_t *inode, struct iatt *stbuf, - struct iatt *preparent, struct iatt *postparent, - dict_t *xdata) +dht_mkdir_hashed_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, inode_t *inode, + struct iatt *stbuf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) { - dht_local_t *local = NULL; - int ret = -1; - xlator_t *prev = NULL; - dht_layout_t *layout = NULL; - dht_conf_t *conf = NULL; - int i = 0; - xlator_t *hashed_subvol = NULL; - char pgfid[GF_UUID_BUF_SIZE] = {0}; - gf_boolean_t parent_layout_changed = _gf_false; - call_stub_t *stub = NULL; - - VALIDATE_OR_GOTO (this->private, err); + dht_local_t *local = NULL; + int ret = -1; + xlator_t *prev = NULL; + dht_layout_t *layout = NULL; + dht_conf_t *conf = NULL; + int i = 0; + xlator_t *hashed_subvol = NULL; + char pgfid[GF_UUID_BUF_SIZE] = {0}; + gf_boolean_t parent_layout_changed = _gf_false; + call_stub_t *stub = NULL; - local = frame->local; - prev = cookie; - layout = local->layout; - conf = this->private; - hashed_subvol = local->hashed_subvol; + VALIDATE_OR_GOTO(this->private, err); - gf_uuid_unparse (local->loc.parent->gfid, pgfid); + local = frame->local; + prev = cookie; + layout = local->layout; + conf = this->private; + hashed_subvol = local->hashed_subvol; - if (gf_uuid_is_null (local->loc.gfid) && !op_ret) - gf_uuid_copy (local->loc.gfid, stbuf->ia_gfid); + gf_uuid_unparse(local->loc.parent->gfid, pgfid); - if (op_ret == -1) { - local->op_errno = op_errno; + if (gf_uuid_is_null(local->loc.gfid) && !op_ret) + gf_uuid_copy(local->loc.gfid, stbuf->ia_gfid); - parent_layout_changed = (xdata && dict_get (xdata, GF_PREOP_CHECK_FAILED)) - ? 1 : 0; - if (parent_layout_changed) { - gf_msg (this->name, GF_LOG_INFO, 0, - DHT_MSG_PARENT_LAYOUT_CHANGED, - "mkdir (%s/%s) (path: %s): parent layout " - "changed. Attempting a refresh and then a " - "retry", pgfid, local->loc.name, - local->loc.path); - - stub = fop_mkdir_stub (frame, dht_mkdir_helper, - &local->loc, local->mode, - local->umask, local->params); - if (stub == NULL) { - goto err; - } - - dht_handle_parent_layout_change (this, stub); - stub = NULL; - - return 0; - } + if (op_ret == -1) { + local->op_errno = op_errno; + parent_layout_changed = (xdata && + dict_get(xdata, GF_PREOP_CHECK_FAILED)) + ? 1 + : 0; + if (parent_layout_changed) { + gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_PARENT_LAYOUT_CHANGED, + "mkdir (%s/%s) (path: %s): parent layout " + "changed. Attempting a refresh and then a " + "retry", + pgfid, local->loc.name, local->loc.path); + + stub = fop_mkdir_stub(frame, dht_mkdir_helper, &local->loc, + local->mode, local->umask, local->params); + if (stub == NULL) { goto err; - } - - dict_del (local->params, GF_PREOP_PARENT_KEY); - dict_del (local->params, conf->xattr_name); + } - if (dht_is_subvol_filled (this, hashed_subvol)) - ret = dht_layout_merge (this, layout, prev, - -1, ENOSPC, NULL); - else - ret = dht_layout_merge (this, layout, prev, - op_ret, op_errno, NULL); + dht_handle_parent_layout_change(this, stub); + stub = NULL; + + return 0; + } + + goto err; + } + + dict_del(local->params, GF_PREOP_PARENT_KEY); + dict_del(local->params, conf->xattr_name); + + if (dht_is_subvol_filled(this, hashed_subvol)) + ret = dht_layout_merge(this, layout, prev, -1, ENOSPC, NULL); + else + ret = dht_layout_merge(this, layout, prev, op_ret, op_errno, NULL); + + /* TODO: we may have to return from the function + if layout merge fails. For now, lets just log an error */ + if (ret) + gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_LAYOUT_MERGE_FAILED, + "%s: failed to merge layouts for subvol %s", local->loc.path, + prev->name); + + local->op_ret = 0; + + dht_iatt_merge(this, &local->stbuf, stbuf); + dht_iatt_merge(this, &local->preparent, preparent); + dht_iatt_merge(this, &local->postparent, postparent); + + local->call_cnt = conf->subvolume_cnt - 1; + /* Delete internal mds xattr from params dict to avoid store + internal mds xattr on other subvols + */ + dict_del(local->params, conf->mds_xattr_key); + + if (gf_uuid_is_null(local->loc.gfid)) + gf_uuid_copy(local->loc.gfid, stbuf->ia_gfid); + + /* Set hashed subvol as a mds subvol on inode ctx */ + /*if (!local->inode) + local->inode = inode_ref (inode); + */ + ret = dht_inode_ctx_mdsvol_set(local->inode, this, hashed_subvol); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_SET_INODE_CTX_FAILED, + "Failed to set hashed subvol for %s on inode vol is %s", + local->loc.path, hashed_subvol->name); + } + + if (local->call_cnt == 0) { + /*Unlock namespace lock once mkdir is done on all subvols*/ + dht_unlock_namespace(frame, &local->lock[0]); + FRAME_SU_DO(frame, dht_local_t); + dht_selfheal_directory(frame, dht_mkdir_selfheal_cbk, &local->loc, + layout); + return 0; + } + + for (i = 0; i < conf->subvolume_cnt; i++) { + if (conf->subvolumes[i] == hashed_subvol) + continue; + STACK_WIND_COOKIE(frame, dht_mkdir_cbk, conf->subvolumes[i], + conf->subvolumes[i], conf->subvolumes[i]->fops->mkdir, + &local->loc, local->mode, local->umask, + local->params); + } + + return 0; +err: + if (local->op_ret != 0) { + dht_unlock_namespace(frame, &local->lock[0]); + } - /* TODO: we may have to return from the function - if layout merge fails. For now, lets just log an error */ - if (ret) - gf_msg (this->name, GF_LOG_WARNING, 0, - DHT_MSG_LAYOUT_MERGE_FAILED, - "%s: failed to merge layouts for subvol %s", - local->loc.path, prev->name); + DHT_STACK_UNWIND(mkdir, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL); - local->op_ret = 0; + return 0; +} - dht_iatt_merge (this, &local->stbuf, stbuf); - dht_iatt_merge (this, &local->preparent, preparent); - dht_iatt_merge (this, &local->postparent, postparent); +int +dht_mkdir_guard_parent_layout_cbk(call_frame_t *frame, xlator_t *this, + loc_t *loc, mode_t mode, mode_t umask, + dict_t *params) +{ + dht_local_t *local = NULL; + dht_conf_t *conf = 0; + char pgfid[GF_UUID_BUF_SIZE] = {0}; + int ret = -1; + int32_t zero[1] = {0}; - local->call_cnt = conf->subvolume_cnt - 1; - /* Delete internal mds xattr from params dict to avoid store - internal mds xattr on other subvols - */ - dict_del (local->params, conf->mds_xattr_key); + local = frame->local; + conf = this->private; - if (gf_uuid_is_null (local->loc.gfid)) - gf_uuid_copy (local->loc.gfid, stbuf->ia_gfid); + gf_uuid_unparse(loc->parent->gfid, pgfid); - /* Set hashed subvol as a mds subvol on inode ctx */ - /*if (!local->inode) - local->inode = inode_ref (inode); - */ - ret = dht_inode_ctx_mdsvol_set (local->inode, this, hashed_subvol); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, DHT_MSG_SET_INODE_CTX_FAILED, - "Failed to set hashed subvol for %s on inode vol is %s", - local->loc.path, hashed_subvol->name); - } + if (local->op_ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, local->op_errno, + DHT_MSG_PARENT_LAYOUT_CHANGED, + "mkdir (%s/%s) (path: %s): " + "Acquiring lock on parent to guard against " + "layout-change failed.", + pgfid, loc->name, loc->path); + goto err; + } - if (local->call_cnt == 0) { - /*Unlock namespace lock once mkdir is done on all subvols*/ - dht_unlock_namespace (frame, &local->lock[0]); - FRAME_SU_DO (frame, dht_local_t); - dht_selfheal_directory (frame, dht_mkdir_selfheal_cbk, - &local->loc, layout); - return 0; - } + local->op_ret = -1; + /* Add internal MDS xattr on disk for hashed subvol + */ + ret = dht_dict_set_array(params, conf->mds_xattr_key, zero, 1); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, ENOMEM, DHT_MSG_DICT_SET_FAILED, + "Failed to set dictionary value:key = %s for " + "path %s", + conf->mds_xattr_key, loc->path); + } - for (i = 0; i < conf->subvolume_cnt; i++) { - if (conf->subvolumes[i] == hashed_subvol) - continue; - STACK_WIND_COOKIE (frame, dht_mkdir_cbk, conf->subvolumes[i], - conf->subvolumes[i], - conf->subvolumes[i]->fops->mkdir, - &local->loc, local->mode, local->umask, - local->params); - } + STACK_WIND_COOKIE(frame, dht_mkdir_hashed_cbk, local->hashed_subvol, + local->hashed_subvol, local->hashed_subvol->fops->mkdir, + loc, mode, umask, params); - return 0; + return 0; err: - if (local->op_ret != 0) { - dht_unlock_namespace (frame, &local->lock[0]); - } + DHT_STACK_UNWIND(mkdir, frame, -1, local->op_errno, NULL, NULL, NULL, NULL, + NULL); + + return 0; +} + +int +dht_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, + mode_t umask, dict_t *params) +{ + dht_local_t *local = NULL; + dht_conf_t *conf = NULL; + int op_errno = EINVAL, ret = -1; + xlator_t *hashed_subvol = NULL; + char pgfid[GF_UUID_BUF_SIZE] = {0}; + call_stub_t *stub = NULL; + + VALIDATE_OR_GOTO(frame, err); + VALIDATE_OR_GOTO(this, err); + VALIDATE_OR_GOTO(loc, err); + VALIDATE_OR_GOTO(loc->inode, err); + VALIDATE_OR_GOTO(loc->path, err); + VALIDATE_OR_GOTO(this->private, err); + + gf_uuid_unparse(loc->parent->gfid, pgfid); + + conf = this->private; + + if (!params || !dict_get(params, "gfid-req")) { + op_errno = EPERM; + gf_msg_callingfn(this->name, GF_LOG_WARNING, op_errno, + DHT_MSG_GFID_NULL, + "mkdir: %s is received " + "without gfid-req %p", + loc->path, params); + goto err; + } + + dht_get_du_info(frame, this, loc); + + local = dht_local_init(frame, loc, NULL, GF_FOP_MKDIR); + if (!local) { + op_errno = ENOMEM; + goto err; + } + + hashed_subvol = dht_subvol_get_hashed(this, loc); + if (hashed_subvol == NULL) { + gf_msg_debug(this->name, 0, "hashed subvol not found for %s", + loc->path); + local->op_errno = EIO; + goto err; + } + + local->hashed_subvol = hashed_subvol; + local->mode = mode; + local->umask = umask; + if (params) + local->params = dict_ref(params); + + local->inode = inode_ref(loc->inode); + + local->layout = dht_layout_new(this, conf->subvolume_cnt); + if (!local->layout) { + op_errno = ENOMEM; + goto err; + } + + /* set the newly created directory hash to the commit hash + * if the configuration option is set. If configuration option + * is not set, the older clients may still be connecting to the + * volume and hence we need to preserve the 1 in disk[0] part of the + * layout xattr */ + if (conf->lookup_optimize) + local->layout->commit_hash = conf->vol_commit_hash; + else + local->layout->commit_hash = DHT_LAYOUT_HASH_INVALID; + + stub = fop_mkdir_stub(frame, dht_mkdir_guard_parent_layout_cbk, loc, mode, + umask, params); + if (stub == NULL) { + gf_msg(this->name, GF_LOG_WARNING, ENOMEM, + DHT_MSG_PARENT_LAYOUT_CHANGED, + "mkdir (%s/%s) (path: %s): " + "creating stub failed.", + pgfid, loc->name, loc->path); + local->op_errno = ENOMEM; + goto err; + } + + ret = dht_guard_parent_layout_and_namespace(this, stub); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_PARENT_LAYOUT_CHANGED, + "mkdir (%s/%s) (path: %s) cannot wind lock request to " + "guard parent layout", + pgfid, loc->name, loc->path); + goto err; + } + + return 0; - DHT_STACK_UNWIND (mkdir, frame, -1, op_errno, NULL, NULL, NULL, - NULL, NULL); +err: + op_errno = local ? local->op_errno : op_errno; + DHT_STACK_UNWIND(mkdir, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL); - return 0; + return 0; } int -dht_mkdir_guard_parent_layout_cbk (call_frame_t *frame, xlator_t *this, - loc_t *loc, mode_t mode, mode_t umask, - dict_t *params) +dht_rmdir_selfheal_cbk(call_frame_t *heal_frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, dict_t *xdata) { - dht_local_t *local = NULL; - dht_conf_t *conf = 0; - char pgfid[GF_UUID_BUF_SIZE] = {0}; - int ret = -1; - int32_t zero[1] = {0}; - - local = frame->local; - conf = this->private; - - gf_uuid_unparse (loc->parent->gfid, pgfid); - - if (local->op_ret < 0) { - gf_msg (this->name, GF_LOG_WARNING, local->op_errno, - DHT_MSG_PARENT_LAYOUT_CHANGED, - "mkdir (%s/%s) (path: %s): " - "Acquiring lock on parent to guard against " - "layout-change failed.", pgfid, loc->name, loc->path); - goto err; - } + dht_local_t *local = NULL; + dht_local_t *heal_local = NULL; + call_frame_t *main_frame = NULL; - local->op_ret = -1; - /* Add internal MDS xattr on disk for hashed subvol - */ - ret = dht_dict_set_array (params, conf->mds_xattr_key, zero, 1); - if (ret) { - gf_msg (this->name, GF_LOG_WARNING, ENOMEM, - DHT_MSG_DICT_SET_FAILED, - "Failed to set dictionary value:key = %s for " - "path %s", conf->mds_xattr_key, loc->path); - } + heal_local = heal_frame->local; + main_frame = heal_local->main_frame; + local = main_frame->local; - STACK_WIND_COOKIE (frame, dht_mkdir_hashed_cbk, local->hashed_subvol, - local->hashed_subvol, - local->hashed_subvol->fops->mkdir, - loc, mode, umask, params); + DHT_STACK_DESTROY(heal_frame); + dht_set_fixed_dir_stat(&local->preparent); + dht_set_fixed_dir_stat(&local->postparent); - return 0; -err: - DHT_STACK_UNWIND (mkdir, frame, -1, local->op_errno, NULL, NULL, NULL, - NULL, NULL); + DHT_STACK_UNWIND(rmdir, main_frame, local->op_ret, local->op_errno, + &local->preparent, &local->postparent, NULL); - return 0; + return 0; } int -dht_mkdir (call_frame_t *frame, xlator_t *this, - loc_t *loc, mode_t mode, mode_t umask, dict_t *params) +dht_rmdir_hashed_subvol_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) { - dht_local_t *local = NULL; - dht_conf_t *conf = NULL; - int op_errno = EINVAL, ret = -1; - xlator_t *hashed_subvol = NULL; - char pgfid[GF_UUID_BUF_SIZE] = {0}; - call_stub_t *stub = NULL; + dht_local_t *local = NULL; + dht_local_t *heal_local = NULL; + call_frame_t *heal_frame = NULL; + dht_conf_t *conf = NULL; + int this_call_cnt = 0; + xlator_t *prev = NULL; + char gfid[GF_UUID_BUF_SIZE] = {0}; - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (loc, err); - VALIDATE_OR_GOTO (loc->inode, err); - VALIDATE_OR_GOTO (loc->path, err); - VALIDATE_OR_GOTO (this->private, err); + local = frame->local; + prev = cookie; + conf = this->private; - gf_uuid_unparse (loc->parent->gfid, pgfid); + gf_uuid_unparse(local->loc.gfid, gfid); - conf = this->private; - - if (!params || !dict_get (params, "gfid-req")) { - op_errno = EPERM; - gf_msg_callingfn (this->name, GF_LOG_WARNING, op_errno, - DHT_MSG_GFID_NULL, "mkdir: %s is received " - "without gfid-req %p", loc->path, params); - goto err; - } - - dht_get_du_info (frame, this, loc); - - local = dht_local_init (frame, loc, NULL, GF_FOP_MKDIR); - if (!local) { - op_errno = ENOMEM; - goto err; - } + LOCK(&frame->lock); + { + if (op_ret == -1) { + local->op_errno = op_errno; + local->op_ret = -1; + if (conf->subvolume_cnt != 1) { + if (op_errno != ENOENT && op_errno != EACCES && + op_errno != ESTALE) { + local->need_selfheal = 1; + } + } - hashed_subvol = dht_subvol_get_hashed (this, loc); - if (hashed_subvol == NULL) { - gf_msg_debug (this->name, 0, - "hashed subvol not found for %s", - loc->path); - local->op_errno = EIO; - goto err; + gf_msg_debug(this->name, op_errno, + "rmdir on %s for %s failed " + "(gfid = %s)", + prev->name, local->loc.path, gfid); + goto unlock; } + dht_iatt_merge(this, &local->preparent, preparent); + dht_iatt_merge(this, &local->postparent, postparent); + } +unlock: + UNLOCK(&frame->lock); - local->hashed_subvol = hashed_subvol; - local->mode = mode; - local->umask = umask; - if (params) - local->params = dict_ref (params); + this_call_cnt = dht_frame_return(frame); + if (is_last_call(this_call_cnt)) { + if (local->need_selfheal) { + dht_rmdir_unlock(frame, this); + local->layout = dht_layout_get(this, local->loc.inode); - local->inode = inode_ref (loc->inode); + /* TODO: neater interface needed below */ + local->stbuf.ia_type = local->loc.inode->ia_type; - local->layout = dht_layout_new (this, conf->subvolume_cnt); - if (!local->layout) { - op_errno = ENOMEM; - goto err; - } + gf_uuid_copy(local->gfid, local->loc.inode->gfid); - /* set the newly created directory hash to the commit hash - * if the configuration option is set. If configuration option - * is not set, the older clients may still be connecting to the - * volume and hence we need to preserve the 1 in disk[0] part of the - * layout xattr */ - if (conf->lookup_optimize) - local->layout->commit_hash = conf->vol_commit_hash; - else - local->layout->commit_hash = DHT_LAYOUT_HASH_INVALID; + /* Use a different frame or else the rmdir op_ret is + * overwritten by that of the selfheal */ + heal_frame = copy_frame(frame); - stub = fop_mkdir_stub (frame, dht_mkdir_guard_parent_layout_cbk, loc, - mode, umask, params); - if (stub == NULL) { - gf_msg (this->name, GF_LOG_WARNING, ENOMEM, - DHT_MSG_PARENT_LAYOUT_CHANGED, - "mkdir (%s/%s) (path: %s): " - "creating stub failed.", pgfid, loc->name, loc->path); - local->op_errno = ENOMEM; + if (heal_frame == NULL) { goto err; - } + } - ret = dht_guard_parent_layout_and_namespace (this, stub); - if (ret < 0) { - gf_msg (this->name, GF_LOG_WARNING, 0, - DHT_MSG_PARENT_LAYOUT_CHANGED, - "mkdir (%s/%s) (path: %s) cannot wind lock request to " - "guard parent layout", pgfid, loc->name, loc->path); + heal_local = dht_local_init(heal_frame, &local->loc, NULL, 0); + if (!heal_local) { + DHT_STACK_DESTROY(heal_frame); goto err; - } - - return 0; - -err: - op_errno = local ? local->op_errno : op_errno; - DHT_STACK_UNWIND (mkdir, frame, -1, op_errno, NULL, NULL, NULL, - NULL, NULL); + } - return 0; -} + heal_local->inode = inode_ref(local->loc.inode); + heal_local->main_frame = frame; + gf_uuid_copy(heal_local->gfid, local->loc.inode->gfid); + dht_selfheal_restore(heal_frame, dht_rmdir_selfheal_cbk, + &heal_local->loc, heal_local->layout); + return 0; + } else { + if (local->loc.parent) { + dht_inode_ctx_time_update(local->loc.parent, this, + &local->preparent, 0); -int -dht_rmdir_selfheal_cbk (call_frame_t *heal_frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, dict_t *xdata) -{ - dht_local_t *local = NULL; - dht_local_t *heal_local = NULL; - call_frame_t *main_frame = NULL; + dht_inode_ctx_time_update(local->loc.parent, this, + &local->postparent, 1); + } - heal_local = heal_frame->local; - main_frame = heal_local->main_frame; - local = main_frame->local; + dht_set_fixed_dir_stat(&local->preparent); + dht_set_fixed_dir_stat(&local->postparent); - DHT_STACK_DESTROY (heal_frame); - dht_set_fixed_dir_stat (&local->preparent); - dht_set_fixed_dir_stat (&local->postparent); + dht_rmdir_unlock(frame, this); + DHT_STACK_UNWIND(rmdir, frame, local->op_ret, local->op_errno, + &local->preparent, &local->postparent, NULL); + } + } - DHT_STACK_UNWIND (rmdir, main_frame, local->op_ret, local->op_errno, - &local->preparent, &local->postparent, NULL); + return 0; - return 0; +err: + DHT_STACK_UNWIND(rmdir, frame, local->op_ret, local->op_errno, NULL, NULL, + NULL); + return 0; } - int -dht_rmdir_hashed_subvol_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) +dht_rmdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, + int op_errno, struct iatt *preparent, struct iatt *postparent, + dict_t *xdata) { - dht_local_t *local = NULL; - dht_local_t *heal_local = NULL; - call_frame_t *heal_frame = NULL; - dht_conf_t *conf = NULL; - int this_call_cnt = 0; - xlator_t *prev = NULL; - char gfid[GF_UUID_BUF_SIZE] ={0}; + dht_local_t *local = NULL; + int this_call_cnt = 0; + xlator_t *prev = NULL; + int done = 0; + char gfid[GF_UUID_BUF_SIZE] = {0}; + dht_local_t *heal_local = NULL; + call_frame_t *heal_frame = NULL; + int ret = -1; - local = frame->local; - prev = cookie; - conf = this->private; + local = frame->local; + prev = cookie; - gf_uuid_unparse(local->loc.gfid, gfid); + LOCK(&frame->lock); + { + if (op_ret == -1) { + if ((op_errno != ENOENT) && (op_errno != ESTALE)) { + local->op_errno = op_errno; + local->op_ret = -1; - LOCK (&frame->lock); - { - if (op_ret == -1) { - local->op_errno = op_errno; - local->op_ret = -1; - if (conf->subvolume_cnt != 1) { - if (op_errno != ENOENT && op_errno != EACCES - && op_errno != ESTALE) { - local->need_selfheal = 1; - } - } - - gf_msg_debug (this->name, op_errno, - "rmdir on %s for %s failed " - "(gfid = %s)", - prev->name, local->loc.path, - gfid); - goto unlock; - } + if (op_errno != EACCES) + local->need_selfheal = 1; + } - dht_iatt_merge (this, &local->preparent, preparent); - dht_iatt_merge (this, &local->postparent, postparent); + gf_uuid_unparse(local->loc.gfid, gfid); + gf_msg_debug(this->name, op_errno, + "rmdir on %s for %s failed." + "(gfid = %s)", + prev->name, local->loc.path, gfid); + goto unlock; } + + /* Track if rmdir succeeded on at least one subvol*/ + local->fop_succeeded = 1; + dht_iatt_merge(this, &local->preparent, preparent); + dht_iatt_merge(this, &local->postparent, postparent); + } unlock: - UNLOCK (&frame->lock); - - this_call_cnt = dht_frame_return (frame); - if (is_last_call (this_call_cnt)) { - if (local->need_selfheal) { - dht_rmdir_unlock (frame, this); - local->layout = - dht_layout_get (this, local->loc.inode); - - /* TODO: neater interface needed below */ - local->stbuf.ia_type = local->loc.inode->ia_type; - - gf_uuid_copy (local->gfid, local->loc.inode->gfid); - - /* Use a different frame or else the rmdir op_ret is - * overwritten by that of the selfheal */ - - heal_frame = copy_frame (frame); - - if (heal_frame == NULL) { - goto err; - } - - heal_local = dht_local_init (heal_frame, - &local->loc, - NULL, 0); - if (!heal_local) { - DHT_STACK_DESTROY (heal_frame); - goto err; - } - - heal_local->inode = inode_ref (local->loc.inode); - heal_local->main_frame = frame; - gf_uuid_copy (heal_local->gfid, local->loc.inode->gfid); - - dht_selfheal_restore (heal_frame, - dht_rmdir_selfheal_cbk, - &heal_local->loc, - heal_local->layout); - return 0; - } else { - - if (local->loc.parent) { - dht_inode_ctx_time_update (local->loc.parent, - this, - &local->preparent, - 0); - - dht_inode_ctx_time_update (local->loc.parent, - this, - &local->postparent, - 1); - } - - dht_set_fixed_dir_stat (&local->preparent); - dht_set_fixed_dir_stat (&local->postparent); - - dht_rmdir_unlock (frame, this); - DHT_STACK_UNWIND (rmdir, frame, local->op_ret, - local->op_errno, &local->preparent, - &local->postparent, NULL); - } - } + UNLOCK(&frame->lock); - return 0; + this_call_cnt = dht_frame_return(frame); -err: - DHT_STACK_UNWIND (rmdir, frame, local->op_ret, - local->op_errno, NULL, NULL, NULL); - return 0; + /* if local->hashed_subvol, we are yet to wind to hashed_subvol. */ + if (local->hashed_subvol && (this_call_cnt == 1)) { + done = 1; + } else if (!local->hashed_subvol && !this_call_cnt) { + done = 1; + } -} + if (done) { + if (local->need_selfheal && local->fop_succeeded) { + dht_rmdir_unlock(frame, this); + local->layout = dht_layout_get(this, local->loc.inode); + /* TODO: neater interface needed below */ + local->stbuf.ia_type = local->loc.inode->ia_type; -int -dht_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) -{ - dht_local_t *local = NULL; - int this_call_cnt = 0; - xlator_t *prev = NULL; - int done = 0; - char gfid[GF_UUID_BUF_SIZE] ={0}; - dht_local_t *heal_local = NULL; - call_frame_t *heal_frame = NULL; - int ret = -1; + gf_uuid_copy(local->gfid, local->loc.inode->gfid); + heal_frame = copy_frame(frame); + if (heal_frame == NULL) { + goto err; + } - local = frame->local; - prev = cookie; + heal_local = dht_local_init(heal_frame, &local->loc, NULL, 0); + if (!heal_local) { + DHT_STACK_DESTROY(heal_frame); + goto err; + } + heal_local->inode = inode_ref(local->loc.inode); + heal_local->main_frame = frame; + gf_uuid_copy(heal_local->gfid, local->loc.inode->gfid); + ret = dht_selfheal_restore(heal_frame, dht_rmdir_selfheal_cbk, + &heal_local->loc, heal_local->layout); + if (ret) { + DHT_STACK_DESTROY(heal_frame); + goto err; + } - LOCK (&frame->lock); - { - if (op_ret == -1) { - if ((op_errno != ENOENT) && (op_errno != ESTALE)) { - local->op_errno = op_errno; - local->op_ret = -1; + } else if (this_call_cnt) { + /* If non-hashed subvol's have responded, proceed */ + if (local->op_ret == 0) { + /* Delete the dir from the hashed subvol if: + * The fop succeeded on at least one subvol + * and did not fail on any + * or + * The fop failed with ENOENT/ESTALE on + * all subvols */ + + STACK_WIND_COOKIE(frame, dht_rmdir_hashed_subvol_cbk, + local->hashed_subvol, local->hashed_subvol, + local->hashed_subvol->fops->rmdir, + &local->loc, local->flags, NULL); + } else { + /* hashed-subvol was non-NULL and rmdir failed on + * all non hashed-subvols. Unwind rmdir with + * local->op_ret and local->op_errno. */ + dht_rmdir_unlock(frame, this); + DHT_STACK_UNWIND(rmdir, frame, local->op_ret, local->op_errno, + &local->preparent, &local->postparent, NULL); - if (op_errno != EACCES) - local->need_selfheal = 1; - } + return 0; + } + } else if (!this_call_cnt) { + /* All subvol's have responded, proceed */ - gf_uuid_unparse(local->loc.gfid, gfid); + if (local->loc.parent) { + dht_inode_ctx_time_update(local->loc.parent, this, + &local->preparent, 0); - gf_msg_debug (this->name, op_errno, - "rmdir on %s for %s failed." - "(gfid = %s)", - prev->name, local->loc.path, - gfid); - goto unlock; - } + dht_inode_ctx_time_update(local->loc.parent, this, + &local->postparent, 1); + } - /* Track if rmdir succeeded on at least one subvol*/ - local->fop_succeeded = 1; - dht_iatt_merge (this, &local->preparent, preparent); - dht_iatt_merge (this, &local->postparent, postparent); - } -unlock: - UNLOCK (&frame->lock); - - - this_call_cnt = dht_frame_return (frame); - - /* if local->hashed_subvol, we are yet to wind to hashed_subvol. */ - if (local->hashed_subvol && (this_call_cnt == 1)) { - done = 1; - } else if (!local->hashed_subvol && !this_call_cnt) { - done = 1; - } - - - if (done) { - if (local->need_selfheal && local->fop_succeeded) { - dht_rmdir_unlock (frame, this); - local->layout = - dht_layout_get (this, local->loc.inode); - - /* TODO: neater interface needed below */ - local->stbuf.ia_type = local->loc.inode->ia_type; - - gf_uuid_copy (local->gfid, local->loc.inode->gfid); - heal_frame = copy_frame (frame); - if (heal_frame == NULL) { - goto err; - } - - heal_local = dht_local_init (heal_frame, &local->loc, - NULL, 0); - if (!heal_local) { - DHT_STACK_DESTROY (heal_frame); - goto err; - } - - heal_local->inode = inode_ref (local->loc.inode); - heal_local->main_frame = frame; - gf_uuid_copy (heal_local->gfid, local->loc.inode->gfid); - ret = dht_selfheal_restore (heal_frame, - dht_rmdir_selfheal_cbk, - &heal_local->loc, - heal_local->layout); - if (ret) { - DHT_STACK_DESTROY (heal_frame); - goto err; - } - - } else if (this_call_cnt) { - /* If non-hashed subvol's have responded, proceed */ - if (local->op_ret == 0) { - /* Delete the dir from the hashed subvol if: - * The fop succeeded on at least one subvol - * and did not fail on any - * or - * The fop failed with ENOENT/ESTALE on - * all subvols */ - - STACK_WIND_COOKIE (frame, dht_rmdir_hashed_subvol_cbk, - local->hashed_subvol, - local->hashed_subvol, - local->hashed_subvol->fops->rmdir, - &local->loc, local->flags, NULL); - } else { - /* hashed-subvol was non-NULL and rmdir failed on - * all non hashed-subvols. Unwind rmdir with - * local->op_ret and local->op_errno. */ - dht_rmdir_unlock (frame, this); - DHT_STACK_UNWIND (rmdir, frame, local->op_ret, - local->op_errno, &local->preparent, - &local->postparent, NULL); - - return 0; - - } - } else if (!this_call_cnt) { - /* All subvol's have responded, proceed */ - - if (local->loc.parent) { - - dht_inode_ctx_time_update (local->loc.parent, - this, - &local->preparent, - 0); - - dht_inode_ctx_time_update (local->loc.parent, - this, - &local->postparent, - 1); - - } - - dht_set_fixed_dir_stat (&local->preparent); - dht_set_fixed_dir_stat (&local->postparent); - - dht_rmdir_unlock (frame, this); - DHT_STACK_UNWIND (rmdir, frame, local->op_ret, - local->op_errno, &local->preparent, - &local->postparent, NULL); - } + dht_set_fixed_dir_stat(&local->preparent); + dht_set_fixed_dir_stat(&local->postparent); + + dht_rmdir_unlock(frame, this); + DHT_STACK_UNWIND(rmdir, frame, local->op_ret, local->op_errno, + &local->preparent, &local->postparent, NULL); } + } - return 0; + return 0; err: - DHT_STACK_UNWIND (rmdir, frame, -1, local->op_errno, NULL, NULL, NULL); - return 0; - + DHT_STACK_UNWIND(rmdir, frame, -1, local->op_errno, NULL, NULL, NULL); + return 0; } - int -dht_rmdir_unlock_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) +dht_rmdir_unlock_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - DHT_STACK_DESTROY (frame); - return 0; + DHT_STACK_DESTROY(frame); + return 0; } - int -dht_rmdir_unlock (call_frame_t *frame, xlator_t *this) +dht_rmdir_unlock(call_frame_t *frame, xlator_t *this) { - dht_local_t *local = NULL, *lock_local = NULL; - call_frame_t *lock_frame = NULL; - int lock_count = 0; + dht_local_t *local = NULL, *lock_local = NULL; + call_frame_t *lock_frame = NULL; + int lock_count = 0; - local = frame->local; + local = frame->local; - /* Unlock entrylk */ - dht_unlock_entrylk_wrapper (frame, &local->lock[0].ns.directory_ns); + /* Unlock entrylk */ + dht_unlock_entrylk_wrapper(frame, &local->lock[0].ns.directory_ns); - /* Unlock inodelk */ - lock_count = dht_lock_count (local->lock[0].ns.parent_layout.locks, - local->lock[0].ns.parent_layout.lk_count); + /* Unlock inodelk */ + lock_count = dht_lock_count(local->lock[0].ns.parent_layout.locks, + local->lock[0].ns.parent_layout.lk_count); - if (lock_count == 0) - goto done; + if (lock_count == 0) + goto done; - lock_frame = copy_frame (frame); - if (lock_frame == NULL) - goto done; + lock_frame = copy_frame(frame); + if (lock_frame == NULL) + goto done; - lock_local = dht_local_init (lock_frame, &local->loc, NULL, - lock_frame->root->op); - if (lock_local == NULL) - goto done; + lock_local = dht_local_init(lock_frame, &local->loc, NULL, + lock_frame->root->op); + if (lock_local == NULL) + goto done; - lock_local->lock[0].ns.parent_layout.locks = local->lock[0].ns.parent_layout.locks; - lock_local->lock[0].ns.parent_layout.lk_count = local->lock[0].ns.parent_layout.lk_count; + lock_local->lock[0].ns.parent_layout.locks = local->lock[0] + .ns.parent_layout.locks; + lock_local->lock[0] + .ns.parent_layout.lk_count = local->lock[0].ns.parent_layout.lk_count; - local->lock[0].ns.parent_layout.locks = NULL; - local->lock[0].ns.parent_layout.lk_count = 0; - dht_unlock_inodelk (lock_frame, - lock_local->lock[0].ns.parent_layout.locks, - lock_local->lock[0].ns.parent_layout.lk_count, - dht_rmdir_unlock_cbk); - lock_frame = NULL; + local->lock[0].ns.parent_layout.locks = NULL; + local->lock[0].ns.parent_layout.lk_count = 0; + dht_unlock_inodelk(lock_frame, lock_local->lock[0].ns.parent_layout.locks, + lock_local->lock[0].ns.parent_layout.lk_count, + dht_rmdir_unlock_cbk); + lock_frame = NULL; done: - if (lock_frame != NULL) { - DHT_STACK_DESTROY (lock_frame); - } + if (lock_frame != NULL) { + DHT_STACK_DESTROY(lock_frame); + } - return 0; + return 0; } - int -dht_rmdir_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) +dht_rmdir_lock_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - dht_local_t *local = NULL; - dht_conf_t *conf = NULL; - int i = 0; - xlator_t *hashed_subvol; + dht_local_t *local = NULL; + dht_conf_t *conf = NULL; + int i = 0; + xlator_t *hashed_subvol; - VALIDATE_OR_GOTO (this->private, err); + VALIDATE_OR_GOTO(this->private, err); - conf = this->private; - local = frame->local; + conf = this->private; + local = frame->local; - if (op_ret < 0) { - gf_msg (this->name, GF_LOG_WARNING, op_errno, - DHT_MSG_INODE_LK_ERROR, - "acquiring entrylk after inodelk failed rmdir for %s)", - local->loc.path); + if (op_ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, op_errno, DHT_MSG_INODE_LK_ERROR, + "acquiring entrylk after inodelk failed rmdir for %s)", + local->loc.path); - local->op_ret = -1; - local->op_errno = op_errno; - goto err; - } + local->op_ret = -1; + local->op_errno = op_errno; + goto err; + } - hashed_subvol = local->hashed_subvol; - for (i = 0; i < conf->subvolume_cnt; i++) { - if (hashed_subvol && - (hashed_subvol == conf->subvolumes[i])) - continue; + hashed_subvol = local->hashed_subvol; + for (i = 0; i < conf->subvolume_cnt; i++) { + if (hashed_subvol && (hashed_subvol == conf->subvolumes[i])) + continue; - STACK_WIND_COOKIE (frame, dht_rmdir_cbk, conf->subvolumes[i], - conf->subvolumes[i], - conf->subvolumes[i]->fops->rmdir, - &local->loc, local->flags, NULL); - } + STACK_WIND_COOKIE(frame, dht_rmdir_cbk, conf->subvolumes[i], + conf->subvolumes[i], conf->subvolumes[i]->fops->rmdir, + &local->loc, local->flags, NULL); + } - return 0; + return 0; err: - DHT_STACK_UNWIND (rmdir, frame, local->op_ret, local->op_errno, - &local->preparent, &local->postparent, NULL); + DHT_STACK_UNWIND(rmdir, frame, local->op_ret, local->op_errno, + &local->preparent, &local->postparent, NULL); - return 0; + return 0; } - int -dht_rmdir_do (call_frame_t *frame, xlator_t *this) +dht_rmdir_do(call_frame_t *frame, xlator_t *this) { - dht_local_t *local = NULL; - dht_conf_t *conf = NULL; - int ret = -1; - xlator_t *hashed_subvol = NULL; - char gfid[GF_UUID_BUF_SIZE] ={0}; + dht_local_t *local = NULL; + dht_conf_t *conf = NULL; + int ret = -1; + xlator_t *hashed_subvol = NULL; + char gfid[GF_UUID_BUF_SIZE] = {0}; - VALIDATE_OR_GOTO (this->private, err); + VALIDATE_OR_GOTO(this->private, err); - conf = this->private; - local = frame->local; + conf = this->private; + local = frame->local; - if (local->op_ret == -1) - goto err; + if (local->op_ret == -1) + goto err; - local->call_cnt = conf->subvolume_cnt; + local->call_cnt = conf->subvolume_cnt; - /* first remove from non-hashed_subvol */ - hashed_subvol = dht_subvol_get_hashed (this, &local->loc); + /* first remove from non-hashed_subvol */ + hashed_subvol = dht_subvol_get_hashed(this, &local->loc); - if (!hashed_subvol) { - gf_uuid_unparse(local->loc.gfid, gfid); + if (!hashed_subvol) { + gf_uuid_unparse(local->loc.gfid, gfid); - gf_msg (this->name, GF_LOG_WARNING, 0, - DHT_MSG_HASHED_SUBVOL_GET_FAILED, - "Failed to get hashed subvol for %s (gfid = %s)", - local->loc.path, gfid); - } else { - local->hashed_subvol = hashed_subvol; - } + gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_HASHED_SUBVOL_GET_FAILED, + "Failed to get hashed subvol for %s (gfid = %s)", + local->loc.path, gfid); + } else { + local->hashed_subvol = hashed_subvol; + } - /* When DHT has only 1 child */ - if (conf->subvolume_cnt == 1) { - STACK_WIND_COOKIE (frame, dht_rmdir_hashed_subvol_cbk, - conf->subvolumes[0], conf->subvolumes[0], - conf->subvolumes[0]->fops->rmdir, - &local->loc, local->flags, NULL); - return 0; - } + /* When DHT has only 1 child */ + if (conf->subvolume_cnt == 1) { + STACK_WIND_COOKIE(frame, dht_rmdir_hashed_subvol_cbk, + conf->subvolumes[0], conf->subvolumes[0], + conf->subvolumes[0]->fops->rmdir, &local->loc, + local->flags, NULL); + return 0; + } - local->current = &local->lock[0]; - ret = dht_protect_namespace (frame, &local->loc, local->hashed_subvol, - &local->current->ns, dht_rmdir_lock_cbk); - if (ret < 0) { - local->op_ret = -1; - local->op_errno = errno ? errno : EINVAL; - goto err; - } + local->current = &local->lock[0]; + ret = dht_protect_namespace(frame, &local->loc, local->hashed_subvol, + &local->current->ns, dht_rmdir_lock_cbk); + if (ret < 0) { + local->op_ret = -1; + local->op_errno = errno ? errno : EINVAL; + goto err; + } - return 0; + return 0; err: - dht_set_fixed_dir_stat (&local->preparent); - dht_set_fixed_dir_stat (&local->postparent); + dht_set_fixed_dir_stat(&local->preparent); + dht_set_fixed_dir_stat(&local->postparent); - DHT_STACK_UNWIND (rmdir, frame, local->op_ret, local->op_errno, - &local->preparent, &local->postparent, NULL); - return 0; + DHT_STACK_UNWIND(rmdir, frame, local->op_ret, local->op_errno, + &local->preparent, &local->postparent, NULL); + return 0; } - int -dht_rmdir_linkfile_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) +dht_rmdir_linkfile_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) { - dht_local_t *local = NULL; - xlator_t *prev = NULL; - xlator_t *src = NULL; - call_frame_t *readdirp_frame = NULL; - dht_local_t *readdirp_local = NULL; - int this_call_cnt = 0; - char gfid[GF_UUID_BUF_SIZE] ={0}; + dht_local_t *local = NULL; + xlator_t *prev = NULL; + xlator_t *src = NULL; + call_frame_t *readdirp_frame = NULL; + dht_local_t *readdirp_local = NULL; + int this_call_cnt = 0; + char gfid[GF_UUID_BUF_SIZE] = {0}; + local = frame->local; + prev = cookie; + src = prev; - local = frame->local; - prev = cookie; - src = prev; + readdirp_frame = local->main_frame; + readdirp_local = readdirp_frame->local; + gf_uuid_unparse(local->loc.gfid, gfid); - readdirp_frame = local->main_frame; - readdirp_local = readdirp_frame->local; - - gf_uuid_unparse(local->loc.gfid, gfid); - - if (op_ret == 0) { - gf_msg_trace (this->name, 0, - "Unlinked linkfile %s on %s, gfid = %s", - local->loc.path, src->name, gfid); - } else { - if (op_errno != ENOENT) { - readdirp_local->op_ret = -1; - readdirp_local->op_errno = op_errno; - } - gf_msg_debug (this->name, op_errno, - "Unlink of %s on %s failed. (gfid = %s)", - local->loc.path, src->name, gfid); + if (op_ret == 0) { + gf_msg_trace(this->name, 0, "Unlinked linkfile %s on %s, gfid = %s", + local->loc.path, src->name, gfid); + } else { + if (op_errno != ENOENT) { + readdirp_local->op_ret = -1; + readdirp_local->op_errno = op_errno; } + gf_msg_debug(this->name, op_errno, + "Unlink of %s on %s failed. (gfid = %s)", local->loc.path, + src->name, gfid); + } - this_call_cnt = dht_frame_return (readdirp_frame); + this_call_cnt = dht_frame_return(readdirp_frame); - if (is_last_call (this_call_cnt)) - dht_rmdir_readdirp_do (readdirp_frame, this); + if (is_last_call(this_call_cnt)) + dht_rmdir_readdirp_do(readdirp_frame, this); - DHT_STACK_DESTROY (frame); - return 0; + DHT_STACK_DESTROY(frame); + return 0; } - int -dht_rmdir_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, inode_t *inode, - struct iatt *stbuf, dict_t *xattr, struct iatt *parent) +dht_rmdir_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, inode_t *inode, + struct iatt *stbuf, dict_t *xattr, struct iatt *parent) { - dht_local_t *local = NULL; - xlator_t *prev = NULL; - xlator_t *src = NULL; - call_frame_t *readdirp_frame = NULL; - dht_local_t *readdirp_local = NULL; - int this_call_cnt = 0; - dht_conf_t *conf = this->private; - char gfid[GF_UUID_BUF_SIZE] = {0}; - - local = frame->local; - prev = cookie; - src = prev; + dht_local_t *local = NULL; + xlator_t *prev = NULL; + xlator_t *src = NULL; + call_frame_t *readdirp_frame = NULL; + dht_local_t *readdirp_local = NULL; + int this_call_cnt = 0; + dht_conf_t *conf = this->private; + char gfid[GF_UUID_BUF_SIZE] = {0}; - gf_msg_debug (this->name, 0, "dht_rmdir_lookup_cbk %s", - local->loc.path); + local = frame->local; + prev = cookie; + src = prev; - readdirp_frame = local->main_frame; - readdirp_local = readdirp_frame->local; + gf_msg_debug(this->name, 0, "dht_rmdir_lookup_cbk %s", local->loc.path); - if (op_ret != 0) { + readdirp_frame = local->main_frame; + readdirp_local = readdirp_frame->local; - gf_msg (this->name, GF_LOG_WARNING, op_errno, - DHT_MSG_FILE_LOOKUP_FAILED, - "lookup failed for %s on %s (type=0%o)", - local->loc.path, src->name, stbuf->ia_type); - goto err; - } + if (op_ret != 0) { + gf_msg(this->name, GF_LOG_WARNING, op_errno, DHT_MSG_FILE_LOOKUP_FAILED, + "lookup failed for %s on %s (type=0%o)", local->loc.path, + src->name, stbuf->ia_type); + goto err; + } - if (!check_is_linkfile (inode, stbuf, xattr, conf->link_xattr_name)) { - readdirp_local->op_ret = -1; - readdirp_local->op_errno = ENOTEMPTY; + if (!check_is_linkfile(inode, stbuf, xattr, conf->link_xattr_name)) { + readdirp_local->op_ret = -1; + readdirp_local->op_errno = ENOTEMPTY; - gf_uuid_unparse(local->loc.gfid, gfid); + gf_uuid_unparse(local->loc.gfid, gfid); - gf_msg (this->name, GF_LOG_WARNING, 0, - DHT_MSG_NOT_LINK_FILE_ERROR, - "%s on %s is not a linkfile (type=0%o, gfid = %s)", - local->loc.path, src->name, stbuf->ia_type, gfid); - goto err; - } + gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_NOT_LINK_FILE_ERROR, + "%s on %s is not a linkfile (type=0%o, gfid = %s)", + local->loc.path, src->name, stbuf->ia_type, gfid); + goto err; + } - STACK_WIND_COOKIE (frame, dht_rmdir_linkfile_unlink_cbk, src, - src, src->fops->unlink, &local->loc, 0, NULL); - return 0; + STACK_WIND_COOKIE(frame, dht_rmdir_linkfile_unlink_cbk, src, src, + src->fops->unlink, &local->loc, 0, NULL); + return 0; err: - this_call_cnt = dht_frame_return (readdirp_frame); - if (is_last_call (this_call_cnt)) { - dht_rmdir_readdirp_do (readdirp_frame, this); - } + this_call_cnt = dht_frame_return(readdirp_frame); + if (is_last_call(this_call_cnt)) { + dht_rmdir_readdirp_do(readdirp_frame, this); + } - DHT_STACK_DESTROY (frame); - return 0; + DHT_STACK_DESTROY(frame); + return 0; } - int -dht_rmdir_cached_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, inode_t *inode, - struct iatt *stbuf, dict_t *xattr, - struct iatt *parent) +dht_rmdir_cached_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, inode_t *inode, + struct iatt *stbuf, dict_t *xattr, + struct iatt *parent) { - dht_local_t *local = NULL; - xlator_t *src = NULL; - call_frame_t *readdirp_frame = NULL; - dht_local_t *readdirp_local = NULL; - int this_call_cnt = 0; - dht_conf_t *conf = this->private; - dict_t *xattrs = NULL; - int ret = 0; + dht_local_t *local = NULL; + xlator_t *src = NULL; + call_frame_t *readdirp_frame = NULL; + dht_local_t *readdirp_local = NULL; + int this_call_cnt = 0; + dht_conf_t *conf = this->private; + dict_t *xattrs = NULL; + int ret = 0; - local = frame->local; - src = local->hashed_subvol; + local = frame->local; + src = local->hashed_subvol; + /* main_frame here is the readdirp_frame */ - /* main_frame here is the readdirp_frame */ + readdirp_frame = local->main_frame; + readdirp_local = readdirp_frame->local; - readdirp_frame = local->main_frame; - readdirp_local = readdirp_frame->local; + gf_msg_debug(this->name, 0, "returning for %s ", local->loc.path); - gf_msg_debug (this->name, 0, "returning for %s ", - local->loc.path); + if (op_ret == 0) { + readdirp_local->op_ret = -1; + readdirp_local->op_errno = ENOTEMPTY; - if (op_ret == 0) { - readdirp_local->op_ret = -1; - readdirp_local->op_errno = ENOTEMPTY; + gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_SUBVOL_ERROR, + "%s found on cached subvol %s", local->loc.path, src->name); + goto err; + } else if (op_errno != ENOENT) { + readdirp_local->op_ret = -1; + readdirp_local->op_errno = op_errno; - gf_msg (this->name, GF_LOG_WARNING, 0, - DHT_MSG_SUBVOL_ERROR, - "%s found on cached subvol %s", - local->loc.path, src->name); - goto err; - } else if (op_errno != ENOENT) { - readdirp_local->op_ret = -1; - readdirp_local->op_errno = op_errno; - - gf_msg (this->name, GF_LOG_WARNING, op_errno, - DHT_MSG_SUBVOL_ERROR, - "%s not found on cached subvol %s", - local->loc.path, src->name); - goto err; - } + gf_msg(this->name, GF_LOG_WARNING, op_errno, DHT_MSG_SUBVOL_ERROR, + "%s not found on cached subvol %s", local->loc.path, src->name); + goto err; + } - xattrs = dict_new (); - if (!xattrs) { - gf_msg (this->name, GF_LOG_ERROR, ENOMEM, - DHT_MSG_NO_MEMORY, "dict_new failed"); - goto err; - } + xattrs = dict_new(); + if (!xattrs) { + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY, + "dict_new failed"); + goto err; + } - ret = dict_set_uint32 (xattrs, conf->link_xattr_name, 256); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_DICT_SET_FAILED, - "Failed to set dictionary value: key = %s", - conf->link_xattr_name); - if (xattrs) - dict_unref (xattrs); - goto err; - } - STACK_WIND_COOKIE (frame, dht_rmdir_lookup_cbk, src, src, - src->fops->lookup, &local->loc, xattrs); + ret = dict_set_uint32(xattrs, conf->link_xattr_name, 256); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED, + "Failed to set dictionary value: key = %s", + conf->link_xattr_name); if (xattrs) - dict_unref (xattrs); - - return 0; + dict_unref(xattrs); + goto err; + } + STACK_WIND_COOKIE(frame, dht_rmdir_lookup_cbk, src, src, src->fops->lookup, + &local->loc, xattrs); + if (xattrs) + dict_unref(xattrs); + + return 0; err: - this_call_cnt = dht_frame_return (readdirp_frame); + this_call_cnt = dht_frame_return(readdirp_frame); - /* Once all the lookups/unlinks etc have returned, proceed to wind - * readdirp on the subvol again until no entries are returned. - * This is required if there are more entries than can be returned - * in a single readdirp call. - */ + /* Once all the lookups/unlinks etc have returned, proceed to wind + * readdirp on the subvol again until no entries are returned. + * This is required if there are more entries than can be returned + * in a single readdirp call. + */ - if (is_last_call (this_call_cnt)) - dht_rmdir_readdirp_do (readdirp_frame, this); + if (is_last_call(this_call_cnt)) + dht_rmdir_readdirp_do(readdirp_frame, this); - DHT_STACK_DESTROY (frame); - return 0; + DHT_STACK_DESTROY(frame); + return 0; } - int -dht_rmdir_is_subvol_empty (call_frame_t *frame, xlator_t *this, - gf_dirent_t *entries, xlator_t *src) +dht_rmdir_is_subvol_empty(call_frame_t *frame, xlator_t *this, + gf_dirent_t *entries, xlator_t *src) { - int ret = 0; - int build_ret = 0; - gf_dirent_t *trav = NULL; - call_frame_t *lookup_frame = NULL; - dht_local_t *lookup_local = NULL; - dht_local_t *local = NULL; - dict_t *xattrs = NULL; - dht_conf_t *conf = this->private; - xlator_t *subvol = NULL; - char gfid[GF_UUID_BUF_SIZE] = {0}; - int count = 0; - gf_boolean_t unwind = _gf_false; + int ret = 0; + int build_ret = 0; + gf_dirent_t *trav = NULL; + call_frame_t *lookup_frame = NULL; + dht_local_t *lookup_local = NULL; + dht_local_t *local = NULL; + dict_t *xattrs = NULL; + dht_conf_t *conf = this->private; + xlator_t *subvol = NULL; + char gfid[GF_UUID_BUF_SIZE] = {0}; + int count = 0; + gf_boolean_t unwind = _gf_false; + local = frame->local; - local = frame->local; - - list_for_each_entry (trav, &entries->list, list) { - if (strcmp (trav->d_name, ".") == 0) - continue; - if (strcmp (trav->d_name, "..") == 0) - continue; - if (check_is_linkfile (NULL, (&trav->d_stat), trav->dict, - conf->link_xattr_name)) { - count++; - continue; - } - - /* this entry is either a directory which is neither "." nor "..", - or a non directory which is not a linkfile. the directory is to - be treated as non-empty - */ - return 0; - } - - xattrs = dict_new (); - if (!xattrs) { - gf_msg (this->name, GF_LOG_ERROR, ENOMEM, - DHT_MSG_NO_MEMORY, "dict_new failed"); - return -1; + list_for_each_entry(trav, &entries->list, list) + { + if (strcmp(trav->d_name, ".") == 0) + continue; + if (strcmp(trav->d_name, "..") == 0) + continue; + if (check_is_linkfile(NULL, (&trav->d_stat), trav->dict, + conf->link_xattr_name)) { + count++; + continue; } - ret = dict_set_uint32 (xattrs, conf->link_xattr_name, 256); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_DICT_SET_FAILED, - "Failed to set dictionary value: key = %s", - conf->link_xattr_name); + /* this entry is either a directory which is neither "." nor "..", + or a non directory which is not a linkfile. the directory is to + be treated as non-empty + */ + return 0; + } - if (xattrs) - dict_unref (xattrs); - return -1; - } + xattrs = dict_new(); + if (!xattrs) { + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY, + "dict_new failed"); + return -1; + } - local->call_cnt = count; - ret = 0; + ret = dict_set_uint32(xattrs, conf->link_xattr_name, 256); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED, + "Failed to set dictionary value: key = %s", + conf->link_xattr_name); - list_for_each_entry (trav, &entries->list, list) { - if (strcmp (trav->d_name, ".") == 0) - continue; - if (strcmp (trav->d_name, "..") == 0) - continue; + if (xattrs) + dict_unref(xattrs); + return -1; + } - lookup_frame = copy_frame (frame); + local->call_cnt = count; + ret = 0; - if (!lookup_frame) { - /* out of memory, let the rmdir fail - (as non-empty, unfortunately) */ - goto err; - } + list_for_each_entry(trav, &entries->list, list) + { + if (strcmp(trav->d_name, ".") == 0) + continue; + if (strcmp(trav->d_name, "..") == 0) + continue; - lookup_local = dht_local_init (lookup_frame, NULL, NULL, - GF_FOP_LOOKUP); - if (!lookup_local) { - goto err; - } + lookup_frame = copy_frame(frame); - lookup_frame->local = lookup_local; - lookup_local->main_frame = frame; - lookup_local->hashed_subvol = src; + if (!lookup_frame) { + /* out of memory, let the rmdir fail + (as non-empty, unfortunately) */ + goto err; + } - build_ret = dht_build_child_loc (this, &lookup_local->loc, - &local->loc, trav->d_name); - if (build_ret != 0) - goto err; + lookup_local = dht_local_init(lookup_frame, NULL, NULL, GF_FOP_LOOKUP); + if (!lookup_local) { + goto err; + } - gf_uuid_copy (lookup_local->loc.gfid, trav->d_stat.ia_gfid); + lookup_frame->local = lookup_local; + lookup_local->main_frame = frame; + lookup_local->hashed_subvol = src; - gf_uuid_unparse(lookup_local->loc.gfid, gfid); + build_ret = dht_build_child_loc(this, &lookup_local->loc, &local->loc, + trav->d_name); + if (build_ret != 0) + goto err; - gf_msg_trace (this->name, 0, - "looking up %s on subvolume %s, gfid = %s", - lookup_local->loc.path, src->name, gfid); + gf_uuid_copy(lookup_local->loc.gfid, trav->d_stat.ia_gfid); - subvol = dht_linkfile_subvol (this, NULL, &trav->d_stat, - trav->dict); - if (!subvol) { + gf_uuid_unparse(lookup_local->loc.gfid, gfid); - gf_msg (this->name, GF_LOG_INFO, 0, - DHT_MSG_INVALID_LINKFILE, - "Linkfile does not have link subvolume. " - "path = %s, gfid = %s", - lookup_local->loc.path, gfid); + gf_msg_trace(this->name, 0, "looking up %s on subvolume %s, gfid = %s", + lookup_local->loc.path, src->name, gfid); - gf_msg_debug (this->name, 0, - "looking up %s on subvol %s, gfid = %s", - lookup_local->loc.path, src->name, gfid); + subvol = dht_linkfile_subvol(this, NULL, &trav->d_stat, trav->dict); + if (!subvol) { + gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_INVALID_LINKFILE, + "Linkfile does not have link subvolume. " + "path = %s, gfid = %s", + lookup_local->loc.path, gfid); - STACK_WIND_COOKIE (lookup_frame, dht_rmdir_lookup_cbk, - src, src, src->fops->lookup, - &lookup_local->loc, xattrs); - } else { - gf_msg_debug (this->name, 0, - "Looking up linkfile target %s on " - " subvol %s, gfid = %s", - lookup_local->loc.path, subvol->name, - gfid); - - STACK_WIND (lookup_frame, dht_rmdir_cached_lookup_cbk, - subvol, subvol->fops->lookup, - &lookup_local->loc, xattrs); - } - ret++; + gf_msg_debug(this->name, 0, "looking up %s on subvol %s, gfid = %s", + lookup_local->loc.path, src->name, gfid); - lookup_frame = NULL; - lookup_local = NULL; + STACK_WIND_COOKIE(lookup_frame, dht_rmdir_lookup_cbk, src, src, + src->fops->lookup, &lookup_local->loc, xattrs); + } else { + gf_msg_debug(this->name, 0, + "Looking up linkfile target %s on " + " subvol %s, gfid = %s", + lookup_local->loc.path, subvol->name, gfid); + + STACK_WIND(lookup_frame, dht_rmdir_cached_lookup_cbk, subvol, + subvol->fops->lookup, &lookup_local->loc, xattrs); } + ret++; - if (xattrs) - dict_unref (xattrs); + lookup_frame = NULL; + lookup_local = NULL; + } - return ret; + if (xattrs) + dict_unref(xattrs); + + return ret; err: - if (xattrs) - dict_unref (xattrs); + if (xattrs) + dict_unref(xattrs); - if (lookup_frame) - DHT_STACK_DESTROY (lookup_frame); + if (lookup_frame) + DHT_STACK_DESTROY(lookup_frame); - /* Handle the case where the wound calls have unwound before the - * loop processing is done - */ + /* Handle the case where the wound calls have unwound before the + * loop processing is done + */ - LOCK (&frame->lock); - { - local->op_ret = -1; - local->op_errno = ENOTEMPTY; + LOCK(&frame->lock); + { + local->op_ret = -1; + local->op_errno = ENOTEMPTY; - local->call_cnt -= (count - ret); - if (!local->call_cnt) - unwind = _gf_true; - } - UNLOCK (&frame->lock); + local->call_cnt -= (count - ret); + if (!local->call_cnt) + unwind = _gf_true; + } + UNLOCK(&frame->lock); - if (!unwind) { - return ret; - } - return 0; + if (!unwind) { + return ret; + } + return 0; } - - /* * No more entries on this subvol. Proceed to the actual rmdir operation. */ void -dht_rmdir_readdirp_done (call_frame_t *readdirp_frame, xlator_t *this) +dht_rmdir_readdirp_done(call_frame_t *readdirp_frame, xlator_t *this) { + call_frame_t *main_frame = NULL; + dht_local_t *main_local = NULL; + dht_local_t *local = NULL; + int this_call_cnt = 0; - call_frame_t *main_frame = NULL; - dht_local_t *main_local = NULL; - dht_local_t *local = NULL; - int this_call_cnt = 0; - - - local = readdirp_frame->local; - main_frame = local->main_frame; - main_local = main_frame->local; - - /* At least one readdirp failed. - * This is a bit hit or miss - if readdirp failed on more than - * one subvol, we don't know which error is returned. - */ - if (local->op_ret == -1) { - main_local->op_ret = local->op_ret; - main_local->op_errno = local->op_errno; - } + local = readdirp_frame->local; + main_frame = local->main_frame; + main_local = main_frame->local; - this_call_cnt = dht_frame_return (main_frame); + /* At least one readdirp failed. + * This is a bit hit or miss - if readdirp failed on more than + * one subvol, we don't know which error is returned. + */ + if (local->op_ret == -1) { + main_local->op_ret = local->op_ret; + main_local->op_errno = local->op_errno; + } - if (is_last_call (this_call_cnt)) - dht_rmdir_do (main_frame, this); + this_call_cnt = dht_frame_return(main_frame); + if (is_last_call(this_call_cnt)) + dht_rmdir_do(main_frame, this); - DHT_STACK_DESTROY (readdirp_frame); + DHT_STACK_DESTROY(readdirp_frame); } - - int -dht_rmdir_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, gf_dirent_t *entries, - dict_t *xdata) +dht_rmdir_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, gf_dirent_t *entries, + dict_t *xdata) { - dht_local_t *local = NULL; - xlator_t *prev = NULL; - xlator_t *src = NULL; - int ret = 0; + dht_local_t *local = NULL; + xlator_t *prev = NULL; + xlator_t *src = NULL; + int ret = 0; + local = frame->local; + prev = cookie; + src = prev; - local = frame->local; - prev = cookie; - src = prev; - - if (op_ret > 2) { - ret = dht_rmdir_is_subvol_empty (frame, this, entries, src); - - switch (ret) { - case 0: /* non linkfiles exist */ - gf_msg_trace (this->name, 0, - "readdir on %s for %s returned %d " - "entries", prev->name, - local->loc.path, op_ret); - local->op_ret = -1; - local->op_errno = ENOTEMPTY; - goto done; - default: - /* @ret number of linkfiles are getting unlinked */ - gf_msg_trace (this->name, 0, - "readdir on %s for %s found %d " - "linkfiles", prev->name, - local->loc.path, ret); - break; - } + if (op_ret > 2) { + ret = dht_rmdir_is_subvol_empty(frame, this, entries, src); + switch (ret) { + case 0: /* non linkfiles exist */ + gf_msg_trace(this->name, 0, + "readdir on %s for %s returned %d " + "entries", + prev->name, local->loc.path, op_ret); + local->op_ret = -1; + local->op_errno = ENOTEMPTY; + goto done; + default: + /* @ret number of linkfiles are getting unlinked */ + gf_msg_trace(this->name, 0, + "readdir on %s for %s found %d " + "linkfiles", + prev->name, local->loc.path, ret); + break; } + } - - if (ret) { - return 0; - } + if (ret) { + return 0; + } done: - /* readdirp failed or no linkto files were found on this subvol */ + /* readdirp failed or no linkto files were found on this subvol */ - dht_rmdir_readdirp_done (frame, this); - return 0; + dht_rmdir_readdirp_done(frame, this); + return 0; } /* Keep sending readdirp on the subvol until it returns no more entries @@ -10746,214 +10095,202 @@ done: */ int -dht_rmdir_readdirp_do (call_frame_t *readdirp_frame, xlator_t *this) +dht_rmdir_readdirp_do(call_frame_t *readdirp_frame, xlator_t *this) { - dht_local_t *local = NULL; - - local = readdirp_frame->local; - - if (local->op_ret == -1) { - /* there is no point doing another readdirp on this - * subvol . */ - dht_rmdir_readdirp_done (readdirp_frame, this); - return 0; - } - - STACK_WIND_COOKIE (readdirp_frame, dht_rmdir_readdirp_cbk, - local->hashed_subvol, - local->hashed_subvol, - local->hashed_subvol->fops->readdirp, - local->fd, 4096, 0, local->xattr); + dht_local_t *local = NULL; + local = readdirp_frame->local; + if (local->op_ret == -1) { + /* there is no point doing another readdirp on this + * subvol . */ + dht_rmdir_readdirp_done(readdirp_frame, this); return 0; + } -} + STACK_WIND_COOKIE(readdirp_frame, dht_rmdir_readdirp_cbk, + local->hashed_subvol, local->hashed_subvol, + local->hashed_subvol->fops->readdirp, local->fd, 4096, 0, + local->xattr); + return 0; +} int -dht_rmdir_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, fd_t *fd, dict_t *xdata) +dht_rmdir_opendir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, fd_t *fd, dict_t *xdata) { - dht_local_t *local = NULL; - int this_call_cnt = -1; - xlator_t *prev = NULL; - int ret = 0; - dht_conf_t *conf = this->private; - dict_t *dict = NULL; - int i = 0; - char gfid[GF_UUID_BUF_SIZE] = {0}; - dht_local_t *readdirp_local = NULL; - call_frame_t *readdirp_frame = NULL; - int cnt = 0; + dht_local_t *local = NULL; + int this_call_cnt = -1; + xlator_t *prev = NULL; + int ret = 0; + dht_conf_t *conf = this->private; + dict_t *dict = NULL; + int i = 0; + char gfid[GF_UUID_BUF_SIZE] = {0}; + dht_local_t *readdirp_local = NULL; + call_frame_t *readdirp_frame = NULL; + int cnt = 0; - local = frame->local; - prev = cookie; + local = frame->local; + prev = cookie; + this_call_cnt = dht_frame_return(frame); + if (op_ret == -1) { + gf_uuid_unparse(local->loc.gfid, gfid); - this_call_cnt = dht_frame_return (frame); - if (op_ret == -1) { - gf_uuid_unparse(local->loc.gfid, gfid); - - gf_msg_debug (this->name, op_errno, - "opendir on %s for %s failed, " - "gfid = %s,", - prev->name, local->loc.path, gfid); - if ((op_errno != ENOENT) && (op_errno != ESTALE)) { - local->op_ret = -1; - local->op_errno = op_errno; - } - goto err; + gf_msg_debug(this->name, op_errno, + "opendir on %s for %s failed, " + "gfid = %s,", + prev->name, local->loc.path, gfid); + if ((op_errno != ENOENT) && (op_errno != ESTALE)) { + local->op_ret = -1; + local->op_errno = op_errno; } + goto err; + } - if (!is_last_call (this_call_cnt)) - return 0; - - if (local->op_ret == -1) - goto err; - - fd_bind (fd); - - dict = dict_new (); - if (!dict) { - local->op_ret = -1; - local->op_errno = ENOMEM; - goto err; - } + if (!is_last_call(this_call_cnt)) + return 0; - ret = dict_set_uint32 (dict, conf->link_xattr_name, 256); - if (ret) - gf_msg (this->name, GF_LOG_WARNING, 0, - DHT_MSG_DICT_SET_FAILED, - "%s: Failed to set dictionary value:key = %s", - local->loc.path, conf->link_xattr_name); + if (local->op_ret == -1) + goto err; - cnt = local->call_cnt = conf->subvolume_cnt; + fd_bind(fd); + dict = dict_new(); + if (!dict) { + local->op_ret = -1; + local->op_errno = ENOMEM; + goto err; + } - /* Create a separate frame per subvol as we might need - * to resend readdirp multiple times to get all the - * entries. - */ + ret = dict_set_uint32(dict, conf->link_xattr_name, 256); + if (ret) + gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED, + "%s: Failed to set dictionary value:key = %s", local->loc.path, + conf->link_xattr_name); - for (i = 0; i < conf->subvolume_cnt; i++) { + cnt = local->call_cnt = conf->subvolume_cnt; - readdirp_frame = copy_frame (frame); + /* Create a separate frame per subvol as we might need + * to resend readdirp multiple times to get all the + * entries. + */ - if (!readdirp_frame) { - cnt--; - /* Reduce the local->call_cnt as well */ - (void) dht_frame_return (frame); - continue; - } + for (i = 0; i < conf->subvolume_cnt; i++) { + readdirp_frame = copy_frame(frame); - readdirp_local = dht_local_init (readdirp_frame, &local->loc, - local->fd, 0); + if (!readdirp_frame) { + cnt--; + /* Reduce the local->call_cnt as well */ + (void)dht_frame_return(frame); + continue; + } - if (!readdirp_local) { - DHT_STACK_DESTROY (readdirp_frame); - cnt--; - /* Reduce the local->call_cnt as well */ - dht_frame_return (frame); - continue; - } - readdirp_local->main_frame = frame; - readdirp_local->op_ret = 0; - readdirp_local->xattr = dict_ref (dict); - /* overload this field to save the subvol info */ - readdirp_local->hashed_subvol = conf->subvolumes[i]; + readdirp_local = dht_local_init(readdirp_frame, &local->loc, local->fd, + 0); - STACK_WIND_COOKIE (readdirp_frame, dht_rmdir_readdirp_cbk, - conf->subvolumes[i], conf->subvolumes[i], - conf->subvolumes[i]->fops->readdirp, - readdirp_local->fd, 4096, 0, - readdirp_local->xattr); + if (!readdirp_local) { + DHT_STACK_DESTROY(readdirp_frame); + cnt--; + /* Reduce the local->call_cnt as well */ + dht_frame_return(frame); + continue; } + readdirp_local->main_frame = frame; + readdirp_local->op_ret = 0; + readdirp_local->xattr = dict_ref(dict); + /* overload this field to save the subvol info */ + readdirp_local->hashed_subvol = conf->subvolumes[i]; - if (dict) - dict_unref (dict); + STACK_WIND_COOKIE(readdirp_frame, dht_rmdir_readdirp_cbk, + conf->subvolumes[i], conf->subvolumes[i], + conf->subvolumes[i]->fops->readdirp, + readdirp_local->fd, 4096, 0, readdirp_local->xattr); + } - /* Could not wind readdirp to any subvol */ + if (dict) + dict_unref(dict); - if (!cnt) - goto err; + /* Could not wind readdirp to any subvol */ - return 0; + if (!cnt) + goto err; + + return 0; err: - if (is_last_call (this_call_cnt)) { - dht_rmdir_do (frame, this); - } + if (is_last_call(this_call_cnt)) { + dht_rmdir_do(frame, this); + } - return 0; + return 0; } - int -dht_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, - dict_t *xdata) +dht_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, + dict_t *xdata) { - dht_local_t *local = NULL; - dht_conf_t *conf = NULL; - int op_errno = -1; - int i = -1; - - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (loc, err); - VALIDATE_OR_GOTO (loc->inode, err); - VALIDATE_OR_GOTO (loc->path, err); - VALIDATE_OR_GOTO (this->private, err); + dht_local_t *local = NULL; + dht_conf_t *conf = NULL; + int op_errno = -1; + int i = -1; - conf = this->private; + VALIDATE_OR_GOTO(frame, err); + VALIDATE_OR_GOTO(this, err); + VALIDATE_OR_GOTO(loc, err); + VALIDATE_OR_GOTO(loc->inode, err); + VALIDATE_OR_GOTO(loc->path, err); + VALIDATE_OR_GOTO(this->private, err); - local = dht_local_init (frame, loc, NULL, GF_FOP_RMDIR); - if (!local) { - op_errno = ENOMEM; - goto err; - } + conf = this->private; - local->call_cnt = conf->subvolume_cnt; - local->op_ret = 0; - local->fop_succeeded = 0; + local = dht_local_init(frame, loc, NULL, GF_FOP_RMDIR); + if (!local) { + op_errno = ENOMEM; + goto err; + } - local->flags = flags; + local->call_cnt = conf->subvolume_cnt; + local->op_ret = 0; + local->fop_succeeded = 0; - local->fd = fd_create (local->loc.inode, frame->root->pid); - if (!local->fd) { + local->flags = flags; - op_errno = ENOMEM; - goto err; - } + local->fd = fd_create(local->loc.inode, frame->root->pid); + if (!local->fd) { + op_errno = ENOMEM; + goto err; + } - if (flags) { - return dht_rmdir_do (frame, this); - } + if (flags) { + return dht_rmdir_do(frame, this); + } - for (i = 0; i < conf->subvolume_cnt; i++) { - STACK_WIND_COOKIE (frame, dht_rmdir_opendir_cbk, - conf->subvolumes[i], conf->subvolumes[i], - conf->subvolumes[i]->fops->opendir, - loc, local->fd, NULL); - } + for (i = 0; i < conf->subvolume_cnt; i++) { + STACK_WIND_COOKIE(frame, dht_rmdir_opendir_cbk, conf->subvolumes[i], + conf->subvolumes[i], + conf->subvolumes[i]->fops->opendir, loc, local->fd, + NULL); + } - return 0; + return 0; err: - op_errno = (op_errno == -1) ? errno : op_errno; - DHT_STACK_UNWIND (rmdir, frame, -1, op_errno, - NULL, NULL, NULL); + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND(rmdir, frame, -1, op_errno, NULL, NULL, NULL); - return 0; + return 0; } int -dht_entrylk_cbk (call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, int32_t op_errno, dict_t *xdata) +dht_entrylk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - DHT_STACK_UNWIND (entrylk, frame, op_ret, op_errno, xdata); - return 0; + DHT_STACK_UNWIND(entrylk, frame, op_ret, op_errno, xdata); + return 0; } /* TODO @@ -10961,765 +10298,735 @@ dht_entrylk_cbk (call_frame_t *frame, void *cookie, * as described in the bug 1311002. */ int -dht_entrylk (call_frame_t *frame, xlator_t *this, - const char *volume, loc_t *loc, const char *basename, - entrylk_cmd cmd, entrylk_type type, dict_t *xdata) +dht_entrylk(call_frame_t *frame, xlator_t *this, const char *volume, loc_t *loc, + const char *basename, entrylk_cmd cmd, entrylk_type type, + dict_t *xdata) { - xlator_t *subvol = NULL; - int op_errno = -1; - dht_local_t *local = NULL; - char gfid[GF_UUID_BUF_SIZE] = {0}; - - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (loc, err); - VALIDATE_OR_GOTO (loc->inode, err); + xlator_t *subvol = NULL; + int op_errno = -1; + dht_local_t *local = NULL; + char gfid[GF_UUID_BUF_SIZE] = {0}; - local = dht_local_init (frame, loc, NULL, GF_FOP_ENTRYLK); - if (!local) { - op_errno = ENOMEM; - goto err; - } + VALIDATE_OR_GOTO(frame, err); + VALIDATE_OR_GOTO(this, err); + VALIDATE_OR_GOTO(loc, err); + VALIDATE_OR_GOTO(loc->inode, err); + local = dht_local_init(frame, loc, NULL, GF_FOP_ENTRYLK); + if (!local) { + op_errno = ENOMEM; + goto err; + } - subvol = local->cached_subvol; - if (!subvol) { - gf_uuid_unparse(loc->gfid, gfid); + subvol = local->cached_subvol; + if (!subvol) { + gf_uuid_unparse(loc->gfid, gfid); - gf_msg_debug (this->name, 0, - "no cached subvolume for path=%s, " - "gfid = %s", loc->path, gfid); - op_errno = EINVAL; - goto err; - } + gf_msg_debug(this->name, 0, + "no cached subvolume for path=%s, " + "gfid = %s", + loc->path, gfid); + op_errno = EINVAL; + goto err; + } - local->call_cnt = 1; + local->call_cnt = 1; - STACK_WIND (frame, dht_entrylk_cbk, - subvol, subvol->fops->entrylk, - volume, loc, basename, cmd, type, xdata); + STACK_WIND(frame, dht_entrylk_cbk, subvol, subvol->fops->entrylk, volume, + loc, basename, cmd, type, xdata); - return 0; + return 0; err: - op_errno = (op_errno == -1) ? errno : op_errno; - DHT_STACK_UNWIND (entrylk, frame, -1, op_errno, NULL); + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND(entrylk, frame, -1, op_errno, NULL); - return 0; + return 0; } - int -dht_fentrylk_cbk (call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, int32_t op_errno, dict_t *xdata) +dht_fentrylk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - DHT_STACK_UNWIND (fentrylk, frame, op_ret, op_errno, NULL); - return 0; + DHT_STACK_UNWIND(fentrylk, frame, op_ret, op_errno, NULL); + return 0; } - int -dht_fentrylk (call_frame_t *frame, xlator_t *this, - const char *volume, fd_t *fd, const char *basename, - entrylk_cmd cmd, entrylk_type type, dict_t *xdata) +dht_fentrylk(call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd, + const char *basename, entrylk_cmd cmd, entrylk_type type, + dict_t *xdata) { - xlator_t *subvol = NULL; - int op_errno = -1; - char gfid[GF_UUID_BUF_SIZE] = {0}; - + xlator_t *subvol = NULL; + int op_errno = -1; + char gfid[GF_UUID_BUF_SIZE] = {0}; - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (fd, err); - VALIDATE_OR_GOTO(fd->inode, err); + VALIDATE_OR_GOTO(frame, err); + VALIDATE_OR_GOTO(this, err); + VALIDATE_OR_GOTO(fd, err); + VALIDATE_OR_GOTO(fd->inode, err); - gf_uuid_unparse(fd->inode->gfid, gfid); + gf_uuid_unparse(fd->inode->gfid, gfid); - subvol = dht_subvol_get_cached (this, fd->inode); - if (!subvol) { - gf_msg_debug (this->name, 0, - "No cached subvolume for fd=%p," - " gfid = %s", fd, gfid); - op_errno = EINVAL; - goto err; - } + subvol = dht_subvol_get_cached(this, fd->inode); + if (!subvol) { + gf_msg_debug(this->name, 0, + "No cached subvolume for fd=%p," + " gfid = %s", + fd, gfid); + op_errno = EINVAL; + goto err; + } - STACK_WIND (frame, dht_fentrylk_cbk, - subvol, subvol->fops->fentrylk, - volume, fd, basename, cmd, type, xdata); + STACK_WIND(frame, dht_fentrylk_cbk, subvol, subvol->fops->fentrylk, volume, + fd, basename, cmd, type, xdata); - return 0; + return 0; err: - op_errno = (op_errno == -1) ? errno : op_errno; - DHT_STACK_UNWIND (fentrylk, frame, -1, op_errno, NULL); + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND(fentrylk, frame, -1, op_errno, NULL); - return 0; + return 0; } - int32_t -dht_ipc_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) +dht_ipc_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + int32_t op_errno, dict_t *xdata) { - dht_local_t *local = NULL; - int this_call_cnt = 0; + dht_local_t *local = NULL; + int this_call_cnt = 0; - GF_VALIDATE_OR_GOTO ("dht", frame, out); - GF_VALIDATE_OR_GOTO ("dht", this, out); - GF_VALIDATE_OR_GOTO ("dht", frame->local, out); + GF_VALIDATE_OR_GOTO("dht", frame, out); + GF_VALIDATE_OR_GOTO("dht", this, out); + GF_VALIDATE_OR_GOTO("dht", frame->local, out); - local = frame->local; + local = frame->local; - LOCK (&frame->lock); - { - if (op_ret < 0 && op_errno != ENOTCONN) { - local->op_errno = op_errno; - goto unlock; - } - local->op_ret = 0; + LOCK(&frame->lock); + { + if (op_ret < 0 && op_errno != ENOTCONN) { + local->op_errno = op_errno; + goto unlock; } + local->op_ret = 0; + } unlock: - UNLOCK (&frame->lock); + UNLOCK(&frame->lock); - this_call_cnt = dht_frame_return (frame); - if (is_last_call (this_call_cnt)) { - DHT_STACK_UNWIND (ipc, frame, local->op_ret, local->op_errno, - NULL); - } + this_call_cnt = dht_frame_return(frame); + if (is_last_call(this_call_cnt)) { + DHT_STACK_UNWIND(ipc, frame, local->op_ret, local->op_errno, NULL); + } out: - return 0; + return 0; } - int32_t -dht_ipc (call_frame_t *frame, xlator_t *this, int32_t op, dict_t *xdata) +dht_ipc(call_frame_t *frame, xlator_t *this, int32_t op, dict_t *xdata) { - dht_local_t *local = NULL; - int op_errno = EINVAL; - dht_conf_t *conf = NULL; - int call_cnt = 0; - int i = 0; + dht_local_t *local = NULL; + int op_errno = EINVAL; + dht_conf_t *conf = NULL; + int call_cnt = 0; + int i = 0; - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO(frame, err); + VALIDATE_OR_GOTO(this, err); - if (op != GF_IPC_TARGET_UPCALL) - goto wind_default; + if (op != GF_IPC_TARGET_UPCALL) + goto wind_default; - VALIDATE_OR_GOTO (this->private, err); - conf = this->private; + VALIDATE_OR_GOTO(this->private, err); + conf = this->private; - local = dht_local_init (frame, NULL, NULL, GF_FOP_IPC); - if (!local) { - op_errno = ENOMEM; - goto err; - } + local = dht_local_init(frame, NULL, NULL, GF_FOP_IPC); + if (!local) { + op_errno = ENOMEM; + goto err; + } - call_cnt = conf->subvolume_cnt; - local->call_cnt = call_cnt; + call_cnt = conf->subvolume_cnt; + local->call_cnt = call_cnt; - if (xdata) { - if (dict_set_int8 (xdata, conf->xattr_name, 0) < 0) - goto err; - } + if (xdata) { + if (dict_set_int8(xdata, conf->xattr_name, 0) < 0) + goto err; + } - for (i = 0; i < call_cnt; i++) { - STACK_WIND (frame, dht_ipc_cbk, conf->subvolumes[i], - conf->subvolumes[i]->fops->ipc, op, xdata); - } + for (i = 0; i < call_cnt; i++) { + STACK_WIND(frame, dht_ipc_cbk, conf->subvolumes[i], + conf->subvolumes[i]->fops->ipc, op, xdata); + } - return 0; + return 0; err: - DHT_STACK_UNWIND (ipc, frame, -1, op_errno, NULL); + DHT_STACK_UNWIND(ipc, frame, -1, op_errno, NULL); - return 0; + return 0; wind_default: - STACK_WIND (frame, default_ipc_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->ipc, op, xdata); - return 0; + STACK_WIND(frame, default_ipc_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->ipc, op, xdata); + return 0; } - int -dht_forget (xlator_t *this, inode_t *inode) +dht_forget(xlator_t *this, inode_t *inode) { - uint64_t ctx_int = 0; - dht_inode_ctx_t *ctx = NULL; - dht_layout_t *layout = NULL; + uint64_t ctx_int = 0; + dht_inode_ctx_t *ctx = NULL; + dht_layout_t *layout = NULL; - inode_ctx_del (inode, this, &ctx_int); + inode_ctx_del(inode, this, &ctx_int); - if (!ctx_int) - return 0; + if (!ctx_int) + return 0; - ctx = (dht_inode_ctx_t *) (long) ctx_int; + ctx = (dht_inode_ctx_t *)(long)ctx_int; - layout = ctx->layout; - ctx->layout = NULL; - dht_layout_unref (this, layout); - GF_FREE (ctx); + layout = ctx->layout; + ctx->layout = NULL; + dht_layout_unref(this, layout); + GF_FREE(ctx); - return 0; + return 0; } - int -dht_notify (xlator_t *this, int event, void *data, ...) +dht_notify(xlator_t *this, int event, void *data, ...) { - xlator_t *subvol = NULL; - int cnt = -1; - int i = -1; - dht_conf_t *conf = NULL; - int ret = -1; - int propagate = 0; + xlator_t *subvol = NULL; + int cnt = -1; + int i = -1; + dht_conf_t *conf = NULL; + int ret = -1; + int propagate = 0; - int had_heard_from_all = 0; - int have_heard_from_all = 0; - struct timeval time = {0,}; - gf_defrag_info_t *defrag = NULL; - dict_t *dict = NULL; - gf_defrag_type cmd = 0; - dict_t *output = NULL; - va_list ap; - dht_methods_t *methods = NULL; - struct gf_upcall *up_data = NULL; - struct gf_upcall_cache_invalidation *up_ci = NULL; + int had_heard_from_all = 0; + int have_heard_from_all = 0; + struct timeval time = { + 0, + }; + gf_defrag_info_t *defrag = NULL; + dict_t *dict = NULL; + gf_defrag_type cmd = 0; + dict_t *output = NULL; + va_list ap; + dht_methods_t *methods = NULL; + struct gf_upcall *up_data = NULL; + struct gf_upcall_cache_invalidation *up_ci = NULL; - conf = this->private; - GF_VALIDATE_OR_GOTO (this->name, conf, out); + conf = this->private; + GF_VALIDATE_OR_GOTO(this->name, conf, out); - methods = &(conf->methods); + methods = &(conf->methods); - /* had all subvolumes reported status once till now? */ - had_heard_from_all = 1; - for (i = 0; i < conf->subvolume_cnt; i++) { - if (!conf->last_event[i]) { - had_heard_from_all = 0; - } + /* had all subvolumes reported status once till now? */ + had_heard_from_all = 1; + for (i = 0; i < conf->subvolume_cnt; i++) { + if (!conf->last_event[i]) { + had_heard_from_all = 0; } + } - switch (event) { + switch (event) { case GF_EVENT_CHILD_UP: - subvol = data; + subvol = data; - conf->gen++; + conf->gen++; - for (i = 0; i < conf->subvolume_cnt; i++) { - if (subvol == conf->subvolumes[i]) { - cnt = i; - break; - } + for (i = 0; i < conf->subvolume_cnt; i++) { + if (subvol == conf->subvolumes[i]) { + cnt = i; + break; } + } - if (cnt == -1) { - gf_msg_debug (this->name, 0, - "got GF_EVENT_CHILD_UP bad " - "subvolume %s", - subvol->name); - break; - } + if (cnt == -1) { + gf_msg_debug(this->name, 0, + "got GF_EVENT_CHILD_UP bad " + "subvolume %s", + subvol->name); + break; + } - gettimeofday (&time, NULL); - LOCK (&conf->subvolume_lock); - { - conf->subvolume_status[cnt] = 1; - conf->last_event[cnt] = event; - conf->subvol_up_time[cnt] = time.tv_sec; - } - UNLOCK (&conf->subvolume_lock); + gettimeofday(&time, NULL); + LOCK(&conf->subvolume_lock); + { + conf->subvolume_status[cnt] = 1; + conf->last_event[cnt] = event; + conf->subvol_up_time[cnt] = time.tv_sec; + } + UNLOCK(&conf->subvolume_lock); - /* one of the node came back up, do a stat update */ - dht_get_du_info_for_subvol (this, cnt); + /* one of the node came back up, do a stat update */ + dht_get_du_info_for_subvol(this, cnt); - break; + break; case GF_EVENT_SOME_DESCENDENT_UP: - subvol = data; - conf->gen++; - propagate = 1; + subvol = data; + conf->gen++; + propagate = 1; - break; + break; case GF_EVENT_SOME_DESCENDENT_DOWN: - subvol = data; - propagate = 1; + subvol = data; + propagate = 1; - break; + break; case GF_EVENT_CHILD_DOWN: - subvol = data; - - if (conf->assert_no_child_down) { - gf_msg (this->name, GF_LOG_WARNING, 0, - DHT_MSG_CHILD_DOWN, - "Received CHILD_DOWN. Exiting"); - if (conf->defrag) { - gf_defrag_stop (conf, - GF_DEFRAG_STATUS_FAILED, NULL); - } else { - kill (getpid(), SIGTERM); - } - } + subvol = data; - for (i = 0; i < conf->subvolume_cnt; i++) { - if (subvol == conf->subvolumes[i]) { - cnt = i; - break; - } - } - - if (cnt == -1) { - gf_msg_debug (this->name, 0, - "got GF_EVENT_CHILD_DOWN bad " - "subvolume %s", subvol->name); - break; + if (conf->assert_no_child_down) { + gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_CHILD_DOWN, + "Received CHILD_DOWN. Exiting"); + if (conf->defrag) { + gf_defrag_stop(conf, GF_DEFRAG_STATUS_FAILED, NULL); + } else { + kill(getpid(), SIGTERM); } + } - LOCK (&conf->subvolume_lock); - { - conf->subvolume_status[cnt] = 0; - conf->last_event[cnt] = event; - conf->subvol_up_time[cnt] = 0; + for (i = 0; i < conf->subvolume_cnt; i++) { + if (subvol == conf->subvolumes[i]) { + cnt = i; + break; } - UNLOCK (&conf->subvolume_lock); + } - for (i = 0; i < conf->subvolume_cnt; i++) - if (conf->last_event[i] != event) - event = GF_EVENT_SOME_DESCENDENT_DOWN; + if (cnt == -1) { + gf_msg_debug(this->name, 0, + "got GF_EVENT_CHILD_DOWN bad " + "subvolume %s", + subvol->name); break; + } - case GF_EVENT_CHILD_CONNECTING: - subvol = data; + LOCK(&conf->subvolume_lock); + { + conf->subvolume_status[cnt] = 0; + conf->last_event[cnt] = event; + conf->subvol_up_time[cnt] = 0; + } + UNLOCK(&conf->subvolume_lock); - for (i = 0; i < conf->subvolume_cnt; i++) { - if (subvol == conf->subvolumes[i]) { - cnt = i; - break; - } - } + for (i = 0; i < conf->subvolume_cnt; i++) + if (conf->last_event[i] != event) + event = GF_EVENT_SOME_DESCENDENT_DOWN; + break; - if (cnt == -1) { - gf_msg_debug (this->name, 0, - "got GF_EVENT_CHILD_CONNECTING" - " bad subvolume %s", - subvol->name); - break; - } + case GF_EVENT_CHILD_CONNECTING: + subvol = data; - LOCK (&conf->subvolume_lock); - { - conf->last_event[cnt] = event; + for (i = 0; i < conf->subvolume_cnt; i++) { + if (subvol == conf->subvolumes[i]) { + cnt = i; + break; } - UNLOCK (&conf->subvolume_lock); + } + if (cnt == -1) { + gf_msg_debug(this->name, 0, + "got GF_EVENT_CHILD_CONNECTING" + " bad subvolume %s", + subvol->name); break; - case GF_EVENT_VOLUME_DEFRAG: - { - if (!conf->defrag) { - return ret; - } - defrag = conf->defrag; + } - dict = data; - va_start (ap, data); - output = va_arg (ap, dict_t*); + LOCK(&conf->subvolume_lock); + { + conf->last_event[cnt] = event; + } + UNLOCK(&conf->subvolume_lock); - ret = dict_get_int32 (dict, "rebalance-command", - (int32_t*)&cmd); - if (ret) { - va_end (ap); - return ret; - } - LOCK (&defrag->lock); - { - if (defrag->is_exiting) - goto unlock; - if ((cmd == GF_DEFRAG_CMD_STATUS) || - (cmd == GF_DEFRAG_CMD_STATUS_TIER) || - (cmd == GF_DEFRAG_CMD_DETACH_STATUS)) - gf_defrag_status_get (conf, output); - else if (cmd == GF_DEFRAG_CMD_START_DETACH_TIER) - gf_defrag_start_detach_tier(defrag); - else if (cmd == GF_DEFRAG_CMD_DETACH_START) - defrag->cmd = GF_DEFRAG_CMD_DETACH_START; - else if (cmd == GF_DEFRAG_CMD_STOP || - cmd == GF_DEFRAG_CMD_STOP_DETACH_TIER || - cmd == GF_DEFRAG_CMD_DETACH_STOP) - gf_defrag_stop (conf, - GF_DEFRAG_STATUS_STOPPED, output); - else if (cmd == GF_DEFRAG_CMD_PAUSE_TIER) - ret = gf_defrag_pause_tier (this, defrag); - else if (cmd == GF_DEFRAG_CMD_RESUME_TIER) - ret = gf_defrag_resume_tier (this, defrag); - } -unlock: - UNLOCK (&defrag->lock); - va_end (ap); + break; + case GF_EVENT_VOLUME_DEFRAG: { + if (!conf->defrag) { return ret; - break; + } + defrag = conf->defrag; + + dict = data; + va_start(ap, data); + output = va_arg(ap, dict_t *); + + ret = dict_get_int32(dict, "rebalance-command", (int32_t *)&cmd); + if (ret) { + va_end(ap); + return ret; + } + LOCK(&defrag->lock); + { + if (defrag->is_exiting) + goto unlock; + if ((cmd == GF_DEFRAG_CMD_STATUS) || + (cmd == GF_DEFRAG_CMD_STATUS_TIER) || + (cmd == GF_DEFRAG_CMD_DETACH_STATUS)) + gf_defrag_status_get(conf, output); + else if (cmd == GF_DEFRAG_CMD_START_DETACH_TIER) + gf_defrag_start_detach_tier(defrag); + else if (cmd == GF_DEFRAG_CMD_DETACH_START) + defrag->cmd = GF_DEFRAG_CMD_DETACH_START; + else if (cmd == GF_DEFRAG_CMD_STOP || + cmd == GF_DEFRAG_CMD_STOP_DETACH_TIER || + cmd == GF_DEFRAG_CMD_DETACH_STOP) + gf_defrag_stop(conf, GF_DEFRAG_STATUS_STOPPED, output); + else if (cmd == GF_DEFRAG_CMD_PAUSE_TIER) + ret = gf_defrag_pause_tier(this, defrag); + else if (cmd == GF_DEFRAG_CMD_RESUME_TIER) + ret = gf_defrag_resume_tier(this, defrag); + } + unlock: + UNLOCK(&defrag->lock); + va_end(ap); + return ret; + break; } case GF_EVENT_UPCALL: - up_data = (struct gf_upcall *)data; - if (up_data->event_type != GF_UPCALL_CACHE_INVALIDATION) - break; - up_ci = (struct gf_upcall_cache_invalidation *)up_data->data; - - /* Since md-cache will be aggressively filtering lookups, - * the stale layout issue will be more pronounced. Hence - * when a layout xattr is changed by the rebalance process - * notify all the md-cache clients to invalidate the existing - * stat cache and send the lookup next time*/ - if (up_ci->dict && dict_get (up_ci->dict, conf->xattr_name)) - up_ci->flags |= UP_EXPLICIT_LOOKUP; - - /* TODO: Instead of invalidating iatt, update the new - * hashed/cached subvolume in dht inode_ctx */ - if (IS_DHT_LINKFILE_MODE (&up_ci->stat)) - up_ci->flags |= UP_EXPLICIT_LOOKUP; - - propagate = 1; + up_data = (struct gf_upcall *)data; + if (up_data->event_type != GF_UPCALL_CACHE_INVALIDATION) break; + up_ci = (struct gf_upcall_cache_invalidation *)up_data->data; + + /* Since md-cache will be aggressively filtering lookups, + * the stale layout issue will be more pronounced. Hence + * when a layout xattr is changed by the rebalance process + * notify all the md-cache clients to invalidate the existing + * stat cache and send the lookup next time*/ + if (up_ci->dict && dict_get(up_ci->dict, conf->xattr_name)) + up_ci->flags |= UP_EXPLICIT_LOOKUP; + + /* TODO: Instead of invalidating iatt, update the new + * hashed/cached subvolume in dht inode_ctx */ + if (IS_DHT_LINKFILE_MODE(&up_ci->stat)) + up_ci->flags |= UP_EXPLICIT_LOOKUP; + + propagate = 1; + break; default: - propagate = 1; - break; - } - + propagate = 1; + break; + } + + /* have all subvolumes reported status once by now? */ + have_heard_from_all = 1; + for (i = 0; i < conf->subvolume_cnt; i++) { + if (!conf->last_event[i]) + have_heard_from_all = 0; + } + + /* if all subvols have reported status, no need to hide anything + or wait for anything else. Just propagate blindly */ + if (have_heard_from_all) { + propagate = 1; + } + + if (!had_heard_from_all && have_heard_from_all) { + /* This is the first event which completes aggregation + of events from all subvolumes. If at least one subvol + had come up, propagate CHILD_UP, but only this time + */ + event = GF_EVENT_CHILD_DOWN; - /* have all subvolumes reported status once by now? */ - have_heard_from_all = 1; for (i = 0; i < conf->subvolume_cnt; i++) { - if (!conf->last_event[i]) - have_heard_from_all = 0; - } - - /* if all subvols have reported status, no need to hide anything - or wait for anything else. Just propagate blindly */ - if (have_heard_from_all) { - propagate = 1; + if (conf->last_event[i] == GF_EVENT_CHILD_UP) { + event = GF_EVENT_CHILD_UP; + break; + } + if (conf->last_event[i] == GF_EVENT_CHILD_CONNECTING) { + event = GF_EVENT_CHILD_CONNECTING; + /* continue to check other events for CHILD_UP */ + } } - - if (!had_heard_from_all && have_heard_from_all) { - /* This is the first event which completes aggregation - of events from all subvolumes. If at least one subvol - had come up, propagate CHILD_UP, but only this time - */ - event = GF_EVENT_CHILD_DOWN; - - for (i = 0; i < conf->subvolume_cnt; i++) { - if (conf->last_event[i] == GF_EVENT_CHILD_UP) { - event = GF_EVENT_CHILD_UP; - break; - } - - if (conf->last_event[i] == GF_EVENT_CHILD_CONNECTING) { - event = GF_EVENT_CHILD_CONNECTING; - /* continue to check other events for CHILD_UP */ - } - } - - /* Rebalance is started with assert_no_child_down. So we do - * not need to handle CHILD_DOWN event here. - * - * If there is a graph switch, we should not restart the - * rebalance daemon. Use 'run_defrag' to indicate if the - * thread has already started. - */ - if (conf->defrag && !run_defrag) { - if (methods->migration_needed(this)) { - run_defrag = 1; - ret = gf_thread_create(&conf->defrag->th, - NULL, - gf_defrag_start, this, - "dhtdg"); - if (ret) { - GF_FREE (conf->defrag); - conf->defrag = NULL; - kill (getpid(), SIGTERM); - } - } + /* Rebalance is started with assert_no_child_down. So we do + * not need to handle CHILD_DOWN event here. + * + * If there is a graph switch, we should not restart the + * rebalance daemon. Use 'run_defrag' to indicate if the + * thread has already started. + */ + if (conf->defrag && !run_defrag) { + if (methods->migration_needed(this)) { + run_defrag = 1; + ret = gf_thread_create(&conf->defrag->th, NULL, gf_defrag_start, + this, "dhtdg"); + if (ret) { + GF_FREE(conf->defrag); + conf->defrag = NULL; + kill(getpid(), SIGTERM); } + } } + } - ret = 0; - if (propagate) - ret = default_notify (this, event, data); + ret = 0; + if (propagate) + ret = default_notify(this, event, data); out: - return ret; + return ret; } int -dht_inode_ctx_layout_get (inode_t *inode, xlator_t *this, dht_layout_t **layout) +dht_inode_ctx_layout_get(inode_t *inode, xlator_t *this, dht_layout_t **layout) { - dht_inode_ctx_t *ctx = NULL; - int ret = -1; + dht_inode_ctx_t *ctx = NULL; + int ret = -1; - ret = dht_inode_ctx_get (inode, this, &ctx); + ret = dht_inode_ctx_get(inode, this, &ctx); - if (!ret && ctx) { - if (ctx->layout) { - if (layout) - *layout = ctx->layout; - ret = 0; - } else { - ret = -1; - } + if (!ret && ctx) { + if (ctx->layout) { + if (layout) + *layout = ctx->layout; + ret = 0; + } else { + ret = -1; } + } - return ret; + return ret; } void -dht_log_new_layout_for_dir_selfheal (xlator_t *this, loc_t *loc, - dht_layout_t *layout) -{ +dht_log_new_layout_for_dir_selfheal(xlator_t *this, loc_t *loc, + dht_layout_t *layout) +{ + char string[2048] = {0}; + char *output_string = NULL; + int len = 0; + int off = 0; + int i = 0; + gf_loglevel_t log_level = gf_log_get_loglevel(); + int ret = 0; + + if (log_level < GF_LOG_INFO) + return; - char string[2048] = {0}; - char *output_string = NULL; - int len = 0; - int off = 0; - int i = 0; - gf_loglevel_t log_level = gf_log_get_loglevel(); - int ret = 0; + if (!layout) + return; - if (log_level < GF_LOG_INFO) - return; + if (!layout->cnt) + return; - if (!layout) - return; + if (!loc) + return; - if (!layout->cnt) - return; + if (!loc->path) + return; - if (!loc) - return; + ret = snprintf(string, sizeof(string), "Setting layout of %s with ", + loc->path); - if (!loc->path) - return; + if (ret < 0) + return; - ret = snprintf (string, sizeof (string), "Setting layout of %s with ", - loc->path); + len += ret; + + /* Calculation of total length of the string required to calloc + * output_string. Log includes subvolume-name, start-range, end-range and + * err value. + * + * This log will help to debug cases where: + * a) Different processes set different layout of a directory. + * b) Error captured in lookup, which will be filled in layout->err + * (like ENOENT, ESTALE etc) + */ + + for (i = 0; i < layout->cnt; i++) { + ret = snprintf(string, sizeof(string), + "[Subvol_name: %s, Err: %d , Start: " + "%" PRIu32 " , Stop: %" PRIu32 " , Hash: %" PRIu32 + " ], ", + layout->list[i].xlator->name, layout->list[i].err, + layout->list[i].start, layout->list[i].stop, + layout->list[i].commit_hash); if (ret < 0) - return; + return; len += ret; + } - /* Calculation of total length of the string required to calloc - * output_string. Log includes subvolume-name, start-range, end-range and - * err value. - * - * This log will help to debug cases where: - * a) Different processes set different layout of a directory. - * b) Error captured in lookup, which will be filled in layout->err - * (like ENOENT, ESTALE etc) - */ - - for (i = 0; i < layout->cnt; i++) { - - ret = snprintf (string, sizeof (string), - "[Subvol_name: %s, Err: %d , Start: " - "%"PRIu32 " , Stop: %"PRIu32 " , Hash: %" - PRIu32 " ], ", - layout->list[i].xlator->name, - layout->list[i].err, layout->list[i].start, - layout->list[i].stop, - layout->list[i].commit_hash); + len++; - if (ret < 0) - return; + output_string = GF_MALLOC(len + 1, gf_common_mt_char); - len += ret; - - } + if (!output_string) + return; - len++; + ret = snprintf(output_string, len + 1, "Setting layout of %s with ", + loc->path); - output_string = GF_MALLOC (len + 1, gf_common_mt_char); + if (ret < 0) + goto err; - if (!output_string) - return; + off += ret; - ret = snprintf (output_string, len + 1, "Setting layout of %s with ", - loc->path); + for (i = 0; i < layout->cnt; i++) { + ret = snprintf(output_string + off, len - off, + "[Subvol_name: %s, Err: %d , Start: " + "%" PRIu32 " , Stop: %" PRIu32 " , Hash: %" PRIu32 + " ], ", + layout->list[i].xlator->name, layout->list[i].err, + layout->list[i].start, layout->list[i].stop, + layout->list[i].commit_hash); if (ret < 0) - goto err; + goto err; off += ret; + } - - for (i = 0; i < layout->cnt; i++) { - - ret = snprintf (output_string + off, len - off, - "[Subvol_name: %s, Err: %d , Start: " - "%"PRIu32 " , Stop: %"PRIu32 " , Hash: %" - PRIu32 " ], ", - layout->list[i].xlator->name, - layout->list[i].err, layout->list[i].start, - layout->list[i].stop, - layout->list[i].commit_hash); - - if (ret < 0) - goto err; - - off += ret; - - } - - gf_msg (this->name, GF_LOG_DEBUG, 0, DHT_MSG_LOG_FIXED_LAYOUT, - "%s", output_string); + gf_msg(this->name, GF_LOG_DEBUG, 0, DHT_MSG_LOG_FIXED_LAYOUT, "%s", + output_string); err: - GF_FREE (output_string); + GF_FREE(output_string); } -int32_t dht_migration_get_dst_subvol(xlator_t *this, dht_local_t *local) +int32_t +dht_migration_get_dst_subvol(xlator_t *this, dht_local_t *local) { - int ret = -1; + int ret = -1; - if (!local) - goto out; + if (!local) + goto out; - local->rebalance.target_node = - dht_subvol_get_hashed (this, &local->loc); + local->rebalance.target_node = dht_subvol_get_hashed(this, &local->loc); - if (local->rebalance.target_node) - ret = 0; + if (local->rebalance.target_node) + ret = 0; out: - return ret; + return ret; } -int32_t dht_migration_needed(xlator_t *this) +int32_t +dht_migration_needed(xlator_t *this) { - gf_defrag_info_t *defrag = NULL; - dht_conf_t *conf = NULL; - int ret = 0; + gf_defrag_info_t *defrag = NULL; + dht_conf_t *conf = NULL; + int ret = 0; - conf = this->private; + conf = this->private; - GF_VALIDATE_OR_GOTO ("dht", conf, out); - GF_VALIDATE_OR_GOTO ("dht", conf->defrag, out); + GF_VALIDATE_OR_GOTO("dht", conf, out); + GF_VALIDATE_OR_GOTO("dht", conf->defrag, out); - defrag = conf->defrag; + defrag = conf->defrag; - if ((defrag->cmd != GF_DEFRAG_CMD_START_TIER) && - (defrag->cmd != GF_DEFRAG_CMD_START_DETACH_TIER)) - ret = 1; + if ((defrag->cmd != GF_DEFRAG_CMD_START_TIER) && + (defrag->cmd != GF_DEFRAG_CMD_START_DETACH_TIER)) + ret = 1; out: - return ret; + return ret; } - - /* This function should not be called more then once during a FOP handling path. It is valid only for for ops on files */ -int32_t dht_set_local_rebalance (xlator_t *this, dht_local_t *local, - struct iatt *stbuf, - struct iatt *prebuf, struct iatt *postbuf, - dict_t *xdata) +int32_t +dht_set_local_rebalance(xlator_t *this, dht_local_t *local, struct iatt *stbuf, + struct iatt *prebuf, struct iatt *postbuf, + dict_t *xdata) { + if (!local) + return -1; - if (!local) - return -1; - - if (local->rebalance.set) { - gf_msg (this->name, GF_LOG_WARNING, 0, - DHT_MSG_REBAL_STRUCT_SET, - "local->rebalance already set"); - } - + if (local->rebalance.set) { + gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_REBAL_STRUCT_SET, + "local->rebalance already set"); + } - if (stbuf) - memcpy (&local->rebalance.stbuf, stbuf, sizeof (struct iatt)); + if (stbuf) + memcpy(&local->rebalance.stbuf, stbuf, sizeof(struct iatt)); - if (prebuf) - memcpy (&local->rebalance.prebuf, prebuf, sizeof (struct iatt)); + if (prebuf) + memcpy(&local->rebalance.prebuf, prebuf, sizeof(struct iatt)); - if (postbuf) - memcpy (&local->rebalance.postbuf, postbuf, - sizeof (struct iatt)); + if (postbuf) + memcpy(&local->rebalance.postbuf, postbuf, sizeof(struct iatt)); - if (xdata) - local->rebalance.xdata = dict_ref (xdata); + if (xdata) + local->rebalance.xdata = dict_ref(xdata); - local->rebalance.set = 1; + local->rebalance.set = 1; - return 0; + return 0; } gf_boolean_t -dht_is_tier_xlator (xlator_t *this) +dht_is_tier_xlator(xlator_t *this) { - - if (strcmp (this->type, "cluster/tier") == 0) - return _gf_true; - return _gf_false; + if (strcmp(this->type, "cluster/tier") == 0) + return _gf_true; + return _gf_false; } int32_t -dht_release (xlator_t *this, fd_t *fd) +dht_release(xlator_t *this, fd_t *fd) { - return dht_fd_ctx_destroy (this, fd); + return dht_fd_ctx_destroy(this, fd); } int -dht_remove_stale_linkto (void *data) +dht_remove_stale_linkto(void *data) { - call_frame_t *frame = NULL; - dht_local_t *local = NULL; - xlator_t *this = NULL; - dict_t *xdata_in = NULL; - int ret = 0; + call_frame_t *frame = NULL; + dht_local_t *local = NULL; + xlator_t *this = NULL; + dict_t *xdata_in = NULL; + int ret = 0; - GF_VALIDATE_OR_GOTO ("dht", data, out); - - frame = data; - local = frame->local; - this = frame->this; - GF_VALIDATE_OR_GOTO ("dht", this, out); - GF_VALIDATE_OR_GOTO ("dht", local, out); - GF_VALIDATE_OR_GOTO ("dht", local->link_subvol, out); - - xdata_in = dict_new (); - if (!xdata_in) - goto out; + GF_VALIDATE_OR_GOTO("dht", data, out); - ret = dht_fill_dict_to_avoid_unlink_of_migrating_file (xdata_in); - if (ret) { - gf_msg (this->name, GF_LOG_WARNING, -ret, 0, - "Failed to set keys for stale linkto" - "deletion on path %s", local->loc.path); - goto out; - } + frame = data; + local = frame->local; + this = frame->this; + GF_VALIDATE_OR_GOTO("dht", this, out); + GF_VALIDATE_OR_GOTO("dht", local, out); + GF_VALIDATE_OR_GOTO("dht", local->link_subvol, out); - ret = syncop_unlink (local->link_subvol, &local->loc, xdata_in, NULL); - if (ret) { - gf_msg (this->name, GF_LOG_WARNING, -ret, 0, - "Removal of linkto failed" - " on path %s at subvol %s", - local->loc.path, local->link_subvol->name); + xdata_in = dict_new(); + if (!xdata_in) + goto out; - } + ret = dht_fill_dict_to_avoid_unlink_of_migrating_file(xdata_in); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, -ret, 0, + "Failed to set keys for stale linkto" + "deletion on path %s", + local->loc.path); + goto out; + } + + ret = syncop_unlink(local->link_subvol, &local->loc, xdata_in, NULL); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, -ret, 0, + "Removal of linkto failed" + " on path %s at subvol %s", + local->loc.path, local->link_subvol->name); + } out: - if (xdata_in) - dict_unref (xdata_in); - return ret; + if (xdata_in) + dict_unref(xdata_in); + return ret; } int -dht_remove_stale_linkto_cbk (int ret, call_frame_t *sync_frame, void *data) +dht_remove_stale_linkto_cbk(int ret, call_frame_t *sync_frame, void *data) { - DHT_STACK_DESTROY (sync_frame); - return 0; + DHT_STACK_DESTROY(sync_frame); + return 0; } diff --git a/xlators/cluster/dht/src/dht-diskusage.c b/xlators/cluster/dht/src/dht-diskusage.c index f109615e52c..13eaabae1c1 100644 --- a/xlators/cluster/dht/src/dht-diskusage.c +++ b/xlators/cluster/dht/src/dht-diskusage.c @@ -8,7 +8,6 @@ cases as published by the Free Software Foundation. */ - /* TODO: add NS locking */ #include "glusterfs.h" @@ -20,493 +19,477 @@ #include #include "events.h" - int -dht_du_info_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, struct statvfs *statvfs, - dict_t *xdata) +dht_du_info_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, + int op_errno, struct statvfs *statvfs, dict_t *xdata) { - dht_conf_t *conf = NULL; - xlator_t *prev = NULL; - int this_call_cnt = 0; - int i = 0; - double percent = 0; - double percent_inodes = 0; - uint64_t bytes = 0; - uint32_t bpc; /* blocks per chunk */ - uint32_t chunks = 0; - - conf = this->private; - prev = cookie; - - if (op_ret == -1 || !statvfs) { - gf_msg (this->name, GF_LOG_WARNING, op_errno, - DHT_MSG_GET_DISK_INFO_ERROR, - "failed to get disk info from %s", prev->name); - goto out; - } - - if (statvfs->f_blocks) { - percent = (statvfs->f_bavail * 100) / statvfs->f_blocks; - bytes = (statvfs->f_bavail * statvfs->f_frsize); - /* - * A 32-bit count of 1MB chunks allows a maximum brick size of - * ~4PB. It's possible that we could see a single local FS - * bigger than that some day, but this code is likely to be - * irrelevant by then. Meanwhile, it's more important to keep - * the chunk size small so the layout-calculation code that - * uses this value can be tested on normal machines. - */ - bpc = (1 << 20) / statvfs->f_bsize; - chunks = (statvfs->f_blocks + bpc - 1) / bpc; - } - - if (statvfs->f_files) { - percent_inodes = (statvfs->f_ffree * 100) / statvfs->f_files; - } else { - /* - * Set percent inodes to 100 for dynamically allocated inode - * filesystems. The rationale is that distribute need not - * worry about total inodes; rather, let the 'create()' be - * scheduled on the hashed subvol regardless of the total - * inodes. - */ - percent_inodes = 100; - } - - LOCK (&conf->subvolume_lock); - { - for (i = 0; i < conf->subvolume_cnt; i++) - if (prev == conf->subvolumes[i]) { - conf->du_stats[i].avail_percent = percent; - conf->du_stats[i].avail_space = bytes; - conf->du_stats[i].avail_inodes = percent_inodes; - conf->du_stats[i].chunks = chunks; - conf->du_stats[i].total_blocks = statvfs->f_blocks; - conf->du_stats[i].avail_blocks = statvfs->f_bavail; - conf->du_stats[i].frsize = statvfs->f_frsize; - - gf_msg_debug (this->name, 0, - "subvolume '%s': avail_percent " - "is: %.2f and avail_space " - "is: %" PRIu64" and avail_inodes" - " is: %.2f", - prev->name, - conf->du_stats[i].avail_percent, - conf->du_stats[i].avail_space, - conf->du_stats[i].avail_inodes); - break; /* no point in looping further */ - } - } - UNLOCK (&conf->subvolume_lock); + dht_conf_t *conf = NULL; + xlator_t *prev = NULL; + int this_call_cnt = 0; + int i = 0; + double percent = 0; + double percent_inodes = 0; + uint64_t bytes = 0; + uint32_t bpc; /* blocks per chunk */ + uint32_t chunks = 0; + + conf = this->private; + prev = cookie; + + if (op_ret == -1 || !statvfs) { + gf_msg(this->name, GF_LOG_WARNING, op_errno, + DHT_MSG_GET_DISK_INFO_ERROR, "failed to get disk info from %s", + prev->name); + goto out; + } + + if (statvfs->f_blocks) { + percent = (statvfs->f_bavail * 100) / statvfs->f_blocks; + bytes = (statvfs->f_bavail * statvfs->f_frsize); + /* + * A 32-bit count of 1MB chunks allows a maximum brick size of + * ~4PB. It's possible that we could see a single local FS + * bigger than that some day, but this code is likely to be + * irrelevant by then. Meanwhile, it's more important to keep + * the chunk size small so the layout-calculation code that + * uses this value can be tested on normal machines. + */ + bpc = (1 << 20) / statvfs->f_bsize; + chunks = (statvfs->f_blocks + bpc - 1) / bpc; + } + + if (statvfs->f_files) { + percent_inodes = (statvfs->f_ffree * 100) / statvfs->f_files; + } else { + /* + * Set percent inodes to 100 for dynamically allocated inode + * filesystems. The rationale is that distribute need not + * worry about total inodes; rather, let the 'create()' be + * scheduled on the hashed subvol regardless of the total + * inodes. + */ + percent_inodes = 100; + } + + LOCK(&conf->subvolume_lock); + { + for (i = 0; i < conf->subvolume_cnt; i++) + if (prev == conf->subvolumes[i]) { + conf->du_stats[i].avail_percent = percent; + conf->du_stats[i].avail_space = bytes; + conf->du_stats[i].avail_inodes = percent_inodes; + conf->du_stats[i].chunks = chunks; + conf->du_stats[i].total_blocks = statvfs->f_blocks; + conf->du_stats[i].avail_blocks = statvfs->f_bavail; + conf->du_stats[i].frsize = statvfs->f_frsize; + + gf_msg_debug(this->name, 0, + "subvolume '%s': avail_percent " + "is: %.2f and avail_space " + "is: %" PRIu64 + " and avail_inodes" + " is: %.2f", + prev->name, conf->du_stats[i].avail_percent, + conf->du_stats[i].avail_space, + conf->du_stats[i].avail_inodes); + break; /* no point in looping further */ + } + } + UNLOCK(&conf->subvolume_lock); out: - this_call_cnt = dht_frame_return (frame); - if (is_last_call (this_call_cnt)) - DHT_STACK_DESTROY (frame); + this_call_cnt = dht_frame_return(frame); + if (is_last_call(this_call_cnt)) + DHT_STACK_DESTROY(frame); - return 0; + return 0; } int -dht_get_du_info_for_subvol (xlator_t *this, int subvol_idx) +dht_get_du_info_for_subvol(xlator_t *this, int subvol_idx) { - dht_conf_t *conf = NULL; - call_frame_t *statfs_frame = NULL; - dht_local_t *statfs_local = NULL; - call_pool_t *pool = NULL; - loc_t tmp_loc = {0,}; - - conf = this->private; - pool = this->ctx->pool; - - statfs_frame = create_frame (this, pool); - if (!statfs_frame) { - goto err; - } - - /* local->fop value is not used in this case */ - statfs_local = dht_local_init (statfs_frame, NULL, NULL, - GF_FOP_MAXVALUE); - if (!statfs_local) { - goto err; - } - - /* make it root gfid, should be enough to get the proper info back */ - tmp_loc.gfid[15] = 1; - - statfs_local->call_cnt = 1; - STACK_WIND_COOKIE (statfs_frame, dht_du_info_cbk, - conf->subvolumes[subvol_idx], - conf->subvolumes[subvol_idx], - conf->subvolumes[subvol_idx]->fops->statfs, - &tmp_loc, NULL); - - return 0; + dht_conf_t *conf = NULL; + call_frame_t *statfs_frame = NULL; + dht_local_t *statfs_local = NULL; + call_pool_t *pool = NULL; + loc_t tmp_loc = { + 0, + }; + + conf = this->private; + pool = this->ctx->pool; + + statfs_frame = create_frame(this, pool); + if (!statfs_frame) { + goto err; + } + + /* local->fop value is not used in this case */ + statfs_local = dht_local_init(statfs_frame, NULL, NULL, GF_FOP_MAXVALUE); + if (!statfs_local) { + goto err; + } + + /* make it root gfid, should be enough to get the proper info back */ + tmp_loc.gfid[15] = 1; + + statfs_local->call_cnt = 1; + STACK_WIND_COOKIE( + statfs_frame, dht_du_info_cbk, conf->subvolumes[subvol_idx], + conf->subvolumes[subvol_idx], + conf->subvolumes[subvol_idx]->fops->statfs, &tmp_loc, NULL); + + return 0; err: - if (statfs_frame) - DHT_STACK_DESTROY (statfs_frame); + if (statfs_frame) + DHT_STACK_DESTROY(statfs_frame); - return -1; + return -1; } int -dht_get_du_info (call_frame_t *frame, xlator_t *this, loc_t *loc) +dht_get_du_info(call_frame_t *frame, xlator_t *this, loc_t *loc) { - int i = 0; - int ret = -1; - dht_conf_t *conf = NULL; - call_frame_t *statfs_frame = NULL; - dht_local_t *statfs_local = NULL; - struct timeval tv = {0,}; - loc_t tmp_loc = {0,}; - - conf = this->private; - - gettimeofday (&tv, NULL); - - /* make it root gfid, should be enough to get the proper - info back */ - tmp_loc.gfid[15] = 1; - - if (tv.tv_sec > (conf->refresh_interval - + conf->last_stat_fetch.tv_sec)) { - - statfs_frame = copy_frame (frame); - if (!statfs_frame) { - goto err; - } - - /* In this case, 'local->fop' is not used */ - statfs_local = dht_local_init (statfs_frame, loc, NULL, - GF_FOP_MAXVALUE); - if (!statfs_local) { - goto err; - } - - statfs_local->params = dict_new (); - if (!statfs_local->params) - goto err; - - ret = dict_set_int8 (statfs_local->params, - GF_INTERNAL_IGNORE_DEEM_STATFS, 1); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_DICT_SET_FAILED, - "Failed to set " - GF_INTERNAL_IGNORE_DEEM_STATFS" in dict"); - goto err; - } + int i = 0; + int ret = -1; + dht_conf_t *conf = NULL; + call_frame_t *statfs_frame = NULL; + dht_local_t *statfs_local = NULL; + struct timeval tv = { + 0, + }; + loc_t tmp_loc = { + 0, + }; + + conf = this->private; + + gettimeofday(&tv, NULL); + + /* make it root gfid, should be enough to get the proper + info back */ + tmp_loc.gfid[15] = 1; + + if (tv.tv_sec > (conf->refresh_interval + conf->last_stat_fetch.tv_sec)) { + statfs_frame = copy_frame(frame); + if (!statfs_frame) { + goto err; + } + + /* In this case, 'local->fop' is not used */ + statfs_local = dht_local_init(statfs_frame, loc, NULL, GF_FOP_MAXVALUE); + if (!statfs_local) { + goto err; + } + + statfs_local->params = dict_new(); + if (!statfs_local->params) + goto err; + + ret = dict_set_int8(statfs_local->params, + GF_INTERNAL_IGNORE_DEEM_STATFS, 1); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED, + "Failed to set " GF_INTERNAL_IGNORE_DEEM_STATFS " in dict"); + goto err; + } + + statfs_local->call_cnt = conf->subvolume_cnt; + for (i = 0; i < conf->subvolume_cnt; i++) { + STACK_WIND_COOKIE(statfs_frame, dht_du_info_cbk, + conf->subvolumes[i], conf->subvolumes[i], + conf->subvolumes[i]->fops->statfs, &tmp_loc, + statfs_local->params); + } - statfs_local->call_cnt = conf->subvolume_cnt; - for (i = 0; i < conf->subvolume_cnt; i++) { - STACK_WIND_COOKIE (statfs_frame, dht_du_info_cbk, - conf->subvolumes[i], - conf->subvolumes[i], - conf->subvolumes[i]->fops->statfs, - &tmp_loc, statfs_local->params); - } - - conf->last_stat_fetch.tv_sec = tv.tv_sec; - } - return 0; + conf->last_stat_fetch.tv_sec = tv.tv_sec; + } + return 0; err: - if (statfs_frame) - DHT_STACK_DESTROY (statfs_frame); + if (statfs_frame) + DHT_STACK_DESTROY(statfs_frame); - return -1; + return -1; } - gf_boolean_t -dht_is_subvol_filled (xlator_t *this, xlator_t *subvol) +dht_is_subvol_filled(xlator_t *this, xlator_t *subvol) { - int i = 0; - char vol_name[256]; - dht_conf_t *conf = NULL; - gf_boolean_t subvol_filled_inodes = _gf_false; - gf_boolean_t subvol_filled_space = _gf_false; - gf_boolean_t is_subvol_filled = _gf_false; - double usage = 0; - - conf = this->private; - - /* Check for values above specified percent or free disk */ - LOCK (&conf->subvolume_lock); - { - for (i = 0; i < conf->subvolume_cnt; i++) { - if (subvol == conf->subvolumes[i]) { - if (conf->disk_unit == 'p') { - if (conf->du_stats[i].avail_percent < - conf->min_free_disk) { - subvol_filled_space = _gf_true; - break; - } - - } else { - if (conf->du_stats[i].avail_space < - conf->min_free_disk) { - subvol_filled_space = _gf_true; - break; - } - } - if (conf->du_stats[i].avail_inodes < - conf->min_free_inodes) { - subvol_filled_inodes = _gf_true; - break; - } - } - } - } - UNLOCK (&conf->subvolume_lock); - - if (subvol_filled_space && conf->subvolume_status[i]) { - if (!(conf->du_stats[i].log++ % (GF_UNIVERSAL_ANSWER * 10))) { - usage = 100 - conf->du_stats[i].avail_percent; - - gf_msg (this->name, GF_LOG_WARNING, 0, - DHT_MSG_SUBVOL_INSUFF_SPACE, - "disk space on subvolume '%s' is getting " - "full (%.2f %%), consider adding more bricks", - subvol->name, usage); - - strncpy(vol_name, this->name, sizeof(vol_name)); - vol_name[(strlen(this->name)-4)] = '\0'; - - gf_event(EVENT_DHT_DISK_USAGE, - "volume=%s;subvol=%s;usage=%.2f %%", - vol_name, subvol->name, usage); - } - } - - if (subvol_filled_inodes && conf->subvolume_status[i]) { - if (!(conf->du_stats[i].log++ % (GF_UNIVERSAL_ANSWER * 10))) { - usage = 100 - conf->du_stats[i].avail_inodes; - gf_msg (this->name, GF_LOG_CRITICAL, 0, - DHT_MSG_SUBVOL_INSUFF_INODES, - "inodes on subvolume '%s' are at " - "(%.2f %%), consider adding more bricks", - subvol->name, usage); - - strncpy(vol_name, this->name, sizeof(vol_name)); - vol_name[(strlen(this->name)-4)] = '\0'; - - gf_event(EVENT_DHT_INODES_USAGE, - "volume=%s;subvol=%s;usage=%.2f %%", - vol_name, subvol->name, usage); - } - } - - is_subvol_filled = (subvol_filled_space || subvol_filled_inodes); - - return is_subvol_filled; -} + int i = 0; + char vol_name[256]; + dht_conf_t *conf = NULL; + gf_boolean_t subvol_filled_inodes = _gf_false; + gf_boolean_t subvol_filled_space = _gf_false; + gf_boolean_t is_subvol_filled = _gf_false; + double usage = 0; + + conf = this->private; + + /* Check for values above specified percent or free disk */ + LOCK(&conf->subvolume_lock); + { + for (i = 0; i < conf->subvolume_cnt; i++) { + if (subvol == conf->subvolumes[i]) { + if (conf->disk_unit == 'p') { + if (conf->du_stats[i].avail_percent < conf->min_free_disk) { + subvol_filled_space = _gf_true; + break; + } + + } else { + if (conf->du_stats[i].avail_space < conf->min_free_disk) { + subvol_filled_space = _gf_true; + break; + } + } + if (conf->du_stats[i].avail_inodes < conf->min_free_inodes) { + subvol_filled_inodes = _gf_true; + break; + } + } + } + } + UNLOCK(&conf->subvolume_lock); + if (subvol_filled_space && conf->subvolume_status[i]) { + if (!(conf->du_stats[i].log++ % (GF_UNIVERSAL_ANSWER * 10))) { + usage = 100 - conf->du_stats[i].avail_percent; -/*Get the best subvolume to create the file in*/ -xlator_t * -dht_free_disk_available_subvol (xlator_t *this, xlator_t *subvol, - dht_local_t *local) -{ - xlator_t *avail_subvol = NULL; - dht_conf_t *conf = NULL; - dht_layout_t *layout = NULL; - loc_t *loc = NULL; + gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_SUBVOL_INSUFF_SPACE, + "disk space on subvolume '%s' is getting " + "full (%.2f %%), consider adding more bricks", + subvol->name, usage); - conf = this->private; - if (!local) - goto out; - loc = &local->loc; - if (!local->layout) { - layout = dht_layout_get (this, loc->parent); - - if (!layout) { - gf_msg_debug (this->name, 0, - "Missing layout. path=%s," - " parent gfid = %s", loc->path, - uuid_utoa (loc->parent->gfid)); - goto out; - } - } else { - layout = dht_layout_ref (this, local->layout); - } + strncpy(vol_name, this->name, sizeof(vol_name)); + vol_name[(strlen(this->name) - 4)] = '\0'; - LOCK (&conf->subvolume_lock); - { - avail_subvol = dht_subvol_with_free_space_inodes(this, subvol, NULL, - layout, 0); - if(!avail_subvol) - { - avail_subvol = dht_subvol_maxspace_nonzeroinode(this, - subvol, - layout); - } + gf_event(EVENT_DHT_DISK_USAGE, "volume=%s;subvol=%s;usage=%.2f %%", + vol_name, subvol->name, usage); + } + } + + if (subvol_filled_inodes && conf->subvolume_status[i]) { + if (!(conf->du_stats[i].log++ % (GF_UNIVERSAL_ANSWER * 10))) { + usage = 100 - conf->du_stats[i].avail_inodes; + gf_msg(this->name, GF_LOG_CRITICAL, 0, DHT_MSG_SUBVOL_INSUFF_INODES, + "inodes on subvolume '%s' are at " + "(%.2f %%), consider adding more bricks", + subvol->name, usage); + + strncpy(vol_name, this->name, sizeof(vol_name)); + vol_name[(strlen(this->name) - 4)] = '\0'; + + gf_event(EVENT_DHT_INODES_USAGE, + "volume=%s;subvol=%s;usage=%.2f %%", vol_name, + subvol->name, usage); + } + } - } - UNLOCK (&conf->subvolume_lock); -out: - if (!avail_subvol) { - gf_msg_debug (this->name, 0, - "No subvolume has enough free space \ - and/or inodes to create"); - avail_subvol = subvol; - } + is_subvol_filled = (subvol_filled_space || subvol_filled_inodes); - if (layout) - dht_layout_unref (this, layout); - return avail_subvol; + return is_subvol_filled; } -static inline -int32_t dht_subvol_has_err (dht_conf_t *conf, xlator_t *this, xlator_t *ignore, - dht_layout_t *layout) +/*Get the best subvolume to create the file in*/ +xlator_t * +dht_free_disk_available_subvol(xlator_t *this, xlator_t *subvol, + dht_local_t *local) { - int ret = -1; - int i = 0; - - if (!this || !layout) - goto out; - - /* this check is meant for rebalance process. The source of the file - * should be ignored for space check */ - if (this == ignore) { - goto out; + xlator_t *avail_subvol = NULL; + dht_conf_t *conf = NULL; + dht_layout_t *layout = NULL; + loc_t *loc = NULL; + + conf = this->private; + if (!local) + goto out; + loc = &local->loc; + if (!local->layout) { + layout = dht_layout_get(this, loc->parent); + + if (!layout) { + gf_msg_debug(this->name, 0, + "Missing layout. path=%s," + " parent gfid = %s", + loc->path, uuid_utoa(loc->parent->gfid)); + goto out; + } + } else { + layout = dht_layout_ref(this, local->layout); + } + + LOCK(&conf->subvolume_lock); + { + avail_subvol = dht_subvol_with_free_space_inodes(this, subvol, NULL, + layout, 0); + if (!avail_subvol) { + avail_subvol = dht_subvol_maxspace_nonzeroinode(this, subvol, + layout); } + } + UNLOCK(&conf->subvolume_lock); +out: + if (!avail_subvol) { + gf_msg_debug(this->name, 0, + "No subvolume has enough free space \ + and/or inodes to create"); + avail_subvol = subvol; + } + if (layout) + dht_layout_unref(this, layout); + return avail_subvol; +} - /* check if subvol has layout errors, before selecting it */ - for (i = 0; i < layout->cnt; i++) { - if (!strcmp (layout->list[i].xlator->name, this->name) && - (layout->list[i].err != 0)) { - ret = -1; - goto out; - } +static inline int32_t +dht_subvol_has_err(dht_conf_t *conf, xlator_t *this, xlator_t *ignore, + dht_layout_t *layout) +{ + int ret = -1; + int i = 0; + + if (!this || !layout) + goto out; + + /* this check is meant for rebalance process. The source of the file + * should be ignored for space check */ + if (this == ignore) { + goto out; + } + + /* check if subvol has layout errors, before selecting it */ + for (i = 0; i < layout->cnt; i++) { + if (!strcmp(layout->list[i].xlator->name, this->name) && + (layout->list[i].err != 0)) { + ret = -1; + goto out; } + } - /* discard decommissioned subvol */ - if (conf->decommission_subvols_cnt) { - for (i = 0; i < conf->subvolume_cnt; i++) { - if (conf->decommissioned_bricks[i] && - conf->decommissioned_bricks[i] == this) { - ret = -1; - goto out; - } - } + /* discard decommissioned subvol */ + if (conf->decommission_subvols_cnt) { + for (i = 0; i < conf->subvolume_cnt; i++) { + if (conf->decommissioned_bricks[i] && + conf->decommissioned_bricks[i] == this) { + ret = -1; + goto out; + } } + } - ret = 0; + ret = 0; out: - return ret; + return ret; } /*Get subvolume which has both space and inodes more than the min criteria*/ xlator_t * -dht_subvol_with_free_space_inodes(xlator_t *this, xlator_t *subvol, xlator_t *ignore, - dht_layout_t *layout, uint64_t filesize) +dht_subvol_with_free_space_inodes(xlator_t *this, xlator_t *subvol, + xlator_t *ignore, dht_layout_t *layout, + uint64_t filesize) { - int i = 0; - double max = 0; - double max_inodes = 0; - int ignore_subvol = 0; - uint64_t total_blocks = 0; - uint64_t avail_blocks = 0; - uint64_t frsize = 0; - double post_availspace = 0; - double post_percent = 0; - - xlator_t *avail_subvol = NULL; - dht_conf_t *conf = NULL; - - conf = this->private; - - for(i=0; i < conf->subvolume_cnt; i++) { - /* check if subvol has layout errors and also it is not a - * decommissioned brick, before selecting it */ - ignore_subvol = dht_subvol_has_err (conf, conf->subvolumes[i], - ignore, layout); - if (ignore_subvol) - continue; - - if ((conf->disk_unit == 'p') && - (conf->du_stats[i].avail_percent > conf->min_free_disk) && - (conf->du_stats[i].avail_inodes > conf->min_free_inodes)) { - if ((conf->du_stats[i].avail_inodes > max_inodes) || - (conf->du_stats[i].avail_percent > max)) { - max = conf->du_stats[i].avail_percent; - max_inodes = conf->du_stats[i].avail_inodes; - avail_subvol = conf->subvolumes[i]; - total_blocks = conf->du_stats[i].total_blocks; - avail_blocks = conf->du_stats[i].avail_blocks; - frsize = conf->du_stats[i].frsize; - } - } - - if ((conf->disk_unit != 'p') && - (conf->du_stats[i].avail_space > conf->min_free_disk) && - (conf->du_stats[i].avail_inodes > conf->min_free_inodes)) { - if ((conf->du_stats[i].avail_inodes > max_inodes) || - (conf->du_stats[i].avail_space > max)) { - max = conf->du_stats[i].avail_space; - max_inodes = conf->du_stats[i].avail_inodes; - avail_subvol = conf->subvolumes[i]; - } - } + int i = 0; + double max = 0; + double max_inodes = 0; + int ignore_subvol = 0; + uint64_t total_blocks = 0; + uint64_t avail_blocks = 0; + uint64_t frsize = 0; + double post_availspace = 0; + double post_percent = 0; + + xlator_t *avail_subvol = NULL; + dht_conf_t *conf = NULL; + + conf = this->private; + + for (i = 0; i < conf->subvolume_cnt; i++) { + /* check if subvol has layout errors and also it is not a + * decommissioned brick, before selecting it */ + ignore_subvol = dht_subvol_has_err(conf, conf->subvolumes[i], ignore, + layout); + if (ignore_subvol) + continue; + + if ((conf->disk_unit == 'p') && + (conf->du_stats[i].avail_percent > conf->min_free_disk) && + (conf->du_stats[i].avail_inodes > conf->min_free_inodes)) { + if ((conf->du_stats[i].avail_inodes > max_inodes) || + (conf->du_stats[i].avail_percent > max)) { + max = conf->du_stats[i].avail_percent; + max_inodes = conf->du_stats[i].avail_inodes; + avail_subvol = conf->subvolumes[i]; + total_blocks = conf->du_stats[i].total_blocks; + avail_blocks = conf->du_stats[i].avail_blocks; + frsize = conf->du_stats[i].frsize; + } } - if (avail_subvol) { - if (conf->disk_unit == 'p') { - post_availspace = (avail_blocks * frsize) - filesize; - post_percent = (post_availspace * 100) / (total_blocks * frsize); - if (post_percent < conf->min_free_disk) - avail_subvol = NULL; - } - if (conf->disk_unit != 'p') { - if ((max - filesize) < conf->min_free_disk) - avail_subvol = NULL; - } + if ((conf->disk_unit != 'p') && + (conf->du_stats[i].avail_space > conf->min_free_disk) && + (conf->du_stats[i].avail_inodes > conf->min_free_inodes)) { + if ((conf->du_stats[i].avail_inodes > max_inodes) || + (conf->du_stats[i].avail_space > max)) { + max = conf->du_stats[i].avail_space; + max_inodes = conf->du_stats[i].avail_inodes; + avail_subvol = conf->subvolumes[i]; + } } + } + + if (avail_subvol) { + if (conf->disk_unit == 'p') { + post_availspace = (avail_blocks * frsize) - filesize; + post_percent = (post_availspace * 100) / (total_blocks * frsize); + if (post_percent < conf->min_free_disk) + avail_subvol = NULL; + } + if (conf->disk_unit != 'p') { + if ((max - filesize) < conf->min_free_disk) + avail_subvol = NULL; + } + } - return avail_subvol; + return avail_subvol; } - /* Get subvol which has at least one inode and maximum space */ xlator_t * -dht_subvol_maxspace_nonzeroinode (xlator_t *this, xlator_t *subvol, - dht_layout_t *layout) +dht_subvol_maxspace_nonzeroinode(xlator_t *this, xlator_t *subvol, + dht_layout_t *layout) { - int i = 0; - double max = 0; - int ignore_subvol = 0; - - xlator_t *avail_subvol = NULL; - dht_conf_t *conf = NULL; - - conf = this->private; - - for (i = 0; i < conf->subvolume_cnt; i++) { - /* check if subvol has layout errors and also it is not a - * decommissioned brick, before selecting it*/ - - ignore_subvol = dht_subvol_has_err (conf, conf->subvolumes[i], NULL, - layout); - if (ignore_subvol) - continue; - - if (conf->disk_unit == 'p') { - if ((conf->du_stats[i].avail_percent > max) - && (conf->du_stats[i].avail_inodes > 0 )) { - max = conf->du_stats[i].avail_percent; - avail_subvol = conf->subvolumes[i]; - } - } else { - if ((conf->du_stats[i].avail_space > max) - && (conf->du_stats[i].avail_inodes > 0)) { - max = conf->du_stats[i].avail_space; - avail_subvol = conf->subvolumes[i]; - } - } + int i = 0; + double max = 0; + int ignore_subvol = 0; + + xlator_t *avail_subvol = NULL; + dht_conf_t *conf = NULL; + + conf = this->private; + + for (i = 0; i < conf->subvolume_cnt; i++) { + /* check if subvol has layout errors and also it is not a + * decommissioned brick, before selecting it*/ + + ignore_subvol = dht_subvol_has_err(conf, conf->subvolumes[i], NULL, + layout); + if (ignore_subvol) + continue; + + if (conf->disk_unit == 'p') { + if ((conf->du_stats[i].avail_percent > max) && + (conf->du_stats[i].avail_inodes > 0)) { + max = conf->du_stats[i].avail_percent; + avail_subvol = conf->subvolumes[i]; + } + } else { + if ((conf->du_stats[i].avail_space > max) && + (conf->du_stats[i].avail_inodes > 0)) { + max = conf->du_stats[i].avail_space; + avail_subvol = conf->subvolumes[i]; + } } + } - return avail_subvol; + return avail_subvol; } diff --git a/xlators/cluster/dht/src/dht-hashfn.c b/xlators/cluster/dht/src/dht-hashfn.c index f8e614a40aa..2f15c0370cc 100644 --- a/xlators/cluster/dht/src/dht-hashfn.c +++ b/xlators/cluster/dht/src/dht-hashfn.c @@ -8,104 +8,98 @@ cases as published by the Free Software Foundation. */ - #include "glusterfs.h" #include "xlator.h" #include "dht-common.h" #include "hashfn.h" - int -dht_hash_compute_internal (int type, const char *name, uint32_t *hash_p) +dht_hash_compute_internal(int type, const char *name, uint32_t *hash_p) { - int ret = 0; - uint32_t hash = 0; + int ret = 0; + uint32_t hash = 0; - switch (type) { + switch (type) { case DHT_HASH_TYPE_DM: case DHT_HASH_TYPE_DM_USER: - hash = gf_dm_hashfn (name, strlen (name)); - break; + hash = gf_dm_hashfn(name, strlen(name)); + break; default: - ret = -1; - break; - } + ret = -1; + break; + } - if (ret == 0) { - *hash_p = hash; - } + if (ret == 0) { + *hash_p = hash; + } - return ret; + return ret; } - -static -gf_boolean_t -dht_munge_name (const char *original, char *modified, - size_t len, regex_t *re) +static gf_boolean_t +dht_munge_name(const char *original, char *modified, size_t len, regex_t *re) { - regmatch_t matches[2] = {{0}, }; - size_t new_len = 0; - int ret = 0; - - ret = regexec(re, original, 2, matches, 0); - - if (ret != REG_NOMATCH) { - if (matches[1].rm_so != -1) { - new_len = matches[1].rm_eo - matches[1].rm_so; - /* Equal would fail due to the NUL at the end. */ - if (new_len < len) { - memcpy (modified,original+matches[1].rm_so, - new_len); - modified[new_len] = '\0'; - return _gf_true; - } - } + regmatch_t matches[2] = { + {0}, + }; + size_t new_len = 0; + int ret = 0; + + ret = regexec(re, original, 2, matches, 0); + + if (ret != REG_NOMATCH) { + if (matches[1].rm_so != -1) { + new_len = matches[1].rm_eo - matches[1].rm_so; + /* Equal would fail due to the NUL at the end. */ + if (new_len < len) { + memcpy(modified, original + matches[1].rm_so, new_len); + modified[new_len] = '\0'; + return _gf_true; + } } + } - /* This is guaranteed safe because of how the dest was allocated. */ - strcpy(modified, original); - return _gf_false; + /* This is guaranteed safe because of how the dest was allocated. */ + strcpy(modified, original); + return _gf_false; } int -dht_hash_compute (xlator_t *this, int type, const char *name, uint32_t *hash_p) +dht_hash_compute(xlator_t *this, int type, const char *name, uint32_t *hash_p) { - char *rsync_friendly_name = NULL; - dht_conf_t *priv = NULL; - size_t len = 0; - gf_boolean_t munged = _gf_false; - - priv = this->private; - - LOCK (&priv->lock); - { - if (priv->extra_regex_valid) { - len = strlen(name) + 1; - rsync_friendly_name = alloca(len); - munged = dht_munge_name (name, rsync_friendly_name, len, - &priv->extra_regex); - } - - if (!munged && priv->rsync_regex_valid) { - len = strlen(name) + 1; - rsync_friendly_name = alloca(len); - gf_msg_trace (this->name, 0, "trying regex for %s", - name); - munged = dht_munge_name (name, rsync_friendly_name, len, - &priv->rsync_regex); - if (munged) { - gf_msg_debug (this->name, 0, - "munged down to %s", - rsync_friendly_name); - } - } + char *rsync_friendly_name = NULL; + dht_conf_t *priv = NULL; + size_t len = 0; + gf_boolean_t munged = _gf_false; + + priv = this->private; + + LOCK(&priv->lock); + { + if (priv->extra_regex_valid) { + len = strlen(name) + 1; + rsync_friendly_name = alloca(len); + munged = dht_munge_name(name, rsync_friendly_name, len, + &priv->extra_regex); } - UNLOCK (&priv->lock); - if (!munged) { - rsync_friendly_name = (char *)name; + if (!munged && priv->rsync_regex_valid) { + len = strlen(name) + 1; + rsync_friendly_name = alloca(len); + gf_msg_trace(this->name, 0, "trying regex for %s", name); + munged = dht_munge_name(name, rsync_friendly_name, len, + &priv->rsync_regex); + if (munged) { + gf_msg_debug(this->name, 0, "munged down to %s", + rsync_friendly_name); + } } + } + UNLOCK(&priv->lock); + + if (!munged) { + rsync_friendly_name = (char *)name; + } - return dht_hash_compute_internal (type, rsync_friendly_name, hash_p); + return dht_hash_compute_internal(type, rsync_friendly_name, hash_p); } diff --git a/xlators/cluster/dht/src/dht-helper.c b/xlators/cluster/dht/src/dht-helper.c index 403f0a0f514..6d6ec24729d 100644 --- a/xlators/cluster/dht/src/dht-helper.c +++ b/xlators/cluster/dht/src/dht-helper.c @@ -8,266 +8,249 @@ cases as published by the Free Software Foundation. */ - #include "glusterfs.h" #include "xlator.h" #include "dht-common.h" #include "dht-lock.h" static void -dht_free_fd_ctx (dht_fd_ctx_t *fd_ctx) +dht_free_fd_ctx(dht_fd_ctx_t *fd_ctx) { - GF_FREE (fd_ctx); + GF_FREE(fd_ctx); } - int32_t -dht_fd_ctx_destroy (xlator_t *this, fd_t *fd) +dht_fd_ctx_destroy(xlator_t *this, fd_t *fd) { - dht_fd_ctx_t *fd_ctx = NULL; - uint64_t value = 0; - int32_t ret = -1; + dht_fd_ctx_t *fd_ctx = NULL; + uint64_t value = 0; + int32_t ret = -1; - GF_VALIDATE_OR_GOTO ("dht", this, out); - GF_VALIDATE_OR_GOTO (this->name, fd, out); + GF_VALIDATE_OR_GOTO("dht", this, out); + GF_VALIDATE_OR_GOTO(this->name, fd, out); - ret = fd_ctx_del (fd, this, &value); - if (ret) { - goto out; - } + ret = fd_ctx_del(fd, this, &value); + if (ret) { + goto out; + } - fd_ctx = (dht_fd_ctx_t *)value; - if (fd_ctx) { - GF_REF_PUT (fd_ctx); - } + fd_ctx = (dht_fd_ctx_t *)value; + if (fd_ctx) { + GF_REF_PUT(fd_ctx); + } out: - return ret; + return ret; } - static int -__dht_fd_ctx_set (xlator_t *this, fd_t *fd, xlator_t *dst) +__dht_fd_ctx_set(xlator_t *this, fd_t *fd, xlator_t *dst) { - dht_fd_ctx_t *fd_ctx = NULL; - uint64_t value = 0; - int ret = -1; + dht_fd_ctx_t *fd_ctx = NULL; + uint64_t value = 0; + int ret = -1; - GF_VALIDATE_OR_GOTO ("dht", this, out); - GF_VALIDATE_OR_GOTO (this->name, fd, out); + GF_VALIDATE_OR_GOTO("dht", this, out); + GF_VALIDATE_OR_GOTO(this->name, fd, out); - fd_ctx = GF_CALLOC (1, sizeof (*fd_ctx), gf_dht_mt_fd_ctx_t); + fd_ctx = GF_CALLOC(1, sizeof(*fd_ctx), gf_dht_mt_fd_ctx_t); - if (!fd_ctx) { - goto out; - } + if (!fd_ctx) { + goto out; + } - fd_ctx->opened_on_dst = (uint64_t) dst; - GF_REF_INIT (fd_ctx, dht_free_fd_ctx); + fd_ctx->opened_on_dst = (uint64_t)dst; + GF_REF_INIT(fd_ctx, dht_free_fd_ctx); - value = (uint64_t) fd_ctx; + value = (uint64_t)fd_ctx; - ret = __fd_ctx_set (fd, this, value); - if (ret < 0) { - gf_msg (this->name, GF_LOG_WARNING, 0, - DHT_MSG_FD_CTX_SET_FAILED, - "Failed to set fd ctx in fd=0x%p", fd); - GF_REF_PUT (fd_ctx); - } + ret = __fd_ctx_set(fd, this, value); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_FD_CTX_SET_FAILED, + "Failed to set fd ctx in fd=0x%p", fd); + GF_REF_PUT(fd_ctx); + } out: - return ret; + return ret; } - - int -dht_fd_ctx_set (xlator_t *this, fd_t *fd, xlator_t *dst) +dht_fd_ctx_set(xlator_t *this, fd_t *fd, xlator_t *dst) { - dht_fd_ctx_t *fd_ctx = NULL; - uint64_t value = 0; - int ret = -1; - - GF_VALIDATE_OR_GOTO ("dht", this, out); - GF_VALIDATE_OR_GOTO (this->name, fd, out); - - LOCK (&fd->lock); - { - ret = __fd_ctx_get (fd, this, &value); - if (ret && value) { - - fd_ctx = (dht_fd_ctx_t *) value; - if (fd_ctx->opened_on_dst == (uint64_t) dst) { - /* This could happen due to racing - * check_progress tasks*/ - goto unlock; - } else { - /* This would be a big problem*/ - gf_msg (this->name, GF_LOG_WARNING, 0, - DHT_MSG_INVALID_VALUE, - "Different dst found in the fd ctx"); - - /* Overwrite and hope for the best*/ - fd_ctx->opened_on_dst = (uint64_t)dst; - goto unlock; - } + dht_fd_ctx_t *fd_ctx = NULL; + uint64_t value = 0; + int ret = -1; + + GF_VALIDATE_OR_GOTO("dht", this, out); + GF_VALIDATE_OR_GOTO(this->name, fd, out); + + LOCK(&fd->lock); + { + ret = __fd_ctx_get(fd, this, &value); + if (ret && value) { + fd_ctx = (dht_fd_ctx_t *)value; + if (fd_ctx->opened_on_dst == (uint64_t)dst) { + /* This could happen due to racing + * check_progress tasks*/ + goto unlock; + } else { + /* This would be a big problem*/ + gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_INVALID_VALUE, + "Different dst found in the fd ctx"); - } - ret = __dht_fd_ctx_set (this, fd, dst); + /* Overwrite and hope for the best*/ + fd_ctx->opened_on_dst = (uint64_t)dst; + goto unlock; + } } + ret = __dht_fd_ctx_set(this, fd, dst); + } unlock: - UNLOCK (&fd->lock); + UNLOCK(&fd->lock); out: - return ret; + return ret; } - - -static -dht_fd_ctx_t * -dht_fd_ctx_get (xlator_t *this, fd_t *fd) +static dht_fd_ctx_t * +dht_fd_ctx_get(xlator_t *this, fd_t *fd) { - dht_fd_ctx_t *fd_ctx = NULL; - int ret = -1; - uint64_t tmp_val = 0; - - GF_VALIDATE_OR_GOTO ("dht", this, out); - GF_VALIDATE_OR_GOTO (this->name, fd, out); - - LOCK (&fd->lock); - { - ret = __fd_ctx_get (fd, this, &tmp_val); - if ((ret < 0) || (tmp_val == 0)) { - UNLOCK (&fd->lock); - goto out; - } + dht_fd_ctx_t *fd_ctx = NULL; + int ret = -1; + uint64_t tmp_val = 0; + + GF_VALIDATE_OR_GOTO("dht", this, out); + GF_VALIDATE_OR_GOTO(this->name, fd, out); - fd_ctx = (dht_fd_ctx_t *)tmp_val; - GF_REF_GET (fd_ctx); + LOCK(&fd->lock); + { + ret = __fd_ctx_get(fd, this, &tmp_val); + if ((ret < 0) || (tmp_val == 0)) { + UNLOCK(&fd->lock); + goto out; } - UNLOCK (&fd->lock); + + fd_ctx = (dht_fd_ctx_t *)tmp_val; + GF_REF_GET(fd_ctx); + } + UNLOCK(&fd->lock); out: - return fd_ctx; + return fd_ctx; } gf_boolean_t -dht_fd_open_on_dst (xlator_t *this, fd_t *fd, xlator_t *dst) +dht_fd_open_on_dst(xlator_t *this, fd_t *fd, xlator_t *dst) { - dht_fd_ctx_t *fd_ctx = NULL; - gf_boolean_t opened = _gf_false; + dht_fd_ctx_t *fd_ctx = NULL; + gf_boolean_t opened = _gf_false; - fd_ctx = dht_fd_ctx_get (this, fd); + fd_ctx = dht_fd_ctx_get(this, fd); - if (fd_ctx) { - if (fd_ctx->opened_on_dst == (uint64_t) dst) { - opened = _gf_true; - } - GF_REF_PUT (fd_ctx); + if (fd_ctx) { + if (fd_ctx->opened_on_dst == (uint64_t)dst) { + opened = _gf_true; } + GF_REF_PUT(fd_ctx); + } - return opened; + return opened; } - void -dht_free_mig_info (void *data) +dht_free_mig_info(void *data) { - dht_migrate_info_t *miginfo = NULL; + dht_migrate_info_t *miginfo = NULL; - miginfo = data; - GF_FREE (miginfo); + miginfo = data; + GF_FREE(miginfo); - return; + return; } static int -dht_inode_ctx_set_mig_info (xlator_t *this, inode_t *inode, - xlator_t *src_subvol, xlator_t *dst_subvol) +dht_inode_ctx_set_mig_info(xlator_t *this, inode_t *inode, xlator_t *src_subvol, + xlator_t *dst_subvol) { - dht_migrate_info_t *miginfo = NULL; - uint64_t value = 0; - int ret = -1; + dht_migrate_info_t *miginfo = NULL; + uint64_t value = 0; + int ret = -1; - miginfo = GF_CALLOC (1, sizeof (*miginfo), gf_dht_mt_miginfo_t); - if (miginfo == NULL) - goto out; + miginfo = GF_CALLOC(1, sizeof(*miginfo), gf_dht_mt_miginfo_t); + if (miginfo == NULL) + goto out; - miginfo->src_subvol = src_subvol; - miginfo->dst_subvol = dst_subvol; - GF_REF_INIT (miginfo, dht_free_mig_info); + miginfo->src_subvol = src_subvol; + miginfo->dst_subvol = dst_subvol; + GF_REF_INIT(miginfo, dht_free_mig_info); - value = (uint64_t) miginfo; + value = (uint64_t)miginfo; - ret = inode_ctx_set1 (inode, this, &value); - if (ret < 0) { - GF_REF_PUT (miginfo); - } + ret = inode_ctx_set1(inode, this, &value); + if (ret < 0) { + GF_REF_PUT(miginfo); + } out: - return ret; + return ret; } - int -dht_inode_ctx_get_mig_info (xlator_t *this, inode_t *inode, - xlator_t **src_subvol, xlator_t **dst_subvol) +dht_inode_ctx_get_mig_info(xlator_t *this, inode_t *inode, + xlator_t **src_subvol, xlator_t **dst_subvol) { - int ret = -1; - uint64_t tmp_miginfo = 0; - dht_migrate_info_t *miginfo = NULL; - - LOCK (&inode->lock); - { - ret = __inode_ctx_get1 (inode, this, &tmp_miginfo); - if ((ret < 0) || (tmp_miginfo == 0)) { - UNLOCK (&inode->lock); - goto out; - } + int ret = -1; + uint64_t tmp_miginfo = 0; + dht_migrate_info_t *miginfo = NULL; - miginfo = (dht_migrate_info_t *)tmp_miginfo; - GF_REF_GET (miginfo); + LOCK(&inode->lock); + { + ret = __inode_ctx_get1(inode, this, &tmp_miginfo); + if ((ret < 0) || (tmp_miginfo == 0)) { + UNLOCK(&inode->lock); + goto out; } - UNLOCK (&inode->lock); - if (src_subvol) - *src_subvol = miginfo->src_subvol; + miginfo = (dht_migrate_info_t *)tmp_miginfo; + GF_REF_GET(miginfo); + } + UNLOCK(&inode->lock); + + if (src_subvol) + *src_subvol = miginfo->src_subvol; - if (dst_subvol) - *dst_subvol = miginfo->dst_subvol; + if (dst_subvol) + *dst_subvol = miginfo->dst_subvol; - GF_REF_PUT (miginfo); + GF_REF_PUT(miginfo); out: - return ret; + return ret; } gf_boolean_t -dht_mig_info_is_invalid (xlator_t *current, xlator_t *src_subvol, - xlator_t *dst_subvol) +dht_mig_info_is_invalid(xlator_t *current, xlator_t *src_subvol, + xlator_t *dst_subvol) { - -/* Not set - */ - if (!src_subvol || !dst_subvol) - return _gf_true; - -/* Invalid scenarios: - * The src_subvol does not match the subvol on which the current op was sent - * so the cached subvol has changed between the last mig_info_set and now. - * src_subvol == dst_subvol. The file was migrated without any FOP detecting - * a P2 so the old dst is now the current subvol. - * - * There is still one scenario where the info could be outdated - if - * file has undergone multiple migrations and ends up on the same src_subvol - * on which the mig_info was first set. - */ - if ((current == dst_subvol) || (current != src_subvol)) - return _gf_true; - - return _gf_false; + /* Not set + */ + if (!src_subvol || !dst_subvol) + return _gf_true; + + /* Invalid scenarios: + * The src_subvol does not match the subvol on which the current op was sent + * so the cached subvol has changed between the last mig_info_set and now. + * src_subvol == dst_subvol. The file was migrated without any FOP detecting + * a P2 so the old dst is now the current subvol. + * + * There is still one scenario where the info could be outdated - if + * file has undergone multiple migrations and ends up on the same src_subvol + * on which the mig_info was first set. + */ + if ((current == dst_subvol) || (current != src_subvol)) + return _gf_true; + + return _gf_false; } - - /* Used to check if fd fops have the fd opened on the cached subvol * This is required when: * 1. an fd is opened on FILE1 on subvol1 @@ -279,337 +262,304 @@ dht_mig_info_is_invalid (xlator_t *current, xlator_t *src_subvol, * */ - int -dht_check_and_open_fd_on_subvol_complete (int ret, call_frame_t *frame, - void *data) +dht_check_and_open_fd_on_subvol_complete(int ret, call_frame_t *frame, + void *data) { - glusterfs_fop_t fop = 0; - dht_local_t *local = NULL; - xlator_t *subvol = NULL; - xlator_t *this = NULL; - fd_t *fd = NULL; - int op_errno = -1; - - local = frame->local; - this = frame->this; - fop = local->fop; - subvol = local->cached_subvol; - fd = local->fd; - - if (ret) { - op_errno = local->op_errno; - goto handle_err; - } - - switch (fop) { - + glusterfs_fop_t fop = 0; + dht_local_t *local = NULL; + xlator_t *subvol = NULL; + xlator_t *this = NULL; + fd_t *fd = NULL; + int op_errno = -1; + + local = frame->local; + this = frame->this; + fop = local->fop; + subvol = local->cached_subvol; + fd = local->fd; + + if (ret) { + op_errno = local->op_errno; + goto handle_err; + } + + switch (fop) { case GF_FOP_WRITE: - STACK_WIND_COOKIE (frame, dht_writev_cbk, subvol, subvol, - subvol->fops->writev, fd, - local->rebalance.vector, - local->rebalance.count, - local->rebalance.offset, - local->rebalance.flags, - local->rebalance.iobref, local->xattr_req); - break; + STACK_WIND_COOKIE(frame, dht_writev_cbk, subvol, subvol, + subvol->fops->writev, fd, local->rebalance.vector, + local->rebalance.count, local->rebalance.offset, + local->rebalance.flags, local->rebalance.iobref, + local->xattr_req); + break; case GF_FOP_FLUSH: - STACK_WIND (frame, dht_flush_cbk, subvol, - subvol->fops->flush, fd, local->xattr_req); - break; + STACK_WIND(frame, dht_flush_cbk, subvol, subvol->fops->flush, fd, + local->xattr_req); + break; case GF_FOP_FSETATTR: - STACK_WIND_COOKIE (frame, dht_file_setattr_cbk, subvol, - subvol, subvol->fops->fsetattr, fd, - &local->rebalance.stbuf, - local->rebalance.flags, - local->xattr_req); - break; + STACK_WIND_COOKIE(frame, dht_file_setattr_cbk, subvol, subvol, + subvol->fops->fsetattr, fd, + &local->rebalance.stbuf, local->rebalance.flags, + local->xattr_req); + break; case GF_FOP_ZEROFILL: - STACK_WIND_COOKIE (frame, dht_zerofill_cbk, subvol, subvol, - subvol->fops->zerofill, fd, - local->rebalance.offset, - local->rebalance.size, local->xattr_req); + STACK_WIND_COOKIE(frame, dht_zerofill_cbk, subvol, subvol, + subvol->fops->zerofill, fd, + local->rebalance.offset, local->rebalance.size, + local->xattr_req); - break; + break; case GF_FOP_DISCARD: - STACK_WIND_COOKIE (frame, dht_discard_cbk, subvol, subvol, - subvol->fops->discard, local->fd, - local->rebalance.offset, - local->rebalance.size, - local->xattr_req); - break; + STACK_WIND_COOKIE(frame, dht_discard_cbk, subvol, subvol, + subvol->fops->discard, local->fd, + local->rebalance.offset, local->rebalance.size, + local->xattr_req); + break; case GF_FOP_FALLOCATE: - STACK_WIND_COOKIE (frame, dht_fallocate_cbk, subvol, subvol, - subvol->fops->fallocate, fd, - local->rebalance.flags, - local->rebalance.offset, - local->rebalance.size, - local->xattr_req); - break; + STACK_WIND_COOKIE(frame, dht_fallocate_cbk, subvol, subvol, + subvol->fops->fallocate, fd, + local->rebalance.flags, local->rebalance.offset, + local->rebalance.size, local->xattr_req); + break; case GF_FOP_FTRUNCATE: - STACK_WIND_COOKIE (frame, dht_truncate_cbk, subvol, subvol, - subvol->fops->ftruncate, fd, - local->rebalance.offset, local->xattr_req); - break; + STACK_WIND_COOKIE(frame, dht_truncate_cbk, subvol, subvol, + subvol->fops->ftruncate, fd, + local->rebalance.offset, local->xattr_req); + break; case GF_FOP_FSYNC: - STACK_WIND_COOKIE (frame, dht_fsync_cbk, subvol, subvol, - subvol->fops->fsync, local->fd, - local->rebalance.flags, local->xattr_req); - break; + STACK_WIND_COOKIE(frame, dht_fsync_cbk, subvol, subvol, + subvol->fops->fsync, local->fd, + local->rebalance.flags, local->xattr_req); + break; case GF_FOP_READ: - STACK_WIND (frame, dht_readv_cbk, subvol, subvol->fops->readv, - local->fd, local->rebalance.size, - local->rebalance.offset, - local->rebalance.flags, local->xattr_req); - break; + STACK_WIND(frame, dht_readv_cbk, subvol, subvol->fops->readv, + local->fd, local->rebalance.size, + local->rebalance.offset, local->rebalance.flags, + local->xattr_req); + break; case GF_FOP_FSTAT: - STACK_WIND_COOKIE (frame, dht_file_attr_cbk, subvol, - subvol, subvol->fops->fstat, fd, - local->xattr_req); - break; + STACK_WIND_COOKIE(frame, dht_file_attr_cbk, subvol, subvol, + subvol->fops->fstat, fd, local->xattr_req); + break; case GF_FOP_FSETXATTR: - STACK_WIND_COOKIE (frame, dht_file_setxattr_cbk, subvol, - subvol, subvol->fops->fsetxattr, local->fd, - local->rebalance.xattr, - local->rebalance.flags, local->xattr_req); - break; + STACK_WIND_COOKIE(frame, dht_file_setxattr_cbk, subvol, subvol, + subvol->fops->fsetxattr, local->fd, + local->rebalance.xattr, local->rebalance.flags, + local->xattr_req); + break; case GF_FOP_FREMOVEXATTR: - STACK_WIND_COOKIE (frame, dht_file_removexattr_cbk, subvol, - subvol, subvol->fops->fremovexattr, - local->fd, local->key, local->xattr_req); + STACK_WIND_COOKIE(frame, dht_file_removexattr_cbk, subvol, subvol, + subvol->fops->fremovexattr, local->fd, local->key, + local->xattr_req); - break; + break; default: - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_UNKNOWN_FOP, - "Unknown FOP on fd (%p) on file %s @ %s", - fd, uuid_utoa (fd->inode->gfid), - subvol->name); - break; + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_UNKNOWN_FOP, + "Unknown FOP on fd (%p) on file %s @ %s", fd, + uuid_utoa(fd->inode->gfid), subvol->name); + break; + } - } + goto out; - goto out; - - /* Could not open the fd on the dst. Unwind */ + /* Could not open the fd on the dst. Unwind */ handle_err: - switch (fop) { - + switch (fop) { case GF_FOP_WRITE: - DHT_STACK_UNWIND (writev, frame, -1, - op_errno, NULL, NULL, NULL); - break; + DHT_STACK_UNWIND(writev, frame, -1, op_errno, NULL, NULL, NULL); + break; case GF_FOP_FLUSH: - DHT_STACK_UNWIND (flush, frame, -1, op_errno, NULL); - break; + DHT_STACK_UNWIND(flush, frame, -1, op_errno, NULL); + break; case GF_FOP_FSETATTR: - DHT_STACK_UNWIND (fsetattr, frame, -1, op_errno, - NULL, NULL, NULL); - break; + DHT_STACK_UNWIND(fsetattr, frame, -1, op_errno, NULL, NULL, NULL); + break; case GF_FOP_ZEROFILL: - DHT_STACK_UNWIND (zerofill, frame, -1, op_errno, - NULL, NULL, NULL); - break; + DHT_STACK_UNWIND(zerofill, frame, -1, op_errno, NULL, NULL, NULL); + break; case GF_FOP_DISCARD: - DHT_STACK_UNWIND (discard, frame, -1, op_errno, - NULL, NULL, NULL); - break; + DHT_STACK_UNWIND(discard, frame, -1, op_errno, NULL, NULL, NULL); + break; case GF_FOP_FALLOCATE: - DHT_STACK_UNWIND (fallocate, frame, -1, op_errno, - NULL, NULL, NULL); - break; + DHT_STACK_UNWIND(fallocate, frame, -1, op_errno, NULL, NULL, NULL); + break; case GF_FOP_FTRUNCATE: - DHT_STACK_UNWIND (ftruncate, frame, -1, op_errno, - NULL, NULL, NULL); - break; + DHT_STACK_UNWIND(ftruncate, frame, -1, op_errno, NULL, NULL, NULL); + break; case GF_FOP_FSYNC: - DHT_STACK_UNWIND (fsync, frame, -1, op_errno, NULL, NULL, NULL); - break; + DHT_STACK_UNWIND(fsync, frame, -1, op_errno, NULL, NULL, NULL); + break; case GF_FOP_READ: - DHT_STACK_UNWIND (readv, frame, -1, op_errno, NULL, - 0, NULL, NULL, NULL); - break; + DHT_STACK_UNWIND(readv, frame, -1, op_errno, NULL, 0, NULL, NULL, + NULL); + break; case GF_FOP_FSTAT: - DHT_STACK_UNWIND (fstat, frame, -1, op_errno, NULL, NULL); - break; + DHT_STACK_UNWIND(fstat, frame, -1, op_errno, NULL, NULL); + break; case GF_FOP_FSETXATTR: - DHT_STACK_UNWIND (fsetxattr, frame, -1, op_errno, NULL); - break; + DHT_STACK_UNWIND(fsetxattr, frame, -1, op_errno, NULL); + break; case GF_FOP_FREMOVEXATTR: - DHT_STACK_UNWIND (fremovexattr, frame, -1, op_errno, NULL); - break; + DHT_STACK_UNWIND(fremovexattr, frame, -1, op_errno, NULL); + break; default: - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_UNKNOWN_FOP, - "Unknown FOP on fd (%p) on file %s @ %s", - fd, uuid_utoa (fd->inode->gfid), - subvol->name); - break; - } + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_UNKNOWN_FOP, + "Unknown FOP on fd (%p) on file %s @ %s", fd, + uuid_utoa(fd->inode->gfid), subvol->name); + break; + } out: - return 0; - + return 0; } - /* Check once again if the fd has been opened on the cached subvol. * If not, open and update the fd_ctx. */ int -dht_check_and_open_fd_on_subvol_task (void *data) +dht_check_and_open_fd_on_subvol_task(void *data) { - loc_t loc = {0,}; - int ret = -1; - call_frame_t *frame = NULL; - dht_local_t *local = NULL; - fd_t *fd = NULL; - xlator_t *this = NULL; - xlator_t *subvol = NULL; - - - frame = data; - local = frame->local; - this = THIS; - fd = local->fd; - subvol = local->cached_subvol; - - local->fd_checked = _gf_true; - - if (fd_is_anonymous (fd) || dht_fd_open_on_dst (this, fd, subvol)) { - ret = 0; - goto out; - } - - gf_msg_debug (this->name, 0, - "Opening fd (%p, flags=0%o) on file %s @ %s", - fd, fd->flags, uuid_utoa (fd->inode->gfid), - subvol->name); + loc_t loc = { + 0, + }; + int ret = -1; + call_frame_t *frame = NULL; + dht_local_t *local = NULL; + fd_t *fd = NULL; + xlator_t *this = NULL; + xlator_t *subvol = NULL; + + frame = data; + local = frame->local; + this = THIS; + fd = local->fd; + subvol = local->cached_subvol; + + local->fd_checked = _gf_true; + + if (fd_is_anonymous(fd) || dht_fd_open_on_dst(this, fd, subvol)) { + ret = 0; + goto out; + } + gf_msg_debug(this->name, 0, "Opening fd (%p, flags=0%o) on file %s @ %s", + fd, fd->flags, uuid_utoa(fd->inode->gfid), subvol->name); - loc.inode = inode_ref (fd->inode); - gf_uuid_copy (loc.gfid, fd->inode->gfid); + loc.inode = inode_ref(fd->inode); + gf_uuid_copy(loc.gfid, fd->inode->gfid); - /* Open this on the dst subvol */ + /* Open this on the dst subvol */ - SYNCTASK_SETID(0, 0); + SYNCTASK_SETID(0, 0); - ret = syncop_open (subvol, &loc, - (fd->flags & ~(O_CREAT | O_EXCL | O_TRUNC)), - fd, NULL, NULL); + ret = syncop_open(subvol, &loc, (fd->flags & ~(O_CREAT | O_EXCL | O_TRUNC)), + fd, NULL, NULL); - if (ret < 0) { - - gf_msg (this->name, GF_LOG_ERROR, -ret, - DHT_MSG_OPEN_FD_ON_DST_FAILED, - "Failed to open the fd" - " (%p, flags=0%o) on file %s @ %s", - fd, fd->flags, uuid_utoa (fd->inode->gfid), - subvol->name); - /* This can happen if the cached subvol was updated in the - * inode_ctx and the fd was opened on the new cached suvol - * after this fop was wound on the old cached subvol. - * As we do not close the fd on the old subvol (a leak) - * don't treat ENOENT as an error and allow the phase1/phase2 - * checks to handle it. - */ - - if ((-ret != ENOENT) && (-ret != ESTALE)) { - local->op_errno = -ret; - ret = -1; - } else { - ret = 0; - } - - local->op_errno = -ret; - ret = -1; + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_OPEN_FD_ON_DST_FAILED, + "Failed to open the fd" + " (%p, flags=0%o) on file %s @ %s", + fd, fd->flags, uuid_utoa(fd->inode->gfid), subvol->name); + /* This can happen if the cached subvol was updated in the + * inode_ctx and the fd was opened on the new cached suvol + * after this fop was wound on the old cached subvol. + * As we do not close the fd on the old subvol (a leak) + * don't treat ENOENT as an error and allow the phase1/phase2 + * checks to handle it. + */ + if ((-ret != ENOENT) && (-ret != ESTALE)) { + local->op_errno = -ret; + ret = -1; } else { - dht_fd_ctx_set (this, fd, subvol); + ret = 0; } - SYNCTASK_SETID (frame->root->uid, frame->root->gid); + local->op_errno = -ret; + ret = -1; + + } else { + dht_fd_ctx_set(this, fd, subvol); + } + + SYNCTASK_SETID(frame->root->uid, frame->root->gid); out: - loc_wipe (&loc); + loc_wipe(&loc); - return ret; + return ret; } - int -dht_check_and_open_fd_on_subvol (xlator_t *this, call_frame_t *frame) +dht_check_and_open_fd_on_subvol(xlator_t *this, call_frame_t *frame) { - int ret = -1; - dht_local_t *local = NULL; - -/* - if (dht_fd_open_on_dst (this, fd, subvol)) - goto out; -*/ - local = frame->local; - - ret = synctask_new (this->ctx->env, - dht_check_and_open_fd_on_subvol_task, - dht_check_and_open_fd_on_subvol_complete, - frame, frame); - - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, 0, - "Failed to create synctask" - " to check and open fd=%p", local->fd); - } - - return ret; + int ret = -1; + dht_local_t *local = NULL; + + /* + if (dht_fd_open_on_dst (this, fd, subvol)) + goto out; + */ + local = frame->local; + + ret = synctask_new(this->ctx->env, dht_check_and_open_fd_on_subvol_task, + dht_check_and_open_fd_on_subvol_complete, frame, frame); + + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, + "Failed to create synctask" + " to check and open fd=%p", + local->fd); + } + + return ret; } - - int -dht_frame_return (call_frame_t *frame) +dht_frame_return(call_frame_t *frame) { - dht_local_t *local = NULL; - int this_call_cnt = -1; + dht_local_t *local = NULL; + int this_call_cnt = -1; - if (!frame) - return -1; + if (!frame) + return -1; - local = frame->local; + local = frame->local; - LOCK (&frame->lock); - { - this_call_cnt = --local->call_cnt; - } - UNLOCK (&frame->lock); + LOCK(&frame->lock); + { + this_call_cnt = --local->call_cnt; + } + UNLOCK(&frame->lock); - return this_call_cnt; + return this_call_cnt; } /* @@ -622,652 +572,636 @@ dht_frame_return (call_frame_t *frame) */ int -dht_filter_loc_subvol_key (xlator_t *this, loc_t *loc, loc_t *new_loc, - xlator_t **subvol) +dht_filter_loc_subvol_key(xlator_t *this, loc_t *loc, loc_t *new_loc, + xlator_t **subvol) { - char *new_name = NULL; - char *new_path = NULL; - xlator_list_t *trav = NULL; - char key[1024] = {0,}; - int ret = 0; /* not found */ - int keylen = 0; - int name_len = 0; - int path_len = 0; - - /* Why do other tasks if first required 'char' itself is not there */ - if (!new_loc || !loc || !loc->name || !strchr (loc->name, '@')) { - /* Skip the GF_FREE checks here */ - return ret; - } - - trav = this->children; - while (trav) { - keylen = snprintf (key, sizeof (key), "*@%s:%s", this->name, - trav->xlator->name); - /* Ignore '*' */ - keylen = keylen - 1; - if (fnmatch (key, loc->name, FNM_NOESCAPE) == 0) { - name_len = strlen (loc->name) - keylen; - new_name = GF_MALLOC(name_len + 1, - gf_common_mt_char); - if (!new_name) - goto out; - if (fnmatch (key, loc->path, FNM_NOESCAPE) == 0) { - path_len = strlen (loc->path) - keylen; - new_path = GF_MALLOC(path_len + 1, - gf_common_mt_char); - if (!new_path) - goto out; - snprintf (new_path, path_len + 1, - "%s", loc->path); - } - snprintf (new_name, name_len + 1, "%s", - loc->name); - - if (new_loc) { - new_loc->path = ((new_path) ? new_path: - gf_strdup (loc->path)); - new_loc->name = new_name; - new_loc->inode = inode_ref (loc->inode); - new_loc->parent = inode_ref (loc->parent); - } - *subvol = trav->xlator; - ret = 1; /* success */ - goto out; - } - trav = trav->next; - } -out: - if (!ret) { - /* !success */ - GF_FREE (new_path); - GF_FREE (new_name); - } + char *new_name = NULL; + char *new_path = NULL; + xlator_list_t *trav = NULL; + char key[1024] = { + 0, + }; + int ret = 0; /* not found */ + int keylen = 0; + int name_len = 0; + int path_len = 0; + + /* Why do other tasks if first required 'char' itself is not there */ + if (!new_loc || !loc || !loc->name || !strchr(loc->name, '@')) { + /* Skip the GF_FREE checks here */ return ret; + } + + trav = this->children; + while (trav) { + keylen = snprintf(key, sizeof(key), "*@%s:%s", this->name, + trav->xlator->name); + /* Ignore '*' */ + keylen = keylen - 1; + if (fnmatch(key, loc->name, FNM_NOESCAPE) == 0) { + name_len = strlen(loc->name) - keylen; + new_name = GF_MALLOC(name_len + 1, gf_common_mt_char); + if (!new_name) + goto out; + if (fnmatch(key, loc->path, FNM_NOESCAPE) == 0) { + path_len = strlen(loc->path) - keylen; + new_path = GF_MALLOC(path_len + 1, gf_common_mt_char); + if (!new_path) + goto out; + snprintf(new_path, path_len + 1, "%s", loc->path); + } + snprintf(new_name, name_len + 1, "%s", loc->name); + + if (new_loc) { + new_loc->path = ((new_path) ? new_path : gf_strdup(loc->path)); + new_loc->name = new_name; + new_loc->inode = inode_ref(loc->inode); + new_loc->parent = inode_ref(loc->parent); + } + *subvol = trav->xlator; + ret = 1; /* success */ + goto out; + } + trav = trav->next; + } +out: + if (!ret) { + /* !success */ + GF_FREE(new_path); + GF_FREE(new_name); + } + return ret; } static xlator_t * dht_get_subvol_from_id(xlator_t *this, int client_id) { - xlator_t *xl = NULL; - dht_conf_t *conf = NULL; - char *sid = NULL; - int32_t ret = -1; - - conf = this->private; - - ret = gf_asprintf(&sid, "%d", client_id); - if (ret == -1) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_ASPRINTF_FAILED, "asprintf failed while " - "fetching subvol from the id"); - goto out; - } + xlator_t *xl = NULL; + dht_conf_t *conf = NULL; + char *sid = NULL; + int32_t ret = -1; + + conf = this->private; + + ret = gf_asprintf(&sid, "%d", client_id); + if (ret == -1) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_ASPRINTF_FAILED, + "asprintf failed while " + "fetching subvol from the id"); + goto out; + } - if (dict_get_ptr(conf->leaf_to_subvol, sid, (void **) &xl)) - xl = NULL; + if (dict_get_ptr(conf->leaf_to_subvol, sid, (void **)&xl)) + xl = NULL; - GF_FREE (sid); + GF_FREE(sid); out: - return xl; - + return xl; } int -dht_deitransform (xlator_t *this, uint64_t y, xlator_t **subvol_p) +dht_deitransform(xlator_t *this, uint64_t y, xlator_t **subvol_p) { - int client_id = 0; - xlator_t *subvol = 0; - dht_conf_t *conf = NULL; + int client_id = 0; + xlator_t *subvol = 0; + dht_conf_t *conf = NULL; - if (!this->private) - return -1; + if (!this->private) + return -1; - conf = this->private; + conf = this->private; - client_id = gf_deitransform(this, y); + client_id = gf_deitransform(this, y); - subvol = dht_get_subvol_from_id(this, client_id); + subvol = dht_get_subvol_from_id(this, client_id); - if (!subvol) - subvol = conf->subvolumes[0]; + if (!subvol) + subvol = conf->subvolumes[0]; - if (subvol_p) - *subvol_p = subvol; + if (subvol_p) + *subvol_p = subvol; - return 0; + return 0; } void -dht_local_wipe (xlator_t *this, dht_local_t *local) +dht_local_wipe(xlator_t *this, dht_local_t *local) { - int i = 0; + int i = 0; - if (!local) - return; + if (!local) + return; - loc_wipe (&local->loc); - loc_wipe (&local->loc2); - loc_wipe (&local->loc2_copy); + loc_wipe(&local->loc); + loc_wipe(&local->loc2); + loc_wipe(&local->loc2_copy); - if (local->xattr) - dict_unref (local->xattr); + if (local->xattr) + dict_unref(local->xattr); - if (local->inode) - inode_unref (local->inode); + if (local->inode) + inode_unref(local->inode); - if (local->layout) { - dht_layout_unref (this, local->layout); - local->layout = NULL; - } + if (local->layout) { + dht_layout_unref(this, local->layout); + local->layout = NULL; + } - loc_wipe (&local->linkfile.loc); + loc_wipe(&local->linkfile.loc); - if (local->linkfile.xattr) - dict_unref (local->linkfile.xattr); + if (local->linkfile.xattr) + dict_unref(local->linkfile.xattr); - if (local->linkfile.inode) - inode_unref (local->linkfile.inode); + if (local->linkfile.inode) + inode_unref(local->linkfile.inode); - if (local->fd) { - fd_unref (local->fd); - local->fd = NULL; - } + if (local->fd) { + fd_unref(local->fd); + local->fd = NULL; + } - if (local->params) { - dict_unref (local->params); - local->params = NULL; - } + if (local->params) { + dict_unref(local->params); + local->params = NULL; + } - if (local->xattr_req) - dict_unref (local->xattr_req); - if (local->mds_xattr) - dict_unref (local->mds_xattr); - if (local->xdata) - dict_unref (local->xdata); + if (local->xattr_req) + dict_unref(local->xattr_req); + if (local->mds_xattr) + dict_unref(local->mds_xattr); + if (local->xdata) + dict_unref(local->xdata); - if (local->selfheal.layout) { - dht_layout_unref (this, local->selfheal.layout); - local->selfheal.layout = NULL; - } + if (local->selfheal.layout) { + dht_layout_unref(this, local->selfheal.layout); + local->selfheal.layout = NULL; + } - if (local->selfheal.refreshed_layout) { - dht_layout_unref (this, local->selfheal.refreshed_layout); - local->selfheal.refreshed_layout = NULL; - } + if (local->selfheal.refreshed_layout) { + dht_layout_unref(this, local->selfheal.refreshed_layout); + local->selfheal.refreshed_layout = NULL; + } - for (i = 0; i < 2; i++) { - dht_lock_array_free (local->lock[i].ns.parent_layout.locks, - local->lock[i].ns.parent_layout.lk_count); + for (i = 0; i < 2; i++) { + dht_lock_array_free(local->lock[i].ns.parent_layout.locks, + local->lock[i].ns.parent_layout.lk_count); - GF_FREE (local->lock[i].ns.parent_layout.locks); + GF_FREE(local->lock[i].ns.parent_layout.locks); - dht_lock_array_free (local->lock[i].ns.directory_ns.locks, - local->lock[i].ns.directory_ns.lk_count); - GF_FREE (local->lock[i].ns.directory_ns.locks); - } + dht_lock_array_free(local->lock[i].ns.directory_ns.locks, + local->lock[i].ns.directory_ns.lk_count); + GF_FREE(local->lock[i].ns.directory_ns.locks); + } - GF_FREE (local->key); + GF_FREE(local->key); - if (local->rebalance.xdata) - dict_unref (local->rebalance.xdata); + if (local->rebalance.xdata) + dict_unref(local->rebalance.xdata); - if (local->rebalance.xattr) - dict_unref (local->rebalance.xattr); + if (local->rebalance.xattr) + dict_unref(local->rebalance.xattr); - if (local->rebalance.dict) - dict_unref (local->rebalance.dict); + if (local->rebalance.dict) + dict_unref(local->rebalance.dict); - GF_FREE (local->rebalance.vector); + GF_FREE(local->rebalance.vector); - if (local->rebalance.iobref) - iobref_unref (local->rebalance.iobref); + if (local->rebalance.iobref) + iobref_unref(local->rebalance.iobref); - if (local->stub) { - call_stub_destroy (local->stub); - local->stub = NULL; - } + if (local->stub) { + call_stub_destroy(local->stub); + local->stub = NULL; + } - if (local->ret_cache) - GF_FREE (local->ret_cache); + if (local->ret_cache) + GF_FREE(local->ret_cache); - mem_put (local); + mem_put(local); } - dht_local_t * -dht_local_init (call_frame_t *frame, loc_t *loc, fd_t *fd, glusterfs_fop_t fop) +dht_local_init(call_frame_t *frame, loc_t *loc, fd_t *fd, glusterfs_fop_t fop) { - dht_local_t *local = NULL; - inode_t *inode = NULL; - int ret = 0; + dht_local_t *local = NULL; + inode_t *inode = NULL; + int ret = 0; - local = mem_get0 (THIS->local_pool); - if (!local) - goto out; + local = mem_get0(THIS->local_pool); + if (!local) + goto out; - if (loc) { - ret = loc_copy (&local->loc, loc); - if (ret) - goto out; + if (loc) { + ret = loc_copy(&local->loc, loc); + if (ret) + goto out; - inode = loc->inode; - } + inode = loc->inode; + } - if (fd) { - local->fd = fd_ref (fd); - if (!inode) - inode = fd->inode; - } + if (fd) { + local->fd = fd_ref(fd); + if (!inode) + inode = fd->inode; + } - local->op_ret = -1; - local->op_errno = EUCLEAN; - local->fop = fop; + local->op_ret = -1; + local->op_errno = EUCLEAN; + local->fop = fop; - if (inode) { - local->layout = dht_layout_get (frame->this, inode); - local->cached_subvol = dht_subvol_get_cached (frame->this, - inode); - } + if (inode) { + local->layout = dht_layout_get(frame->this, inode); + local->cached_subvol = dht_subvol_get_cached(frame->this, inode); + } - frame->local = local; + frame->local = local; out: - if (ret) { - if (local) - mem_put (local); - local = NULL; - } - return local; + if (ret) { + if (local) + mem_put(local); + local = NULL; + } + return local; } xlator_t * -dht_first_up_subvol (xlator_t *this) +dht_first_up_subvol(xlator_t *this) { - dht_conf_t *conf = NULL; - xlator_t *child = NULL; - int i = 0; - time_t time = 0; + dht_conf_t *conf = NULL; + xlator_t *child = NULL; + int i = 0; + time_t time = 0; - conf = this->private; - if (!conf) - goto out; + conf = this->private; + if (!conf) + goto out; - LOCK (&conf->subvolume_lock); - { - for (i = 0; i < conf->subvolume_cnt; i++) { - if (conf->subvol_up_time[i]) { - if (!time) { - time = conf->subvol_up_time[i]; - child = conf->subvolumes[i]; - } else if (time > conf->subvol_up_time[i]) { - time = conf->subvol_up_time[i]; - child = conf->subvolumes[i]; - } - } + LOCK(&conf->subvolume_lock); + { + for (i = 0; i < conf->subvolume_cnt; i++) { + if (conf->subvol_up_time[i]) { + if (!time) { + time = conf->subvol_up_time[i]; + child = conf->subvolumes[i]; + } else if (time > conf->subvol_up_time[i]) { + time = conf->subvol_up_time[i]; + child = conf->subvolumes[i]; } + } } - UNLOCK (&conf->subvolume_lock); + } + UNLOCK(&conf->subvolume_lock); out: - return child; + return child; } xlator_t * -dht_last_up_subvol (xlator_t *this) +dht_last_up_subvol(xlator_t *this) { - dht_conf_t *conf = NULL; - xlator_t *child = NULL; - int i = 0; + dht_conf_t *conf = NULL; + xlator_t *child = NULL; + int i = 0; - conf = this->private; - if (!conf) - goto out; + conf = this->private; + if (!conf) + goto out; - LOCK (&conf->subvolume_lock); - { - for (i = conf->subvolume_cnt-1; i >= 0; i--) { - if (conf->subvolume_status[i]) { - child = conf->subvolumes[i]; - break; - } - } + LOCK(&conf->subvolume_lock); + { + for (i = conf->subvolume_cnt - 1; i >= 0; i--) { + if (conf->subvolume_status[i]) { + child = conf->subvolumes[i]; + break; + } } - UNLOCK (&conf->subvolume_lock); + } + UNLOCK(&conf->subvolume_lock); out: - return child; + return child; } xlator_t * -dht_subvol_get_hashed (xlator_t *this, loc_t *loc) +dht_subvol_get_hashed(xlator_t *this, loc_t *loc) { - dht_layout_t *layout = NULL; - xlator_t *subvol = NULL; - dht_conf_t *conf = NULL; - dht_methods_t *methods = NULL; + dht_layout_t *layout = NULL; + xlator_t *subvol = NULL; + dht_conf_t *conf = NULL; + dht_methods_t *methods = NULL; - GF_VALIDATE_OR_GOTO ("dht", this, out); - GF_VALIDATE_OR_GOTO (this->name, loc, out); + GF_VALIDATE_OR_GOTO("dht", this, out); + GF_VALIDATE_OR_GOTO(this->name, loc, out); - conf = this->private; - GF_VALIDATE_OR_GOTO (this->name, conf, out); + conf = this->private; + GF_VALIDATE_OR_GOTO(this->name, conf, out); - methods = &(conf->methods); + methods = &(conf->methods); - if (__is_root_gfid (loc->gfid)) { - subvol = dht_first_up_subvol (this); - goto out; - } + if (__is_root_gfid(loc->gfid)) { + subvol = dht_first_up_subvol(this); + goto out; + } - GF_VALIDATE_OR_GOTO (this->name, loc->parent, out); - GF_VALIDATE_OR_GOTO (this->name, loc->name, out); + GF_VALIDATE_OR_GOTO(this->name, loc->parent, out); + GF_VALIDATE_OR_GOTO(this->name, loc->name, out); - layout = dht_layout_get (this, loc->parent); + layout = dht_layout_get(this, loc->parent); - if (!layout) { - gf_msg_debug (this->name, 0, - "Missing layout. path=%s, parent gfid =%s", - loc->path, uuid_utoa (loc->parent->gfid)); - goto out; - } + if (!layout) { + gf_msg_debug(this->name, 0, "Missing layout. path=%s, parent gfid =%s", + loc->path, uuid_utoa(loc->parent->gfid)); + goto out; + } - subvol = methods->layout_search (this, layout, loc->name); + subvol = methods->layout_search(this, layout, loc->name); - if (!subvol) { - gf_msg_debug (this->name, 0, - "No hashed subvolume for path=%s", - loc->path); - goto out; - } + if (!subvol) { + gf_msg_debug(this->name, 0, "No hashed subvolume for path=%s", + loc->path); + goto out; + } out: - if (layout) { - dht_layout_unref (this, layout); - } + if (layout) { + dht_layout_unref(this, layout); + } - return subvol; + return subvol; } - xlator_t * -dht_subvol_get_cached (xlator_t *this, inode_t *inode) +dht_subvol_get_cached(xlator_t *this, inode_t *inode) { - dht_layout_t *layout = NULL; - xlator_t *subvol = NULL; + dht_layout_t *layout = NULL; + xlator_t *subvol = NULL; - GF_VALIDATE_OR_GOTO (this->name, this, out); - GF_VALIDATE_OR_GOTO (this->name, inode, out); + GF_VALIDATE_OR_GOTO(this->name, this, out); + GF_VALIDATE_OR_GOTO(this->name, inode, out); - layout = dht_layout_get (this, inode); + layout = dht_layout_get(this, inode); - if (!layout) { - goto out; - } + if (!layout) { + goto out; + } - subvol = layout->list[0].xlator; + subvol = layout->list[0].xlator; out: - if (layout) { - dht_layout_unref (this, layout); - } + if (layout) { + dht_layout_unref(this, layout); + } - return subvol; + return subvol; } - xlator_t * -dht_subvol_next (xlator_t *this, xlator_t *prev) +dht_subvol_next(xlator_t *this, xlator_t *prev) { - dht_conf_t *conf = NULL; - int i = 0; - xlator_t *next = NULL; + dht_conf_t *conf = NULL; + int i = 0; + xlator_t *next = NULL; - conf = this->private; - if (!conf) - goto out; + conf = this->private; + if (!conf) + goto out; - for (i = 0; i < conf->subvolume_cnt; i++) { - if (conf->subvolumes[i] == prev) { - if ((i + 1) < conf->subvolume_cnt) - next = conf->subvolumes[i + 1]; - break; - } + for (i = 0; i < conf->subvolume_cnt; i++) { + if (conf->subvolumes[i] == prev) { + if ((i + 1) < conf->subvolume_cnt) + next = conf->subvolumes[i + 1]; + break; } + } out: - return next; + return next; } /* This func wraps around, if prev is actually the last subvol. */ xlator_t * -dht_subvol_next_available (xlator_t *this, xlator_t *prev) +dht_subvol_next_available(xlator_t *this, xlator_t *prev) { - dht_conf_t *conf = NULL; - int i = 0; - xlator_t *next = NULL; + dht_conf_t *conf = NULL; + int i = 0; + xlator_t *next = NULL; - conf = this->private; - if (!conf) - goto out; + conf = this->private; + if (!conf) + goto out; - for (i = 0; i < conf->subvolume_cnt; i++) { - if (conf->subvolumes[i] == prev) { - /* if prev is last in conf->subvolumes, then wrap - * around. - */ - if ((i + 1) < conf->subvolume_cnt) { - next = conf->subvolumes[i + 1]; - } else { - next = conf->subvolumes[0]; - } - break; - } + for (i = 0; i < conf->subvolume_cnt; i++) { + if (conf->subvolumes[i] == prev) { + /* if prev is last in conf->subvolumes, then wrap + * around. + */ + if ((i + 1) < conf->subvolume_cnt) { + next = conf->subvolumes[i + 1]; + } else { + next = conf->subvolumes[0]; + } + break; } + } out: - return next; + return next; } int -dht_subvol_cnt (xlator_t *this, xlator_t *subvol) +dht_subvol_cnt(xlator_t *this, xlator_t *subvol) { - int i = 0; - int ret = -1; - dht_conf_t *conf = NULL; + int i = 0; + int ret = -1; + dht_conf_t *conf = NULL; - conf = this->private; - if (!conf) - goto out; + conf = this->private; + if (!conf) + goto out; - for (i = 0; i < conf->subvolume_cnt; i++) { - if (subvol == conf->subvolumes[i]) { - ret = i; - break; - } + for (i = 0; i < conf->subvolume_cnt; i++) { + if (subvol == conf->subvolumes[i]) { + ret = i; + break; } + } out: - return ret; + return ret; } +#define set_if_greater(a, b) \ + do { \ + if ((a) < (b)) \ + (a) = (b); \ + } while (0) -#define set_if_greater(a, b) do { \ - if ((a) < (b)) \ - (a) = (b); \ - } while (0) - - -#define set_if_greater_time(a, an, b, bn) do { \ - if (((a) < (b)) || (((a) == (b)) && ((an) < (bn)))){ \ - (a) = (b); \ - (an) = (bn); \ - } \ - } while (0) \ - +#define set_if_greater_time(a, an, b, bn) \ + do { \ + if (((a) < (b)) || (((a) == (b)) && ((an) < (bn)))) { \ + (a) = (b); \ + (an) = (bn); \ + } \ + } while (0) int -dht_iatt_merge (xlator_t *this, struct iatt *to, struct iatt *from) +dht_iatt_merge(xlator_t *this, struct iatt *to, struct iatt *from) { - if (!from || !to) - return 0; - - to->ia_dev = from->ia_dev; - - gf_uuid_copy (to->ia_gfid, from->ia_gfid); - - to->ia_ino = from->ia_ino; - to->ia_prot = from->ia_prot; - to->ia_type = from->ia_type; - to->ia_nlink = from->ia_nlink; - to->ia_rdev = from->ia_rdev; - to->ia_size += from->ia_size; - to->ia_blksize = from->ia_blksize; - to->ia_blocks += from->ia_blocks; - - if (IA_ISDIR (from->ia_type)) { - to->ia_blocks = DHT_DIR_STAT_BLOCKS; - to->ia_size = DHT_DIR_STAT_SIZE; - } - set_if_greater (to->ia_uid, from->ia_uid); - set_if_greater (to->ia_gid, from->ia_gid); - - set_if_greater_time(to->ia_atime, to->ia_atime_nsec, - from->ia_atime, from->ia_atime_nsec); - set_if_greater_time (to->ia_mtime, to->ia_mtime_nsec, - from->ia_mtime, from->ia_mtime_nsec); - set_if_greater_time (to->ia_ctime, to->ia_ctime_nsec, - from->ia_ctime, from->ia_ctime_nsec); - + if (!from || !to) return 0; + + to->ia_dev = from->ia_dev; + + gf_uuid_copy(to->ia_gfid, from->ia_gfid); + + to->ia_ino = from->ia_ino; + to->ia_prot = from->ia_prot; + to->ia_type = from->ia_type; + to->ia_nlink = from->ia_nlink; + to->ia_rdev = from->ia_rdev; + to->ia_size += from->ia_size; + to->ia_blksize = from->ia_blksize; + to->ia_blocks += from->ia_blocks; + + if (IA_ISDIR(from->ia_type)) { + to->ia_blocks = DHT_DIR_STAT_BLOCKS; + to->ia_size = DHT_DIR_STAT_SIZE; + } + set_if_greater(to->ia_uid, from->ia_uid); + set_if_greater(to->ia_gid, from->ia_gid); + + set_if_greater_time(to->ia_atime, to->ia_atime_nsec, from->ia_atime, + from->ia_atime_nsec); + set_if_greater_time(to->ia_mtime, to->ia_mtime_nsec, from->ia_mtime, + from->ia_mtime_nsec); + set_if_greater_time(to->ia_ctime, to->ia_ctime_nsec, from->ia_ctime, + from->ia_ctime_nsec); + + return 0; } int -dht_build_child_loc (xlator_t *this, loc_t *child, loc_t *parent, char *name) +dht_build_child_loc(xlator_t *this, loc_t *child, loc_t *parent, char *name) { - if (!child) { - goto err; - } + if (!child) { + goto err; + } - if (strcmp (parent->path, "/") == 0) - gf_asprintf ((char **)&child->path, "/%s", name); - else - gf_asprintf ((char **)&child->path, "%s/%s", parent->path, name); + if (strcmp(parent->path, "/") == 0) + gf_asprintf((char **)&child->path, "/%s", name); + else + gf_asprintf((char **)&child->path, "%s/%s", parent->path, name); - if (!child->path) { - goto err; - } + if (!child->path) { + goto err; + } - child->name = strrchr (child->path, '/'); - if (child->name) - child->name++; + child->name = strrchr(child->path, '/'); + if (child->name) + child->name++; - child->parent = inode_ref (parent->inode); - child->inode = inode_new (parent->inode->table); + child->parent = inode_ref(parent->inode); + child->inode = inode_new(parent->inode->table); - if (!child->inode) { - goto err; - } + if (!child->inode) { + goto err; + } - return 0; + return 0; err: - if (child) { - loc_wipe (child); - } - return -1; + if (child) { + loc_wipe(child); + } + return -1; } int -dht_init_local_subvolumes (xlator_t *this, dht_conf_t *conf) +dht_init_local_subvolumes(xlator_t *this, dht_conf_t *conf) { - xlator_list_t *subvols = NULL; - int cnt = 0; + xlator_list_t *subvols = NULL; + int cnt = 0; - if (!conf) - return -1; + if (!conf) + return -1; - for (subvols = this->children; subvols; subvols = subvols->next) - cnt++; + for (subvols = this->children; subvols; subvols = subvols->next) + cnt++; - conf->local_subvols = GF_CALLOC (cnt, sizeof (xlator_t *), - gf_dht_mt_xlator_t); + conf->local_subvols = GF_CALLOC(cnt, sizeof(xlator_t *), + gf_dht_mt_xlator_t); - /* FIX FIX : do this dynamically*/ - conf->local_nodeuuids = GF_CALLOC (cnt, - sizeof (subvol_nodeuuids_info_t), - gf_dht_nodeuuids_t); + /* FIX FIX : do this dynamically*/ + conf->local_nodeuuids = GF_CALLOC(cnt, sizeof(subvol_nodeuuids_info_t), + gf_dht_nodeuuids_t); - if (!conf->local_subvols || !conf->local_nodeuuids) { - return -1; - } + if (!conf->local_subvols || !conf->local_nodeuuids) { + return -1; + } - conf->local_subvols_cnt = 0; + conf->local_subvols_cnt = 0; - return 0; + return 0; } int -dht_init_subvolumes (xlator_t *this, dht_conf_t *conf) +dht_init_subvolumes(xlator_t *this, dht_conf_t *conf) { - xlator_list_t *subvols = NULL; - int cnt = 0; + xlator_list_t *subvols = NULL; + int cnt = 0; - if (!conf) - return -1; + if (!conf) + return -1; - for (subvols = this->children; subvols; subvols = subvols->next) - cnt++; + for (subvols = this->children; subvols; subvols = subvols->next) + cnt++; - conf->subvolumes = GF_CALLOC (cnt, sizeof (xlator_t *), - gf_dht_mt_xlator_t); - if (!conf->subvolumes) { - return -1; - } - conf->subvolume_cnt = cnt; + conf->subvolumes = GF_CALLOC(cnt, sizeof(xlator_t *), gf_dht_mt_xlator_t); + if (!conf->subvolumes) { + return -1; + } + conf->subvolume_cnt = cnt; - conf->local_subvols_cnt = 0; + conf->local_subvols_cnt = 0; - dht_set_subvol_range(this); + dht_set_subvol_range(this); - cnt = 0; - for (subvols = this->children; subvols; subvols = subvols->next) - conf->subvolumes[cnt++] = subvols->xlator; + cnt = 0; + for (subvols = this->children; subvols; subvols = subvols->next) + conf->subvolumes[cnt++] = subvols->xlator; - conf->subvolume_status = GF_CALLOC (cnt, sizeof (char), - gf_dht_mt_char); - if (!conf->subvolume_status) { - return -1; - } + conf->subvolume_status = GF_CALLOC(cnt, sizeof(char), gf_dht_mt_char); + if (!conf->subvolume_status) { + return -1; + } - conf->last_event = GF_CALLOC (cnt, sizeof (int), - gf_dht_mt_char); - if (!conf->last_event) { - return -1; - } + conf->last_event = GF_CALLOC(cnt, sizeof(int), gf_dht_mt_char); + if (!conf->last_event) { + return -1; + } - conf->subvol_up_time = GF_CALLOC (cnt, sizeof (time_t), - gf_dht_mt_subvol_time); - if (!conf->subvol_up_time) { - return -1; - } + conf->subvol_up_time = GF_CALLOC(cnt, sizeof(time_t), + gf_dht_mt_subvol_time); + if (!conf->subvol_up_time) { + return -1; + } - conf->du_stats = GF_CALLOC (conf->subvolume_cnt, sizeof (dht_du_t), - gf_dht_mt_dht_du_t); - if (!conf->du_stats) { - return -1; - } + conf->du_stats = GF_CALLOC(conf->subvolume_cnt, sizeof(dht_du_t), + gf_dht_mt_dht_du_t); + if (!conf->du_stats) { + return -1; + } - conf->decommissioned_bricks = GF_CALLOC (cnt, sizeof (xlator_t *), - gf_dht_mt_xlator_t); - if (!conf->decommissioned_bricks) { - return -1; - } + conf->decommissioned_bricks = GF_CALLOC(cnt, sizeof(xlator_t *), + gf_dht_mt_xlator_t); + if (!conf->decommissioned_bricks) { + return -1; + } - return 0; + return 0; } - /* op_ret values : 0 : Success. @@ -1276,270 +1210,263 @@ dht_init_subvolumes (xlator_t *this, dht_conf_t *conf) */ static int -dht_migration_complete_check_done (int op_ret, call_frame_t *frame, void *data) +dht_migration_complete_check_done(int op_ret, call_frame_t *frame, void *data) { - dht_local_t *local = NULL; - xlator_t *subvol = NULL; + dht_local_t *local = NULL; + xlator_t *subvol = NULL; - local = frame->local; + local = frame->local; - if (op_ret != 0) - goto out; + if (op_ret != 0) + goto out; - if (local->cached_subvol == NULL) { - local->op_errno = EINVAL; - goto out; - } + if (local->cached_subvol == NULL) { + local->op_errno = EINVAL; + goto out; + } - subvol = local->cached_subvol; + subvol = local->cached_subvol; out: - local->rebalance.target_op_fn (THIS, subvol, frame, op_ret); + local->rebalance.target_op_fn(THIS, subvol, frame, op_ret); - return 0; + return 0; } - int -dht_migration_complete_check_task (void *data) +dht_migration_complete_check_task(void *data) { - int ret = -1; - xlator_t *src_node = NULL; - xlator_t *dst_node = NULL, *linkto_target = NULL; - dht_local_t *local = NULL; - dict_t *dict = NULL; - struct iatt stbuf = {0,}; - xlator_t *this = NULL; - call_frame_t *frame = NULL; - loc_t tmp_loc = {0,}; - char *path = NULL; - dht_conf_t *conf = NULL; - inode_t *inode = NULL; - fd_t *iter_fd = NULL; - fd_t *tmp = NULL; - uint64_t tmp_miginfo = 0; - dht_migrate_info_t *miginfo = NULL; - int open_failed = 0; - - this = THIS; - frame = data; - local = frame->local; - conf = this->private; - - src_node = local->cached_subvol; - - if (!local->loc.inode && !local->fd) { - local->op_errno = EINVAL; - goto out; - } - - inode = (!local->fd) ? local->loc.inode : local->fd->inode; - - /* getxattr on cached_subvol for 'linkto' value. Do path based getxattr - * as root:root. If a fd is already open, access check won't be done*/ - - if (!local->loc.inode) { - ret = syncop_fgetxattr (src_node, local->fd, &dict, - conf->link_xattr_name, NULL, NULL); - } else { - SYNCTASK_SETID (0, 0); - ret = syncop_getxattr (src_node, &local->loc, &dict, - conf->link_xattr_name, NULL, NULL); - SYNCTASK_SETID (frame->root->uid, frame->root->gid); - } - - - /* - * Each DHT xlator layer has its own name for the linkto xattr. - * If the file mode bits indicate the the file is being migrated but - * this layer's linkto xattr is not set, it means that another - * DHT layer is migrating the file. In this case, return 1 so - * the mode bits can be passed on to the higher layer for appropriate - * action. - */ - if (-ret == ENODATA) { - /* This DHT translator is not migrating this file */ - - ret = inode_ctx_reset1 (inode, this, &tmp_miginfo); - if (tmp_miginfo) { - - /* This can be a problem if the file was - * migrated by two different layers. Raise - * a warning here. - */ - gf_msg (this->name, GF_LOG_WARNING, 0, - DHT_MSG_HAS_MIGINFO, - "%s: Found miginfo in the inode ctx", - tmp_loc.path ? tmp_loc.path : - uuid_utoa (tmp_loc.gfid)); - - miginfo = (void *)tmp_miginfo; - GF_REF_PUT (miginfo); - } - ret = 1; - goto out; - } + int ret = -1; + xlator_t *src_node = NULL; + xlator_t *dst_node = NULL, *linkto_target = NULL; + dht_local_t *local = NULL; + dict_t *dict = NULL; + struct iatt stbuf = { + 0, + }; + xlator_t *this = NULL; + call_frame_t *frame = NULL; + loc_t tmp_loc = { + 0, + }; + char *path = NULL; + dht_conf_t *conf = NULL; + inode_t *inode = NULL; + fd_t *iter_fd = NULL; + fd_t *tmp = NULL; + uint64_t tmp_miginfo = 0; + dht_migrate_info_t *miginfo = NULL; + int open_failed = 0; + + this = THIS; + frame = data; + local = frame->local; + conf = this->private; + + src_node = local->cached_subvol; + + if (!local->loc.inode && !local->fd) { + local->op_errno = EINVAL; + goto out; + } - if (!ret) - linkto_target = dht_linkfile_subvol (this, NULL, NULL, dict); + inode = (!local->fd) ? local->loc.inode : local->fd->inode; - if (local->loc.inode) { - loc_copy (&tmp_loc, &local->loc); - } else { - tmp_loc.inode = inode_ref (inode); - gf_uuid_copy (tmp_loc.gfid, inode->gfid); - } + /* getxattr on cached_subvol for 'linkto' value. Do path based getxattr + * as root:root. If a fd is already open, access check won't be done*/ - ret = syncop_lookup (this, &tmp_loc, &stbuf, 0, 0, 0); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, -ret, - DHT_MSG_FILE_LOOKUP_FAILED, - "%s: failed to lookup the file on %s", - tmp_loc.path ? tmp_loc.path : uuid_utoa (tmp_loc.gfid), - this->name); - local->op_errno = -ret; - ret = -1; - goto out; - } + if (!local->loc.inode) { + ret = syncop_fgetxattr(src_node, local->fd, &dict, + conf->link_xattr_name, NULL, NULL); + } else { + SYNCTASK_SETID(0, 0); + ret = syncop_getxattr(src_node, &local->loc, &dict, + conf->link_xattr_name, NULL, NULL); + SYNCTASK_SETID(frame->root->uid, frame->root->gid); + } + + /* + * Each DHT xlator layer has its own name for the linkto xattr. + * If the file mode bits indicate the the file is being migrated but + * this layer's linkto xattr is not set, it means that another + * DHT layer is migrating the file. In this case, return 1 so + * the mode bits can be passed on to the higher layer for appropriate + * action. + */ + if (-ret == ENODATA) { + /* This DHT translator is not migrating this file */ + + ret = inode_ctx_reset1(inode, this, &tmp_miginfo); + if (tmp_miginfo) { + /* This can be a problem if the file was + * migrated by two different layers. Raise + * a warning here. + */ + gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_HAS_MIGINFO, + "%s: Found miginfo in the inode ctx", + tmp_loc.path ? tmp_loc.path : uuid_utoa(tmp_loc.gfid)); + + miginfo = (void *)tmp_miginfo; + GF_REF_PUT(miginfo); + } + ret = 1; + goto out; + } + + if (!ret) + linkto_target = dht_linkfile_subvol(this, NULL, NULL, dict); + + if (local->loc.inode) { + loc_copy(&tmp_loc, &local->loc); + } else { + tmp_loc.inode = inode_ref(inode); + gf_uuid_copy(tmp_loc.gfid, inode->gfid); + } + + ret = syncop_lookup(this, &tmp_loc, &stbuf, 0, 0, 0); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_FILE_LOOKUP_FAILED, + "%s: failed to lookup the file on %s", + tmp_loc.path ? tmp_loc.path : uuid_utoa(tmp_loc.gfid), + this->name); + local->op_errno = -ret; + ret = -1; + goto out; + } + + dst_node = dht_subvol_get_cached(this, tmp_loc.inode); + if (linkto_target && dst_node != linkto_target) { + gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_INVALID_LINKFILE, + "linkto target (%s) is " + "different from cached-subvol (%s). Treating %s as " + "destination subvol", + linkto_target->name, dst_node->name, dst_node->name); + } + + if (gf_uuid_compare(stbuf.ia_gfid, tmp_loc.inode->gfid)) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_GFID_MISMATCH, + "%s: gfid different on the target file on %s", + tmp_loc.path ? tmp_loc.path : uuid_utoa(tmp_loc.gfid), + dst_node->name); + ret = -1; + local->op_errno = EIO; + goto out; + } - dst_node = dht_subvol_get_cached (this, tmp_loc.inode); - if (linkto_target && dst_node != linkto_target) { - gf_msg (this->name, GF_LOG_WARNING, 0, - DHT_MSG_INVALID_LINKFILE, - "linkto target (%s) is " - "different from cached-subvol (%s). Treating %s as " - "destination subvol", linkto_target->name, - dst_node->name, dst_node->name); - } + /* update local. A layout is set in inode-ctx in lookup already */ - if (gf_uuid_compare (stbuf.ia_gfid, tmp_loc.inode->gfid)) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_GFID_MISMATCH, - "%s: gfid different on the target file on %s", - tmp_loc.path ? tmp_loc.path : - uuid_utoa (tmp_loc.gfid), dst_node->name); - ret = -1; - local->op_errno = EIO; - goto out; - } + dht_layout_unref(this, local->layout); - /* update local. A layout is set in inode-ctx in lookup already */ + local->layout = dht_layout_get(frame->this, inode); + local->cached_subvol = dst_node; - dht_layout_unref (this, local->layout); + ret = 0; - local->layout = dht_layout_get (frame->this, inode); - local->cached_subvol = dst_node; + /* once we detect the migration complete, the inode-ctx2 is no more + required.. delete the ctx and also, it means, open() already + done on all the fd of inode */ + ret = inode_ctx_reset1(inode, this, &tmp_miginfo); + if (tmp_miginfo) { + miginfo = (void *)tmp_miginfo; + GF_REF_PUT(miginfo); + goto out; + } + + /* perform 'open()' on all the fd's present on the inode */ + if (tmp_loc.path == NULL) { + inode_path(inode, NULL, &path); + if (path) + tmp_loc.path = path; + } + + LOCK(&inode->lock); + + if (list_empty(&inode->fd_list)) + goto unlock; + + /* perform open as root:root. There is window between linkfile + * creation(root:root) and setattr with the correct uid/gid + */ + SYNCTASK_SETID(0, 0); + + /* It's possible that we are the last user of iter_fd after each + * iteration. In this case the fd_unref() of iter_fd at the end of + * the loop will cause the destruction of the fd. So we need to + * iterate the list safely because iter_fd cannot be trusted. + */ + list_for_each_entry_safe(iter_fd, tmp, &inode->fd_list, inode_list) + { + if (fd_is_anonymous(iter_fd)) + continue; + + if (dht_fd_open_on_dst(this, iter_fd, dst_node)) + continue; + + /* We need to release the inode->lock before calling + * syncop_open() to avoid possible deadlocks. However this + * can cause the iter_fd to be released by other threads. + * To avoid this, we take a reference before releasing the + * lock. + */ + __fd_ref(iter_fd); - ret = 0; + UNLOCK(&inode->lock); - /* once we detect the migration complete, the inode-ctx2 is no more - required.. delete the ctx and also, it means, open() already - done on all the fd of inode */ - ret = inode_ctx_reset1 (inode, this, &tmp_miginfo); - if (tmp_miginfo) { - miginfo = (void *)tmp_miginfo; - GF_REF_PUT (miginfo); - goto out; + /* flags for open are stripped down to allow following the + * new location of the file, otherwise we can get EEXIST or + * truncate the file again as rebalance is moving the data */ + ret = syncop_open(dst_node, &tmp_loc, + (iter_fd->flags & ~(O_CREAT | O_EXCL | O_TRUNC)), + iter_fd, NULL, NULL); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, -ret, + DHT_MSG_OPEN_FD_ON_DST_FAILED, + "failed" + " to open the fd" + " (%p, flags=0%o) on file %s @ %s", + iter_fd, iter_fd->flags, path, dst_node->name); + + open_failed = 1; + local->op_errno = -ret; + ret = -1; + } else { + dht_fd_ctx_set(this, iter_fd, dst_node); } - /* perform 'open()' on all the fd's present on the inode */ - if (tmp_loc.path == NULL) { - inode_path (inode, NULL, &path); - if (path) - tmp_loc.path = path; - } + fd_unref(iter_fd); LOCK(&inode->lock); + } - if (list_empty (&inode->fd_list)) - goto unlock; - - /* perform open as root:root. There is window between linkfile - * creation(root:root) and setattr with the correct uid/gid - */ - SYNCTASK_SETID(0, 0); - - /* It's possible that we are the last user of iter_fd after each - * iteration. In this case the fd_unref() of iter_fd at the end of - * the loop will cause the destruction of the fd. So we need to - * iterate the list safely because iter_fd cannot be trusted. - */ - list_for_each_entry_safe (iter_fd, tmp, &inode->fd_list, inode_list) { - - if (fd_is_anonymous (iter_fd)) - continue; - - if (dht_fd_open_on_dst (this, iter_fd, dst_node)) - continue; - - /* We need to release the inode->lock before calling - * syncop_open() to avoid possible deadlocks. However this - * can cause the iter_fd to be released by other threads. - * To avoid this, we take a reference before releasing the - * lock. - */ - __fd_ref(iter_fd); - - UNLOCK(&inode->lock); - - /* flags for open are stripped down to allow following the - * new location of the file, otherwise we can get EEXIST or - * truncate the file again as rebalance is moving the data */ - ret = syncop_open (dst_node, &tmp_loc, - (iter_fd->flags & - ~(O_CREAT | O_EXCL | O_TRUNC)), - iter_fd, NULL, NULL); - if (ret < 0) { - gf_msg (this->name, GF_LOG_ERROR, -ret, - DHT_MSG_OPEN_FD_ON_DST_FAILED, "failed" - " to open the fd" - " (%p, flags=0%o) on file %s @ %s", - iter_fd, iter_fd->flags, path, - dst_node->name); - - open_failed = 1; - local->op_errno = -ret; - ret = -1; - } else { - dht_fd_ctx_set (this, iter_fd, dst_node); - } - - fd_unref(iter_fd); - - LOCK(&inode->lock); - } + SYNCTASK_SETID(frame->root->uid, frame->root->gid); - SYNCTASK_SETID (frame->root->uid, frame->root->gid); - - if (open_failed) { - ret = -1; - goto unlock; - } - ret = 0; + if (open_failed) { + ret = -1; + goto unlock; + } + ret = 0; unlock: - UNLOCK(&inode->lock); + UNLOCK(&inode->lock); out: - if (dict) { - dict_unref (dict); - } + if (dict) { + dict_unref(dict); + } - loc_wipe (&tmp_loc); + loc_wipe(&tmp_loc); - return ret; + return ret; } int -dht_rebalance_complete_check (xlator_t *this, call_frame_t *frame) +dht_rebalance_complete_check(xlator_t *this, call_frame_t *frame) { - int ret = -1; - - ret = synctask_new (this->ctx->env, dht_migration_complete_check_task, - dht_migration_complete_check_done, - frame, frame); - return ret; + int ret = -1; + ret = synctask_new(this->ctx->env, dht_migration_complete_check_task, + dht_migration_complete_check_done, frame, frame); + return ret; } /* During 'in-progress' state, both nodes should have the file */ @@ -1550,790 +1477,777 @@ dht_rebalance_complete_check (xlator_t *this, call_frame_t *frame) 1 : File is being migrated but not by this DHT layer. */ static int -dht_inprogress_check_done (int op_ret, call_frame_t *frame, void *data) +dht_inprogress_check_done(int op_ret, call_frame_t *frame, void *data) { - dht_local_t *local = NULL; - xlator_t *dst_subvol = NULL, *src_subvol = NULL; - inode_t *inode = NULL; + dht_local_t *local = NULL; + xlator_t *dst_subvol = NULL, *src_subvol = NULL; + inode_t *inode = NULL; - local = frame->local; + local = frame->local; - if (op_ret != 0) - goto out; + if (op_ret != 0) + goto out; - inode = local->loc.inode ? local->loc.inode : local->fd->inode; + inode = local->loc.inode ? local->loc.inode : local->fd->inode; - dht_inode_ctx_get_mig_info (THIS, inode, &src_subvol, &dst_subvol); - if (dht_mig_info_is_invalid (local->cached_subvol, - src_subvol, dst_subvol)) { - dst_subvol = dht_subvol_get_cached (THIS, inode); - if (!dst_subvol) { - local->op_errno = EINVAL; - goto out; - } + dht_inode_ctx_get_mig_info(THIS, inode, &src_subvol, &dst_subvol); + if (dht_mig_info_is_invalid(local->cached_subvol, src_subvol, dst_subvol)) { + dst_subvol = dht_subvol_get_cached(THIS, inode); + if (!dst_subvol) { + local->op_errno = EINVAL; + goto out; } + } out: - local->rebalance.target_op_fn (THIS, dst_subvol, frame, op_ret); + local->rebalance.target_op_fn(THIS, dst_subvol, frame, op_ret); - return 0; + return 0; } static int -dht_rebalance_inprogress_task (void *data) +dht_rebalance_inprogress_task(void *data) { - int ret = -1; - xlator_t *src_node = NULL; - xlator_t *dst_node = NULL; - dht_local_t *local = NULL; - dict_t *dict = NULL; - call_frame_t *frame = NULL; - xlator_t *this = NULL; - char *path = NULL; - struct iatt stbuf = {0,}; - loc_t tmp_loc = {0,}; - dht_conf_t *conf = NULL; - inode_t *inode = NULL; - fd_t *iter_fd = NULL; - fd_t *tmp = NULL; - int open_failed = 0; - uint64_t tmp_miginfo = 0; - dht_migrate_info_t *miginfo = NULL; - - - this = THIS; - frame = data; - local = frame->local; - conf = this->private; - - src_node = local->cached_subvol; - - if (!local->loc.inode && !local->fd) - goto out; + int ret = -1; + xlator_t *src_node = NULL; + xlator_t *dst_node = NULL; + dht_local_t *local = NULL; + dict_t *dict = NULL; + call_frame_t *frame = NULL; + xlator_t *this = NULL; + char *path = NULL; + struct iatt stbuf = { + 0, + }; + loc_t tmp_loc = { + 0, + }; + dht_conf_t *conf = NULL; + inode_t *inode = NULL; + fd_t *iter_fd = NULL; + fd_t *tmp = NULL; + int open_failed = 0; + uint64_t tmp_miginfo = 0; + dht_migrate_info_t *miginfo = NULL; + + this = THIS; + frame = data; + local = frame->local; + conf = this->private; + + src_node = local->cached_subvol; + + if (!local->loc.inode && !local->fd) + goto out; - inode = (!local->fd) ? local->loc.inode : local->fd->inode; + inode = (!local->fd) ? local->loc.inode : local->fd->inode; - /* getxattr on cached_subvol for 'linkto' value. Do path based getxattr - * as root:root. If a fd is already open, access check won't be done*/ - if (local->loc.inode) { - SYNCTASK_SETID (0, 0); - ret = syncop_getxattr (src_node, &local->loc, &dict, - conf->link_xattr_name, NULL, NULL); - SYNCTASK_SETID (frame->root->uid, frame->root->gid); - } else { - ret = syncop_fgetxattr (src_node, local->fd, &dict, - conf->link_xattr_name, NULL, NULL); - } + /* getxattr on cached_subvol for 'linkto' value. Do path based getxattr + * as root:root. If a fd is already open, access check won't be done*/ + if (local->loc.inode) { + SYNCTASK_SETID(0, 0); + ret = syncop_getxattr(src_node, &local->loc, &dict, + conf->link_xattr_name, NULL, NULL); + SYNCTASK_SETID(frame->root->uid, frame->root->gid); + } else { + ret = syncop_fgetxattr(src_node, local->fd, &dict, + conf->link_xattr_name, NULL, NULL); + } + + /* + * Each DHT xlator layer has its own name for the linkto xattr. + * If the file mode bits indicate the the file is being migrated but + * this layer's linkto xattr is not present, it means that another + * DHT layer is migrating the file. In this case, return 1 so + * the mode bits can be passed on to the higher layer for appropriate + * action. + */ + + if (-ret == ENODATA) { + /* This DHT layer is not migrating this file */ + ret = inode_ctx_reset1(inode, this, &tmp_miginfo); + if (tmp_miginfo) { + /* This can be a problem if the file was + * migrated by two different layers. Raise + * a warning here. + */ + gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_HAS_MIGINFO, + "%s: Found miginfo in the inode ctx", + tmp_loc.path ? tmp_loc.path : uuid_utoa(tmp_loc.gfid)); + miginfo = (void *)tmp_miginfo; + GF_REF_PUT(miginfo); + } + ret = 1; + goto out; + } - /* - * Each DHT xlator layer has its own name for the linkto xattr. - * If the file mode bits indicate the the file is being migrated but - * this layer's linkto xattr is not present, it means that another - * DHT layer is migrating the file. In this case, return 1 so - * the mode bits can be passed on to the higher layer for appropriate - * action. + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_GET_XATTR_FAILED, + "%s: failed to get the 'linkto' xattr", local->loc.path); + ret = -1; + goto out; + } + + dst_node = dht_linkfile_subvol(this, NULL, NULL, dict); + if (!dst_node) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_SUBVOL_NOT_FOUND, + "%s: failed to get the 'linkto' xattr from dict", + local->loc.path); + ret = -1; + goto out; + } + + local->rebalance.target_node = dst_node; + + if (local->loc.inode) { + loc_copy(&tmp_loc, &local->loc); + } else { + tmp_loc.inode = inode_ref(inode); + gf_uuid_copy(tmp_loc.gfid, inode->gfid); + } + + /* lookup on dst */ + ret = syncop_lookup(dst_node, &tmp_loc, &stbuf, NULL, NULL, NULL); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, -ret, + DHT_MSG_FILE_LOOKUP_ON_DST_FAILED, + "%s: failed to lookup the file on %s", + tmp_loc.path ? tmp_loc.path : uuid_utoa(tmp_loc.gfid), + dst_node->name); + ret = -1; + goto out; + } + + if (gf_uuid_compare(stbuf.ia_gfid, tmp_loc.inode->gfid)) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_GFID_MISMATCH, + "%s: gfid different on the target file on %s", + tmp_loc.path ? tmp_loc.path : uuid_utoa(tmp_loc.gfid), + dst_node->name); + ret = -1; + goto out; + } + ret = 0; + + if (tmp_loc.path == NULL) { + inode_path(inode, NULL, &path); + if (path) + tmp_loc.path = path; + } + + LOCK(&inode->lock); + + if (list_empty(&inode->fd_list)) + goto unlock; + + /* perform open as root:root. There is window between linkfile + * creation(root:root) and setattr with the correct uid/gid + */ + SYNCTASK_SETID(0, 0); + + /* It's possible that we are the last user of iter_fd after each + * iteration. In this case the fd_unref() of iter_fd at the end of + * the loop will cause the destruction of the fd. So we need to + * iterate the list safely because iter_fd cannot be trusted. + */ + list_for_each_entry_safe(iter_fd, tmp, &inode->fd_list, inode_list) + { + if (fd_is_anonymous(iter_fd)) + continue; + + if (dht_fd_open_on_dst(this, iter_fd, dst_node)) + continue; + + /* We need to release the inode->lock before calling + * syncop_open() to avoid possible deadlocks. However this + * can cause the iter_fd to be released by other threads. + * To avoid this, we take a reference before releasing the + * lock. */ + __fd_ref(iter_fd); - if (-ret == ENODATA) { - /* This DHT layer is not migrating this file */ - ret = inode_ctx_reset1 (inode, this, &tmp_miginfo); - if (tmp_miginfo) { - /* This can be a problem if the file was - * migrated by two different layers. Raise - * a warning here. - */ - gf_msg (this->name, GF_LOG_WARNING, 0, - DHT_MSG_HAS_MIGINFO, - "%s: Found miginfo in the inode ctx", - tmp_loc.path ? tmp_loc.path : - uuid_utoa (tmp_loc.gfid)); - miginfo = (void *)tmp_miginfo; - GF_REF_PUT (miginfo); - } - ret = 1; - goto out; - } + UNLOCK(&inode->lock); + /* flags for open are stripped down to allow following the + * new location of the file, otherwise we can get EEXIST or + * truncate the file again as rebalance is moving the data */ + ret = syncop_open(dst_node, &tmp_loc, + (iter_fd->flags & ~(O_CREAT | O_EXCL | O_TRUNC)), + iter_fd, NULL, NULL); if (ret < 0) { - gf_msg (this->name, GF_LOG_ERROR, -ret, - DHT_MSG_GET_XATTR_FAILED, - "%s: failed to get the 'linkto' xattr", - local->loc.path); - ret = -1; - goto out; - } - - dst_node = dht_linkfile_subvol (this, NULL, NULL, dict); - if (!dst_node) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_SUBVOL_NOT_FOUND, - "%s: failed to get the 'linkto' xattr from dict", - local->loc.path); - ret = -1; - goto out; - } - - local->rebalance.target_node = dst_node; - - if (local->loc.inode) { - loc_copy (&tmp_loc, &local->loc); + gf_msg(this->name, GF_LOG_ERROR, -ret, + DHT_MSG_OPEN_FD_ON_DST_FAILED, + "failed to send open " + "the fd (%p, flags=0%o) on file %s @ %s", + iter_fd, iter_fd->flags, path, dst_node->name); + ret = -1; + open_failed = 1; } else { - tmp_loc.inode = inode_ref (inode); - gf_uuid_copy (tmp_loc.gfid, inode->gfid); + /* Potential fd leak if this fails here as it will be + reopened at the next Phase1/2 check */ + dht_fd_ctx_set(this, iter_fd, dst_node); } - /* lookup on dst */ - ret = syncop_lookup (dst_node, &tmp_loc, &stbuf, NULL, - NULL, NULL); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, -ret, - DHT_MSG_FILE_LOOKUP_ON_DST_FAILED, - "%s: failed to lookup the file on %s", - tmp_loc.path ? tmp_loc.path : uuid_utoa (tmp_loc.gfid), - dst_node->name); - ret = -1; - goto out; - } - - if (gf_uuid_compare (stbuf.ia_gfid, tmp_loc.inode->gfid)) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_GFID_MISMATCH, - "%s: gfid different on the target file on %s", - tmp_loc.path ? tmp_loc.path : uuid_utoa (tmp_loc.gfid), - dst_node->name); - ret = -1; - goto out; - } - ret = 0; - - if (tmp_loc.path == NULL) { - inode_path (inode, NULL, &path); - if (path) - tmp_loc.path = path; - } + fd_unref(iter_fd); LOCK(&inode->lock); + } - if (list_empty (&inode->fd_list)) - goto unlock; - - /* perform open as root:root. There is window between linkfile - * creation(root:root) and setattr with the correct uid/gid - */ - SYNCTASK_SETID (0, 0); - - /* It's possible that we are the last user of iter_fd after each - * iteration. In this case the fd_unref() of iter_fd at the end of - * the loop will cause the destruction of the fd. So we need to - * iterate the list safely because iter_fd cannot be trusted. - */ - list_for_each_entry_safe (iter_fd, tmp, &inode->fd_list, inode_list) { - if (fd_is_anonymous (iter_fd)) - continue; - - if (dht_fd_open_on_dst (this, iter_fd, dst_node)) - continue; - - /* We need to release the inode->lock before calling - * syncop_open() to avoid possible deadlocks. However this - * can cause the iter_fd to be released by other threads. - * To avoid this, we take a reference before releasing the - * lock. - */ - __fd_ref(iter_fd); - - UNLOCK(&inode->lock); - - /* flags for open are stripped down to allow following the - * new location of the file, otherwise we can get EEXIST or - * truncate the file again as rebalance is moving the data */ - ret = syncop_open (dst_node, &tmp_loc, - (iter_fd->flags & - ~(O_CREAT | O_EXCL | O_TRUNC)), - iter_fd, NULL, NULL); - if (ret < 0) { - gf_msg (this->name, GF_LOG_ERROR, -ret, - DHT_MSG_OPEN_FD_ON_DST_FAILED, - "failed to send open " - "the fd (%p, flags=0%o) on file %s @ %s", - iter_fd, iter_fd->flags, path, - dst_node->name); - ret = -1; - open_failed = 1; - } else { - /* Potential fd leak if this fails here as it will be - reopened at the next Phase1/2 check */ - dht_fd_ctx_set (this, iter_fd, dst_node); - } - - fd_unref(iter_fd); - - LOCK(&inode->lock); - } - - SYNCTASK_SETID (frame->root->uid, frame->root->gid); + SYNCTASK_SETID(frame->root->uid, frame->root->gid); unlock: - UNLOCK(&inode->lock); + UNLOCK(&inode->lock); - if (open_failed) { - ret = -1; - goto out; - } + if (open_failed) { + ret = -1; + goto out; + } - ret = dht_inode_ctx_set_mig_info (this, inode, src_node, dst_node); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_SET_INODE_CTX_FAILED, - "%s: failed to set inode-ctx target file at %s", - local->loc.path, dst_node->name); - goto out; - } + ret = dht_inode_ctx_set_mig_info(this, inode, src_node, dst_node); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_SET_INODE_CTX_FAILED, + "%s: failed to set inode-ctx target file at %s", local->loc.path, + dst_node->name); + goto out; + } - ret = 0; + ret = 0; out: - if (dict) { - dict_unref (dict); - } + if (dict) { + dict_unref(dict); + } - loc_wipe (&tmp_loc); - return ret; + loc_wipe(&tmp_loc); + return ret; } int -dht_rebalance_in_progress_check (xlator_t *this, call_frame_t *frame) +dht_rebalance_in_progress_check(xlator_t *this, call_frame_t *frame) { + int ret = -1; - int ret = -1; - - ret = synctask_new (this->ctx->env, dht_rebalance_inprogress_task, - dht_inprogress_check_done, - frame, frame); - return ret; + ret = synctask_new(this->ctx->env, dht_rebalance_inprogress_task, + dht_inprogress_check_done, frame, frame); + return ret; } int -dht_inode_ctx_layout_set (inode_t *inode, xlator_t *this, - dht_layout_t *layout_int) +dht_inode_ctx_layout_set(inode_t *inode, xlator_t *this, + dht_layout_t *layout_int) { - dht_inode_ctx_t *ctx = NULL; - int ret = -1; - - ret = dht_inode_ctx_get (inode, this, &ctx); - if (!ret && ctx) { - ctx->layout = layout_int; - } else { - ctx = GF_CALLOC (1, sizeof (*ctx), gf_dht_mt_inode_ctx_t); - if (!ctx) - return ret; - ctx->layout = layout_int; - } - - ret = dht_inode_ctx_set (inode, this, ctx); - - return ret; + dht_inode_ctx_t *ctx = NULL; + int ret = -1; + + ret = dht_inode_ctx_get(inode, this, &ctx); + if (!ret && ctx) { + ctx->layout = layout_int; + } else { + ctx = GF_CALLOC(1, sizeof(*ctx), gf_dht_mt_inode_ctx_t); + if (!ctx) + return ret; + ctx->layout = layout_int; + } + + ret = dht_inode_ctx_set(inode, this, ctx); + + return ret; } - void -dht_inode_ctx_time_set (inode_t *inode, xlator_t *this, struct iatt *stat) +dht_inode_ctx_time_set(inode_t *inode, xlator_t *this, struct iatt *stat) { - dht_inode_ctx_t *ctx = NULL; - dht_stat_time_t *time = 0; - int ret = -1; + dht_inode_ctx_t *ctx = NULL; + dht_stat_time_t *time = 0; + int ret = -1; - ret = dht_inode_ctx_get (inode, this, &ctx); + ret = dht_inode_ctx_get(inode, this, &ctx); - if (ret) - return; + if (ret) + return; - time = &ctx->time; + time = &ctx->time; - time->mtime = stat->ia_mtime; - time->mtime_nsec = stat->ia_mtime_nsec; + time->mtime = stat->ia_mtime; + time->mtime_nsec = stat->ia_mtime_nsec; - time->ctime = stat->ia_ctime; - time->ctime_nsec = stat->ia_ctime_nsec; + time->ctime = stat->ia_ctime; + time->ctime_nsec = stat->ia_ctime_nsec; - time->atime = stat->ia_atime; - time->atime_nsec = stat->ia_atime_nsec; + time->atime = stat->ia_atime; + time->atime_nsec = stat->ia_atime_nsec; - return; + return; } - int -dht_inode_ctx_time_update (inode_t *inode, xlator_t *this, struct iatt *stat, - int32_t post) +dht_inode_ctx_time_update(inode_t *inode, xlator_t *this, struct iatt *stat, + int32_t post) { - dht_inode_ctx_t *ctx = NULL; - dht_stat_time_t *time = 0; - int ret = -1; + dht_inode_ctx_t *ctx = NULL; + dht_stat_time_t *time = 0; + int ret = -1; - GF_VALIDATE_OR_GOTO (this->name, stat, out); - GF_VALIDATE_OR_GOTO (this->name, inode, out); + GF_VALIDATE_OR_GOTO(this->name, stat, out); + GF_VALIDATE_OR_GOTO(this->name, inode, out); - ret = dht_inode_ctx_get (inode, this, &ctx); + ret = dht_inode_ctx_get(inode, this, &ctx); - if (ret) { - ctx = GF_CALLOC (1, sizeof (*ctx), gf_dht_mt_inode_ctx_t); - if (!ctx) - return -1; - } + if (ret) { + ctx = GF_CALLOC(1, sizeof(*ctx), gf_dht_mt_inode_ctx_t); + if (!ctx) + return -1; + } - time = &ctx->time; + time = &ctx->time; - DHT_UPDATE_TIME(time->mtime, time->mtime_nsec, - stat->ia_mtime, stat->ia_mtime_nsec, inode, post); - DHT_UPDATE_TIME(time->ctime, time->ctime_nsec, - stat->ia_ctime, stat->ia_ctime_nsec, inode, post); - DHT_UPDATE_TIME(time->atime, time->atime_nsec, - stat->ia_atime, stat->ia_atime_nsec, inode, post); + DHT_UPDATE_TIME(time->mtime, time->mtime_nsec, stat->ia_mtime, + stat->ia_mtime_nsec, inode, post); + DHT_UPDATE_TIME(time->ctime, time->ctime_nsec, stat->ia_ctime, + stat->ia_ctime_nsec, inode, post); + DHT_UPDATE_TIME(time->atime, time->atime_nsec, stat->ia_atime, + stat->ia_atime_nsec, inode, post); - ret = dht_inode_ctx_set (inode, this, ctx); + ret = dht_inode_ctx_set(inode, this, ctx); out: - return 0; + return 0; } int -dht_inode_ctx_get (inode_t *inode, xlator_t *this, dht_inode_ctx_t **ctx) +dht_inode_ctx_get(inode_t *inode, xlator_t *this, dht_inode_ctx_t **ctx) { - int ret = -1; - uint64_t ctx_int = 0; + int ret = -1; + uint64_t ctx_int = 0; - GF_VALIDATE_OR_GOTO ("dht", this, out); - GF_VALIDATE_OR_GOTO (this->name, inode, out); + GF_VALIDATE_OR_GOTO("dht", this, out); + GF_VALIDATE_OR_GOTO(this->name, inode, out); - ret = inode_ctx_get (inode, this, &ctx_int); + ret = inode_ctx_get(inode, this, &ctx_int); - if (ret) - return ret; + if (ret) + return ret; - if (ctx) - *ctx = (dht_inode_ctx_t *) ctx_int; + if (ctx) + *ctx = (dht_inode_ctx_t *)ctx_int; out: - return ret; + return ret; } -int dht_inode_ctx_set (inode_t *inode, xlator_t *this, dht_inode_ctx_t *ctx) +int +dht_inode_ctx_set(inode_t *inode, xlator_t *this, dht_inode_ctx_t *ctx) { - int ret = -1; - uint64_t ctx_int = 0; + int ret = -1; + uint64_t ctx_int = 0; - GF_VALIDATE_OR_GOTO ("dht", this, out); - GF_VALIDATE_OR_GOTO (this->name, inode, out); - GF_VALIDATE_OR_GOTO (this->name, ctx, out); + GF_VALIDATE_OR_GOTO("dht", this, out); + GF_VALIDATE_OR_GOTO(this->name, inode, out); + GF_VALIDATE_OR_GOTO(this->name, ctx, out); - ctx_int = (long)ctx; - ret = inode_ctx_set (inode, this, &ctx_int); + ctx_int = (long)ctx; + ret = inode_ctx_set(inode, this, &ctx_int); out: - return ret; + return ret; } int -dht_subvol_status (dht_conf_t *conf, xlator_t *subvol) +dht_subvol_status(dht_conf_t *conf, xlator_t *subvol) { - int i; + int i; - for (i=0 ; i < conf->subvolume_cnt; i++) { - if (conf->subvolumes[i] == subvol) { - return conf->subvolume_status[i]; - } + for (i = 0; i < conf->subvolume_cnt; i++) { + if (conf->subvolumes[i] == subvol) { + return conf->subvolume_status[i]; } - return 0; + } + return 0; } -inode_t* -dht_heal_path (xlator_t *this, char *path, inode_table_t *itable) +inode_t * +dht_heal_path(xlator_t *this, char *path, inode_table_t *itable) { - int ret = -1; - struct iatt iatt = {0, }; - inode_t *linked_inode = NULL; - loc_t loc = {0, }; - char *bname = NULL; - char *save_ptr = NULL; - uuid_t gfid = {0, }; - char *tmp_path = NULL; - - - tmp_path = gf_strdup (path); - if (!tmp_path) { - goto out; - } + int ret = -1; + struct iatt iatt = { + 0, + }; + inode_t *linked_inode = NULL; + loc_t loc = { + 0, + }; + char *bname = NULL; + char *save_ptr = NULL; + uuid_t gfid = { + 0, + }; + char *tmp_path = NULL; + + tmp_path = gf_strdup(path); + if (!tmp_path) { + goto out; + } - memset (gfid, 0, 16); - gfid[15] = 1; + memset(gfid, 0, 16); + gfid[15] = 1; - gf_uuid_copy (loc.pargfid, gfid); - loc.parent = inode_ref (itable->root); + gf_uuid_copy(loc.pargfid, gfid); + loc.parent = inode_ref(itable->root); - bname = strtok_r (tmp_path, "/", &save_ptr); + bname = strtok_r(tmp_path, "/", &save_ptr); - /* sending a lookup on parent directory, - * Eg: if path is like /a/b/c/d/e/f/g/ - * then we will send a lookup on a first and then b,c,d,etc - */ + /* sending a lookup on parent directory, + * Eg: if path is like /a/b/c/d/e/f/g/ + * then we will send a lookup on a first and then b,c,d,etc + */ - while (bname) { - linked_inode = NULL; - loc.inode = inode_grep (itable, loc.parent, bname); - if (loc.inode == NULL) { - loc.inode = inode_new (itable); - if (loc.inode == NULL) { - ret = -ENOMEM; - goto out; - } - } else { - /* - * Inode is already populated in the inode table. - * Which means we already looked up the inode and - * linked with a dentry. So that we will skip - * lookup on this entry, and proceed to next. - */ - linked_inode = loc.inode; - bname = strtok_r (NULL, "/", &save_ptr); - inode_unref (loc.parent); - if (!bname) { - goto out; - } - loc.parent = loc.inode; - gf_uuid_copy (loc.pargfid, loc.inode->gfid); - loc.inode = NULL; - continue; - } + while (bname) { + linked_inode = NULL; + loc.inode = inode_grep(itable, loc.parent, bname); + if (loc.inode == NULL) { + loc.inode = inode_new(itable); + if (loc.inode == NULL) { + ret = -ENOMEM; + goto out; + } + } else { + /* + * Inode is already populated in the inode table. + * Which means we already looked up the inode and + * linked with a dentry. So that we will skip + * lookup on this entry, and proceed to next. + */ + linked_inode = loc.inode; + bname = strtok_r(NULL, "/", &save_ptr); + inode_unref(loc.parent); + if (!bname) { + goto out; + } + loc.parent = loc.inode; + gf_uuid_copy(loc.pargfid, loc.inode->gfid); + loc.inode = NULL; + continue; + } - loc.name = bname; - ret = loc_path (&loc, bname); + loc.name = bname; + ret = loc_path(&loc, bname); - ret = syncop_lookup (this, &loc, &iatt, NULL, NULL, NULL); - if (ret) { - gf_msg (this->name, GF_LOG_INFO, -ret, - DHT_MSG_DIR_SELFHEAL_FAILED, - "Healing of path %s failed on subvolume %s for " - "directory %s", path, this->name, bname); - goto out; - } + ret = syncop_lookup(this, &loc, &iatt, NULL, NULL, NULL); + if (ret) { + gf_msg(this->name, GF_LOG_INFO, -ret, DHT_MSG_DIR_SELFHEAL_FAILED, + "Healing of path %s failed on subvolume %s for " + "directory %s", + path, this->name, bname); + goto out; + } - linked_inode = inode_link (loc.inode, loc.parent, bname, &iatt); - if (!linked_inode) - goto out; + linked_inode = inode_link(loc.inode, loc.parent, bname, &iatt); + if (!linked_inode) + goto out; - loc_wipe (&loc); - gf_uuid_copy (loc.pargfid, linked_inode->gfid); - loc.inode = NULL; + loc_wipe(&loc); + gf_uuid_copy(loc.pargfid, linked_inode->gfid); + loc.inode = NULL; - bname = strtok_r (NULL, "/", &save_ptr); - if (bname) - loc.parent = linked_inode; - } + bname = strtok_r(NULL, "/", &save_ptr); + if (bname) + loc.parent = linked_inode; + } out: - inode_ref (linked_inode); - loc_wipe (&loc); - GF_FREE (tmp_path); + inode_ref(linked_inode); + loc_wipe(&loc); + GF_FREE(tmp_path); - return linked_inode; + return linked_inode; } - int -dht_heal_full_path (void *data) +dht_heal_full_path(void *data) { - call_frame_t *heal_frame = data; - dht_local_t *local = NULL; - loc_t loc = {0, }; - dict_t *dict = NULL; - char *path = NULL; - int ret = -1; - xlator_t *source = NULL; - xlator_t *this = NULL; - inode_table_t *itable = NULL; - inode_t *inode = NULL; - inode_t *tmp_inode = NULL; - - GF_VALIDATE_OR_GOTO ("DHT", heal_frame, out); - - local = heal_frame->local; - this = heal_frame->this; - source = heal_frame->cookie; - heal_frame->cookie = NULL; - gf_uuid_copy (loc.gfid, local->gfid); - - if (local->loc.inode) - loc.inode = inode_ref (local->loc.inode); - else - goto out; - - itable = loc.inode->table; - ret = syncop_getxattr (source, &loc, &dict, - GET_ANCESTRY_PATH_KEY, NULL, NULL); - if (ret) { - gf_msg (this->name, GF_LOG_INFO, -ret, - DHT_MSG_DIR_SELFHEAL_FAILED, - "Failed to get path from subvol %s. Aborting " - "directory healing.", source->name); - goto out; - } + call_frame_t *heal_frame = data; + dht_local_t *local = NULL; + loc_t loc = { + 0, + }; + dict_t *dict = NULL; + char *path = NULL; + int ret = -1; + xlator_t *source = NULL; + xlator_t *this = NULL; + inode_table_t *itable = NULL; + inode_t *inode = NULL; + inode_t *tmp_inode = NULL; + + GF_VALIDATE_OR_GOTO("DHT", heal_frame, out); + + local = heal_frame->local; + this = heal_frame->this; + source = heal_frame->cookie; + heal_frame->cookie = NULL; + gf_uuid_copy(loc.gfid, local->gfid); + + if (local->loc.inode) + loc.inode = inode_ref(local->loc.inode); + else + goto out; - ret = dict_get_str (dict, GET_ANCESTRY_PATH_KEY, &path); - if (path) { - inode = dht_heal_path (this, path, itable); - if (inode && inode != local->inode) { - /* - * if inode returned by heal function is different - * from what we passed, which means a racing thread - * already linked a different inode for dentry. - * So we will update our local->inode, so that we can - * retrurn proper inode. - */ - tmp_inode = local->inode; - local->inode = inode; - inode_unref (tmp_inode); - tmp_inode = NULL; - } else { - inode_unref (inode); - } + itable = loc.inode->table; + ret = syncop_getxattr(source, &loc, &dict, GET_ANCESTRY_PATH_KEY, NULL, + NULL); + if (ret) { + gf_msg(this->name, GF_LOG_INFO, -ret, DHT_MSG_DIR_SELFHEAL_FAILED, + "Failed to get path from subvol %s. Aborting " + "directory healing.", + source->name); + goto out; + } + + ret = dict_get_str(dict, GET_ANCESTRY_PATH_KEY, &path); + if (path) { + inode = dht_heal_path(this, path, itable); + if (inode && inode != local->inode) { + /* + * if inode returned by heal function is different + * from what we passed, which means a racing thread + * already linked a different inode for dentry. + * So we will update our local->inode, so that we can + * retrurn proper inode. + */ + tmp_inode = local->inode; + local->inode = inode; + inode_unref(tmp_inode); + tmp_inode = NULL; + } else { + inode_unref(inode); } + } out: - loc_wipe (&loc); - if (dict) - dict_unref (dict); - return 0; + loc_wipe(&loc); + if (dict) + dict_unref(dict); + return 0; } int -dht_heal_full_path_done (int op_ret, call_frame_t *heal_frame, void *data) +dht_heal_full_path_done(int op_ret, call_frame_t *heal_frame, void *data) { + call_frame_t *main_frame = NULL; + dht_local_t *local = NULL; + xlator_t *this = NULL; + int ret = -1; + + local = heal_frame->local; + main_frame = local->main_frame; + local->main_frame = NULL; + this = heal_frame->this; + + dht_set_fixed_dir_stat(&local->postparent); + if (local->need_xattr_heal) { + local->need_xattr_heal = 0; + ret = dht_dir_xattr_heal(this, local); + if (ret) + gf_msg(this->name, GF_LOG_ERROR, ret, DHT_MSG_DIR_XATTR_HEAL_FAILED, + "xattr heal failed for directory %s ", local->loc.path); + } - call_frame_t *main_frame = NULL; - dht_local_t *local = NULL; - xlator_t *this = NULL; - int ret = -1; - - local = heal_frame->local; - main_frame = local->main_frame; - local->main_frame = NULL; - this = heal_frame->this; - - dht_set_fixed_dir_stat (&local->postparent); - if (local->need_xattr_heal) { - local->need_xattr_heal = 0; - ret = dht_dir_xattr_heal (this, local); - if (ret) - gf_msg (this->name, GF_LOG_ERROR, ret, - DHT_MSG_DIR_XATTR_HEAL_FAILED, - "xattr heal failed for directory %s ", - local->loc.path); - } - - DHT_STACK_UNWIND (lookup, main_frame, 0, 0, - local->inode, &local->stbuf, local->xattr, - &local->postparent); + DHT_STACK_UNWIND(lookup, main_frame, 0, 0, local->inode, &local->stbuf, + local->xattr, &local->postparent); - DHT_STACK_DESTROY (heal_frame); - return 0; + DHT_STACK_DESTROY(heal_frame); + return 0; } /* This function must be called inside an inode lock */ int -__dht_lock_subvol_set (inode_t *inode, xlator_t *this, - xlator_t *lock_subvol) +__dht_lock_subvol_set(inode_t *inode, xlator_t *this, xlator_t *lock_subvol) { - dht_inode_ctx_t *ctx = NULL; - int ret = -1; - uint64_t value = 0; + dht_inode_ctx_t *ctx = NULL; + int ret = -1; + uint64_t value = 0; - GF_VALIDATE_OR_GOTO (this->name, inode, out); + GF_VALIDATE_OR_GOTO(this->name, inode, out); - ret = __inode_ctx_get0 (inode, this, &value); - if (ret || !value) { - return -1; - } + ret = __inode_ctx_get0(inode, this, &value); + if (ret || !value) { + return -1; + } - ctx = (dht_inode_ctx_t *) value; - ctx->lock_subvol = lock_subvol; + ctx = (dht_inode_ctx_t *)value; + ctx->lock_subvol = lock_subvol; out: - return ret; + return ret; } -xlator_t* -dht_get_lock_subvolume (xlator_t *this, struct gf_flock *lock, - dht_local_t *local) +xlator_t * +dht_get_lock_subvolume(xlator_t *this, struct gf_flock *lock, + dht_local_t *local) { - xlator_t *subvol = NULL; - inode_t *inode = NULL; - int32_t ret = -1; - uint64_t value = 0; - xlator_t *cached_subvol = NULL; - dht_inode_ctx_t *ctx = NULL; - char gfid[GF_UUID_BUF_SIZE] = {0}; - - GF_VALIDATE_OR_GOTO (this->name, lock, out); - GF_VALIDATE_OR_GOTO (this->name, local, out); + xlator_t *subvol = NULL; + inode_t *inode = NULL; + int32_t ret = -1; + uint64_t value = 0; + xlator_t *cached_subvol = NULL; + dht_inode_ctx_t *ctx = NULL; + char gfid[GF_UUID_BUF_SIZE] = {0}; - cached_subvol = local->cached_subvol; + GF_VALIDATE_OR_GOTO(this->name, lock, out); + GF_VALIDATE_OR_GOTO(this->name, local, out); - if (local->loc.inode || local->fd) { - inode = local->loc.inode ? local->loc.inode : local->fd->inode; - } + cached_subvol = local->cached_subvol; - if (!inode) - goto out; + if (local->loc.inode || local->fd) { + inode = local->loc.inode ? local->loc.inode : local->fd->inode; + } - if (!(IA_ISDIR (inode->ia_type) || IA_ISINVAL (inode->ia_type))) { - /* - * We may get non-linked inode for directories as part - * of the selfheal code path. So checking for IA_INVAL - * type also. This will only happen for directory. - */ - subvol = local->cached_subvol; - goto out; - } + if (!inode) + goto out; - if (lock->l_type != F_UNLCK) { - /* - * inode purging might happen on NFS between a lk - * and unlk. Due to this lk and unlk might be sent - * to different subvols. - * So during a lock request, taking a ref on inode - * to prevent inode purging. inode unref will happen - * in unlock cbk code path. - */ - inode_ref (inode); - } + if (!(IA_ISDIR(inode->ia_type) || IA_ISINVAL(inode->ia_type))) { + /* + * We may get non-linked inode for directories as part + * of the selfheal code path. So checking for IA_INVAL + * type also. This will only happen for directory. + */ + subvol = local->cached_subvol; + goto out; + } - LOCK (&inode->lock); - ret = __inode_ctx_get0 (inode, this, &value); - if (!ret && value) { - ctx = (dht_inode_ctx_t *) value; - subvol = ctx->lock_subvol; - } - if (!subvol && lock->l_type != F_UNLCK && cached_subvol) { - ret = __dht_lock_subvol_set (inode, this, - cached_subvol); - if (ret) { - gf_uuid_unparse(inode->gfid, gfid); - gf_msg (this->name, GF_LOG_WARNING, 0, - DHT_MSG_SET_INODE_CTX_FAILED, - "Failed to set lock_subvol in " - "inode ctx for gfid %s", - gfid); - goto unlock; - } - subvol = cached_subvol; - } + if (lock->l_type != F_UNLCK) { + /* + * inode purging might happen on NFS between a lk + * and unlk. Due to this lk and unlk might be sent + * to different subvols. + * So during a lock request, taking a ref on inode + * to prevent inode purging. inode unref will happen + * in unlock cbk code path. + */ + inode_ref(inode); + } + + LOCK(&inode->lock); + ret = __inode_ctx_get0(inode, this, &value); + if (!ret && value) { + ctx = (dht_inode_ctx_t *)value; + subvol = ctx->lock_subvol; + } + if (!subvol && lock->l_type != F_UNLCK && cached_subvol) { + ret = __dht_lock_subvol_set(inode, this, cached_subvol); + if (ret) { + gf_uuid_unparse(inode->gfid, gfid); + gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_SET_INODE_CTX_FAILED, + "Failed to set lock_subvol in " + "inode ctx for gfid %s", + gfid); + goto unlock; + } + subvol = cached_subvol; + } unlock: - UNLOCK (&inode->lock); - if (!subvol && inode && lock->l_type != F_UNLCK) { - inode_unref (inode); - } + UNLOCK(&inode->lock); + if (!subvol && inode && lock->l_type != F_UNLCK) { + inode_unref(inode); + } out: - return subvol; + return subvol; } int -dht_lk_inode_unref (call_frame_t *frame, int32_t op_ret) +dht_lk_inode_unref(call_frame_t *frame, int32_t op_ret) { - int ret = -1; - dht_local_t *local = NULL; - inode_t *inode = NULL; - xlator_t *this = NULL; - char gfid[GF_UUID_BUF_SIZE] = {0}; + int ret = -1; + dht_local_t *local = NULL; + inode_t *inode = NULL; + xlator_t *this = NULL; + char gfid[GF_UUID_BUF_SIZE] = {0}; - local = frame->local; - this = frame->this; + local = frame->local; + this = frame->this; - if (local->loc.inode || local->fd) { - inode = local->loc.inode ? local->loc.inode : local->fd->inode; - } - if (!inode) { - gf_msg (this->name, GF_LOG_WARNING, 0, - DHT_MSG_LOCK_INODE_UNREF_FAILED, - "Found a NULL inode. Failed to unref the inode"); - goto out; - } + if (local->loc.inode || local->fd) { + inode = local->loc.inode ? local->loc.inode : local->fd->inode; + } + if (!inode) { + gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_LOCK_INODE_UNREF_FAILED, + "Found a NULL inode. Failed to unref the inode"); + goto out; + } - if (!(IA_ISDIR (inode->ia_type) || IA_ISINVAL (inode->ia_type))) { - ret = 0; - goto out; - } + if (!(IA_ISDIR(inode->ia_type) || IA_ISINVAL(inode->ia_type))) { + ret = 0; + goto out; + } - switch (local->lock_type) { + switch (local->lock_type) { case F_RDLCK: case F_WRLCK: - if (op_ret) { - gf_uuid_unparse(inode->gfid, gfid); - gf_msg_debug (this->name, 0, - "lock request failed for gfid %s", gfid); - inode_unref (inode); - goto out; - } - break; + if (op_ret) { + gf_uuid_unparse(inode->gfid, gfid); + gf_msg_debug(this->name, 0, "lock request failed for gfid %s", + gfid); + inode_unref(inode); + goto out; + } + break; case F_UNLCK: - if (!op_ret) { - inode_unref (inode); - } else { - gf_uuid_unparse(inode->gfid, gfid); - gf_msg (this->name, GF_LOG_WARNING, 0, - DHT_MSG_LOCK_INODE_UNREF_FAILED, - "Unlock request failed for gfid %s." - "Failed to unref the inode", gfid); - goto out; - } + if (!op_ret) { + inode_unref(inode); + } else { + gf_uuid_unparse(inode->gfid, gfid); + gf_msg(this->name, GF_LOG_WARNING, 0, + DHT_MSG_LOCK_INODE_UNREF_FAILED, + "Unlock request failed for gfid %s." + "Failed to unref the inode", + gfid); + goto out; + } default: - break; - } - ret = 0; + break; + } + ret = 0; out: - return ret; + return ret; } /* Code to update custom extended attributes from src dict to dst dict -*/ + */ void -dht_dir_set_heal_xattr (xlator_t *this, dht_local_t *local, dict_t *dst, - dict_t *src, int *uret, int *uflag) +dht_dir_set_heal_xattr(xlator_t *this, dht_local_t *local, dict_t *dst, + dict_t *src, int *uret, int *uflag) { - int ret = -1; - data_t *keyval = NULL; - int luret = -1; - int luflag = -1; - int i = 0; - - if (!src || !dst) { - gf_msg (this->name, GF_LOG_WARNING, EINVAL, - DHT_MSG_DICT_SET_FAILED, - "src or dst is NULL. Failed to set " - " dictionary value for path %s", - local->loc.path); - return; - } - /* Check if any user xattr present in src dict and set - it to dst dict - */ - luret = dict_foreach_fnmatch (src, "user.*", - dht_set_user_xattr, dst); - /* Check if any other custom xattr present in src dict - and set it to dst dict, here index start from 1 because - user xattr already checked in previous statement - */ - for (i = 1; xattrs_to_heal[i]; i++) { - keyval = dict_get (src, xattrs_to_heal[i]); - if (keyval) { - luflag = 1; - ret = dict_set (dst, xattrs_to_heal[i], keyval); - if (ret) - gf_msg (this->name, GF_LOG_WARNING, ENOMEM, - DHT_MSG_DICT_SET_FAILED, - "Failed to set dictionary value:key = %s for " - "path %s", xattrs_to_heal[i], - local->loc.path); - keyval = NULL; - } - } - if (uret) - (*uret) = luret; - if (uflag) - (*uflag) = luflag; + int ret = -1; + data_t *keyval = NULL; + int luret = -1; + int luflag = -1; + int i = 0; + + if (!src || !dst) { + gf_msg(this->name, GF_LOG_WARNING, EINVAL, DHT_MSG_DICT_SET_FAILED, + "src or dst is NULL. Failed to set " + " dictionary value for path %s", + local->loc.path); + return; + } + /* Check if any user xattr present in src dict and set + it to dst dict + */ + luret = dict_foreach_fnmatch(src, "user.*", dht_set_user_xattr, dst); + /* Check if any other custom xattr present in src dict + and set it to dst dict, here index start from 1 because + user xattr already checked in previous statement + */ + for (i = 1; xattrs_to_heal[i]; i++) { + keyval = dict_get(src, xattrs_to_heal[i]); + if (keyval) { + luflag = 1; + ret = dict_set(dst, xattrs_to_heal[i], keyval); + if (ret) + gf_msg(this->name, GF_LOG_WARNING, ENOMEM, + DHT_MSG_DICT_SET_FAILED, + "Failed to set dictionary value:key = %s for " + "path %s", + xattrs_to_heal[i], local->loc.path); + keyval = NULL; + } + } + if (uret) + (*uret) = luret; + if (uflag) + (*uflag) = luflag; } diff --git a/xlators/cluster/dht/src/dht-inode-read.c b/xlators/cluster/dht/src/dht-inode-read.c index 53215a3d34d..f2be5120e37 100644 --- a/xlators/cluster/dht/src/dht-inode-read.c +++ b/xlators/cluster/dht/src/dht-inode-read.c @@ -10,1544 +10,1475 @@ #include "dht-common.h" -int dht_access2 (xlator_t *this, xlator_t *dst_node, - call_frame_t *frame, int ret); -int dht_readv2 (xlator_t *this, xlator_t *dst_node, - call_frame_t *frame, int ret); -int dht_attr2 (xlator_t *this, xlator_t *dst_node, - call_frame_t *frame, int ret); -int dht_open2 (xlator_t *this, xlator_t *dst_node, - call_frame_t *frame, int ret); -int dht_flush2 (xlator_t *this, xlator_t *dst_node, - call_frame_t *frame, int ret); -int dht_lk2 (xlator_t *this, xlator_t *dst_node, - call_frame_t *frame, int ret); -int dht_fsync2 (xlator_t *this, xlator_t *dst_node, - call_frame_t *frame, int ret); int -dht_common_xattrop2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame, - int ret); +dht_access2(xlator_t *this, xlator_t *dst_node, call_frame_t *frame, int ret); +int +dht_readv2(xlator_t *this, xlator_t *dst_node, call_frame_t *frame, int ret); +int +dht_attr2(xlator_t *this, xlator_t *dst_node, call_frame_t *frame, int ret); +int +dht_open2(xlator_t *this, xlator_t *dst_node, call_frame_t *frame, int ret); +int +dht_flush2(xlator_t *this, xlator_t *dst_node, call_frame_t *frame, int ret); +int +dht_lk2(xlator_t *this, xlator_t *dst_node, call_frame_t *frame, int ret); +int +dht_fsync2(xlator_t *this, xlator_t *dst_node, call_frame_t *frame, int ret); +int +dht_common_xattrop2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, + int ret); int -dht_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, fd_t *fd, dict_t *xdata) +dht_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, + int op_errno, fd_t *fd, dict_t *xdata) { - dht_local_t *local = NULL; - xlator_t *prev = NULL; - int ret = 0; - - local = frame->local; - prev = cookie; - - local->op_errno = op_errno; - if ((op_ret == -1) && !dht_inode_missing(op_errno)) { - gf_msg_debug (this->name, op_errno, - "subvolume %s returned -1", - prev->name); - goto out; - } - - /* Update ctx if the fd has been opened on the target*/ - if (!op_ret && (local->call_cnt == 1)) { - dht_fd_ctx_set (this, fd, prev); - goto out; - } - - if (!op_ret || (local->call_cnt != 1)) - goto out; - - /* rebalance would have happened */ - local->rebalance.target_op_fn = dht_open2; - ret = dht_rebalance_complete_check (this, frame); - if (!ret) - return 0; + dht_local_t *local = NULL; + xlator_t *prev = NULL; + int ret = 0; + + local = frame->local; + prev = cookie; + + local->op_errno = op_errno; + if ((op_ret == -1) && !dht_inode_missing(op_errno)) { + gf_msg_debug(this->name, op_errno, "subvolume %s returned -1", + prev->name); + goto out; + } + + /* Update ctx if the fd has been opened on the target*/ + if (!op_ret && (local->call_cnt == 1)) { + dht_fd_ctx_set(this, fd, prev); + goto out; + } + + if (!op_ret || (local->call_cnt != 1)) + goto out; + + /* rebalance would have happened */ + local->rebalance.target_op_fn = dht_open2; + ret = dht_rebalance_complete_check(this, frame); + if (!ret) + return 0; out: - DHT_STACK_UNWIND (open, frame, op_ret, op_errno, local->fd, xdata); + DHT_STACK_UNWIND(open, frame, op_ret, op_errno, local->fd, xdata); - return 0; + return 0; } int -dht_open2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret) +dht_open2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret) { - dht_local_t *local = NULL; - int op_errno = EINVAL; + dht_local_t *local = NULL; + int op_errno = EINVAL; - if (!frame || !frame->local) - goto out; + if (!frame || !frame->local) + goto out; - local = frame->local; - op_errno = local->op_errno; - - if (we_are_not_migrating (ret)) { - /* This DHT layer is not migrating the file */ - DHT_STACK_UNWIND (open, frame, -1, local->op_errno, - NULL, local->rebalance.xdata); - return 0; + local = frame->local; + op_errno = local->op_errno; - } + if (we_are_not_migrating(ret)) { + /* This DHT layer is not migrating the file */ + DHT_STACK_UNWIND(open, frame, -1, local->op_errno, NULL, + local->rebalance.xdata); + return 0; + } - if (subvol == NULL) - goto out; + if (subvol == NULL) + goto out; - local->call_cnt = 2; + local->call_cnt = 2; - STACK_WIND_COOKIE (frame, dht_open_cbk, subvol, subvol, - subvol->fops->open, &local->loc, - local->rebalance.flags, local->fd, local->xattr_req); - return 0; + STACK_WIND_COOKIE(frame, dht_open_cbk, subvol, subvol, subvol->fops->open, + &local->loc, local->rebalance.flags, local->fd, + local->xattr_req); + return 0; out: - DHT_STACK_UNWIND (open, frame, -1, op_errno, NULL, NULL); - return 0; + DHT_STACK_UNWIND(open, frame, -1, op_errno, NULL, NULL); + return 0; } - int -dht_open (call_frame_t *frame, xlator_t *this, - loc_t *loc, int flags, fd_t *fd, dict_t *xdata) +dht_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, fd_t *fd, + dict_t *xdata) { - xlator_t *subvol = NULL; - int op_errno = -1; - dht_local_t *local = NULL; - - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (fd, err); - - local = dht_local_init (frame, loc, fd, GF_FOP_OPEN); - if (!local) { - op_errno = ENOMEM; - goto err; - } + xlator_t *subvol = NULL; + int op_errno = -1; + dht_local_t *local = NULL; - subvol = local->cached_subvol; - if (!subvol) { - gf_msg_debug (this->name, 0, - "no cached subvolume for fd=%p", fd); - op_errno = EINVAL; - goto err; - } - if (xdata) - local->xattr_req = dict_ref (xdata); + VALIDATE_OR_GOTO(frame, err); + VALIDATE_OR_GOTO(this, err); + VALIDATE_OR_GOTO(fd, err); - local->rebalance.flags = flags; - local->call_cnt = 1; + local = dht_local_init(frame, loc, fd, GF_FOP_OPEN); + if (!local) { + op_errno = ENOMEM; + goto err; + } - STACK_WIND_COOKIE (frame, dht_open_cbk, subvol, subvol, - subvol->fops->open, loc, flags, fd, xdata); + subvol = local->cached_subvol; + if (!subvol) { + gf_msg_debug(this->name, 0, "no cached subvolume for fd=%p", fd); + op_errno = EINVAL; + goto err; + } + if (xdata) + local->xattr_req = dict_ref(xdata); - return 0; + local->rebalance.flags = flags; + local->call_cnt = 1; + + STACK_WIND_COOKIE(frame, dht_open_cbk, subvol, subvol, subvol->fops->open, + loc, flags, fd, xdata); + + return 0; err: - op_errno = (op_errno == -1) ? errno : op_errno; - DHT_STACK_UNWIND (open, frame, -1, op_errno, NULL, NULL); + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND(open, frame, -1, op_errno, NULL, NULL); - return 0; + return 0; } int -dht_file_attr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, struct iatt *stbuf, dict_t *xdata) +dht_file_attr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, + int op_errno, struct iatt *stbuf, dict_t *xdata) { - xlator_t *subvol1 = 0; - xlator_t *subvol2 = 0; - dht_local_t *local = NULL; - xlator_t *prev = NULL; - int ret = -1; - inode_t *inode = NULL; - - GF_VALIDATE_OR_GOTO ("dht", frame, err); - GF_VALIDATE_OR_GOTO ("dht", this, out); - GF_VALIDATE_OR_GOTO ("dht", frame->local, out); - GF_VALIDATE_OR_GOTO ("dht", cookie, out); - - local = frame->local; - prev = cookie; - - if ((local->fop == GF_FOP_FSTAT) && (op_ret == -1) - && (op_errno == EBADF) && !(local->fd_checked)) { - - ret = dht_check_and_open_fd_on_subvol (this, frame); - if (ret) - goto out; - return 0; - } - - if ((op_ret == -1) && !dht_inode_missing(op_errno)) { - local->op_errno = op_errno; - gf_msg_debug (this->name, op_errno, - "subvolume %s returned -1", - prev->name); - goto out; - } - - if (local->call_cnt != 1) - goto out; + xlator_t *subvol1 = 0; + xlator_t *subvol2 = 0; + dht_local_t *local = NULL; + xlator_t *prev = NULL; + int ret = -1; + inode_t *inode = NULL; + + GF_VALIDATE_OR_GOTO("dht", frame, err); + GF_VALIDATE_OR_GOTO("dht", this, out); + GF_VALIDATE_OR_GOTO("dht", frame->local, out); + GF_VALIDATE_OR_GOTO("dht", cookie, out); + + local = frame->local; + prev = cookie; + + if ((local->fop == GF_FOP_FSTAT) && (op_ret == -1) && (op_errno == EBADF) && + !(local->fd_checked)) { + ret = dht_check_and_open_fd_on_subvol(this, frame); + if (ret) + goto out; + return 0; + } + if ((op_ret == -1) && !dht_inode_missing(op_errno)) { local->op_errno = op_errno; - local->op_ret = op_ret; - - /* Check if the rebalance phase2 is true */ - if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2 (stbuf)) { - - local->rebalance.target_op_fn = dht_attr2; - dht_set_local_rebalance (this, local, NULL, NULL, - stbuf, xdata); - inode = (local->fd) ? local->fd->inode : local->loc.inode; - - dht_inode_ctx_get_mig_info (this, inode, &subvol1, &subvol2); - if (dht_mig_info_is_invalid (local->cached_subvol, - subvol1, subvol2)){ - /* Phase 2 of migration */ - ret = dht_rebalance_complete_check (this, frame); - if (!ret) - return 0; - } else { - /* it is a non-fd op or it is an fd based Fop and - opened on the dst.*/ - if (local->fd && - !dht_fd_open_on_dst (this, local->fd, subvol2)) { - ret = dht_rebalance_complete_check (this, frame); - if (!ret) - return 0; - } else { - dht_attr2 (this, subvol2, frame, 0); - return 0; - } - } + gf_msg_debug(this->name, op_errno, "subvolume %s returned -1", + prev->name); + goto out; + } + + if (local->call_cnt != 1) + goto out; + + local->op_errno = op_errno; + local->op_ret = op_ret; + + /* Check if the rebalance phase2 is true */ + if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2(stbuf)) { + local->rebalance.target_op_fn = dht_attr2; + dht_set_local_rebalance(this, local, NULL, NULL, stbuf, xdata); + inode = (local->fd) ? local->fd->inode : local->loc.inode; + + dht_inode_ctx_get_mig_info(this, inode, &subvol1, &subvol2); + if (dht_mig_info_is_invalid(local->cached_subvol, subvol1, subvol2)) { + /* Phase 2 of migration */ + ret = dht_rebalance_complete_check(this, frame); + if (!ret) + return 0; + } else { + /* it is a non-fd op or it is an fd based Fop and + opened on the dst.*/ + if (local->fd && !dht_fd_open_on_dst(this, local->fd, subvol2)) { + ret = dht_rebalance_complete_check(this, frame); + if (!ret) + return 0; + } else { + dht_attr2(this, subvol2, frame, 0); + return 0; + } } + } out: - DHT_STRIP_PHASE1_FLAGS (stbuf); - DHT_STACK_UNWIND (stat, frame, op_ret, op_errno, stbuf, xdata); + DHT_STRIP_PHASE1_FLAGS(stbuf); + DHT_STACK_UNWIND(stat, frame, op_ret, op_errno, stbuf, xdata); err: - return 0; + return 0; } int -dht_attr2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret) +dht_attr2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret) { - dht_local_t *local = NULL; - int op_errno = EINVAL; - - local = frame->local; - if (!local) - goto out; - - op_errno = local->op_errno; - - if (we_are_not_migrating (ret)) { - /* This dht xlator is not migrating the file. Unwind and - * pass on the original mode bits so the higher DHT layer - * can handle this. - */ - DHT_STACK_UNWIND (stat, frame, local->op_ret, op_errno, - &local->rebalance.postbuf, - local->rebalance.xdata); - return 0; - } - + dht_local_t *local = NULL; + int op_errno = EINVAL; + + local = frame->local; + if (!local) + goto out; + + op_errno = local->op_errno; + + if (we_are_not_migrating(ret)) { + /* This dht xlator is not migrating the file. Unwind and + * pass on the original mode bits so the higher DHT layer + * can handle this. + */ + DHT_STACK_UNWIND(stat, frame, local->op_ret, op_errno, + &local->rebalance.postbuf, local->rebalance.xdata); + return 0; + } - if (subvol == NULL) - goto out; + if (subvol == NULL) + goto out; - local->call_cnt = 2; + local->call_cnt = 2; - if (local->fop == GF_FOP_FSTAT) { - STACK_WIND_COOKIE (frame, dht_file_attr_cbk, subvol, subvol, - subvol->fops->fstat, local->fd, - local->xattr_req); - } else { - STACK_WIND_COOKIE (frame, dht_file_attr_cbk, subvol, subvol, - subvol->fops->stat, &local->loc, - local->xattr_req); - } + if (local->fop == GF_FOP_FSTAT) { + STACK_WIND_COOKIE(frame, dht_file_attr_cbk, subvol, subvol, + subvol->fops->fstat, local->fd, local->xattr_req); + } else { + STACK_WIND_COOKIE(frame, dht_file_attr_cbk, subvol, subvol, + subvol->fops->stat, &local->loc, local->xattr_req); + } - return 0; + return 0; out: - DHT_STACK_UNWIND (stat, frame, -1, op_errno, NULL, NULL); - return 0; + DHT_STACK_UNWIND(stat, frame, -1, op_errno, NULL, NULL); + return 0; } int -dht_attr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, struct iatt *stbuf, dict_t *xdata) +dht_attr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, + int op_errno, struct iatt *stbuf, dict_t *xdata) { - dht_local_t *local = NULL; - int this_call_cnt = 0; - xlator_t *prev = NULL; + dht_local_t *local = NULL; + int this_call_cnt = 0; + xlator_t *prev = NULL; - GF_VALIDATE_OR_GOTO ("dht", frame, err); - GF_VALIDATE_OR_GOTO ("dht", this, out); - GF_VALIDATE_OR_GOTO ("dht", frame->local, out); - GF_VALIDATE_OR_GOTO ("dht", cookie, out); + GF_VALIDATE_OR_GOTO("dht", frame, err); + GF_VALIDATE_OR_GOTO("dht", this, out); + GF_VALIDATE_OR_GOTO("dht", frame->local, out); + GF_VALIDATE_OR_GOTO("dht", cookie, out); - local = frame->local; - prev = cookie; + local = frame->local; + prev = cookie; - LOCK (&frame->lock); - { - if (op_ret == -1) { - local->op_errno = op_errno; - gf_msg_debug (this->name, op_errno, - "subvolume %s returned -1", - prev->name); + LOCK(&frame->lock); + { + if (op_ret == -1) { + local->op_errno = op_errno; + gf_msg_debug(this->name, op_errno, "subvolume %s returned -1", + prev->name); - goto unlock; - } + goto unlock; + } - dht_iatt_merge (this, &local->stbuf, stbuf); + dht_iatt_merge(this, &local->stbuf, stbuf); - local->op_ret = 0; - } + local->op_ret = 0; + } unlock: - UNLOCK (&frame->lock); + UNLOCK(&frame->lock); out: - this_call_cnt = dht_frame_return (frame); - if (is_last_call (this_call_cnt)) { - DHT_STACK_UNWIND (stat, frame, local->op_ret, local->op_errno, - &local->stbuf, xdata); - } + this_call_cnt = dht_frame_return(frame); + if (is_last_call(this_call_cnt)) { + DHT_STACK_UNWIND(stat, frame, local->op_ret, local->op_errno, + &local->stbuf, xdata); + } err: - return 0; + return 0; } int -dht_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) +dht_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) { - xlator_t *subvol = NULL; - int op_errno = -1; - dht_local_t *local = NULL; - dht_layout_t *layout = NULL; - int i = 0; - int call_cnt = 0; - - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (loc, err); - VALIDATE_OR_GOTO (loc->inode, err); - VALIDATE_OR_GOTO (loc->path, err); - - - local = dht_local_init (frame, loc, NULL, GF_FOP_STAT); - if (!local) { - op_errno = ENOMEM; - goto err; - } - - layout = local->layout; - if (!layout) { - gf_msg_debug (this->name, 0, - "no layout for path=%s", loc->path); - op_errno = EINVAL; - goto err; - } - if (xdata) - local->xattr_req = dict_ref (xdata); - - if (IA_ISREG (loc->inode->ia_type)) { - local->call_cnt = 1; + xlator_t *subvol = NULL; + int op_errno = -1; + dht_local_t *local = NULL; + dht_layout_t *layout = NULL; + int i = 0; + int call_cnt = 0; + + VALIDATE_OR_GOTO(frame, err); + VALIDATE_OR_GOTO(this, err); + VALIDATE_OR_GOTO(loc, err); + VALIDATE_OR_GOTO(loc->inode, err); + VALIDATE_OR_GOTO(loc->path, err); + + local = dht_local_init(frame, loc, NULL, GF_FOP_STAT); + if (!local) { + op_errno = ENOMEM; + goto err; + } + + layout = local->layout; + if (!layout) { + gf_msg_debug(this->name, 0, "no layout for path=%s", loc->path); + op_errno = EINVAL; + goto err; + } + if (xdata) + local->xattr_req = dict_ref(xdata); + + if (IA_ISREG(loc->inode->ia_type)) { + local->call_cnt = 1; - subvol = local->cached_subvol; + subvol = local->cached_subvol; - STACK_WIND_COOKIE (frame, dht_file_attr_cbk, subvol, subvol, - subvol->fops->stat, loc, xdata); + STACK_WIND_COOKIE(frame, dht_file_attr_cbk, subvol, subvol, + subvol->fops->stat, loc, xdata); - return 0; - } + return 0; + } - local->call_cnt = call_cnt = layout->cnt; + local->call_cnt = call_cnt = layout->cnt; - for (i = 0; i < call_cnt; i++) { - subvol = layout->list[i].xlator; + for (i = 0; i < call_cnt; i++) { + subvol = layout->list[i].xlator; - STACK_WIND_COOKIE (frame, dht_attr_cbk, subvol, subvol, - subvol->fops->stat, loc, xdata); - } + STACK_WIND_COOKIE(frame, dht_attr_cbk, subvol, subvol, + subvol->fops->stat, loc, xdata); + } - return 0; + return 0; err: - op_errno = (op_errno == -1) ? errno : op_errno; - DHT_STACK_UNWIND (stat, frame, -1, op_errno, NULL, NULL); + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND(stat, frame, -1, op_errno, NULL, NULL); - return 0; + return 0; } - int -dht_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) +dht_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) { - xlator_t *subvol = NULL; - int op_errno = -1; - dht_local_t *local = NULL; - dht_layout_t *layout = NULL; - int i = 0; - int call_cnt = 0; - - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (fd, err); - - local = dht_local_init (frame, NULL, fd, GF_FOP_FSTAT); - if (!local) { - op_errno = ENOMEM; - goto err; - } - - layout = local->layout; - if (!layout) { - gf_msg (this->name, GF_LOG_ERROR, 0, 0, - "no layout for fd=%p", fd); - op_errno = EINVAL; - goto err; - } - if (xdata) - local->xattr_req = dict_ref (xdata); - - if (IA_ISREG (fd->inode->ia_type)) { - local->call_cnt = 1; + xlator_t *subvol = NULL; + int op_errno = -1; + dht_local_t *local = NULL; + dht_layout_t *layout = NULL; + int i = 0; + int call_cnt = 0; + + VALIDATE_OR_GOTO(frame, err); + VALIDATE_OR_GOTO(this, err); + VALIDATE_OR_GOTO(fd, err); + + local = dht_local_init(frame, NULL, fd, GF_FOP_FSTAT); + if (!local) { + op_errno = ENOMEM; + goto err; + } + + layout = local->layout; + if (!layout) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, "no layout for fd=%p", fd); + op_errno = EINVAL; + goto err; + } + if (xdata) + local->xattr_req = dict_ref(xdata); + + if (IA_ISREG(fd->inode->ia_type)) { + local->call_cnt = 1; - subvol = local->cached_subvol; + subvol = local->cached_subvol; - STACK_WIND_COOKIE (frame, dht_file_attr_cbk, subvol, - subvol, subvol->fops->fstat, fd, - xdata); - return 0; - } + STACK_WIND_COOKIE(frame, dht_file_attr_cbk, subvol, subvol, + subvol->fops->fstat, fd, xdata); + return 0; + } - local->call_cnt = call_cnt = layout->cnt; + local->call_cnt = call_cnt = layout->cnt; - for (i = 0; i < call_cnt; i++) { - subvol = layout->list[i].xlator; - STACK_WIND_COOKIE (frame, dht_attr_cbk, subvol, subvol, - subvol->fops->fstat, fd, xdata); - } + for (i = 0; i < call_cnt; i++) { + subvol = layout->list[i].xlator; + STACK_WIND_COOKIE(frame, dht_attr_cbk, subvol, subvol, + subvol->fops->fstat, fd, xdata); + } - return 0; + return 0; err: - op_errno = (op_errno == -1) ? errno : op_errno; - DHT_STACK_UNWIND (fstat, frame, -1, op_errno, NULL, NULL); + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND(fstat, frame, -1, op_errno, NULL, NULL); - return 0; + return 0; } int -dht_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, - struct iovec *vector, int count, struct iatt *stbuf, - struct iobref *iobref, dict_t *xdata) +dht_readv_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, + int op_errno, struct iovec *vector, int count, struct iatt *stbuf, + struct iobref *iobref, dict_t *xdata) { - dht_local_t *local = NULL; - int ret = 0; - xlator_t *src_subvol = 0; - xlator_t *dst_subvol = 0; - - local = frame->local; - if (!local) { - op_ret = -1; - op_errno = EINVAL; - goto out; - } + dht_local_t *local = NULL; + int ret = 0; + xlator_t *src_subvol = 0; + xlator_t *dst_subvol = 0; + + local = frame->local; + if (!local) { + op_ret = -1; + op_errno = EINVAL; + goto out; + } + + /* This is already second try, no need for re-check */ + if (local->call_cnt != 1) + goto out; + + if (op_ret == -1 && (op_errno == EBADF) && !(local->fd_checked)) { + ret = dht_check_and_open_fd_on_subvol(this, frame); + if (ret) + goto out; + return 0; + } - /* This is already second try, no need for re-check */ - if (local->call_cnt != 1) - goto out; + if ((op_ret == -1) && !dht_inode_missing(op_errno)) + goto out; - if (op_ret == -1 && (op_errno == EBADF) && !(local->fd_checked)) { - ret = dht_check_and_open_fd_on_subvol (this, frame); - if (ret) - goto out; + local->op_errno = op_errno; + if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2(stbuf)) { + local->op_ret = op_ret; + local->rebalance.target_op_fn = dht_readv2; + dht_set_local_rebalance(this, local, NULL, NULL, stbuf, xdata); + /* File would be migrated to other node */ + ret = dht_inode_ctx_get_mig_info(this, local->fd->inode, &src_subvol, + &dst_subvol); + + if (dht_mig_info_is_invalid(local->cached_subvol, src_subvol, + dst_subvol) || + !dht_fd_open_on_dst(this, local->fd, dst_subvol)) { + ret = dht_rebalance_complete_check(this, frame); + if (!ret) return 0; + } else { + /* value is already set in fd_ctx, that means no need + to check for whether its complete or not. */ + dht_readv2(this, dst_subvol, frame, 0); + return 0; } - - if ((op_ret == -1) && !dht_inode_missing(op_errno)) - goto out; - - - local->op_errno = op_errno; - if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2 (stbuf)) { - - local->op_ret = op_ret; - local->rebalance.target_op_fn = dht_readv2; - dht_set_local_rebalance (this, local, NULL, NULL, - stbuf, xdata); - /* File would be migrated to other node */ - ret = dht_inode_ctx_get_mig_info (this, local->fd->inode, - &src_subvol, - &dst_subvol); - - if (dht_mig_info_is_invalid (local->cached_subvol, - src_subvol, dst_subvol) - || !dht_fd_open_on_dst(this, local->fd, dst_subvol)) { - - ret = dht_rebalance_complete_check (this, frame); - if (!ret) - return 0; - } else { - /* value is already set in fd_ctx, that means no need - to check for whether its complete or not. */ - dht_readv2 (this, dst_subvol, frame, 0); - return 0; - } - } + } out: - DHT_STRIP_PHASE1_FLAGS (stbuf); + DHT_STRIP_PHASE1_FLAGS(stbuf); - DHT_STACK_UNWIND (readv, frame, op_ret, op_errno, vector, count, stbuf, - iobref, xdata); + DHT_STACK_UNWIND(readv, frame, op_ret, op_errno, vector, count, stbuf, + iobref, xdata); - return 0; + return 0; } int -dht_readv2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret) +dht_readv2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret) { - dht_local_t *local = NULL; - int op_errno = EINVAL; - - local = frame->local; - if (!local) - goto out; - - op_errno = local->op_errno; - - if (we_are_not_migrating (ret)) { - /* This dht xlator is not migrating the file. Unwind and - * pass on the original mode bits so the higher DHT layer - * can handle this. - */ - DHT_STACK_UNWIND (readv, frame, local->op_ret, op_errno, - NULL, 0, &local->rebalance.postbuf, - NULL, local->rebalance.xdata); - return 0; - } + dht_local_t *local = NULL; + int op_errno = EINVAL; + + local = frame->local; + if (!local) + goto out; + + op_errno = local->op_errno; + + if (we_are_not_migrating(ret)) { + /* This dht xlator is not migrating the file. Unwind and + * pass on the original mode bits so the higher DHT layer + * can handle this. + */ + DHT_STACK_UNWIND(readv, frame, local->op_ret, op_errno, NULL, 0, + &local->rebalance.postbuf, NULL, + local->rebalance.xdata); + return 0; + } - if (subvol == NULL) - goto out; + if (subvol == NULL) + goto out; - local->call_cnt = 2; + local->call_cnt = 2; - STACK_WIND (frame, dht_readv_cbk, subvol, subvol->fops->readv, - local->fd, local->rebalance.size, local->rebalance.offset, - local->rebalance.flags, local->xattr_req); + STACK_WIND(frame, dht_readv_cbk, subvol, subvol->fops->readv, local->fd, + local->rebalance.size, local->rebalance.offset, + local->rebalance.flags, local->xattr_req); - return 0; + return 0; out: - DHT_STACK_UNWIND (readv, frame, -1, op_errno, NULL, 0, NULL, NULL, NULL); - return 0; + DHT_STACK_UNWIND(readv, frame, -1, op_errno, NULL, 0, NULL, NULL, NULL); + return 0; } - int -dht_readv (call_frame_t *frame, xlator_t *this, - fd_t *fd, size_t size, off_t off, uint32_t flags, dict_t *xdata) +dht_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, off_t off, + uint32_t flags, dict_t *xdata) { - xlator_t *subvol = NULL; - int op_errno = -1; - dht_local_t *local = NULL; - - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (fd, err); - - local = dht_local_init (frame, NULL, fd, GF_FOP_READ); - if (!local) { - op_errno = ENOMEM; - goto err; - } + xlator_t *subvol = NULL; + int op_errno = -1; + dht_local_t *local = NULL; - subvol = local->cached_subvol; - if (!subvol) { - gf_msg_debug (this->name, 0, - "no cached subvolume for fd=%p", fd); - op_errno = EINVAL; - goto err; - } + VALIDATE_OR_GOTO(frame, err); + VALIDATE_OR_GOTO(this, err); + VALIDATE_OR_GOTO(fd, err); - if (xdata) - local->xattr_req = dict_ref (xdata); + local = dht_local_init(frame, NULL, fd, GF_FOP_READ); + if (!local) { + op_errno = ENOMEM; + goto err; + } - local->rebalance.offset = off; - local->rebalance.size = size; - local->rebalance.flags = flags; - local->call_cnt = 1; + subvol = local->cached_subvol; + if (!subvol) { + gf_msg_debug(this->name, 0, "no cached subvolume for fd=%p", fd); + op_errno = EINVAL; + goto err; + } - STACK_WIND (frame, dht_readv_cbk, subvol, subvol->fops->readv, - local->fd, local->rebalance.size, - local->rebalance.offset, - local->rebalance.flags, local->xattr_req); + if (xdata) + local->xattr_req = dict_ref(xdata); - return 0; + local->rebalance.offset = off; + local->rebalance.size = size; + local->rebalance.flags = flags; + local->call_cnt = 1; + + STACK_WIND(frame, dht_readv_cbk, subvol, subvol->fops->readv, local->fd, + local->rebalance.size, local->rebalance.offset, + local->rebalance.flags, local->xattr_req); + + return 0; err: - op_errno = (op_errno == -1) ? errno : op_errno; - DHT_STACK_UNWIND (readv, frame, -1, op_errno, NULL, 0, NULL, NULL, NULL); + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND(readv, frame, -1, op_errno, NULL, 0, NULL, NULL, NULL); - return 0; + return 0; } int -dht_access_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, dict_t *xdata) +dht_access_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, + int op_errno, dict_t *xdata) { - int ret = -1; - dht_local_t *local = NULL; - xlator_t *subvol = NULL; - xlator_t *prev = NULL; - - local = frame->local; - prev = cookie; - - if (!prev) - goto out; - if (local->call_cnt != 1) - goto out; - if ((op_ret == -1) && ((op_errno == ENOTCONN) || - dht_inode_missing(op_errno)) && - IA_ISDIR(local->loc.inode->ia_type)) { - subvol = dht_subvol_next_available (this, prev); - if (!subvol) - goto out; - - /* check if we are done with visiting every node */ - if (subvol == local->cached_subvol) { - goto out; - } - - STACK_WIND_COOKIE (frame, dht_access_cbk, subvol, subvol, - subvol->fops->access, &local->loc, - local->rebalance.flags, NULL); - return 0; - } - if ((op_ret == -1) && dht_inode_missing(op_errno) && - !(IA_ISDIR(local->loc.inode->ia_type))) { - /* File would be migrated to other node */ - local->op_errno = op_errno; - local->rebalance.target_op_fn = dht_access2; - ret = dht_rebalance_complete_check (frame->this, frame); - if (!ret) - return 0; - } + int ret = -1; + dht_local_t *local = NULL; + xlator_t *subvol = NULL; + xlator_t *prev = NULL; + + local = frame->local; + prev = cookie; + + if (!prev) + goto out; + if (local->call_cnt != 1) + goto out; + if ((op_ret == -1) && + ((op_errno == ENOTCONN) || dht_inode_missing(op_errno)) && + IA_ISDIR(local->loc.inode->ia_type)) { + subvol = dht_subvol_next_available(this, prev); + if (!subvol) + goto out; + + /* check if we are done with visiting every node */ + if (subvol == local->cached_subvol) { + goto out; + } + + STACK_WIND_COOKIE(frame, dht_access_cbk, subvol, subvol, + subvol->fops->access, &local->loc, + local->rebalance.flags, NULL); + return 0; + } + if ((op_ret == -1) && dht_inode_missing(op_errno) && + !(IA_ISDIR(local->loc.inode->ia_type))) { + /* File would be migrated to other node */ + local->op_errno = op_errno; + local->rebalance.target_op_fn = dht_access2; + ret = dht_rebalance_complete_check(frame->this, frame); + if (!ret) + return 0; + } out: - DHT_STACK_UNWIND (access, frame, op_ret, op_errno, xdata); - return 0; + DHT_STACK_UNWIND(access, frame, op_ret, op_errno, xdata); + return 0; } int -dht_access2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret) +dht_access2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret) { - dht_local_t *local = NULL; - int op_errno = EINVAL; + dht_local_t *local = NULL; + int op_errno = EINVAL; - local = frame->local; - if (!local) - goto out; + local = frame->local; + if (!local) + goto out; - op_errno = local->op_errno; + op_errno = local->op_errno; - if (we_are_not_migrating (ret)) { - /* This dht xlator is not migrating the file. Unwind and - * pass on the original mode bits so the higher DHT layer - * can handle this. - */ + if (we_are_not_migrating(ret)) { + /* This dht xlator is not migrating the file. Unwind and + * pass on the original mode bits so the higher DHT layer + * can handle this. + */ - DHT_STACK_UNWIND (access, frame, -1, op_errno, NULL); - return 0; - } + DHT_STACK_UNWIND(access, frame, -1, op_errno, NULL); + return 0; + } - if (subvol == NULL) - goto out; + if (subvol == NULL) + goto out; - local->call_cnt = 2; + local->call_cnt = 2; - STACK_WIND_COOKIE (frame, dht_access_cbk, subvol, subvol, - subvol->fops->access, &local->loc, - local->rebalance.flags, local->xattr_req); + STACK_WIND_COOKIE(frame, dht_access_cbk, subvol, subvol, + subvol->fops->access, &local->loc, local->rebalance.flags, + local->xattr_req); - return 0; + return 0; out: - DHT_STACK_UNWIND (access, frame, -1, op_errno, NULL); - return 0; + DHT_STACK_UNWIND(access, frame, -1, op_errno, NULL); + return 0; } - int -dht_access (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask, - dict_t *xdata) +dht_access(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask, + dict_t *xdata) { - xlator_t *subvol = NULL; - int op_errno = -1; - dht_local_t *local = NULL; - - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (loc, err); - VALIDATE_OR_GOTO (loc->inode, err); - VALIDATE_OR_GOTO (loc->path, err); - - local = dht_local_init (frame, loc, NULL, GF_FOP_ACCESS); - if (!local) { - op_errno = ENOMEM; - goto err; - } - - local->rebalance.flags = mask; - local->call_cnt = 1; - subvol = local->cached_subvol; - if (!subvol) { - gf_msg_debug (this->name, 0, - "no cached subvolume for path=%s", loc->path); - op_errno = EINVAL; - goto err; - } - if (xdata) - local->xattr_req = dict_ref (xdata); - - STACK_WIND_COOKIE (frame, dht_access_cbk, subvol, subvol, - subvol->fops->access, loc, mask, xdata); - - return 0; + xlator_t *subvol = NULL; + int op_errno = -1; + dht_local_t *local = NULL; + + VALIDATE_OR_GOTO(frame, err); + VALIDATE_OR_GOTO(this, err); + VALIDATE_OR_GOTO(loc, err); + VALIDATE_OR_GOTO(loc->inode, err); + VALIDATE_OR_GOTO(loc->path, err); + + local = dht_local_init(frame, loc, NULL, GF_FOP_ACCESS); + if (!local) { + op_errno = ENOMEM; + goto err; + } + + local->rebalance.flags = mask; + local->call_cnt = 1; + subvol = local->cached_subvol; + if (!subvol) { + gf_msg_debug(this->name, 0, "no cached subvolume for path=%s", + loc->path); + op_errno = EINVAL; + goto err; + } + if (xdata) + local->xattr_req = dict_ref(xdata); + + STACK_WIND_COOKIE(frame, dht_access_cbk, subvol, subvol, + subvol->fops->access, loc, mask, xdata); + + return 0; err: - op_errno = (op_errno == -1) ? errno : op_errno; - DHT_STACK_UNWIND (access, frame, -1, op_errno, NULL); + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND(access, frame, -1, op_errno, NULL); - return 0; + return 0; } - int -dht_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, dict_t *xdata) +dht_flush_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, + int op_errno, dict_t *xdata) { - dht_local_t *local = NULL; - xlator_t *subvol = 0; - int ret = 0; + dht_local_t *local = NULL; + xlator_t *subvol = 0; + int ret = 0; - local = frame->local; + local = frame->local; - local->op_errno = op_errno; + local->op_errno = op_errno; - if (local->call_cnt != 1) - goto out; + if (local->call_cnt != 1) + goto out; - if (op_ret == -1 && (op_errno == EBADF) && !(local->fd_checked)) { - ret = dht_check_and_open_fd_on_subvol (this, frame); - if (ret) - goto out; - return 0; - } + if (op_ret == -1 && (op_errno == EBADF) && !(local->fd_checked)) { + ret = dht_check_and_open_fd_on_subvol(this, frame); + if (ret) + goto out; + return 0; + } - local->rebalance.target_op_fn = dht_flush2; + local->rebalance.target_op_fn = dht_flush2; - local->op_ret = op_ret; - local->op_errno = op_errno; + local->op_ret = op_ret; + local->op_errno = op_errno; - /* If context is set, then send flush() it to the destination */ - dht_inode_ctx_get_mig_info (this, local->fd->inode, NULL, &subvol); - if (subvol && dht_fd_open_on_dst (this, local->fd, subvol)) { - dht_flush2 (this, subvol, frame, 0); - return 0; - } + /* If context is set, then send flush() it to the destination */ + dht_inode_ctx_get_mig_info(this, local->fd->inode, NULL, &subvol); + if (subvol && dht_fd_open_on_dst(this, local->fd, subvol)) { + dht_flush2(this, subvol, frame, 0); + return 0; + } - if (op_errno == EREMOTE) { - ret = dht_rebalance_complete_check (this, frame); - if (!ret) { - return 0; - } + if (op_errno == EREMOTE) { + ret = dht_rebalance_complete_check(this, frame); + if (!ret) { + return 0; } + } out: - DHT_STACK_UNWIND (flush, frame, op_ret, op_errno, xdata); + DHT_STACK_UNWIND(flush, frame, op_ret, op_errno, xdata); - return 0; + return 0; } int -dht_flush2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret) +dht_flush2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret) { - dht_local_t *local = NULL; - int32_t op_errno = EINVAL; + dht_local_t *local = NULL; + int32_t op_errno = EINVAL; - if ((frame == NULL) || (frame->local == NULL)) - goto out; + if ((frame == NULL) || (frame->local == NULL)) + goto out; - local = frame->local; + local = frame->local; - op_errno = local->op_errno; + op_errno = local->op_errno; - if (subvol == NULL) - goto out; + if (subvol == NULL) + goto out; - local->call_cnt = 2; /* This is the second attempt */ + local->call_cnt = 2; /* This is the second attempt */ - STACK_WIND (frame, dht_flush_cbk, - subvol, subvol->fops->flush, local->fd, - local->xattr_req); + STACK_WIND(frame, dht_flush_cbk, subvol, subvol->fops->flush, local->fd, + local->xattr_req); - return 0; + return 0; out: - DHT_STACK_UNWIND (flush, frame, -1, op_errno, NULL); - return 0; + DHT_STACK_UNWIND(flush, frame, -1, op_errno, NULL); + return 0; } - int -dht_flush (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) +dht_flush(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) { - xlator_t *subvol = NULL; - int op_errno = -1; - dht_local_t *local = NULL; - - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (fd, err); - - local = dht_local_init (frame, NULL, fd, GF_FOP_FLUSH); - if (!local) { - op_errno = ENOMEM; - goto err; - } + xlator_t *subvol = NULL; + int op_errno = -1; + dht_local_t *local = NULL; - subvol = local->cached_subvol; - if (!subvol) { - gf_msg_debug (this->name, 0, - "no cached subvolume for fd=%p", fd); - op_errno = EINVAL; - goto err; - } + VALIDATE_OR_GOTO(frame, err); + VALIDATE_OR_GOTO(this, err); + VALIDATE_OR_GOTO(fd, err); - if (xdata) - local->xattr_req = dict_ref (xdata); + local = dht_local_init(frame, NULL, fd, GF_FOP_FLUSH); + if (!local) { + op_errno = ENOMEM; + goto err; + } - local->call_cnt = 1; + subvol = local->cached_subvol; + if (!subvol) { + gf_msg_debug(this->name, 0, "no cached subvolume for fd=%p", fd); + op_errno = EINVAL; + goto err; + } - STACK_WIND (frame, dht_flush_cbk, - subvol, subvol->fops->flush, fd, local->xattr_req); - return 0; + if (xdata) + local->xattr_req = dict_ref(xdata); + + local->call_cnt = 1; + + STACK_WIND(frame, dht_flush_cbk, subvol, subvol->fops->flush, fd, + local->xattr_req); + return 0; err: - op_errno = (op_errno == -1) ? errno : op_errno; - DHT_STACK_UNWIND (flush, frame, -1, op_errno, NULL); + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND(flush, frame, -1, op_errno, NULL); - return 0; + return 0; } - int -dht_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, - int op_errno, struct iatt *prebuf, struct iatt *postbuf, - dict_t *xdata) +dht_fsync_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, + int op_errno, struct iatt *prebuf, struct iatt *postbuf, + dict_t *xdata) { - dht_local_t *local = NULL; - xlator_t *prev = NULL; - int ret = -1; - inode_t *inode = NULL; - xlator_t *src_subvol = 0; - xlator_t *dst_subvol = 0; + dht_local_t *local = NULL; + xlator_t *prev = NULL; + int ret = -1; + inode_t *inode = NULL; + xlator_t *src_subvol = 0; + xlator_t *dst_subvol = 0; - local = frame->local; - prev = cookie; + local = frame->local; + prev = cookie; - local->op_errno = op_errno; + local->op_errno = op_errno; - if (op_ret == -1 && (op_errno == EBADF) && !(local->fd_checked)) { - ret = dht_check_and_open_fd_on_subvol (this, frame); - if (ret) - goto out; - return 0; - } + if (op_ret == -1 && (op_errno == EBADF) && !(local->fd_checked)) { + ret = dht_check_and_open_fd_on_subvol(this, frame); + if (ret) + goto out; + return 0; + } - if (op_ret == -1 && !dht_inode_missing(op_errno)) { - gf_msg_debug (this->name, op_errno, - "subvolume %s returned -1", - prev->name); - goto out; - } + if (op_ret == -1 && !dht_inode_missing(op_errno)) { + gf_msg_debug(this->name, op_errno, "subvolume %s returned -1", + prev->name); + goto out; + } - if (local->call_cnt != 1) { - if (local->stbuf.ia_blocks) { - dht_iatt_merge (this, postbuf, &local->stbuf); - dht_iatt_merge (this, prebuf, &local->prebuf); - } - goto out; + if (local->call_cnt != 1) { + if (local->stbuf.ia_blocks) { + dht_iatt_merge(this, postbuf, &local->stbuf); + dht_iatt_merge(this, prebuf, &local->prebuf); } + goto out; + } - local->op_ret = op_ret; - inode = local->fd->inode; + local->op_ret = op_ret; + inode = local->fd->inode; - local->rebalance.target_op_fn = dht_fsync2; - dht_set_local_rebalance (this, local, NULL, prebuf, - postbuf, xdata); + local->rebalance.target_op_fn = dht_fsync2; + dht_set_local_rebalance(this, local, NULL, prebuf, postbuf, xdata); - if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2 (postbuf)) { - ret = dht_rebalance_complete_check (this, frame); - if (!ret) - return 0; - } - - /* Check if the rebalance phase1 is true */ - if (IS_DHT_MIGRATION_PHASE1 (postbuf)) { - - dht_iatt_merge (this, &local->stbuf, postbuf); - dht_iatt_merge (this, &local->prebuf, prebuf); + if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2(postbuf)) { + ret = dht_rebalance_complete_check(this, frame); + if (!ret) + return 0; + } - dht_inode_ctx_get_mig_info (this, inode, &src_subvol, &dst_subvol); + /* Check if the rebalance phase1 is true */ + if (IS_DHT_MIGRATION_PHASE1(postbuf)) { + dht_iatt_merge(this, &local->stbuf, postbuf); + dht_iatt_merge(this, &local->prebuf, prebuf); - if (dht_mig_info_is_invalid (local->cached_subvol, src_subvol, - dst_subvol) || - !dht_fd_open_on_dst (this, local->fd, dst_subvol)) { + dht_inode_ctx_get_mig_info(this, inode, &src_subvol, &dst_subvol); - ret = dht_rebalance_in_progress_check (this, frame); - if (!ret) - return 0; - } else { - dht_fsync2 (this, dst_subvol, frame, 0); - return 0; - } + if (dht_mig_info_is_invalid(local->cached_subvol, src_subvol, + dst_subvol) || + !dht_fd_open_on_dst(this, local->fd, dst_subvol)) { + ret = dht_rebalance_in_progress_check(this, frame); + if (!ret) + return 0; + } else { + dht_fsync2(this, dst_subvol, frame, 0); + return 0; } - + } out: - DHT_STRIP_PHASE1_FLAGS (postbuf); - DHT_STRIP_PHASE1_FLAGS (prebuf); + DHT_STRIP_PHASE1_FLAGS(postbuf); + DHT_STRIP_PHASE1_FLAGS(prebuf); - DHT_STACK_UNWIND (fsync, frame, op_ret, op_errno, - prebuf, postbuf, xdata); + DHT_STACK_UNWIND(fsync, frame, op_ret, op_errno, prebuf, postbuf, xdata); - return 0; + return 0; } int -dht_fsync2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret) +dht_fsync2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret) { - dht_local_t *local = NULL; - int32_t op_errno = EINVAL; - - if ((frame == NULL) || (frame->local == NULL)) - goto out; - - local = frame->local; - op_errno = local->op_errno; - - if (we_are_not_migrating (ret)) { - /* This dht xlator is not migrating the file. Unwind and - * pass on the original mode bits so the higher DHT layer - * can handle this. - */ - DHT_STACK_UNWIND (fsync, frame, local->op_ret, - op_errno, &local->rebalance.prebuf, - &local->rebalance.postbuf, - local->rebalance.xdata); - return 0; - } + dht_local_t *local = NULL; + int32_t op_errno = EINVAL; + + if ((frame == NULL) || (frame->local == NULL)) + goto out; + + local = frame->local; + op_errno = local->op_errno; + + if (we_are_not_migrating(ret)) { + /* This dht xlator is not migrating the file. Unwind and + * pass on the original mode bits so the higher DHT layer + * can handle this. + */ + DHT_STACK_UNWIND(fsync, frame, local->op_ret, op_errno, + &local->rebalance.prebuf, &local->rebalance.postbuf, + local->rebalance.xdata); + return 0; + } - if (subvol == NULL) - goto out; + if (subvol == NULL) + goto out; - local->call_cnt = 2; /* This is the second attempt */ + local->call_cnt = 2; /* This is the second attempt */ - STACK_WIND_COOKIE (frame, dht_fsync_cbk, subvol, subvol, - subvol->fops->fsync, local->fd, - local->rebalance.flags, local->xattr_req); + STACK_WIND_COOKIE(frame, dht_fsync_cbk, subvol, subvol, subvol->fops->fsync, + local->fd, local->rebalance.flags, local->xattr_req); - return 0; + return 0; out: - DHT_STACK_UNWIND (fsync, frame, -1, op_errno, NULL, NULL, NULL); - return 0; + DHT_STACK_UNWIND(fsync, frame, -1, op_errno, NULL, NULL, NULL); + return 0; } int -dht_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int datasync, - dict_t *xdata) +dht_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int datasync, + dict_t *xdata) { - xlator_t *subvol = NULL; - int op_errno = -1; - dht_local_t *local = NULL; + xlator_t *subvol = NULL; + int op_errno = -1; + dht_local_t *local = NULL; - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (fd, err); + VALIDATE_OR_GOTO(frame, err); + VALIDATE_OR_GOTO(this, err); + VALIDATE_OR_GOTO(fd, err); - local = dht_local_init (frame, NULL, fd, GF_FOP_FSYNC); - if (!local) { - op_errno = ENOMEM; + local = dht_local_init(frame, NULL, fd, GF_FOP_FSYNC); + if (!local) { + op_errno = ENOMEM; - goto err; - } - if (xdata) - local->xattr_req = dict_ref (xdata); + goto err; + } + if (xdata) + local->xattr_req = dict_ref(xdata); - local->call_cnt = 1; - local->rebalance.flags = datasync; + local->call_cnt = 1; + local->rebalance.flags = datasync; - subvol = local->cached_subvol; + subvol = local->cached_subvol; - STACK_WIND_COOKIE (frame, dht_fsync_cbk, subvol, subvol, - subvol->fops->fsync, local->fd, - local->rebalance.flags, local->xattr_req); - return 0; + STACK_WIND_COOKIE(frame, dht_fsync_cbk, subvol, subvol, subvol->fops->fsync, + local->fd, local->rebalance.flags, local->xattr_req); + return 0; err: - op_errno = (op_errno == -1) ? errno : op_errno; - DHT_STACK_UNWIND (fsync, frame, -1, op_errno, NULL, NULL, NULL); + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND(fsync, frame, -1, op_errno, NULL, NULL, NULL); - return 0; + return 0; } - /* TODO: for 'lk()' call, we need some other special error, may be ESTALE to indicate that lock migration happened on the fd, so we can consider it as phase 2 of migration */ int -dht_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, struct gf_flock *flock, dict_t *xdata) +dht_lk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, + int op_errno, struct gf_flock *flock, dict_t *xdata) { - dht_local_t *local = NULL; - int ret = -1; - xlator_t *subvol = NULL; + dht_local_t *local = NULL; + int ret = -1; + xlator_t *subvol = NULL; - local = frame->local; + local = frame->local; - if (!local) { - op_ret = -1; - op_errno = EINVAL; - goto out; - } + if (!local) { + op_ret = -1; + op_errno = EINVAL; + goto out; + } - if (local->call_cnt != 1) - goto out; + if (local->call_cnt != 1) + goto out; - local->rebalance.target_op_fn = dht_lk2; + local->rebalance.target_op_fn = dht_lk2; - local->op_ret = op_ret; - local->op_errno = op_errno; + local->op_ret = op_ret; + local->op_errno = op_errno; + + if (xdata) + local->rebalance.xdata = dict_ref(xdata); - if (xdata) - local->rebalance.xdata = dict_ref (xdata); - - if (op_errno == EREMOTE) { - dht_inode_ctx_get_mig_info (this, local->fd->inode, - NULL, &subvol); - if (subvol && dht_fd_open_on_dst (this, local->fd, subvol)) { - dht_lk2 (this, subvol, frame, 0); - return 0; - } else { - ret = dht_rebalance_complete_check (this, frame); - if (!ret) { - return 0; - } - } + if (op_errno == EREMOTE) { + dht_inode_ctx_get_mig_info(this, local->fd->inode, NULL, &subvol); + if (subvol && dht_fd_open_on_dst(this, local->fd, subvol)) { + dht_lk2(this, subvol, frame, 0); + return 0; + } else { + ret = dht_rebalance_complete_check(this, frame); + if (!ret) { + return 0; + } } + } out: - dht_lk_inode_unref (frame, op_ret); - DHT_STACK_UNWIND (lk, frame, op_ret, op_errno, flock, xdata); + dht_lk_inode_unref(frame, op_ret); + DHT_STACK_UNWIND(lk, frame, op_ret, op_errno, flock, xdata); - return 0; + return 0; } int -dht_lk2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret) +dht_lk2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret) { - dht_local_t *local = NULL; - int32_t op_errno = EINVAL; + dht_local_t *local = NULL; + int32_t op_errno = EINVAL; - if ((frame == NULL) || (frame->local == NULL)) - goto out; + if ((frame == NULL) || (frame->local == NULL)) + goto out; - local = frame->local; + local = frame->local; - op_errno = local->op_errno; + op_errno = local->op_errno; - if (subvol == NULL) - goto out; + if (subvol == NULL) + goto out; - local->call_cnt = 2; /* This is the second attempt */ + local->call_cnt = 2; /* This is the second attempt */ - STACK_WIND (frame, dht_lk_cbk, subvol, subvol->fops->lk, local->fd, - local->rebalance.lock_cmd, &local->rebalance.flock, - local->xattr_req); + STACK_WIND(frame, dht_lk_cbk, subvol, subvol->fops->lk, local->fd, + local->rebalance.lock_cmd, &local->rebalance.flock, + local->xattr_req); - return 0; + return 0; out: - DHT_STACK_UNWIND (lk, frame, -1, op_errno, NULL, NULL); - return 0; + DHT_STACK_UNWIND(lk, frame, -1, op_errno, NULL, NULL); + return 0; } int -dht_lk (call_frame_t *frame, xlator_t *this, - fd_t *fd, int cmd, struct gf_flock *flock, dict_t *xdata) +dht_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int cmd, + struct gf_flock *flock, dict_t *xdata) { - xlator_t *lock_subvol = NULL; - int op_errno = -1; - dht_local_t *local = NULL; - - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (fd, err); - - local = dht_local_init (frame, NULL, fd, GF_FOP_LK); - if (!local) { - op_errno = ENOMEM; - goto err; - } - - local->lock_type = flock->l_type; - lock_subvol = dht_get_lock_subvolume (this, flock, local); - if (!lock_subvol) { - gf_msg_debug (this->name, 0, - "no lock subvolume for path=%p", fd); - op_errno = EINVAL; - goto err; - } - -/* - local->cached_subvol = lock_subvol; - ret = dht_check_and_open_fd_on_subvol (this, frame); - if (ret) - goto err; -*/ - if (xdata) - local->xattr_req = dict_ref (xdata); - - local->rebalance.flock = *flock; - local->rebalance.lock_cmd = cmd; - - local->call_cnt = 1; - - STACK_WIND (frame, dht_lk_cbk, lock_subvol, lock_subvol->fops->lk, fd, - cmd, flock, xdata); - - return 0; + xlator_t *lock_subvol = NULL; + int op_errno = -1; + dht_local_t *local = NULL; + + VALIDATE_OR_GOTO(frame, err); + VALIDATE_OR_GOTO(this, err); + VALIDATE_OR_GOTO(fd, err); + + local = dht_local_init(frame, NULL, fd, GF_FOP_LK); + if (!local) { + op_errno = ENOMEM; + goto err; + } + + local->lock_type = flock->l_type; + lock_subvol = dht_get_lock_subvolume(this, flock, local); + if (!lock_subvol) { + gf_msg_debug(this->name, 0, "no lock subvolume for path=%p", fd); + op_errno = EINVAL; + goto err; + } + + /* + local->cached_subvol = lock_subvol; + ret = dht_check_and_open_fd_on_subvol (this, frame); + if (ret) + goto err; + */ + if (xdata) + local->xattr_req = dict_ref(xdata); + + local->rebalance.flock = *flock; + local->rebalance.lock_cmd = cmd; + + local->call_cnt = 1; + + STACK_WIND(frame, dht_lk_cbk, lock_subvol, lock_subvol->fops->lk, fd, cmd, + flock, xdata); + + return 0; err: - op_errno = (op_errno == -1) ? errno : op_errno; - DHT_STACK_UNWIND (lk, frame, -1, op_errno, NULL, NULL); + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND(lk, frame, -1, op_errno, NULL, NULL); - return 0; + return 0; } int -dht_lease_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, struct gf_lease *lease, dict_t *xdata) +dht_lease_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, + int op_errno, struct gf_lease *lease, dict_t *xdata) { - DHT_STACK_UNWIND (lease, frame, op_ret, op_errno, lease, xdata); + DHT_STACK_UNWIND(lease, frame, op_ret, op_errno, lease, xdata); - return 0; + return 0; } int -dht_lease (call_frame_t *frame, xlator_t *this, - loc_t *loc, struct gf_lease *lease, dict_t *xdata) +dht_lease(call_frame_t *frame, xlator_t *this, loc_t *loc, + struct gf_lease *lease, dict_t *xdata) { - xlator_t *subvol = NULL; - int op_errno = -1; - - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (loc, err); - - subvol = dht_subvol_get_cached (this, loc->inode); - if (!subvol) { - gf_msg_debug (this->name, 0, - "no cached subvolume for path=%s", loc->path); - op_errno = EINVAL; - goto err; - } + xlator_t *subvol = NULL; + int op_errno = -1; - /* TODO: for rebalance, we need to preserve the fop arguments */ - STACK_WIND (frame, dht_lease_cbk, subvol, subvol->fops->lease, - loc, lease, xdata); + VALIDATE_OR_GOTO(frame, err); + VALIDATE_OR_GOTO(this, err); + VALIDATE_OR_GOTO(loc, err); - return 0; + subvol = dht_subvol_get_cached(this, loc->inode); + if (!subvol) { + gf_msg_debug(this->name, 0, "no cached subvolume for path=%s", + loc->path); + op_errno = EINVAL; + goto err; + } + + /* TODO: for rebalance, we need to preserve the fop arguments */ + STACK_WIND(frame, dht_lease_cbk, subvol, subvol->fops->lease, loc, lease, + xdata); + + return 0; err: - op_errno = (op_errno == -1) ? errno : op_errno; - DHT_STACK_UNWIND (lease, frame, -1, op_errno, NULL, NULL); + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND(lease, frame, -1, op_errno, NULL, NULL); - return 0; + return 0; } /* Symlinks are currently not migrated, so no need for any check here */ int -dht_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, const char *path, - struct iatt *stbuf, dict_t *xdata) +dht_readlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, + int op_errno, const char *path, struct iatt *stbuf, + dict_t *xdata) { - dht_local_t *local = NULL; + dht_local_t *local = NULL; - local = frame->local; - if (op_ret == -1) - goto err; + local = frame->local; + if (op_ret == -1) + goto err; - if (!local) { - op_ret = -1; - op_errno = EINVAL; - } + if (!local) { + op_ret = -1; + op_errno = EINVAL; + } err: - DHT_STRIP_PHASE1_FLAGS (stbuf); - DHT_STACK_UNWIND (readlink, frame, op_ret, op_errno, path, stbuf, xdata); + DHT_STRIP_PHASE1_FLAGS(stbuf); + DHT_STACK_UNWIND(readlink, frame, op_ret, op_errno, path, stbuf, xdata); - return 0; + return 0; } - int -dht_readlink (call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size, - dict_t *xdata) +dht_readlink(call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size, + dict_t *xdata) { - xlator_t *subvol = NULL; - int op_errno = -1; - dht_local_t *local = NULL; - - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (loc, err); - VALIDATE_OR_GOTO (loc->inode, err); - VALIDATE_OR_GOTO (loc->path, err); - - local = dht_local_init (frame, loc, NULL, GF_FOP_READLINK); - if (!local) { - op_errno = ENOMEM; - goto err; - } - - subvol = local->cached_subvol; - if (!subvol) { - gf_msg_debug (this->name, 0, - "no cached subvolume for path=%s", loc->path); - op_errno = EINVAL; - goto err; - } - - STACK_WIND (frame, dht_readlink_cbk, - subvol, subvol->fops->readlink, - loc, size, xdata); - - return 0; + xlator_t *subvol = NULL; + int op_errno = -1; + dht_local_t *local = NULL; + + VALIDATE_OR_GOTO(frame, err); + VALIDATE_OR_GOTO(this, err); + VALIDATE_OR_GOTO(loc, err); + VALIDATE_OR_GOTO(loc->inode, err); + VALIDATE_OR_GOTO(loc->path, err); + + local = dht_local_init(frame, loc, NULL, GF_FOP_READLINK); + if (!local) { + op_errno = ENOMEM; + goto err; + } + + subvol = local->cached_subvol; + if (!subvol) { + gf_msg_debug(this->name, 0, "no cached subvolume for path=%s", + loc->path); + op_errno = EINVAL; + goto err; + } + + STACK_WIND(frame, dht_readlink_cbk, subvol, subvol->fops->readlink, loc, + size, xdata); + + return 0; err: - op_errno = (op_errno == -1) ? errno : op_errno; - DHT_STACK_UNWIND (readlink, frame, -1, op_errno, NULL, NULL, NULL); + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND(readlink, frame, -1, op_errno, NULL, NULL, NULL); - return 0; + return 0; } - int -dht_common_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *dict, - dict_t *xdata) +dht_common_xattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *dict, + dict_t *xdata) { - dht_local_t *local = NULL; - call_frame_t *call_frame = NULL; - xlator_t *prev = NULL; - xlator_t *src_subvol = NULL; - xlator_t *dst_subvol = NULL; - struct iatt stbuf = {0,}; - int ret = -1; - inode_t *inode = NULL; - - local = frame->local; - call_frame = cookie; - prev = call_frame->this; - - local->op_errno = op_errno; - - if ((op_ret == -1) && !dht_inode_missing (op_errno)) { - gf_msg_debug (this->name, op_errno, - "subvolume %s returned -1.", - prev->name); - goto out; - } - - if (local->call_cnt != 1) - goto out; - - ret = dht_read_iatt_from_xdata (this, xdata, &stbuf); - - if ((!op_ret) && (ret)) { - /* This is a potential problem and can cause corruption - * with sharding. - * Oh well. We tried. - */ - goto out; - } - - local->op_ret = op_ret; - local->rebalance.target_op_fn = dht_common_xattrop2; - if (xdata) - local->rebalance.xdata = dict_ref (xdata); - - if (dict) - local->rebalance.dict = dict_ref (dict); - - /* Phase 2 of migration */ - if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2 (&stbuf)) { - ret = dht_rebalance_complete_check (this, frame); - if (!ret) - return 0; - } - - /* Check if the rebalance phase1 is true */ - if (IS_DHT_MIGRATION_PHASE1 (&stbuf)) { - - inode = local->loc.inode ? local->loc.inode : local->fd->inode; - dht_inode_ctx_get_mig_info (this, inode, &src_subvol, - &dst_subvol); - - if (dht_mig_info_is_invalid (local->cached_subvol, src_subvol, - dst_subvol) || - !dht_fd_open_on_dst (this, local->fd, dst_subvol)) { - - ret = dht_rebalance_in_progress_check (this, frame); - if (!ret) - return 0; - } else { - dht_common_xattrop2 (this, dst_subvol, frame, 0); - return 0; - } + dht_local_t *local = NULL; + call_frame_t *call_frame = NULL; + xlator_t *prev = NULL; + xlator_t *src_subvol = NULL; + xlator_t *dst_subvol = NULL; + struct iatt stbuf = { + 0, + }; + int ret = -1; + inode_t *inode = NULL; + + local = frame->local; + call_frame = cookie; + prev = call_frame->this; + + local->op_errno = op_errno; + + if ((op_ret == -1) && !dht_inode_missing(op_errno)) { + gf_msg_debug(this->name, op_errno, "subvolume %s returned -1.", + prev->name); + goto out; + } + + if (local->call_cnt != 1) + goto out; + + ret = dht_read_iatt_from_xdata(this, xdata, &stbuf); + + if ((!op_ret) && (ret)) { + /* This is a potential problem and can cause corruption + * with sharding. + * Oh well. We tried. + */ + goto out; + } + + local->op_ret = op_ret; + local->rebalance.target_op_fn = dht_common_xattrop2; + if (xdata) + local->rebalance.xdata = dict_ref(xdata); + + if (dict) + local->rebalance.dict = dict_ref(dict); + + /* Phase 2 of migration */ + if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2(&stbuf)) { + ret = dht_rebalance_complete_check(this, frame); + if (!ret) + return 0; + } + + /* Check if the rebalance phase1 is true */ + if (IS_DHT_MIGRATION_PHASE1(&stbuf)) { + inode = local->loc.inode ? local->loc.inode : local->fd->inode; + dht_inode_ctx_get_mig_info(this, inode, &src_subvol, &dst_subvol); + + if (dht_mig_info_is_invalid(local->cached_subvol, src_subvol, + dst_subvol) || + !dht_fd_open_on_dst(this, local->fd, dst_subvol)) { + ret = dht_rebalance_in_progress_check(this, frame); + if (!ret) + return 0; + } else { + dht_common_xattrop2(this, dst_subvol, frame, 0); + return 0; } - + } out: - if (local->fop == GF_FOP_XATTROP) { - DHT_STACK_UNWIND (xattrop, frame, op_ret, op_errno, - dict, xdata); - } else { - DHT_STACK_UNWIND (fxattrop, frame, op_ret, op_errno, - dict, xdata); - } + if (local->fop == GF_FOP_XATTROP) { + DHT_STACK_UNWIND(xattrop, frame, op_ret, op_errno, dict, xdata); + } else { + DHT_STACK_UNWIND(fxattrop, frame, op_ret, op_errno, dict, xdata); + } - return 0; + return 0; } - int -dht_common_xattrop2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame, - int ret) +dht_common_xattrop2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, + int ret) { - dht_local_t *local = NULL; - int32_t op_errno = EINVAL; - - if ((frame == NULL) || (frame->local == NULL)) - goto out; - - local = frame->local; - op_errno = local->op_errno; - - if (we_are_not_migrating (ret)) { - /* This dht xlator is not migrating the file. Unwind and - * pass on the original mode bits so the higher DHT layer - * can handle this. - */ - if (local->fop == GF_FOP_XATTROP) { - DHT_STACK_UNWIND (xattrop, frame, local->op_ret, - op_errno, local->rebalance.dict, - local->rebalance.xdata); - } else { - DHT_STACK_UNWIND (fxattrop, frame, local->op_ret, - op_errno, local->rebalance.dict, - local->rebalance.xdata); - } + dht_local_t *local = NULL; + int32_t op_errno = EINVAL; - return 0; - } + if ((frame == NULL) || (frame->local == NULL)) + goto out; - if (subvol == NULL) - goto out; - - local->call_cnt = 2; /* This is the second attempt */ + local = frame->local; + op_errno = local->op_errno; + if (we_are_not_migrating(ret)) { + /* This dht xlator is not migrating the file. Unwind and + * pass on the original mode bits so the higher DHT layer + * can handle this. + */ if (local->fop == GF_FOP_XATTROP) { - STACK_WIND (frame, dht_common_xattrop_cbk, subvol, - subvol->fops->xattrop, &local->loc, - local->rebalance.flags, local->rebalance.xattr, - local->xattr_req); + DHT_STACK_UNWIND(xattrop, frame, local->op_ret, op_errno, + local->rebalance.dict, local->rebalance.xdata); } else { - STACK_WIND (frame, dht_common_xattrop_cbk, subvol, - subvol->fops->fxattrop, local->fd, - local->rebalance.flags, local->rebalance.xattr, - local->xattr_req); + DHT_STACK_UNWIND(fxattrop, frame, local->op_ret, op_errno, + local->rebalance.dict, local->rebalance.xdata); } return 0; + } + + if (subvol == NULL) + goto out; + + local->call_cnt = 2; /* This is the second attempt */ + + if (local->fop == GF_FOP_XATTROP) { + STACK_WIND(frame, dht_common_xattrop_cbk, subvol, subvol->fops->xattrop, + &local->loc, local->rebalance.flags, local->rebalance.xattr, + local->xattr_req); + } else { + STACK_WIND(frame, dht_common_xattrop_cbk, subvol, + subvol->fops->fxattrop, local->fd, local->rebalance.flags, + local->rebalance.xattr, local->xattr_req); + } + + return 0; out: - /* If local is unavailable we could be unwinding the wrong - * function here */ + /* If local is unavailable we could be unwinding the wrong + * function here */ - if (local && (local->fop == GF_FOP_XATTROP)) { - DHT_STACK_UNWIND (xattrop, frame, -1, op_errno, NULL, NULL); - } else { - DHT_STACK_UNWIND (fxattrop, frame, -1, op_errno, NULL, NULL); - } - return 0; + if (local && (local->fop == GF_FOP_XATTROP)) { + DHT_STACK_UNWIND(xattrop, frame, -1, op_errno, NULL, NULL); + } else { + DHT_STACK_UNWIND(fxattrop, frame, -1, op_errno, NULL, NULL); + } + return 0; } - int -dht_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata) +dht_xattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata) { - DHT_STACK_UNWIND (xattrop, frame, op_ret, op_errno, dict, xdata); - return 0; + DHT_STACK_UNWIND(xattrop, frame, op_ret, op_errno, dict, xdata); + return 0; } - int -dht_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc, - gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata) +dht_xattrop(call_frame_t *frame, xlator_t *this, loc_t *loc, + gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata) { - xlator_t *subvol = NULL; - int op_errno = -1; - dht_local_t *local = NULL; - int ret = -1; - - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (loc, err); - VALIDATE_OR_GOTO (loc->inode, err); - - local = dht_local_init (frame, loc, NULL, GF_FOP_XATTROP); - if (!local) { - op_errno = ENOMEM; - goto err; - } - - subvol = local->cached_subvol; - if (!subvol) { - gf_msg_debug (this->name, 0, - "no cached subvolume for gfid=%s", - uuid_utoa (loc->inode->gfid)); - op_errno = EINVAL; - goto err; - } - - /* Todo : Handle dirs as well. At the moment the only xlator above dht - * that uses xattrop is sharding and that is only for files */ - - if (IA_ISDIR (loc->inode->ia_type)) { - STACK_WIND (frame, dht_xattrop_cbk, subvol, - subvol->fops->xattrop, loc, flags, dict, xdata); - - } else { - local->xattr_req = xdata ? dict_ref(xdata) : dict_new (); - local->call_cnt = 1; - - local->rebalance.xattr = dict_ref (dict); - local->rebalance.flags = flags; + xlator_t *subvol = NULL; + int op_errno = -1; + dht_local_t *local = NULL; + int ret = -1; + + VALIDATE_OR_GOTO(frame, err); + VALIDATE_OR_GOTO(this, err); + VALIDATE_OR_GOTO(loc, err); + VALIDATE_OR_GOTO(loc->inode, err); + + local = dht_local_init(frame, loc, NULL, GF_FOP_XATTROP); + if (!local) { + op_errno = ENOMEM; + goto err; + } + + subvol = local->cached_subvol; + if (!subvol) { + gf_msg_debug(this->name, 0, "no cached subvolume for gfid=%s", + uuid_utoa(loc->inode->gfid)); + op_errno = EINVAL; + goto err; + } + + /* Todo : Handle dirs as well. At the moment the only xlator above dht + * that uses xattrop is sharding and that is only for files */ + + if (IA_ISDIR(loc->inode->ia_type)) { + STACK_WIND(frame, dht_xattrop_cbk, subvol, subvol->fops->xattrop, loc, + flags, dict, xdata); + + } else { + local->xattr_req = xdata ? dict_ref(xdata) : dict_new(); + local->call_cnt = 1; - ret = dht_request_iatt_in_xdata (this, local->xattr_req); + local->rebalance.xattr = dict_ref(dict); + local->rebalance.flags = flags; - if (ret) { - gf_msg_debug (this->name, 0, - "Failed to set dictionary key %s file=%s", - DHT_IATT_IN_XDATA_KEY, loc->path); - } + ret = dht_request_iatt_in_xdata(this, local->xattr_req); - STACK_WIND (frame, dht_common_xattrop_cbk, subvol, - subvol->fops->xattrop, loc, - local->rebalance.flags, local->rebalance.xattr, - local->xattr_req); + if (ret) { + gf_msg_debug(this->name, 0, + "Failed to set dictionary key %s file=%s", + DHT_IATT_IN_XDATA_KEY, loc->path); } - return 0; + STACK_WIND(frame, dht_common_xattrop_cbk, subvol, subvol->fops->xattrop, + loc, local->rebalance.flags, local->rebalance.xattr, + local->xattr_req); + } + + return 0; err: - op_errno = (op_errno == -1) ? errno : op_errno; - DHT_STACK_UNWIND (xattrop, frame, -1, op_errno, NULL, NULL); + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND(xattrop, frame, -1, op_errno, NULL, NULL); - return 0; + return 0; } - int -dht_fxattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata) +dht_fxattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata) { - DHT_STACK_UNWIND (fxattrop, frame, op_ret, op_errno, dict, xdata); - return 0; + DHT_STACK_UNWIND(fxattrop, frame, op_ret, op_errno, dict, xdata); + return 0; } - int -dht_fxattrop (call_frame_t *frame, xlator_t *this, - fd_t *fd, gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata) +dht_fxattrop(call_frame_t *frame, xlator_t *this, fd_t *fd, + gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata) { - xlator_t *subvol = NULL; - int op_errno = -1; - dht_local_t *local = NULL; - int ret = -1; - - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (fd, err); - - subvol = dht_subvol_get_cached (this, fd->inode); - if (!subvol) { - gf_msg_debug (this->name, 0, - "no cached subvolume for fd=%p", fd); - op_errno = EINVAL; - goto err; - } - - local = dht_local_init (frame, NULL, fd, GF_FOP_FXATTROP); - if (!local) { - op_errno = ENOMEM; - goto err; - } - - /* Todo : Handle dirs as well. At the moment the only xlator above dht - * that uses xattrop is sharding and that is only for files */ - - if (IA_ISDIR (fd->inode->ia_type)) { - STACK_WIND (frame, dht_fxattrop_cbk, subvol, - subvol->fops->fxattrop, fd, flags, dict, xdata); - - } else { - local->xattr_req = xdata ? dict_ref(xdata) : dict_new (); - local->call_cnt = 1; - - local->rebalance.xattr = dict_ref (dict); - local->rebalance.flags = flags; + xlator_t *subvol = NULL; + int op_errno = -1; + dht_local_t *local = NULL; + int ret = -1; + + VALIDATE_OR_GOTO(frame, err); + VALIDATE_OR_GOTO(this, err); + VALIDATE_OR_GOTO(fd, err); + + subvol = dht_subvol_get_cached(this, fd->inode); + if (!subvol) { + gf_msg_debug(this->name, 0, "no cached subvolume for fd=%p", fd); + op_errno = EINVAL; + goto err; + } + + local = dht_local_init(frame, NULL, fd, GF_FOP_FXATTROP); + if (!local) { + op_errno = ENOMEM; + goto err; + } + + /* Todo : Handle dirs as well. At the moment the only xlator above dht + * that uses xattrop is sharding and that is only for files */ + + if (IA_ISDIR(fd->inode->ia_type)) { + STACK_WIND(frame, dht_fxattrop_cbk, subvol, subvol->fops->fxattrop, fd, + flags, dict, xdata); + + } else { + local->xattr_req = xdata ? dict_ref(xdata) : dict_new(); + local->call_cnt = 1; - ret = dht_request_iatt_in_xdata (this, local->xattr_req); + local->rebalance.xattr = dict_ref(dict); + local->rebalance.flags = flags; - if (ret) { - gf_msg_debug (this->name, 0, - "Failed to set dictionary key %s fd=%p", - DHT_IATT_IN_XDATA_KEY, fd); - } + ret = dht_request_iatt_in_xdata(this, local->xattr_req); - STACK_WIND (frame, dht_common_xattrop_cbk, subvol, - subvol->fops->fxattrop, fd, - local->rebalance.flags, local->rebalance.xattr, - local->xattr_req); + if (ret) { + gf_msg_debug(this->name, 0, "Failed to set dictionary key %s fd=%p", + DHT_IATT_IN_XDATA_KEY, fd); } - return 0; + STACK_WIND(frame, dht_common_xattrop_cbk, subvol, + subvol->fops->fxattrop, fd, local->rebalance.flags, + local->rebalance.xattr, local->xattr_req); + } + + return 0; err: - op_errno = (op_errno == -1) ? errno : op_errno; - DHT_STACK_UNWIND (fxattrop, frame, -1, op_errno, NULL, NULL); + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND(fxattrop, frame, -1, op_errno, NULL, NULL); - return 0; + return 0; } /* Currently no translators on top of 'distribute' will be using @@ -1555,116 +1486,108 @@ err: */ int -dht_inodelk_cbk (call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, int32_t op_errno, dict_t *xdata) +dht_inodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - dht_lk_inode_unref (frame, op_ret); - DHT_STACK_UNWIND (inodelk, frame, op_ret, op_errno, xdata); - return 0; + dht_lk_inode_unref(frame, op_ret); + DHT_STACK_UNWIND(inodelk, frame, op_ret, op_errno, xdata); + return 0; } - int32_t -dht_inodelk (call_frame_t *frame, xlator_t *this, const char *volume, - loc_t *loc, int32_t cmd, struct gf_flock *lock, dict_t *xdata) +dht_inodelk(call_frame_t *frame, xlator_t *this, const char *volume, loc_t *loc, + int32_t cmd, struct gf_flock *lock, dict_t *xdata) { - xlator_t *lock_subvol = NULL; - int op_errno = -1; - dht_local_t *local = NULL; + xlator_t *lock_subvol = NULL; + int op_errno = -1; + dht_local_t *local = NULL; + VALIDATE_OR_GOTO(frame, err); + VALIDATE_OR_GOTO(this, err); + VALIDATE_OR_GOTO(loc, err); + VALIDATE_OR_GOTO(loc->inode, err); - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (loc, err); - VALIDATE_OR_GOTO (loc->inode, err); - - local = dht_local_init (frame, loc, NULL, GF_FOP_INODELK); - if (!local) { - op_errno = ENOMEM; - goto err; - } + local = dht_local_init(frame, loc, NULL, GF_FOP_INODELK); + if (!local) { + op_errno = ENOMEM; + goto err; + } - local->lock_type = lock->l_type; - lock_subvol = dht_get_lock_subvolume (this, lock, local); - if (!lock_subvol) { - gf_msg_debug (this->name, 0, - "no lock subvolume for path=%s", loc->path); - op_errno = EINVAL; - goto err; - } + local->lock_type = lock->l_type; + lock_subvol = dht_get_lock_subvolume(this, lock, local); + if (!lock_subvol) { + gf_msg_debug(this->name, 0, "no lock subvolume for path=%s", loc->path); + op_errno = EINVAL; + goto err; + } - local->call_cnt = 1; + local->call_cnt = 1; - STACK_WIND (frame, dht_inodelk_cbk, - lock_subvol, lock_subvol->fops->inodelk, - volume, loc, cmd, lock, xdata); + STACK_WIND(frame, dht_inodelk_cbk, lock_subvol, lock_subvol->fops->inodelk, + volume, loc, cmd, lock, xdata); - return 0; + return 0; err: - op_errno = (op_errno == -1) ? errno : op_errno; - DHT_STACK_UNWIND (inodelk, frame, -1, op_errno, NULL); + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND(inodelk, frame, -1, op_errno, NULL); - return 0; + return 0; } int -dht_finodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) +dht_finodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - dht_lk_inode_unref (frame, op_ret); - DHT_STACK_UNWIND (finodelk, frame, op_ret, op_errno, xdata); - return 0; + dht_lk_inode_unref(frame, op_ret); + DHT_STACK_UNWIND(finodelk, frame, op_ret, op_errno, xdata); + return 0; } - int -dht_finodelk (call_frame_t *frame, xlator_t *this, const char *volume, - fd_t *fd, int32_t cmd, struct gf_flock *lock, dict_t *xdata) +dht_finodelk(call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd, + int32_t cmd, struct gf_flock *lock, dict_t *xdata) { - xlator_t *lock_subvol = NULL; - dht_local_t *local = NULL; - int op_errno = -1; - - - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (fd, err); - - local = dht_local_init (frame, NULL, fd, GF_FOP_INODELK); - if (!local) { - op_errno = ENOMEM; - goto err; - } - - local->call_cnt = 1; - local->lock_type = lock->l_type; - - lock_subvol = dht_get_lock_subvolume (this, lock, local); - if (!lock_subvol) { - gf_msg_debug (this->name, 0, - "no lock subvolume for fd=%p", fd); - op_errno = EINVAL; - goto err; - } - -/* - local->cached_subvol = lock_subvol; - ret = dht_check_and_open_fd_on_subvol (this, frame); - if (ret) - goto err; -*/ - STACK_WIND (frame, dht_finodelk_cbk, lock_subvol, - lock_subvol->fops->finodelk, - volume, fd, cmd, lock, xdata); - - return 0; + xlator_t *lock_subvol = NULL; + dht_local_t *local = NULL; + int op_errno = -1; + + VALIDATE_OR_GOTO(frame, err); + VALIDATE_OR_GOTO(this, err); + VALIDATE_OR_GOTO(fd, err); + + local = dht_local_init(frame, NULL, fd, GF_FOP_INODELK); + if (!local) { + op_errno = ENOMEM; + goto err; + } + + local->call_cnt = 1; + local->lock_type = lock->l_type; + + lock_subvol = dht_get_lock_subvolume(this, lock, local); + if (!lock_subvol) { + gf_msg_debug(this->name, 0, "no lock subvolume for fd=%p", fd); + op_errno = EINVAL; + goto err; + } + + /* + local->cached_subvol = lock_subvol; + ret = dht_check_and_open_fd_on_subvol (this, frame); + if (ret) + goto err; + */ + STACK_WIND(frame, dht_finodelk_cbk, lock_subvol, + lock_subvol->fops->finodelk, volume, fd, cmd, lock, xdata); + + return 0; err: - op_errno = (op_errno == -1) ? errno : op_errno; - DHT_STACK_UNWIND (finodelk, frame, -1, op_errno, NULL); + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND(finodelk, frame, -1, op_errno, NULL); - return 0; + return 0; } diff --git a/xlators/cluster/dht/src/dht-inode-write.c b/xlators/cluster/dht/src/dht-inode-write.c index 4b4f2c2d4a3..d0d12fd7658 100644 --- a/xlators/cluster/dht/src/dht-inode-write.c +++ b/xlators/cluster/dht/src/dht-inode-write.c @@ -8,1487 +8,1401 @@ cases as published by the Free Software Foundation. */ - #include "dht-common.h" -int dht_writev2 (xlator_t *this, xlator_t *subvol, - call_frame_t *frame, int ret); -int dht_truncate2 (xlator_t *this, xlator_t *subvol, - call_frame_t *frame, int ret); -int dht_setattr2 (xlator_t *this, xlator_t *subvol, - call_frame_t *frame, int ret); -int dht_fallocate2 (xlator_t *this, xlator_t *subvol, - call_frame_t *frame, int ret); -int dht_discard2 (xlator_t *this, xlator_t *subvol, - call_frame_t *frame, int ret); -int dht_zerofill2 (xlator_t *this, xlator_t *subvol, - call_frame_t *frame, int ret); +int +dht_writev2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret); +int +dht_truncate2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret); +int +dht_setattr2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret); +int +dht_fallocate2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret); +int +dht_discard2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret); +int +dht_zerofill2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret); int -dht_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, struct iatt *prebuf, - struct iatt *postbuf, dict_t *xdata) +dht_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, + int op_errno, struct iatt *prebuf, struct iatt *postbuf, + dict_t *xdata) { - dht_local_t *local = NULL; - xlator_t *prev = NULL; - int ret = -1; - xlator_t *subvol1 = NULL; - xlator_t *subvol2 = NULL; - - local = frame->local; - prev = cookie; - - if (!local) { - op_ret = -1; - op_errno = EINVAL; - goto out; - } - - /* writev fails with EBADF if dht has not yet opened the fd - * on the cached subvol. This could happen if the file was migrated - * and a lookup updated the cached subvol in the inode ctx. - * We only check once as this could be a valid bad fd error. - */ - - if (op_ret == -1 && (op_errno == EBADF) && !(local->fd_checked)) { - ret = dht_check_and_open_fd_on_subvol (this, frame); - if (ret) - goto out; - return 0; - } + dht_local_t *local = NULL; + xlator_t *prev = NULL; + int ret = -1; + xlator_t *subvol1 = NULL; + xlator_t *subvol2 = NULL; + + local = frame->local; + prev = cookie; + + if (!local) { + op_ret = -1; + op_errno = EINVAL; + goto out; + } + + /* writev fails with EBADF if dht has not yet opened the fd + * on the cached subvol. This could happen if the file was migrated + * and a lookup updated the cached subvol in the inode ctx. + * We only check once as this could be a valid bad fd error. + */ + + if (op_ret == -1 && (op_errno == EBADF) && !(local->fd_checked)) { + ret = dht_check_and_open_fd_on_subvol(this, frame); + if (ret) + goto out; + return 0; + } - if (op_ret == -1 && !dht_inode_missing(op_errno)) { - local->op_errno = op_errno; - local->op_ret = -1; - gf_msg_debug (this->name, 0, - "subvolume %s returned -1 (%s)", - prev->name, strerror (op_errno)); - goto out; + if (op_ret == -1 && !dht_inode_missing(op_errno)) { + local->op_errno = op_errno; + local->op_ret = -1; + gf_msg_debug(this->name, 0, "subvolume %s returned -1 (%s)", prev->name, + strerror(op_errno)); + goto out; + } + + if (local->call_cnt != 1) { + /* preserve the modes of source */ + if (local->stbuf.ia_blocks) { + dht_iatt_merge(this, postbuf, &local->stbuf); + dht_iatt_merge(this, prebuf, &local->prebuf); } - - if (local->call_cnt != 1) { - /* preserve the modes of source */ - if (local->stbuf.ia_blocks) { - dht_iatt_merge (this, postbuf, &local->stbuf); - dht_iatt_merge (this, prebuf, &local->prebuf); + goto out; + } + + local->rebalance.target_op_fn = dht_writev2; + + local->op_ret = op_ret; + local->op_errno = op_errno; + + /* We might need to pass the stbuf information to the higher DHT + * layer for appropriate handling. + */ + + dht_set_local_rebalance(this, local, NULL, prebuf, postbuf, xdata); + + /* Phase 2 of migration */ + if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2(postbuf)) { + ret = dht_rebalance_complete_check(this, frame); + if (!ret) + return 0; + } + + /* Check if the rebalance phase1 is true */ + if (IS_DHT_MIGRATION_PHASE1(postbuf)) { + if (!dht_is_tier_xlator(this)) { + if (!local->xattr_req) { + local->xattr_req = dict_new(); + if (!local->xattr_req) { + gf_msg(this->name, GF_LOG_ERROR, DHT_MSG_NO_MEMORY, ENOMEM, + "insufficient memory"); + local->op_errno = ENOMEM; + local->op_ret = -1; + goto out; } + } + + ret = dict_set_uint32(local->xattr_req, + GF_PROTECT_FROM_EXTERNAL_WRITES, 1); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, DHT_MSG_DICT_SET_FAILED, 0, + "Failed to set key %s in dictionary", + GF_PROTECT_FROM_EXTERNAL_WRITES); + local->op_errno = ENOMEM; + local->op_ret = -1; goto out; + } } - local->rebalance.target_op_fn = dht_writev2; - - local->op_ret = op_ret; - local->op_errno = op_errno; - - /* We might need to pass the stbuf information to the higher DHT - * layer for appropriate handling. - */ - - dht_set_local_rebalance (this, local, NULL, prebuf, postbuf, xdata); - - /* Phase 2 of migration */ - if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2 (postbuf)) { - ret = dht_rebalance_complete_check (this, frame); - if (!ret) - return 0; - } - - /* Check if the rebalance phase1 is true */ - if (IS_DHT_MIGRATION_PHASE1 (postbuf)) { - if (!dht_is_tier_xlator (this)) { - if (!local->xattr_req) { - local->xattr_req = dict_new (); - if (!local->xattr_req) { - gf_msg (this->name, GF_LOG_ERROR, - DHT_MSG_NO_MEMORY, - ENOMEM, "insufficient memory"); - local->op_errno = ENOMEM; - local->op_ret = -1; - goto out; - } - } - - ret = dict_set_uint32 (local->xattr_req, - GF_PROTECT_FROM_EXTERNAL_WRITES, - 1); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, - DHT_MSG_DICT_SET_FAILED, 0, - "Failed to set key %s in dictionary", - GF_PROTECT_FROM_EXTERNAL_WRITES); - local->op_errno = ENOMEM; - local->op_ret = -1; - goto out; - } - } + dht_iatt_merge(this, &local->stbuf, postbuf); + dht_iatt_merge(this, &local->prebuf, prebuf); - dht_iatt_merge (this, &local->stbuf, postbuf); - dht_iatt_merge (this, &local->prebuf, prebuf); - - ret = dht_inode_ctx_get_mig_info (this, local->fd->inode, - &subvol1, &subvol2); - if (!dht_mig_info_is_invalid (local->cached_subvol, - subvol1, subvol2)) { - if (dht_fd_open_on_dst (this, local->fd, subvol2)) { - dht_writev2 (this, subvol2, frame, 0); - return 0; - } - } - ret = dht_rebalance_in_progress_check (this, frame); - if (!ret) - return 0; + ret = dht_inode_ctx_get_mig_info(this, local->fd->inode, &subvol1, + &subvol2); + if (!dht_mig_info_is_invalid(local->cached_subvol, subvol1, subvol2)) { + if (dht_fd_open_on_dst(this, local->fd, subvol2)) { + dht_writev2(this, subvol2, frame, 0); + return 0; + } } + ret = dht_rebalance_in_progress_check(this, frame); + if (!ret) + return 0; + } out: - DHT_STRIP_PHASE1_FLAGS (postbuf); - DHT_STRIP_PHASE1_FLAGS (prebuf); + DHT_STRIP_PHASE1_FLAGS(postbuf); + DHT_STRIP_PHASE1_FLAGS(prebuf); - DHT_STACK_UNWIND (writev, frame, op_ret, op_errno, prebuf, postbuf, - xdata); + DHT_STACK_UNWIND(writev, frame, op_ret, op_errno, prebuf, postbuf, xdata); - return 0; + return 0; } int -dht_writev2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret) +dht_writev2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret) { - dht_local_t *local = NULL; - int32_t op_errno = EINVAL; + dht_local_t *local = NULL; + int32_t op_errno = EINVAL; - if ((frame == NULL) || (frame->local == NULL)) - goto out; + if ((frame == NULL) || (frame->local == NULL)) + goto out; - local = frame->local; - op_errno = local->op_errno; - - if (we_are_not_migrating (ret)) { - /* This dht xlator is not migrating the file. Unwind and - * pass on the original mode bits so the higher DHT layer - * can handle this. - */ - DHT_STACK_UNWIND (writev, frame, local->op_ret, - local->op_errno, &local->rebalance.prebuf, - &local->rebalance.postbuf, - local->rebalance.xdata); - return 0; - } + local = frame->local; + op_errno = local->op_errno; - if (subvol == NULL) - goto out; + if (we_are_not_migrating(ret)) { + /* This dht xlator is not migrating the file. Unwind and + * pass on the original mode bits so the higher DHT layer + * can handle this. + */ + DHT_STACK_UNWIND(writev, frame, local->op_ret, local->op_errno, + &local->rebalance.prebuf, &local->rebalance.postbuf, + local->rebalance.xdata); + return 0; + } - local->call_cnt = 2; /* This is the second attempt */ + if (subvol == NULL) + goto out; - STACK_WIND_COOKIE (frame, dht_writev_cbk, subvol, - subvol, subvol->fops->writev, - local->fd, local->rebalance.vector, - local->rebalance.count, - local->rebalance.offset, local->rebalance.flags, - local->rebalance.iobref, local->xattr_req); + local->call_cnt = 2; /* This is the second attempt */ - return 0; + STACK_WIND_COOKIE(frame, dht_writev_cbk, subvol, subvol, + subvol->fops->writev, local->fd, local->rebalance.vector, + local->rebalance.count, local->rebalance.offset, + local->rebalance.flags, local->rebalance.iobref, + local->xattr_req); + + return 0; out: - DHT_STACK_UNWIND (writev, frame, -1, op_errno, NULL, NULL, NULL); + DHT_STACK_UNWIND(writev, frame, -1, op_errno, NULL, NULL, NULL); - return 0; + return 0; } int -dht_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, - struct iovec *vector, int count, off_t off, uint32_t flags, - struct iobref *iobref, dict_t *xdata) +dht_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector, + int count, off_t off, uint32_t flags, struct iobref *iobref, + dict_t *xdata) { - xlator_t *subvol = NULL; - int op_errno = -1; - dht_local_t *local = NULL; - - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (fd, err); - - local = dht_local_init (frame, NULL, fd, GF_FOP_WRITE); - if (!local) { - - op_errno = ENOMEM; - goto err; - } - - subvol = local->cached_subvol; - if (!subvol) { - gf_msg_debug (this->name, 0, - "no cached subvolume for fd=%p", fd); - op_errno = EINVAL; - goto err; - } - - if (xdata) - local->xattr_req = dict_ref (xdata); - - local->rebalance.vector = iov_dup (vector, count); - local->rebalance.offset = off; - local->rebalance.count = count; - local->rebalance.flags = flags; - local->rebalance.iobref = iobref_ref (iobref); - local->call_cnt = 1; - - STACK_WIND_COOKIE (frame, dht_writev_cbk, subvol, subvol, - subvol->fops->writev, fd, - local->rebalance.vector, - local->rebalance.count, - local->rebalance.offset, - local->rebalance.flags, - local->rebalance.iobref, local->xattr_req); - - return 0; + xlator_t *subvol = NULL; + int op_errno = -1; + dht_local_t *local = NULL; + + VALIDATE_OR_GOTO(frame, err); + VALIDATE_OR_GOTO(this, err); + VALIDATE_OR_GOTO(fd, err); + + local = dht_local_init(frame, NULL, fd, GF_FOP_WRITE); + if (!local) { + op_errno = ENOMEM; + goto err; + } + + subvol = local->cached_subvol; + if (!subvol) { + gf_msg_debug(this->name, 0, "no cached subvolume for fd=%p", fd); + op_errno = EINVAL; + goto err; + } + + if (xdata) + local->xattr_req = dict_ref(xdata); + + local->rebalance.vector = iov_dup(vector, count); + local->rebalance.offset = off; + local->rebalance.count = count; + local->rebalance.flags = flags; + local->rebalance.iobref = iobref_ref(iobref); + local->call_cnt = 1; + + STACK_WIND_COOKIE(frame, dht_writev_cbk, subvol, subvol, + subvol->fops->writev, fd, local->rebalance.vector, + local->rebalance.count, local->rebalance.offset, + local->rebalance.flags, local->rebalance.iobref, + local->xattr_req); + + return 0; err: - op_errno = (op_errno == -1) ? errno : op_errno; - DHT_STACK_UNWIND (writev, frame, -1, op_errno, NULL, NULL, NULL); + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND(writev, frame, -1, op_errno, NULL, NULL, NULL); - return 0; + return 0; } - - int -dht_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, struct iatt *prebuf, - struct iatt *postbuf, dict_t *xdata) +dht_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, + int op_errno, struct iatt *prebuf, struct iatt *postbuf, + dict_t *xdata) { - dht_local_t *local = NULL; - xlator_t *prev = NULL; - int ret = -1; - xlator_t *src_subvol = NULL; - xlator_t *dst_subvol = NULL; - inode_t *inode = NULL; - - - GF_VALIDATE_OR_GOTO ("dht", frame, err); - GF_VALIDATE_OR_GOTO ("dht", this, out); - GF_VALIDATE_OR_GOTO ("dht", frame->local, out); - GF_VALIDATE_OR_GOTO ("dht", cookie, out); - - local = frame->local; - prev = cookie; - - /* Needs to be checked only for ftruncate. - * ftruncate fails with EBADF/EINVAL if dht has not yet opened the fd - * on the cached subvol. This could happen if the file was migrated - * and a lookup updated the cached subvol in the inode ctx. - * We only check once as this could actually be a valid error. - */ + dht_local_t *local = NULL; + xlator_t *prev = NULL; + int ret = -1; + xlator_t *src_subvol = NULL; + xlator_t *dst_subvol = NULL; + inode_t *inode = NULL; + + GF_VALIDATE_OR_GOTO("dht", frame, err); + GF_VALIDATE_OR_GOTO("dht", this, out); + GF_VALIDATE_OR_GOTO("dht", frame->local, out); + GF_VALIDATE_OR_GOTO("dht", cookie, out); + + local = frame->local; + prev = cookie; + + /* Needs to be checked only for ftruncate. + * ftruncate fails with EBADF/EINVAL if dht has not yet opened the fd + * on the cached subvol. This could happen if the file was migrated + * and a lookup updated the cached subvol in the inode ctx. + * We only check once as this could actually be a valid error. + */ + + if ((local->fop == GF_FOP_FTRUNCATE) && (op_ret == -1) && + ((op_errno == EBADF) || (op_errno == EINVAL)) && !(local->fd_checked)) { + ret = dht_check_and_open_fd_on_subvol(this, frame); + if (ret) + goto out; + return 0; + } - if ((local->fop == GF_FOP_FTRUNCATE) && (op_ret == -1) - && ((op_errno == EBADF) || (op_errno == EINVAL)) - && !(local->fd_checked)) { - ret = dht_check_and_open_fd_on_subvol (this, frame); - if (ret) - goto out; - return 0; - } + if ((op_ret == -1) && !dht_inode_missing(op_errno)) { + local->op_errno = op_errno; + local->op_ret = -1; + gf_msg_debug(this->name, op_errno, "subvolume %s returned -1", + prev->name); - if ((op_ret == -1) && !dht_inode_missing(op_errno)) { - local->op_errno = op_errno; - local->op_ret = -1; - gf_msg_debug (this->name, op_errno, - "subvolume %s returned -1", - prev->name); + goto out; + } - goto out; + if (local->call_cnt != 1) { + if (local->stbuf.ia_blocks) { + dht_iatt_merge(this, postbuf, &local->stbuf); + dht_iatt_merge(this, prebuf, &local->prebuf); } + goto out; + } + local->rebalance.target_op_fn = dht_truncate2; - if (local->call_cnt != 1) { - if (local->stbuf.ia_blocks) { - dht_iatt_merge (this, postbuf, &local->stbuf); - dht_iatt_merge (this, prebuf, &local->prebuf); - } - goto out; - } + local->op_ret = op_ret; + local->op_errno = op_errno; - local->rebalance.target_op_fn = dht_truncate2; + /* We might need to pass the stbuf information to the higher DHT + * layer for appropriate handling. + */ - local->op_ret = op_ret; - local->op_errno = op_errno; + dht_set_local_rebalance(this, local, NULL, prebuf, postbuf, xdata); - /* We might need to pass the stbuf information to the higher DHT - * layer for appropriate handling. - */ + /* Phase 2 of migration */ + if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2(postbuf)) { + ret = dht_rebalance_complete_check(this, frame); + if (!ret) + return 0; + } - dht_set_local_rebalance (this, local, NULL, prebuf, postbuf, xdata); + /* Check if the rebalance phase1 is true */ + if (IS_DHT_MIGRATION_PHASE1(postbuf)) { + dht_iatt_merge(this, &local->stbuf, postbuf); + dht_iatt_merge(this, &local->prebuf, prebuf); - /* Phase 2 of migration */ - if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2 (postbuf)) { - ret = dht_rebalance_complete_check (this, frame); - if (!ret) - return 0; - } + inode = (local->fd) ? local->fd->inode : local->loc.inode; - /* Check if the rebalance phase1 is true */ - if (IS_DHT_MIGRATION_PHASE1 (postbuf)) { - dht_iatt_merge (this, &local->stbuf, postbuf); - dht_iatt_merge (this, &local->prebuf, prebuf); - - inode = (local->fd) ? local->fd->inode : local->loc.inode; - - dht_inode_ctx_get_mig_info (this, inode, &src_subvol, - &dst_subvol); - if (!dht_mig_info_is_invalid (local->cached_subvol, - src_subvol, dst_subvol)) { - if ((!local->fd) || ((local->fd) && - dht_fd_open_on_dst (this, local->fd, dst_subvol))) { - dht_truncate2 (this, dst_subvol, frame, 0); - return 0; - } - } - ret = dht_rebalance_in_progress_check (this, frame); - if (!ret) - return 0; + dht_inode_ctx_get_mig_info(this, inode, &src_subvol, &dst_subvol); + if (!dht_mig_info_is_invalid(local->cached_subvol, src_subvol, + dst_subvol)) { + if ((!local->fd) || + ((local->fd) && + dht_fd_open_on_dst(this, local->fd, dst_subvol))) { + dht_truncate2(this, dst_subvol, frame, 0); + return 0; + } } + ret = dht_rebalance_in_progress_check(this, frame); + if (!ret) + return 0; + } out: - DHT_STRIP_PHASE1_FLAGS (postbuf); - DHT_STRIP_PHASE1_FLAGS (prebuf); + DHT_STRIP_PHASE1_FLAGS(postbuf); + DHT_STRIP_PHASE1_FLAGS(prebuf); - DHT_STACK_UNWIND (truncate, frame, op_ret, op_errno, - prebuf, postbuf, xdata); + DHT_STACK_UNWIND(truncate, frame, op_ret, op_errno, prebuf, postbuf, xdata); err: - return 0; + return 0; } - int -dht_truncate2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret) +dht_truncate2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret) { - dht_local_t *local = NULL; - int32_t op_errno = EINVAL; + dht_local_t *local = NULL; + int32_t op_errno = EINVAL; - if (!frame || !frame->local) - goto out; + if (!frame || !frame->local) + goto out; - local = frame->local; - op_errno = local->op_errno; + local = frame->local; + op_errno = local->op_errno; - /* This dht xlator is not migrating the file */ - if (we_are_not_migrating (ret)) { - - DHT_STACK_UNWIND (truncate, frame, local->op_ret, - local->op_errno, &local->rebalance.prebuf, - &local->rebalance.postbuf, - local->rebalance.xdata); - return 0; - } + /* This dht xlator is not migrating the file */ + if (we_are_not_migrating(ret)) { + DHT_STACK_UNWIND(truncate, frame, local->op_ret, local->op_errno, + &local->rebalance.prebuf, &local->rebalance.postbuf, + local->rebalance.xdata); + return 0; + } - if (subvol == NULL) - goto out; + if (subvol == NULL) + goto out; - local->call_cnt = 2; /* This is the second attempt */ + local->call_cnt = 2; /* This is the second attempt */ - if (local->fop == GF_FOP_TRUNCATE) { - STACK_WIND_COOKIE (frame, dht_truncate_cbk, subvol, subvol, - subvol->fops->truncate, &local->loc, - local->rebalance.offset, local->xattr_req); - } else { - STACK_WIND_COOKIE (frame, dht_truncate_cbk, subvol, subvol, - subvol->fops->ftruncate, local->fd, - local->rebalance.offset, local->xattr_req); - } + if (local->fop == GF_FOP_TRUNCATE) { + STACK_WIND_COOKIE(frame, dht_truncate_cbk, subvol, subvol, + subvol->fops->truncate, &local->loc, + local->rebalance.offset, local->xattr_req); + } else { + STACK_WIND_COOKIE(frame, dht_truncate_cbk, subvol, subvol, + subvol->fops->ftruncate, local->fd, + local->rebalance.offset, local->xattr_req); + } - return 0; + return 0; out: - DHT_STACK_UNWIND (truncate, frame, -1, op_errno, NULL, NULL, NULL); - return 0; + DHT_STACK_UNWIND(truncate, frame, -1, op_errno, NULL, NULL, NULL); + return 0; } int -dht_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, - dict_t *xdata) +dht_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, + dict_t *xdata) { - xlator_t *subvol = NULL; - int op_errno = -1; - dht_local_t *local = NULL; - - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (loc, err); - VALIDATE_OR_GOTO (loc->inode, err); - - local = dht_local_init (frame, loc, NULL, GF_FOP_TRUNCATE); - if (!local) { - op_errno = ENOMEM; - goto err; - } - - local->rebalance.offset = offset; - local->call_cnt = 1; - subvol = local->cached_subvol; - if (!subvol) { - gf_msg_debug (this->name, 0, - "no cached subvolume for gfid=%s", - uuid_utoa (loc->inode->gfid)); - op_errno = EINVAL; - goto err; - } - - if (xdata) - local->xattr_req = dict_ref (xdata); - - STACK_WIND_COOKIE (frame, dht_truncate_cbk, subvol, subvol, - subvol->fops->truncate, loc, offset, xdata); - - return 0; + xlator_t *subvol = NULL; + int op_errno = -1; + dht_local_t *local = NULL; + + VALIDATE_OR_GOTO(frame, err); + VALIDATE_OR_GOTO(this, err); + VALIDATE_OR_GOTO(loc, err); + VALIDATE_OR_GOTO(loc->inode, err); + + local = dht_local_init(frame, loc, NULL, GF_FOP_TRUNCATE); + if (!local) { + op_errno = ENOMEM; + goto err; + } + + local->rebalance.offset = offset; + local->call_cnt = 1; + subvol = local->cached_subvol; + if (!subvol) { + gf_msg_debug(this->name, 0, "no cached subvolume for gfid=%s", + uuid_utoa(loc->inode->gfid)); + op_errno = EINVAL; + goto err; + } + + if (xdata) + local->xattr_req = dict_ref(xdata); + + STACK_WIND_COOKIE(frame, dht_truncate_cbk, subvol, subvol, + subvol->fops->truncate, loc, offset, xdata); + + return 0; err: - op_errno = (op_errno == -1) ? errno : op_errno; - DHT_STACK_UNWIND (truncate, frame, -1, op_errno, NULL, NULL, NULL); + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND(truncate, frame, -1, op_errno, NULL, NULL, NULL); - return 0; + return 0; } int -dht_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, - dict_t *xdata) +dht_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + dict_t *xdata) { - xlator_t *subvol = NULL; - int op_errno = -1; - dht_local_t *local = NULL; - - - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (fd, err); - - local = dht_local_init (frame, NULL, fd, GF_FOP_FTRUNCATE); - if (!local) { - op_errno = ENOMEM; - goto err; - } - - local->rebalance.offset = offset; - local->call_cnt = 1; - subvol = local->cached_subvol; - if (!subvol) { - gf_msg_debug (this->name, 0, - "no cached subvolume for fd=%p", fd); - op_errno = EINVAL; - goto err; - } - - if (xdata) - local->xattr_req = dict_ref (xdata); - - STACK_WIND_COOKIE (frame, dht_truncate_cbk, subvol, subvol, - subvol->fops->ftruncate, fd, - local->rebalance.offset, local->xattr_req); - return 0; + xlator_t *subvol = NULL; + int op_errno = -1; + dht_local_t *local = NULL; + + VALIDATE_OR_GOTO(frame, err); + VALIDATE_OR_GOTO(this, err); + VALIDATE_OR_GOTO(fd, err); + + local = dht_local_init(frame, NULL, fd, GF_FOP_FTRUNCATE); + if (!local) { + op_errno = ENOMEM; + goto err; + } + + local->rebalance.offset = offset; + local->call_cnt = 1; + subvol = local->cached_subvol; + if (!subvol) { + gf_msg_debug(this->name, 0, "no cached subvolume for fd=%p", fd); + op_errno = EINVAL; + goto err; + } + + if (xdata) + local->xattr_req = dict_ref(xdata); + + STACK_WIND_COOKIE(frame, dht_truncate_cbk, subvol, subvol, + subvol->fops->ftruncate, fd, local->rebalance.offset, + local->xattr_req); + return 0; err: - op_errno = (op_errno == -1) ? errno : op_errno; - DHT_STACK_UNWIND (ftruncate, frame, -1, op_errno, NULL, NULL, NULL); + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND(ftruncate, frame, -1, op_errno, NULL, NULL, NULL); - return 0; + return 0; } - int -dht_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, struct iatt *prebuf, - struct iatt *postbuf, dict_t *xdata) +dht_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, + int op_errno, struct iatt *prebuf, struct iatt *postbuf, + dict_t *xdata) { - dht_local_t *local = NULL; - xlator_t *prev = NULL; - int ret = -1; - xlator_t *src_subvol = NULL; - xlator_t *dst_subvol = NULL; - - GF_VALIDATE_OR_GOTO ("dht", frame, err); - GF_VALIDATE_OR_GOTO ("dht", this, out); - GF_VALIDATE_OR_GOTO ("dht", frame->local, out); - GF_VALIDATE_OR_GOTO ("dht", cookie, out); - - local = frame->local; - prev = cookie; - - /* fallocate fails with EBADF if dht has not yet opened the fd - * on the cached subvol. This could happen if the file was migrated - * and a lookup updated the cached subvol in the inode ctx. - * We only check once as this could actually be a valid error. - */ - - if ((op_ret == -1) && (op_errno == EBADF) - && !(local->fd_checked)) { - ret = dht_check_and_open_fd_on_subvol (this, frame); - if (ret) - goto out; - return 0; - } - - if ((op_ret == -1) && !dht_inode_missing(op_errno)) { - local->op_errno = op_errno; - local->op_ret = -1; - gf_msg_debug (this->name, op_errno, - "subvolume %s returned -1", - prev->name); - - goto out; - } - - if (local->call_cnt != 1) { - if (local->stbuf.ia_blocks) { - dht_iatt_merge (this, postbuf, &local->stbuf); - dht_iatt_merge (this, prebuf, &local->prebuf); - } - goto out; - } + dht_local_t *local = NULL; + xlator_t *prev = NULL; + int ret = -1; + xlator_t *src_subvol = NULL; + xlator_t *dst_subvol = NULL; + + GF_VALIDATE_OR_GOTO("dht", frame, err); + GF_VALIDATE_OR_GOTO("dht", this, out); + GF_VALIDATE_OR_GOTO("dht", frame->local, out); + GF_VALIDATE_OR_GOTO("dht", cookie, out); + + local = frame->local; + prev = cookie; + + /* fallocate fails with EBADF if dht has not yet opened the fd + * on the cached subvol. This could happen if the file was migrated + * and a lookup updated the cached subvol in the inode ctx. + * We only check once as this could actually be a valid error. + */ + + if ((op_ret == -1) && (op_errno == EBADF) && !(local->fd_checked)) { + ret = dht_check_and_open_fd_on_subvol(this, frame); + if (ret) + goto out; + return 0; + } - local->op_ret = op_ret; + if ((op_ret == -1) && !dht_inode_missing(op_errno)) { local->op_errno = op_errno; - local->rebalance.target_op_fn = dht_fallocate2; + local->op_ret = -1; + gf_msg_debug(this->name, op_errno, "subvolume %s returned -1", + prev->name); - dht_set_local_rebalance (this, local, NULL, prebuf, postbuf, xdata); + goto out; + } - /* Phase 2 of migration */ - if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2 (postbuf)) { - ret = dht_rebalance_complete_check (this, frame); - if (!ret) - return 0; + if (local->call_cnt != 1) { + if (local->stbuf.ia_blocks) { + dht_iatt_merge(this, postbuf, &local->stbuf); + dht_iatt_merge(this, prebuf, &local->prebuf); } - - /* Check if the rebalance phase1 is true */ - if (IS_DHT_MIGRATION_PHASE1 (postbuf)) { - dht_iatt_merge (this, &local->stbuf, postbuf); - dht_iatt_merge (this, &local->prebuf, prebuf); - - dht_inode_ctx_get_mig_info (this, local->fd->inode, &src_subvol, - &dst_subvol); - if (!dht_mig_info_is_invalid (local->cached_subvol, - src_subvol, dst_subvol)) { - if (dht_fd_open_on_dst (this, local->fd, dst_subvol)) { - dht_fallocate2 (this, dst_subvol, frame, 0); - return 0; - } - } - ret = dht_rebalance_in_progress_check (this, frame); - if (!ret) - return 0; + goto out; + } + + local->op_ret = op_ret; + local->op_errno = op_errno; + local->rebalance.target_op_fn = dht_fallocate2; + + dht_set_local_rebalance(this, local, NULL, prebuf, postbuf, xdata); + + /* Phase 2 of migration */ + if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2(postbuf)) { + ret = dht_rebalance_complete_check(this, frame); + if (!ret) + return 0; + } + + /* Check if the rebalance phase1 is true */ + if (IS_DHT_MIGRATION_PHASE1(postbuf)) { + dht_iatt_merge(this, &local->stbuf, postbuf); + dht_iatt_merge(this, &local->prebuf, prebuf); + + dht_inode_ctx_get_mig_info(this, local->fd->inode, &src_subvol, + &dst_subvol); + if (!dht_mig_info_is_invalid(local->cached_subvol, src_subvol, + dst_subvol)) { + if (dht_fd_open_on_dst(this, local->fd, dst_subvol)) { + dht_fallocate2(this, dst_subvol, frame, 0); + return 0; + } } + ret = dht_rebalance_in_progress_check(this, frame); + if (!ret) + return 0; + } out: - DHT_STRIP_PHASE1_FLAGS (postbuf); - DHT_STRIP_PHASE1_FLAGS (prebuf); + DHT_STRIP_PHASE1_FLAGS(postbuf); + DHT_STRIP_PHASE1_FLAGS(prebuf); - DHT_STACK_UNWIND (fallocate, frame, op_ret, op_errno, - prebuf, postbuf, xdata); + DHT_STACK_UNWIND(fallocate, frame, op_ret, op_errno, prebuf, postbuf, + xdata); err: - return 0; + return 0; } int -dht_fallocate2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret) +dht_fallocate2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret) { - dht_local_t *local = NULL; - int32_t op_errno = EINVAL; + dht_local_t *local = NULL; + int32_t op_errno = EINVAL; - if (!frame || !frame->local) - goto out; + if (!frame || !frame->local) + goto out; - local = frame->local; - op_errno = local->op_errno; - - if (we_are_not_migrating (ret)) { - /* This dht xlator is not migrating the file. Unwind and - * pass on the original mode bits so the higher DHT layer - * can handle this. - */ - DHT_STACK_UNWIND (fallocate, frame, local->op_ret, - local->op_errno, - &local->rebalance.prebuf, - &local->rebalance.postbuf, - local->rebalance.xdata); - return 0; - } + local = frame->local; + op_errno = local->op_errno; - if (subvol == NULL) - goto out; + if (we_are_not_migrating(ret)) { + /* This dht xlator is not migrating the file. Unwind and + * pass on the original mode bits so the higher DHT layer + * can handle this. + */ + DHT_STACK_UNWIND(fallocate, frame, local->op_ret, local->op_errno, + &local->rebalance.prebuf, &local->rebalance.postbuf, + local->rebalance.xdata); + return 0; + } - local->call_cnt = 2; /* This is the second attempt */ + if (subvol == NULL) + goto out; - STACK_WIND_COOKIE (frame, dht_fallocate_cbk, subvol, subvol, - subvol->fops->fallocate, local->fd, - local->rebalance.flags, local->rebalance.offset, - local->rebalance.size, local->xattr_req); + local->call_cnt = 2; /* This is the second attempt */ - return 0; + STACK_WIND_COOKIE(frame, dht_fallocate_cbk, subvol, subvol, + subvol->fops->fallocate, local->fd, + local->rebalance.flags, local->rebalance.offset, + local->rebalance.size, local->xattr_req); + + return 0; out: - DHT_STACK_UNWIND (fallocate, frame, -1, op_errno, NULL, NULL, NULL); - return 0; + DHT_STACK_UNWIND(fallocate, frame, -1, op_errno, NULL, NULL, NULL); + return 0; } int -dht_fallocate (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode, - off_t offset, size_t len, dict_t *xdata) +dht_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode, + off_t offset, size_t len, dict_t *xdata) { - xlator_t *subvol = NULL; - int op_errno = -1; - dht_local_t *local = NULL; - - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (fd, err); - - local = dht_local_init (frame, NULL, fd, GF_FOP_FALLOCATE); - if (!local) { - op_errno = ENOMEM; - goto err; - } - - local->rebalance.flags = mode; - local->rebalance.offset = offset; - local->rebalance.size = len; - - local->call_cnt = 1; - subvol = local->cached_subvol; - if (!subvol) { - gf_msg_debug (this->name, 0, - "no cached subvolume for fd=%p", fd); - op_errno = EINVAL; - goto err; - } - - if (xdata) - local->xattr_req = dict_ref (xdata); - - STACK_WIND_COOKIE (frame, dht_fallocate_cbk, subvol, subvol, - subvol->fops->fallocate, fd, - local->rebalance.flags, - local->rebalance.offset, - local->rebalance.size, - local->xattr_req); - - return 0; + xlator_t *subvol = NULL; + int op_errno = -1; + dht_local_t *local = NULL; + + VALIDATE_OR_GOTO(frame, err); + VALIDATE_OR_GOTO(this, err); + VALIDATE_OR_GOTO(fd, err); + + local = dht_local_init(frame, NULL, fd, GF_FOP_FALLOCATE); + if (!local) { + op_errno = ENOMEM; + goto err; + } + + local->rebalance.flags = mode; + local->rebalance.offset = offset; + local->rebalance.size = len; + + local->call_cnt = 1; + subvol = local->cached_subvol; + if (!subvol) { + gf_msg_debug(this->name, 0, "no cached subvolume for fd=%p", fd); + op_errno = EINVAL; + goto err; + } + + if (xdata) + local->xattr_req = dict_ref(xdata); + + STACK_WIND_COOKIE(frame, dht_fallocate_cbk, subvol, subvol, + subvol->fops->fallocate, fd, local->rebalance.flags, + local->rebalance.offset, local->rebalance.size, + local->xattr_req); + + return 0; err: - op_errno = (op_errno == -1) ? errno : op_errno; - DHT_STACK_UNWIND (fallocate, frame, -1, op_errno, NULL, NULL, NULL); + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND(fallocate, frame, -1, op_errno, NULL, NULL, NULL); - return 0; + return 0; } - int -dht_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, struct iatt *prebuf, - struct iatt *postbuf, dict_t *xdata) +dht_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, + int op_errno, struct iatt *prebuf, struct iatt *postbuf, + dict_t *xdata) { - dht_local_t *local = NULL; - xlator_t *prev = NULL; - int ret = -1; - xlator_t *src_subvol = NULL; - xlator_t *dst_subvol = NULL; - - GF_VALIDATE_OR_GOTO ("dht", frame, err); - GF_VALIDATE_OR_GOTO ("dht", this, out); - GF_VALIDATE_OR_GOTO ("dht", frame->local, out); - GF_VALIDATE_OR_GOTO ("dht", cookie, out); - - local = frame->local; - prev = cookie; - - - /* discard fails with EBADF if dht has not yet opened the fd - * on the cached subvol. This could happen if the file was migrated - * and a lookup updated the cached subvol in the inode ctx. - * We only check once as this could actually be a valid error. - */ - if ((op_ret == -1) && (op_errno == EBADF) - && !(local->fd_checked)) { - ret = dht_check_and_open_fd_on_subvol (this, frame); - if (ret) - goto out; - return 0; - } - - if ((op_ret == -1) && !dht_inode_missing(op_errno)) { - local->op_errno = op_errno; - local->op_ret = -1; - gf_msg_debug (this->name, op_errno, - "subvolume %s returned -1", - prev->name); - - goto out; - } - - if (local->call_cnt != 1) { - if (local->stbuf.ia_blocks) { - dht_iatt_merge (this, postbuf, &local->stbuf); - dht_iatt_merge (this, prebuf, &local->prebuf); - } - goto out; - } + dht_local_t *local = NULL; + xlator_t *prev = NULL; + int ret = -1; + xlator_t *src_subvol = NULL; + xlator_t *dst_subvol = NULL; + + GF_VALIDATE_OR_GOTO("dht", frame, err); + GF_VALIDATE_OR_GOTO("dht", this, out); + GF_VALIDATE_OR_GOTO("dht", frame->local, out); + GF_VALIDATE_OR_GOTO("dht", cookie, out); + + local = frame->local; + prev = cookie; + + /* discard fails with EBADF if dht has not yet opened the fd + * on the cached subvol. This could happen if the file was migrated + * and a lookup updated the cached subvol in the inode ctx. + * We only check once as this could actually be a valid error. + */ + if ((op_ret == -1) && (op_errno == EBADF) && !(local->fd_checked)) { + ret = dht_check_and_open_fd_on_subvol(this, frame); + if (ret) + goto out; + return 0; + } - local->rebalance.target_op_fn = dht_discard2; - local->op_ret = op_ret; + if ((op_ret == -1) && !dht_inode_missing(op_errno)) { local->op_errno = op_errno; + local->op_ret = -1; + gf_msg_debug(this->name, op_errno, "subvolume %s returned -1", + prev->name); - dht_set_local_rebalance (this, local, NULL, prebuf, postbuf, xdata); + goto out; + } - /* Phase 2 of migration */ - if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2 (postbuf)) { - ret = dht_rebalance_complete_check (this, frame); - if (!ret) - return 0; + if (local->call_cnt != 1) { + if (local->stbuf.ia_blocks) { + dht_iatt_merge(this, postbuf, &local->stbuf); + dht_iatt_merge(this, prebuf, &local->prebuf); } - - /* Check if the rebalance phase1 is true */ - if (IS_DHT_MIGRATION_PHASE1 (postbuf)) { - dht_iatt_merge (this, &local->stbuf, postbuf); - dht_iatt_merge (this, &local->prebuf, prebuf); - - dht_inode_ctx_get_mig_info (this, local->fd->inode, &src_subvol, - &dst_subvol); - if (!dht_mig_info_is_invalid(local->cached_subvol, - src_subvol, dst_subvol)) { - if (dht_fd_open_on_dst (this, local->fd, dst_subvol)) { - dht_discard2 (this, dst_subvol, frame, 0); - return 0; - } - } - ret = dht_rebalance_in_progress_check (this, frame); - if (!ret) - return 0; + goto out; + } + + local->rebalance.target_op_fn = dht_discard2; + local->op_ret = op_ret; + local->op_errno = op_errno; + + dht_set_local_rebalance(this, local, NULL, prebuf, postbuf, xdata); + + /* Phase 2 of migration */ + if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2(postbuf)) { + ret = dht_rebalance_complete_check(this, frame); + if (!ret) + return 0; + } + + /* Check if the rebalance phase1 is true */ + if (IS_DHT_MIGRATION_PHASE1(postbuf)) { + dht_iatt_merge(this, &local->stbuf, postbuf); + dht_iatt_merge(this, &local->prebuf, prebuf); + + dht_inode_ctx_get_mig_info(this, local->fd->inode, &src_subvol, + &dst_subvol); + if (!dht_mig_info_is_invalid(local->cached_subvol, src_subvol, + dst_subvol)) { + if (dht_fd_open_on_dst(this, local->fd, dst_subvol)) { + dht_discard2(this, dst_subvol, frame, 0); + return 0; + } } + ret = dht_rebalance_in_progress_check(this, frame); + if (!ret) + return 0; + } out: - DHT_STRIP_PHASE1_FLAGS (postbuf); - DHT_STRIP_PHASE1_FLAGS (prebuf); + DHT_STRIP_PHASE1_FLAGS(postbuf); + DHT_STRIP_PHASE1_FLAGS(prebuf); - DHT_STACK_UNWIND (discard, frame, op_ret, op_errno, - prebuf, postbuf, xdata); + DHT_STACK_UNWIND(discard, frame, op_ret, op_errno, prebuf, postbuf, xdata); err: - return 0; + return 0; } int -dht_discard2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret) +dht_discard2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret) { - dht_local_t *local = NULL; - int32_t op_errno = EINVAL; + dht_local_t *local = NULL; + int32_t op_errno = EINVAL; - if (!frame || !frame->local) - goto out; + if (!frame || !frame->local) + goto out; - local = frame->local; - op_errno = local->op_errno; - - if (we_are_not_migrating (ret)) { - /* This dht xlator is not migrating the file. Unwind and - * pass on the original mode bits so the higher DHT layer - * can handle this. - */ - DHT_STACK_UNWIND (discard, frame, local->op_ret, - local->op_errno, - &local->rebalance.prebuf, - &local->rebalance.postbuf, - local->rebalance.xdata); - return 0; - } + local = frame->local; + op_errno = local->op_errno; - if (subvol == NULL) - goto out; + if (we_are_not_migrating(ret)) { + /* This dht xlator is not migrating the file. Unwind and + * pass on the original mode bits so the higher DHT layer + * can handle this. + */ + DHT_STACK_UNWIND(discard, frame, local->op_ret, local->op_errno, + &local->rebalance.prebuf, &local->rebalance.postbuf, + local->rebalance.xdata); + return 0; + } - local->call_cnt = 2; /* This is the second attempt */ + if (subvol == NULL) + goto out; - STACK_WIND_COOKIE (frame, dht_discard_cbk, subvol, subvol, - subvol->fops->discard, local->fd, - local->rebalance.offset, local->rebalance.size, - local->xattr_req); + local->call_cnt = 2; /* This is the second attempt */ - return 0; + STACK_WIND_COOKIE(frame, dht_discard_cbk, subvol, subvol, + subvol->fops->discard, local->fd, local->rebalance.offset, + local->rebalance.size, local->xattr_req); + + return 0; out: - DHT_STACK_UNWIND (discard, frame, -1, op_errno, NULL, NULL, NULL); - return 0; + DHT_STACK_UNWIND(discard, frame, -1, op_errno, NULL, NULL, NULL); + return 0; } int -dht_discard (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, - size_t len, dict_t *xdata) +dht_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + size_t len, dict_t *xdata) { - xlator_t *subvol = NULL; - int op_errno = -1; - dht_local_t *local = NULL; - - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (fd, err); - - local = dht_local_init (frame, NULL, fd, GF_FOP_DISCARD); - if (!local) { - op_errno = ENOMEM; - goto err; - } + xlator_t *subvol = NULL; + int op_errno = -1; + dht_local_t *local = NULL; - local->rebalance.offset = offset; - local->rebalance.size = len; + VALIDATE_OR_GOTO(frame, err); + VALIDATE_OR_GOTO(this, err); + VALIDATE_OR_GOTO(fd, err); - local->call_cnt = 1; - subvol = local->cached_subvol; - if (!subvol) { - gf_msg_debug (this->name, 0, - "no cached subvolume for fd=%p", fd); - op_errno = EINVAL; - goto err; - } + local = dht_local_init(frame, NULL, fd, GF_FOP_DISCARD); + if (!local) { + op_errno = ENOMEM; + goto err; + } - if (xdata) - local->xattr_req = dict_ref (xdata); + local->rebalance.offset = offset; + local->rebalance.size = len; - STACK_WIND_COOKIE (frame, dht_discard_cbk, subvol, subvol, - subvol->fops->discard, fd, - local->rebalance.offset, - local->rebalance.size, - local->xattr_req); + local->call_cnt = 1; + subvol = local->cached_subvol; + if (!subvol) { + gf_msg_debug(this->name, 0, "no cached subvolume for fd=%p", fd); + op_errno = EINVAL; + goto err; + } - return 0; + if (xdata) + local->xattr_req = dict_ref(xdata); + + STACK_WIND_COOKIE(frame, dht_discard_cbk, subvol, subvol, + subvol->fops->discard, fd, local->rebalance.offset, + local->rebalance.size, local->xattr_req); + + return 0; err: - op_errno = (op_errno == -1) ? errno : op_errno; - DHT_STACK_UNWIND (discard, frame, -1, op_errno, NULL, NULL, NULL); + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND(discard, frame, -1, op_errno, NULL, NULL, NULL); - return 0; + return 0; } int -dht_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, struct iatt *prebuf, - struct iatt *postbuf, dict_t *xdata) +dht_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, + int op_errno, struct iatt *prebuf, struct iatt *postbuf, + dict_t *xdata) { - dht_local_t *local = NULL; - xlator_t *prev = NULL; - int ret = -1; - xlator_t *subvol1 = NULL, *subvol2 = NULL; - - GF_VALIDATE_OR_GOTO ("dht", frame, err); - GF_VALIDATE_OR_GOTO ("dht", this, out); - GF_VALIDATE_OR_GOTO ("dht", frame->local, out); - GF_VALIDATE_OR_GOTO ("dht", cookie, out); - - local = frame->local; - prev = cookie; - - /* zerofill fails with EBADF if dht has not yet opened the fd - * on the cached subvol. This could happen if the file was migrated - * and a lookup updated the cached subvol in the inode ctx. - * We only check once as this could actually be a valid error. - */ - if ((op_ret == -1) && (op_errno == EBADF) - && !(local->fd_checked)) { - ret = dht_check_and_open_fd_on_subvol (this, frame); - if (ret) - goto out; - return 0; - } - - if ((op_ret == -1) && !dht_inode_missing(op_errno)) { - local->op_errno = op_errno; - local->op_ret = -1; - gf_msg_debug (this->name, op_errno, - "subvolume %s returned -1", - prev->name); - goto out; - } - - if (local->call_cnt != 1) { - if (local->stbuf.ia_blocks) { - dht_iatt_merge (this, postbuf, &local->stbuf); - dht_iatt_merge (this, prebuf, &local->prebuf); - } - goto out; - } + dht_local_t *local = NULL; + xlator_t *prev = NULL; + int ret = -1; + xlator_t *subvol1 = NULL, *subvol2 = NULL; + + GF_VALIDATE_OR_GOTO("dht", frame, err); + GF_VALIDATE_OR_GOTO("dht", this, out); + GF_VALIDATE_OR_GOTO("dht", frame->local, out); + GF_VALIDATE_OR_GOTO("dht", cookie, out); + + local = frame->local; + prev = cookie; + + /* zerofill fails with EBADF if dht has not yet opened the fd + * on the cached subvol. This could happen if the file was migrated + * and a lookup updated the cached subvol in the inode ctx. + * We only check once as this could actually be a valid error. + */ + if ((op_ret == -1) && (op_errno == EBADF) && !(local->fd_checked)) { + ret = dht_check_and_open_fd_on_subvol(this, frame); + if (ret) + goto out; + return 0; + } - local->rebalance.target_op_fn = dht_zerofill2; - local->op_ret = op_ret; + if ((op_ret == -1) && !dht_inode_missing(op_errno)) { local->op_errno = op_errno; - - dht_set_local_rebalance (this, local, NULL, prebuf, postbuf, xdata); - - /* Phase 2 of migration */ - if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2 (postbuf)) { - ret = dht_rebalance_complete_check (this, frame); - if (!ret) - return 0; + local->op_ret = -1; + gf_msg_debug(this->name, op_errno, "subvolume %s returned -1", + prev->name); + goto out; + } + + if (local->call_cnt != 1) { + if (local->stbuf.ia_blocks) { + dht_iatt_merge(this, postbuf, &local->stbuf); + dht_iatt_merge(this, prebuf, &local->prebuf); } - - /* Check if the rebalance phase1 is true */ - if (IS_DHT_MIGRATION_PHASE1 (postbuf)) { - dht_iatt_merge (this, &local->stbuf, postbuf); - dht_iatt_merge (this, &local->prebuf, prebuf); - - ret = dht_inode_ctx_get_mig_info (this, local->fd->inode, - &subvol1, &subvol2); - if (!dht_mig_info_is_invalid (local->cached_subvol, - subvol1, subvol2)) { - if (dht_fd_open_on_dst (this, local->fd, subvol2)) { - dht_zerofill2 (this, subvol2, frame, 0); - return 0; - } - } - - ret = dht_rebalance_in_progress_check (this, frame); - if (!ret) - return 0; + goto out; + } + + local->rebalance.target_op_fn = dht_zerofill2; + local->op_ret = op_ret; + local->op_errno = op_errno; + + dht_set_local_rebalance(this, local, NULL, prebuf, postbuf, xdata); + + /* Phase 2 of migration */ + if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2(postbuf)) { + ret = dht_rebalance_complete_check(this, frame); + if (!ret) + return 0; + } + + /* Check if the rebalance phase1 is true */ + if (IS_DHT_MIGRATION_PHASE1(postbuf)) { + dht_iatt_merge(this, &local->stbuf, postbuf); + dht_iatt_merge(this, &local->prebuf, prebuf); + + ret = dht_inode_ctx_get_mig_info(this, local->fd->inode, &subvol1, + &subvol2); + if (!dht_mig_info_is_invalid(local->cached_subvol, subvol1, subvol2)) { + if (dht_fd_open_on_dst(this, local->fd, subvol2)) { + dht_zerofill2(this, subvol2, frame, 0); + return 0; + } } + ret = dht_rebalance_in_progress_check(this, frame); + if (!ret) + return 0; + } + out: - DHT_STRIP_PHASE1_FLAGS (postbuf); - DHT_STRIP_PHASE1_FLAGS (prebuf); + DHT_STRIP_PHASE1_FLAGS(postbuf); + DHT_STRIP_PHASE1_FLAGS(prebuf); - DHT_STACK_UNWIND (zerofill, frame, op_ret, op_errno, - prebuf, postbuf, xdata); + DHT_STACK_UNWIND(zerofill, frame, op_ret, op_errno, prebuf, postbuf, xdata); err: - return 0; + return 0; } int -dht_zerofill2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret) +dht_zerofill2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret) { - dht_local_t *local = NULL; - int32_t op_errno = EINVAL; + dht_local_t *local = NULL; + int32_t op_errno = EINVAL; - if (!frame || !frame->local) - goto out; + if (!frame || !frame->local) + goto out; - local = frame->local; + local = frame->local; - op_errno = local->op_errno; + op_errno = local->op_errno; - if (we_are_not_migrating (ret)) { - /* This dht xlator is not migrating the file. Unwind and - * pass on the original mode bits so the higher DHT layer - * can handle this. - */ - DHT_STACK_UNWIND (zerofill, frame, local->op_ret, - local->op_errno, - &local->rebalance.prebuf, - &local->rebalance.postbuf, - local->rebalance.xdata); + if (we_are_not_migrating(ret)) { + /* This dht xlator is not migrating the file. Unwind and + * pass on the original mode bits so the higher DHT layer + * can handle this. + */ + DHT_STACK_UNWIND(zerofill, frame, local->op_ret, local->op_errno, + &local->rebalance.prebuf, &local->rebalance.postbuf, + local->rebalance.xdata); - return 0; - } + return 0; + } - if (subvol == NULL) - goto out; + if (subvol == NULL) + goto out; - local->call_cnt = 2; /* This is the second attempt */ + local->call_cnt = 2; /* This is the second attempt */ - STACK_WIND_COOKIE (frame, dht_zerofill_cbk, subvol, subvol, - subvol->fops->zerofill, - local->fd, local->rebalance.offset, - local->rebalance.size, local->xattr_req); + STACK_WIND_COOKIE(frame, dht_zerofill_cbk, subvol, subvol, + subvol->fops->zerofill, local->fd, + local->rebalance.offset, local->rebalance.size, + local->xattr_req); - return 0; + return 0; out: - DHT_STACK_UNWIND (zerofill, frame, -1, op_errno, NULL, NULL, NULL); - return 0; + DHT_STACK_UNWIND(zerofill, frame, -1, op_errno, NULL, NULL, NULL); + return 0; } int -dht_zerofill (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, - off_t len, dict_t *xdata) +dht_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + off_t len, dict_t *xdata) { - xlator_t *subvol = NULL; - int op_errno = -1; - dht_local_t *local = NULL; - - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (fd, err); - - local = dht_local_init (frame, NULL, fd, GF_FOP_ZEROFILL); - if (!local) { - op_errno = ENOMEM; - goto err; - } + xlator_t *subvol = NULL; + int op_errno = -1; + dht_local_t *local = NULL; - local->rebalance.offset = offset; - local->rebalance.size = len; + VALIDATE_OR_GOTO(frame, err); + VALIDATE_OR_GOTO(this, err); + VALIDATE_OR_GOTO(fd, err); - local->call_cnt = 1; - subvol = local->cached_subvol; - if (!subvol) { - gf_msg_debug (this->name, 0, - "no cached subvolume for fd=%p", fd); - op_errno = EINVAL; - goto err; - } + local = dht_local_init(frame, NULL, fd, GF_FOP_ZEROFILL); + if (!local) { + op_errno = ENOMEM; + goto err; + } - if (xdata) - local->xattr_req = dict_ref (xdata); + local->rebalance.offset = offset; + local->rebalance.size = len; - STACK_WIND_COOKIE (frame, dht_zerofill_cbk, subvol, subvol, - subvol->fops->zerofill, fd, - local->rebalance.offset, - local->rebalance.size, local->xattr_req); + local->call_cnt = 1; + subvol = local->cached_subvol; + if (!subvol) { + gf_msg_debug(this->name, 0, "no cached subvolume for fd=%p", fd); + op_errno = EINVAL; + goto err; + } - return 0; + if (xdata) + local->xattr_req = dict_ref(xdata); -err: - op_errno = (op_errno == -1) ? errno : op_errno; - DHT_STACK_UNWIND (zerofill, frame, -1, op_errno, NULL, NULL, NULL); + STACK_WIND_COOKIE(frame, dht_zerofill_cbk, subvol, subvol, + subvol->fops->zerofill, fd, local->rebalance.offset, + local->rebalance.size, local->xattr_req); - return 0; -} + return 0; +err: + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND(zerofill, frame, -1, op_errno, NULL, NULL, NULL); + return 0; +} /* handle cases of migration here for 'setattr()' calls */ int -dht_file_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, struct iatt *prebuf, - struct iatt *postbuf, dict_t *xdata) +dht_file_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) { - dht_local_t *local = NULL; - xlator_t *prev = NULL; - int ret = -1; + dht_local_t *local = NULL; + xlator_t *prev = NULL; + int ret = -1; - local = frame->local; - prev = cookie; + local = frame->local; + prev = cookie; - local->op_errno = op_errno; - - if ((local->fop == GF_FOP_FSETATTR) && - (op_ret == -1) && (op_errno == EBADF) && !(local->fd_checked)) { - ret = dht_check_and_open_fd_on_subvol (this, frame); - if (ret) - goto out; - return 0; - } - - if ((op_ret == -1) && !dht_inode_missing(op_errno)) { - gf_msg_debug (this->name, op_errno, - "subvolume %s returned -1", - prev->name); - goto out; - } + local->op_errno = op_errno; - if (local->call_cnt != 1) - goto out; + if ((local->fop == GF_FOP_FSETATTR) && (op_ret == -1) && + (op_errno == EBADF) && !(local->fd_checked)) { + ret = dht_check_and_open_fd_on_subvol(this, frame); + if (ret) + goto out; + return 0; + } - local->op_ret = op_ret; - local->op_errno = op_errno; + if ((op_ret == -1) && !dht_inode_missing(op_errno)) { + gf_msg_debug(this->name, op_errno, "subvolume %s returned -1", + prev->name); + goto out; + } - local->rebalance.target_op_fn = dht_setattr2; + if (local->call_cnt != 1) + goto out; + local->op_ret = op_ret; + local->op_errno = op_errno; - /* Phase 2 of migration */ - if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2 (postbuf)) { + local->rebalance.target_op_fn = dht_setattr2; - dht_set_local_rebalance (this, local, NULL, prebuf, - postbuf, xdata); + /* Phase 2 of migration */ + if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2(postbuf)) { + dht_set_local_rebalance(this, local, NULL, prebuf, postbuf, xdata); - ret = dht_rebalance_complete_check (this, frame); - if (!ret) - return 0; - } + ret = dht_rebalance_complete_check(this, frame); + if (!ret) + return 0; + } - /* At the end of the migration process, whatever 'attr' we - have on source file will be migrated to destination file - in one shot, hence we don't need to check for in progress - state here (ie, PHASE1) */ + /* At the end of the migration process, whatever 'attr' we + have on source file will be migrated to destination file + in one shot, hence we don't need to check for in progress + state here (ie, PHASE1) */ out: - DHT_STRIP_PHASE1_FLAGS (postbuf); - DHT_STRIP_PHASE1_FLAGS (prebuf); + DHT_STRIP_PHASE1_FLAGS(postbuf); + DHT_STRIP_PHASE1_FLAGS(prebuf); - DHT_STACK_UNWIND (setattr, frame, op_ret, op_errno, - prebuf, postbuf, xdata); + DHT_STACK_UNWIND(setattr, frame, op_ret, op_errno, prebuf, postbuf, xdata); - return 0; + return 0; } int -dht_setattr2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret) +dht_setattr2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret) { - dht_local_t *local = NULL; - int32_t op_errno = EINVAL; + dht_local_t *local = NULL; + int32_t op_errno = EINVAL; - if (!frame || !frame->local) - goto out; + if (!frame || !frame->local) + goto out; - local = frame->local; - op_errno = local->op_errno; - - if (we_are_not_migrating (ret)) { - /* This dht xlator is not migrating the file. Unwind and - * pass on the original mode bits so the higher DHT layer - * can handle this. - */ - DHT_STACK_UNWIND (setattr, frame, local->op_ret, - local->op_errno, - &local->rebalance.prebuf, - &local->rebalance.postbuf, - local->rebalance.xdata); - return 0; - } + local = frame->local; + op_errno = local->op_errno; - if (subvol == NULL) - goto out; + if (we_are_not_migrating(ret)) { + /* This dht xlator is not migrating the file. Unwind and + * pass on the original mode bits so the higher DHT layer + * can handle this. + */ + DHT_STACK_UNWIND(setattr, frame, local->op_ret, local->op_errno, + &local->rebalance.prebuf, &local->rebalance.postbuf, + local->rebalance.xdata); + return 0; + } - local->call_cnt = 2; /* This is the second attempt */ - - if (local->fop == GF_FOP_SETATTR) { - STACK_WIND_COOKIE (frame, dht_file_setattr_cbk, subvol, - subvol, subvol->fops->setattr, &local->loc, - &local->rebalance.stbuf, local->rebalance.flags, - local->xattr_req); - } else { - STACK_WIND_COOKIE (frame, dht_file_setattr_cbk, subvol, - subvol, subvol->fops->fsetattr, local->fd, - &local->rebalance.stbuf, local->rebalance.flags, - local->xattr_req); - } + if (subvol == NULL) + goto out; - return 0; + local->call_cnt = 2; /* This is the second attempt */ + + if (local->fop == GF_FOP_SETATTR) { + STACK_WIND_COOKIE(frame, dht_file_setattr_cbk, subvol, subvol, + subvol->fops->setattr, &local->loc, + &local->rebalance.stbuf, local->rebalance.flags, + local->xattr_req); + } else { + STACK_WIND_COOKIE(frame, dht_file_setattr_cbk, subvol, subvol, + subvol->fops->fsetattr, local->fd, + &local->rebalance.stbuf, local->rebalance.flags, + local->xattr_req); + } + + return 0; out: - DHT_STACK_UNWIND (setattr, frame, -1, op_errno, NULL, NULL, NULL); - return 0; + DHT_STACK_UNWIND(setattr, frame, -1, op_errno, NULL, NULL, NULL); + return 0; } - /* Keep the existing code same for all the cases other than regular file */ int -dht_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, struct iatt *statpre, - struct iatt *statpost, dict_t *xdata) +dht_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, + int op_errno, struct iatt *statpre, struct iatt *statpost, + dict_t *xdata) { - dht_local_t *local = NULL; - int this_call_cnt = 0; - xlator_t *prev = NULL; - - - local = frame->local; - prev = cookie; - - LOCK (&frame->lock); - { - if (op_ret == -1) { - local->op_errno = op_errno; - gf_msg_debug (this->name, op_errno, - "subvolume %s returned -1", - prev->name); - goto unlock; - } + dht_local_t *local = NULL; + int this_call_cnt = 0; + xlator_t *prev = NULL; - dht_iatt_merge (this, &local->prebuf, statpre); - dht_iatt_merge (this, &local->stbuf, statpost); + local = frame->local; + prev = cookie; - local->op_ret = 0; - local->op_errno = 0; + LOCK(&frame->lock); + { + if (op_ret == -1) { + local->op_errno = op_errno; + gf_msg_debug(this->name, op_errno, "subvolume %s returned -1", + prev->name); + goto unlock; } + + dht_iatt_merge(this, &local->prebuf, statpre); + dht_iatt_merge(this, &local->stbuf, statpost); + + local->op_ret = 0; + local->op_errno = 0; + } unlock: - UNLOCK (&frame->lock); + UNLOCK(&frame->lock); - this_call_cnt = dht_frame_return (frame); - if (is_last_call (this_call_cnt)) { - if (local->op_ret == 0) - dht_inode_ctx_time_set (local->loc.inode, this, - &local->stbuf); - DHT_STACK_UNWIND (setattr, frame, local->op_ret, local->op_errno, - &local->prebuf, &local->stbuf, xdata); - } + this_call_cnt = dht_frame_return(frame); + if (is_last_call(this_call_cnt)) { + if (local->op_ret == 0) + dht_inode_ctx_time_set(local->loc.inode, this, &local->stbuf); + DHT_STACK_UNWIND(setattr, frame, local->op_ret, local->op_errno, + &local->prebuf, &local->stbuf, xdata); + } - return 0; + return 0; } - /* Keep the existing code same for all the cases other than regular file */ int -dht_non_mds_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, struct iatt *statpre, - struct iatt *statpost, dict_t *xdata) +dht_non_mds_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, struct iatt *statpre, + struct iatt *statpost, dict_t *xdata) { - dht_local_t *local = NULL; - int this_call_cnt = 0; - xlator_t *prev = NULL; - + dht_local_t *local = NULL; + int this_call_cnt = 0; + xlator_t *prev = NULL; - local = frame->local; - prev = cookie; + local = frame->local; + prev = cookie; - LOCK (&frame->lock); - { - if (op_ret == -1) { - gf_msg (this->name, op_errno, 0, - 0, "subvolume %s returned -1", - prev->name); + LOCK(&frame->lock); + { + if (op_ret == -1) { + gf_msg(this->name, op_errno, 0, 0, "subvolume %s returned -1", + prev->name); - goto unlock; - } + goto unlock; + } - dht_iatt_merge (this, &local->prebuf, statpre); - dht_iatt_merge (this, &local->stbuf, statpost); + dht_iatt_merge(this, &local->prebuf, statpre); + dht_iatt_merge(this, &local->stbuf, statpost); - local->op_ret = 0; - local->op_errno = 0; - } + local->op_ret = 0; + local->op_errno = 0; + } unlock: - UNLOCK (&frame->lock); + UNLOCK(&frame->lock); - this_call_cnt = dht_frame_return (frame); - if (is_last_call (this_call_cnt)) { - dht_inode_ctx_time_set (local->loc.inode, this, &local->stbuf); - DHT_STACK_UNWIND (setattr, frame, 0, 0, - &local->prebuf, &local->stbuf, xdata); - } + this_call_cnt = dht_frame_return(frame); + if (is_last_call(this_call_cnt)) { + dht_inode_ctx_time_set(local->loc.inode, this, &local->stbuf); + DHT_STACK_UNWIND(setattr, frame, 0, 0, &local->prebuf, &local->stbuf, + xdata); + } - return 0; + return 0; } - - - - int -dht_mds_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, struct iatt *statpre, - struct iatt *statpost, dict_t *xdata) +dht_mds_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, struct iatt *statpre, + struct iatt *statpost, dict_t *xdata) { - dht_local_t *local = NULL; - dht_conf_t *conf = NULL; - xlator_t *prev = NULL; - xlator_t *mds_subvol = NULL; - struct iatt loc_stbuf = {0,}; - int i = 0; - - local = frame->local; - prev = cookie; - conf = this->private; - mds_subvol = local->mds_subvol; - - if (op_ret == -1) { - local->op_ret = op_ret; - local->op_errno = op_errno; - gf_msg_debug (this->name, op_errno, - "subvolume %s returned -1", - prev->name); - goto out; - } - - local->op_ret = 0; - loc_stbuf = local->stbuf; - dht_iatt_merge (this, &local->prebuf, statpre); - dht_iatt_merge (this, &local->stbuf, statpost); - - local->call_cnt = conf->subvolume_cnt - 1; - for (i = 0; i < conf->subvolume_cnt; i++) { - if (mds_subvol == conf->subvolumes[i]) - continue; - STACK_WIND_COOKIE (frame, dht_non_mds_setattr_cbk, - conf->subvolumes[i], conf->subvolumes[i], - conf->subvolumes[i]->fops->setattr, - &local->loc, &loc_stbuf, - local->valid, local->xattr_req); - } - - return 0; + dht_local_t *local = NULL; + dht_conf_t *conf = NULL; + xlator_t *prev = NULL; + xlator_t *mds_subvol = NULL; + struct iatt loc_stbuf = { + 0, + }; + int i = 0; + + local = frame->local; + prev = cookie; + conf = this->private; + mds_subvol = local->mds_subvol; + + if (op_ret == -1) { + local->op_ret = op_ret; + local->op_errno = op_errno; + gf_msg_debug(this->name, op_errno, "subvolume %s returned -1", + prev->name); + goto out; + } + + local->op_ret = 0; + loc_stbuf = local->stbuf; + dht_iatt_merge(this, &local->prebuf, statpre); + dht_iatt_merge(this, &local->stbuf, statpost); + + local->call_cnt = conf->subvolume_cnt - 1; + for (i = 0; i < conf->subvolume_cnt; i++) { + if (mds_subvol == conf->subvolumes[i]) + continue; + STACK_WIND_COOKIE(frame, dht_non_mds_setattr_cbk, conf->subvolumes[i], + conf->subvolumes[i], + conf->subvolumes[i]->fops->setattr, &local->loc, + &loc_stbuf, local->valid, local->xattr_req); + } + + return 0; out: - DHT_STACK_UNWIND (setattr, frame, local->op_ret, local->op_errno, - &local->prebuf, &local->stbuf, xdata); + DHT_STACK_UNWIND(setattr, frame, local->op_ret, local->op_errno, + &local->prebuf, &local->stbuf, xdata); - return 0; + return 0; } int -dht_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc, - struct iatt *stbuf, int32_t valid, dict_t *xdata) +dht_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *stbuf, + int32_t valid, dict_t *xdata) { - xlator_t *subvol = NULL; - xlator_t *mds_subvol = NULL; - dht_layout_t *layout = NULL; - dht_local_t *local = NULL; - int op_errno = -1; - int i = -1; - int ret = -1; - int call_cnt = 0; - dht_conf_t *conf = NULL; - - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (loc, err); - VALIDATE_OR_GOTO (loc->inode, err); - VALIDATE_OR_GOTO (loc->path, err); - - conf = this->private; - local = dht_local_init (frame, loc, NULL, GF_FOP_SETATTR); - if (!local) { - op_errno = ENOMEM; - goto err; - } - - layout = local->layout; - if (!layout) { - gf_msg_debug (this->name, 0, - "no layout for path=%s", loc->path); - op_errno = EINVAL; - goto err; - } - - if (!layout_is_sane (layout)) { - gf_msg_debug (this->name, 0, - "layout is not sane for path=%s", loc->path); - op_errno = EINVAL; - goto err; - } - if (xdata) - local->xattr_req = dict_ref (xdata); - - if (IA_ISREG (loc->inode->ia_type)) { - /* in the regular file _cbk(), we need to check for - migration possibilities */ - local->rebalance.stbuf = *stbuf; - local->rebalance.flags = valid; - local->call_cnt = 1; - subvol = local->cached_subvol; + xlator_t *subvol = NULL; + xlator_t *mds_subvol = NULL; + dht_layout_t *layout = NULL; + dht_local_t *local = NULL; + int op_errno = -1; + int i = -1; + int ret = -1; + int call_cnt = 0; + dht_conf_t *conf = NULL; + + VALIDATE_OR_GOTO(frame, err); + VALIDATE_OR_GOTO(this, err); + VALIDATE_OR_GOTO(loc, err); + VALIDATE_OR_GOTO(loc->inode, err); + VALIDATE_OR_GOTO(loc->path, err); + + conf = this->private; + local = dht_local_init(frame, loc, NULL, GF_FOP_SETATTR); + if (!local) { + op_errno = ENOMEM; + goto err; + } + + layout = local->layout; + if (!layout) { + gf_msg_debug(this->name, 0, "no layout for path=%s", loc->path); + op_errno = EINVAL; + goto err; + } + + if (!layout_is_sane(layout)) { + gf_msg_debug(this->name, 0, "layout is not sane for path=%s", + loc->path); + op_errno = EINVAL; + goto err; + } + if (xdata) + local->xattr_req = dict_ref(xdata); + + if (IA_ISREG(loc->inode->ia_type)) { + /* in the regular file _cbk(), we need to check for + migration possibilities */ + local->rebalance.stbuf = *stbuf; + local->rebalance.flags = valid; + local->call_cnt = 1; + subvol = local->cached_subvol; - STACK_WIND_COOKIE (frame, dht_file_setattr_cbk, subvol, - subvol, subvol->fops->setattr, loc, stbuf, - valid, xdata); + STACK_WIND_COOKIE(frame, dht_file_setattr_cbk, subvol, subvol, + subvol->fops->setattr, loc, stbuf, valid, xdata); - return 0; + return 0; + } + + local->call_cnt = call_cnt = layout->cnt; + + if (IA_ISDIR(loc->inode->ia_type) && !__is_root_gfid(loc->inode->gfid) && + call_cnt != 1) { + ret = dht_inode_ctx_mdsvol_get(loc->inode, this, &mds_subvol); + if (ret || !mds_subvol) { + gf_msg(this->name, GF_LOG_ERROR, 0, + DHT_MSG_HASHED_SUBVOL_GET_FAILED, + "Failed to get mds subvol for path %s", local->loc.path); + op_errno = EINVAL; + goto err; } - local->call_cnt = call_cnt = layout->cnt; - - if (IA_ISDIR (loc->inode->ia_type) && - !__is_root_gfid (loc->inode->gfid) && call_cnt != 1) { - ret = dht_inode_ctx_mdsvol_get (loc->inode, this, &mds_subvol); - if (ret || !mds_subvol) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_HASHED_SUBVOL_GET_FAILED, - "Failed to get mds subvol for path %s", - local->loc.path); - op_errno = EINVAL; - goto err; - } - - local->mds_subvol = mds_subvol; - for (i = 0; i < conf->subvolume_cnt; i++) { - if (conf->subvolumes[i] == mds_subvol) { - if (!conf->subvolume_status[i]) { - gf_msg (this->name, GF_LOG_WARNING, - layout->list[i].err, - DHT_MSG_HASHED_SUBVOL_DOWN, - "MDS subvol is down for path " - " %s Unable to set attr " , - local->loc.path); - op_errno = ENOTCONN; - goto err; - } - } - } - local->valid = valid; - local->stbuf = *stbuf; - - STACK_WIND_COOKIE (frame, dht_mds_setattr_cbk, - local->mds_subvol, - local->mds_subvol, - local->mds_subvol->fops->setattr, - loc, stbuf, valid, xdata); - return 0; - } else { - for (i = 0; i < call_cnt; i++) { - STACK_WIND_COOKIE (frame, dht_setattr_cbk, - layout->list[i].xlator, - layout->list[i].xlator, - layout->list[i].xlator->fops->setattr, - loc, stbuf, valid, xdata); + local->mds_subvol = mds_subvol; + for (i = 0; i < conf->subvolume_cnt; i++) { + if (conf->subvolumes[i] == mds_subvol) { + if (!conf->subvolume_status[i]) { + gf_msg(this->name, GF_LOG_WARNING, layout->list[i].err, + DHT_MSG_HASHED_SUBVOL_DOWN, + "MDS subvol is down for path " + " %s Unable to set attr ", + local->loc.path); + op_errno = ENOTCONN; + goto err; } + } } + local->valid = valid; + local->stbuf = *stbuf; + STACK_WIND_COOKIE(frame, dht_mds_setattr_cbk, local->mds_subvol, + local->mds_subvol, local->mds_subvol->fops->setattr, + loc, stbuf, valid, xdata); return 0; + } else { + for (i = 0; i < call_cnt; i++) { + STACK_WIND_COOKIE(frame, dht_setattr_cbk, layout->list[i].xlator, + layout->list[i].xlator, + layout->list[i].xlator->fops->setattr, loc, stbuf, + valid, xdata); + } + } + + return 0; err: - op_errno = (op_errno == -1) ? errno : op_errno; - DHT_STACK_UNWIND (setattr, frame, -1, op_errno, NULL, NULL, NULL); + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND(setattr, frame, -1, op_errno, NULL, NULL, NULL); - return 0; + return 0; } - int -dht_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *stbuf, - int32_t valid, dict_t *xdata) +dht_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *stbuf, + int32_t valid, dict_t *xdata) { - xlator_t *subvol = NULL; - dht_layout_t *layout = NULL; - dht_local_t *local = NULL; - int op_errno = -1; - int i = -1; - int call_cnt = 0; - - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (fd, err); - - local = dht_local_init (frame, NULL, fd, GF_FOP_FSETATTR); - if (!local) { - op_errno = ENOMEM; - goto err; - } - - layout = local->layout; - if (!layout) { - gf_msg_debug (this->name, 0, - "no layout for fd=%p", fd); - op_errno = EINVAL; - goto err; - } + xlator_t *subvol = NULL; + dht_layout_t *layout = NULL; + dht_local_t *local = NULL; + int op_errno = -1; + int i = -1; + int call_cnt = 0; + + VALIDATE_OR_GOTO(frame, err); + VALIDATE_OR_GOTO(this, err); + VALIDATE_OR_GOTO(fd, err); + + local = dht_local_init(frame, NULL, fd, GF_FOP_FSETATTR); + if (!local) { + op_errno = ENOMEM; + goto err; + } + + layout = local->layout; + if (!layout) { + gf_msg_debug(this->name, 0, "no layout for fd=%p", fd); + op_errno = EINVAL; + goto err; + } + + if (!layout_is_sane(layout)) { + gf_msg_debug(this->name, 0, "layout is not sane for fd=%p", fd); + op_errno = EINVAL; + goto err; + } + if (xdata) + local->xattr_req = dict_ref(xdata); + + if (IA_ISREG(fd->inode->ia_type)) { + /* in the regular file _cbk(), we need to check for + migration possibilities */ + local->rebalance.stbuf = *stbuf; + local->rebalance.flags = valid; + local->call_cnt = 1; + subvol = local->cached_subvol; - if (!layout_is_sane (layout)) { - gf_msg_debug (this->name, 0, - "layout is not sane for fd=%p", fd); - op_errno = EINVAL; - goto err; - } - if (xdata) - local->xattr_req = dict_ref (xdata); - - if (IA_ISREG (fd->inode->ia_type)) { - /* in the regular file _cbk(), we need to check for - migration possibilities */ - local->rebalance.stbuf = *stbuf; - local->rebalance.flags = valid; - local->call_cnt = 1; - subvol = local->cached_subvol; - - STACK_WIND_COOKIE (frame, dht_file_setattr_cbk, subvol, - subvol, subvol->fops->fsetattr, fd, - &local->rebalance.stbuf, - local->rebalance.flags, - local->xattr_req); - return 0; - } + STACK_WIND_COOKIE(frame, dht_file_setattr_cbk, subvol, subvol, + subvol->fops->fsetattr, fd, &local->rebalance.stbuf, + local->rebalance.flags, local->xattr_req); + return 0; + } - local->call_cnt = call_cnt = layout->cnt; + local->call_cnt = call_cnt = layout->cnt; - for (i = 0; i < call_cnt; i++) { - STACK_WIND_COOKIE (frame, dht_setattr_cbk, - layout->list[i].xlator, - layout->list[i].xlator, - layout->list[i].xlator->fops->fsetattr, - fd, stbuf, valid, xdata); - } + for (i = 0; i < call_cnt; i++) { + STACK_WIND_COOKIE(frame, dht_setattr_cbk, layout->list[i].xlator, + layout->list[i].xlator, + layout->list[i].xlator->fops->fsetattr, fd, stbuf, + valid, xdata); + } - return 0; + return 0; err: - op_errno = (op_errno == -1) ? errno : op_errno; - DHT_STACK_UNWIND (fsetattr, frame, -1, op_errno, NULL, NULL, NULL); + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND(fsetattr, frame, -1, op_errno, NULL, NULL, NULL); - return 0; + return 0; } diff --git a/xlators/cluster/dht/src/dht-layout.c b/xlators/cluster/dht/src/dht-layout.c index 97b98e01451..43746bc63b9 100644 --- a/xlators/cluster/dht/src/dht-layout.c +++ b/xlators/cluster/dht/src/dht-layout.c @@ -8,7 +8,6 @@ cases as published by the Free Software Foundation. */ - #include "glusterfs.h" #include "xlator.h" #include "dht-common.h" @@ -16,862 +15,814 @@ #include "dht-messages.h" #include "unittest/unittest.h" +#define layout_base_size (sizeof(dht_layout_t)) -#define layout_base_size (sizeof (dht_layout_t)) - -#define layout_entry_size (sizeof ((dht_layout_t *)NULL)->list[0]) +#define layout_entry_size (sizeof((dht_layout_t *)NULL)->list[0]) #define layout_size(cnt) (layout_base_size + (cnt * layout_entry_size)) dht_layout_t * -dht_layout_new (xlator_t *this, int cnt) +dht_layout_new(xlator_t *this, int cnt) { - dht_layout_t *layout = NULL; - dht_conf_t *conf = NULL; + dht_layout_t *layout = NULL; + dht_conf_t *conf = NULL; - REQUIRE(NULL != this); - REQUIRE(cnt >= 0); + REQUIRE(NULL != this); + REQUIRE(cnt >= 0); - conf = this->private; + conf = this->private; - layout = GF_CALLOC (1, layout_size (cnt), - gf_dht_mt_dht_layout_t); - if (!layout) { - goto out; - } + layout = GF_CALLOC(1, layout_size(cnt), gf_dht_mt_dht_layout_t); + if (!layout) { + goto out; + } - layout->type = DHT_HASH_TYPE_DM; - layout->cnt = cnt; + layout->type = DHT_HASH_TYPE_DM; + layout->cnt = cnt; - if (conf) { - layout->spread_cnt = conf->dir_spread_cnt; - layout->gen = conf->gen; - } + if (conf) { + layout->spread_cnt = conf->dir_spread_cnt; + layout->gen = conf->gen; + } - GF_ATOMIC_INIT (layout->ref, 1); + GF_ATOMIC_INIT(layout->ref, 1); - ENSURE(NULL != layout); - ENSURE(layout->type == DHT_HASH_TYPE_DM); - ENSURE(layout->cnt == cnt); - ENSURE(GF_ATOMIC_GET (layout->ref) == 1); + ENSURE(NULL != layout); + ENSURE(layout->type == DHT_HASH_TYPE_DM); + ENSURE(layout->cnt == cnt); + ENSURE(GF_ATOMIC_GET(layout->ref) == 1); out: - return layout; + return layout; } - dht_layout_t * -dht_layout_get (xlator_t *this, inode_t *inode) +dht_layout_get(xlator_t *this, inode_t *inode) { - dht_layout_t *layout = NULL; - int ret = 0; - - ret = dht_inode_ctx_layout_get (inode, this, &layout); - if ((!ret) && layout) { - GF_ATOMIC_INC (layout->ref); - } - return layout; + dht_layout_t *layout = NULL; + int ret = 0; + + ret = dht_inode_ctx_layout_get(inode, this, &layout); + if ((!ret) && layout) { + GF_ATOMIC_INC(layout->ref); + } + return layout; } - int -dht_layout_set (xlator_t *this, inode_t *inode, dht_layout_t *layout) +dht_layout_set(xlator_t *this, inode_t *inode, dht_layout_t *layout) { - dht_conf_t *conf = NULL; - int oldret = -1; - int ret = -1; - dht_layout_t *old_layout; - - conf = this->private; - if (!conf || !layout) - goto out; - - LOCK (&conf->layout_lock); - { - oldret = dht_inode_ctx_layout_get (inode, this, &old_layout); - if (layout) - GF_ATOMIC_INC (layout->ref); - ret = dht_inode_ctx_layout_set (inode, this, layout); - } - UNLOCK (&conf->layout_lock); - - if (!oldret) { - dht_layout_unref (this, old_layout); - } - if (ret) - GF_ATOMIC_DEC (layout->ref); + dht_conf_t *conf = NULL; + int oldret = -1; + int ret = -1; + dht_layout_t *old_layout; + + conf = this->private; + if (!conf || !layout) + goto out; + + LOCK(&conf->layout_lock); + { + oldret = dht_inode_ctx_layout_get(inode, this, &old_layout); + if (layout) + GF_ATOMIC_INC(layout->ref); + ret = dht_inode_ctx_layout_set(inode, this, layout); + } + UNLOCK(&conf->layout_lock); + + if (!oldret) { + dht_layout_unref(this, old_layout); + } + if (ret) + GF_ATOMIC_DEC(layout->ref); out: - return ret; + return ret; } - void -dht_layout_unref (xlator_t *this, dht_layout_t *layout) +dht_layout_unref(xlator_t *this, dht_layout_t *layout) { - int ref = 0; + int ref = 0; - if (!layout || layout->preset || !this->private) - return; + if (!layout || layout->preset || !this->private) + return; - ref = GF_ATOMIC_DEC (layout->ref); + ref = GF_ATOMIC_DEC(layout->ref); - if (!ref) - GF_FREE (layout); + if (!ref) + GF_FREE(layout); } - dht_layout_t * -dht_layout_ref (xlator_t *this, dht_layout_t *layout) +dht_layout_ref(xlator_t *this, dht_layout_t *layout) { - if (layout->preset || !this->private) - return layout; + if (layout->preset || !this->private) + return layout; - GF_ATOMIC_INC (layout->ref); + GF_ATOMIC_INC(layout->ref); - return layout; + return layout; } - xlator_t * -dht_layout_search (xlator_t *this, dht_layout_t *layout, const char *name) +dht_layout_search(xlator_t *this, dht_layout_t *layout, const char *name) { - uint32_t hash = 0; - xlator_t *subvol = NULL; - int i = 0; - int ret = 0; - - ret = dht_hash_compute (this, layout->type, name, &hash); - if (ret != 0) { - gf_msg (this->name, GF_LOG_WARNING, 0, - DHT_MSG_COMPUTE_HASH_FAILED, - "hash computation failed for type=%d name=%s", - layout->type, name); - goto out; - } - - for (i = 0; i < layout->cnt; i++) { - if (layout->list[i].start <= hash - && layout->list[i].stop >= hash) { - subvol = layout->list[i].xlator; - break; - } - } - - if (!subvol) { - gf_msg (this->name, GF_LOG_WARNING, 0, - DHT_MSG_HASHED_SUBVOL_GET_FAILED, - "no subvolume for hash (value) = %u", hash); - } + uint32_t hash = 0; + xlator_t *subvol = NULL; + int i = 0; + int ret = 0; + + ret = dht_hash_compute(this, layout->type, name, &hash); + if (ret != 0) { + gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_COMPUTE_HASH_FAILED, + "hash computation failed for type=%d name=%s", layout->type, + name); + goto out; + } + + for (i = 0; i < layout->cnt; i++) { + if (layout->list[i].start <= hash && layout->list[i].stop >= hash) { + subvol = layout->list[i].xlator; + break; + } + } + + if (!subvol) { + gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_HASHED_SUBVOL_GET_FAILED, + "no subvolume for hash (value) = %u", hash); + } out: - return subvol; + return subvol; } - dht_layout_t * -dht_layout_for_subvol (xlator_t *this, xlator_t *subvol) +dht_layout_for_subvol(xlator_t *this, xlator_t *subvol) { - dht_conf_t *conf = NULL; - dht_layout_t *layout = NULL; - int i = 0; - - conf = this->private; - if (!conf) - goto out; - - for (i = 0; i < conf->subvolume_cnt; i++) { - if (conf->subvolumes[i] == subvol) { - layout = conf->file_layouts[i]; - break; - } + dht_conf_t *conf = NULL; + dht_layout_t *layout = NULL; + int i = 0; + + conf = this->private; + if (!conf) + goto out; + + for (i = 0; i < conf->subvolume_cnt; i++) { + if (conf->subvolumes[i] == subvol) { + layout = conf->file_layouts[i]; + break; } + } out: - return layout; + return layout; } - int -dht_layouts_init (xlator_t *this, dht_conf_t *conf) +dht_layouts_init(xlator_t *this, dht_conf_t *conf) { - dht_layout_t *layout = NULL; - int i = 0; - int ret = -1; - - if (!conf) - goto out; - - conf->file_layouts = GF_CALLOC (conf->subvolume_cnt, - sizeof (dht_layout_t *), - gf_dht_mt_dht_layout_t); - if (!conf->file_layouts) { - goto out; - } + dht_layout_t *layout = NULL; + int i = 0; + int ret = -1; - for (i = 0; i < conf->subvolume_cnt; i++) { - layout = dht_layout_new (this, 1); + if (!conf) + goto out; - if (!layout) { - goto out; - } - - layout->preset = 1; + conf->file_layouts = GF_CALLOC(conf->subvolume_cnt, sizeof(dht_layout_t *), + gf_dht_mt_dht_layout_t); + if (!conf->file_layouts) { + goto out; + } - layout->list[0].xlator = conf->subvolumes[i]; + for (i = 0; i < conf->subvolume_cnt; i++) { + layout = dht_layout_new(this, 1); - conf->file_layouts[i] = layout; + if (!layout) { + goto out; } - ret = 0; + layout->preset = 1; + + layout->list[0].xlator = conf->subvolumes[i]; + + conf->file_layouts[i] = layout; + } + + ret = 0; out: - return ret; + return ret; } - int -dht_disk_layout_extract (xlator_t *this, dht_layout_t *layout, - int pos, int32_t **disk_layout_p) +dht_disk_layout_extract(xlator_t *this, dht_layout_t *layout, int pos, + int32_t **disk_layout_p) { - int ret = -1; - int32_t *disk_layout = NULL; + int ret = -1; + int32_t *disk_layout = NULL; - disk_layout = GF_CALLOC (5, sizeof (int), - gf_dht_mt_int32_t); - if (!disk_layout) { - goto out; - } + disk_layout = GF_CALLOC(5, sizeof(int), gf_dht_mt_int32_t); + if (!disk_layout) { + goto out; + } - disk_layout[0] = hton32 (layout->list[pos].commit_hash); - disk_layout[1] = hton32 (layout->type); - disk_layout[2] = hton32 (layout->list[pos].start); - disk_layout[3] = hton32 (layout->list[pos].stop); + disk_layout[0] = hton32(layout->list[pos].commit_hash); + disk_layout[1] = hton32(layout->type); + disk_layout[2] = hton32(layout->list[pos].start); + disk_layout[3] = hton32(layout->list[pos].stop); - if (disk_layout_p) - *disk_layout_p = disk_layout; - else - GF_FREE (disk_layout); + if (disk_layout_p) + *disk_layout_p = disk_layout; + else + GF_FREE(disk_layout); - ret = 0; + ret = 0; out: - return ret; + return ret; } int -dht_disk_layout_extract_for_subvol (xlator_t *this, dht_layout_t *layout, - xlator_t *subvol, int32_t **disk_layout_p) +dht_disk_layout_extract_for_subvol(xlator_t *this, dht_layout_t *layout, + xlator_t *subvol, int32_t **disk_layout_p) { - int i = 0; + int i = 0; - for (i = 0; i < layout->cnt; i++) { - if (layout->list[i].xlator == subvol) - break; - } + for (i = 0; i < layout->cnt; i++) { + if (layout->list[i].xlator == subvol) + break; + } - if (i == layout->cnt) - return -1; + if (i == layout->cnt) + return -1; - return dht_disk_layout_extract (this, layout, i, disk_layout_p); + return dht_disk_layout_extract(this, layout, i, disk_layout_p); } int -dht_disk_layout_merge (xlator_t *this, dht_layout_t *layout, - int pos, void *disk_layout_raw, int disk_layout_len) +dht_disk_layout_merge(xlator_t *this, dht_layout_t *layout, int pos, + void *disk_layout_raw, int disk_layout_len) { - int type = 0; - int start_off = 0; - int stop_off = 0; - int commit_hash = 0; - int disk_layout[4]; + int type = 0; + int start_off = 0; + int stop_off = 0; + int commit_hash = 0; + int disk_layout[4]; - if (!disk_layout_raw) { - gf_msg (this->name, GF_LOG_CRITICAL, 0, - DHT_MSG_LAYOUT_MERGE_FAILED, - "error no layout on disk for merge"); - return -1; - } + if (!disk_layout_raw) { + gf_msg(this->name, GF_LOG_CRITICAL, 0, DHT_MSG_LAYOUT_MERGE_FAILED, + "error no layout on disk for merge"); + return -1; + } - GF_ASSERT (disk_layout_len == sizeof (disk_layout)); + GF_ASSERT(disk_layout_len == sizeof(disk_layout)); - memcpy (disk_layout, disk_layout_raw, disk_layout_len); + memcpy(disk_layout, disk_layout_raw, disk_layout_len); - type = ntoh32 (disk_layout[1]); - switch (type) { + type = ntoh32(disk_layout[1]); + switch (type) { case DHT_HASH_TYPE_DM_USER: - gf_msg_debug (this->name, 0, "found user-set layout"); - layout->type = type; - /* Fall through. */ - case DHT_HASH_TYPE_DM: - break; + gf_msg_debug(this->name, 0, "found user-set layout"); + layout->type = type; + /* Fall through. */ + case DHT_HASH_TYPE_DM: + break; default: - gf_msg (this->name, GF_LOG_CRITICAL, 0, - DHT_MSG_INVALID_DISK_LAYOUT, - "Invalid disk layout: " - "Catastrophic error layout with unknown type found %d", - disk_layout[1]); - return -1; - } - - commit_hash = ntoh32 (disk_layout[0]); - start_off = ntoh32 (disk_layout[2]); - stop_off = ntoh32 (disk_layout[3]); - - layout->list[pos].commit_hash = commit_hash; - layout->list[pos].start = start_off; - layout->list[pos].stop = stop_off; - - gf_msg_trace (this->name, 0, - "merged to layout: %u - %u (type %d, hash %d) from %s", - start_off, stop_off, commit_hash, type, - layout->list[pos].xlator->name); - - return 0; + gf_msg(this->name, GF_LOG_CRITICAL, 0, DHT_MSG_INVALID_DISK_LAYOUT, + "Invalid disk layout: " + "Catastrophic error layout with unknown type found %d", + disk_layout[1]); + return -1; + } + + commit_hash = ntoh32(disk_layout[0]); + start_off = ntoh32(disk_layout[2]); + stop_off = ntoh32(disk_layout[3]); + + layout->list[pos].commit_hash = commit_hash; + layout->list[pos].start = start_off; + layout->list[pos].stop = stop_off; + + gf_msg_trace( + this->name, 0, "merged to layout: %u - %u (type %d, hash %d) from %s", + start_off, stop_off, commit_hash, type, layout->list[pos].xlator->name); + + return 0; } int -dht_layout_merge (xlator_t *this, dht_layout_t *layout, xlator_t *subvol, - int op_ret, int op_errno, dict_t *xattr) +dht_layout_merge(xlator_t *this, dht_layout_t *layout, xlator_t *subvol, + int op_ret, int op_errno, dict_t *xattr) { - int i = 0; - int ret = -1; - int err = -1; - void *disk_layout_raw = NULL; - int disk_layout_len = 0; - dht_conf_t *conf = this->private; - - if (op_ret != 0) { - err = op_errno; - } + int i = 0; + int ret = -1; + int err = -1; + void *disk_layout_raw = NULL; + int disk_layout_len = 0; + dht_conf_t *conf = this->private; - if (!layout) - goto out; + if (op_ret != 0) { + err = op_errno; + } - for (i = 0; i < layout->cnt; i++) { - if (layout->list[i].xlator == NULL) { - layout->list[i].err = err; - layout->list[i].xlator = subvol; - break; - } - } + if (!layout) + goto out; - if (op_ret != 0) { - ret = 0; - goto out; + for (i = 0; i < layout->cnt; i++) { + if (layout->list[i].xlator == NULL) { + layout->list[i].err = err; + layout->list[i].xlator = subvol; + break; } + } - if (xattr) { - /* during lookup and not mkdir */ - ret = dict_get_ptr_and_len (xattr, conf->xattr_name, - &disk_layout_raw, &disk_layout_len); - } + if (op_ret != 0) { + ret = 0; + goto out; + } - if (ret != 0) { - layout->list[i].err = 0; - gf_msg_trace (this->name, 0, - "Missing disk layout on %s. err = %d", - subvol->name, err); - ret = 0; - goto out; - } + if (xattr) { + /* during lookup and not mkdir */ + ret = dict_get_ptr_and_len(xattr, conf->xattr_name, &disk_layout_raw, + &disk_layout_len); + } - ret = dht_disk_layout_merge (this, layout, i, disk_layout_raw, - disk_layout_len); - if (ret != 0) { - gf_msg (this->name, GF_LOG_WARNING, 0, - DHT_MSG_LAYOUT_MERGE_FAILED, - "layout merge from subvolume %s failed", - subvol->name); - goto out; - } + if (ret != 0) { + layout->list[i].err = 0; + gf_msg_trace(this->name, 0, "Missing disk layout on %s. err = %d", + subvol->name, err); + ret = 0; + goto out; + } - if (layout->commit_hash == 0) { - layout->commit_hash = layout->list[i].commit_hash; - } else if (layout->commit_hash != layout->list[i].commit_hash) { - layout->commit_hash = DHT_LAYOUT_HASH_INVALID; - } + ret = dht_disk_layout_merge(this, layout, i, disk_layout_raw, + disk_layout_len); + if (ret != 0) { + gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_LAYOUT_MERGE_FAILED, + "layout merge from subvolume %s failed", subvol->name); + goto out; + } - layout->list[i].err = 0; + if (layout->commit_hash == 0) { + layout->commit_hash = layout->list[i].commit_hash; + } else if (layout->commit_hash != layout->list[i].commit_hash) { + layout->commit_hash = DHT_LAYOUT_HASH_INVALID; + } + + layout->list[i].err = 0; out: - return ret; + return ret; } - void -dht_layout_entry_swap (dht_layout_t *layout, int i, int j) +dht_layout_entry_swap(dht_layout_t *layout, int i, int j) { - uint32_t start_swap = 0; - uint32_t stop_swap = 0; - uint32_t commit_hash_swap = 0; - xlator_t *xlator_swap = 0; - int err_swap = 0; - - start_swap = layout->list[i].start; - stop_swap = layout->list[i].stop; - xlator_swap = layout->list[i].xlator; - err_swap = layout->list[i].err; - commit_hash_swap = layout->list[i].commit_hash; - - layout->list[i].start = layout->list[j].start; - layout->list[i].stop = layout->list[j].stop; - layout->list[i].xlator = layout->list[j].xlator; - layout->list[i].err = layout->list[j].err; - layout->list[i].commit_hash = layout->list[j].commit_hash; - - layout->list[j].start = start_swap; - layout->list[j].stop = stop_swap; - layout->list[j].xlator = xlator_swap; - layout->list[j].err = err_swap; - layout->list[j].commit_hash = commit_hash_swap; + uint32_t start_swap = 0; + uint32_t stop_swap = 0; + uint32_t commit_hash_swap = 0; + xlator_t *xlator_swap = 0; + int err_swap = 0; + + start_swap = layout->list[i].start; + stop_swap = layout->list[i].stop; + xlator_swap = layout->list[i].xlator; + err_swap = layout->list[i].err; + commit_hash_swap = layout->list[i].commit_hash; + + layout->list[i].start = layout->list[j].start; + layout->list[i].stop = layout->list[j].stop; + layout->list[i].xlator = layout->list[j].xlator; + layout->list[i].err = layout->list[j].err; + layout->list[i].commit_hash = layout->list[j].commit_hash; + + layout->list[j].start = start_swap; + layout->list[j].stop = stop_swap; + layout->list[j].xlator = xlator_swap; + layout->list[j].err = err_swap; + layout->list[j].commit_hash = commit_hash_swap; } void -dht_layout_range_swap (dht_layout_t *layout, int i, int j) +dht_layout_range_swap(dht_layout_t *layout, int i, int j) { - uint32_t start_swap = 0; - uint32_t stop_swap = 0; + uint32_t start_swap = 0; + uint32_t stop_swap = 0; - start_swap = layout->list[i].start; - stop_swap = layout->list[i].stop; + start_swap = layout->list[i].start; + stop_swap = layout->list[i].stop; - layout->list[i].start = layout->list[j].start; - layout->list[i].stop = layout->list[j].stop; + layout->list[i].start = layout->list[j].start; + layout->list[i].stop = layout->list[j].stop; - layout->list[j].start = start_swap; - layout->list[j].stop = stop_swap; + layout->list[j].start = start_swap; + layout->list[j].stop = stop_swap; } int64_t -dht_layout_entry_cmp_volname (dht_layout_t *layout, int i, int j) +dht_layout_entry_cmp_volname(dht_layout_t *layout, int i, int j) { - return (strcmp (layout->list[i].xlator->name, - layout->list[j].xlator->name)); + return (strcmp(layout->list[i].xlator->name, layout->list[j].xlator->name)); } gf_boolean_t -dht_is_subvol_in_layout (dht_layout_t *layout, xlator_t *xlator) +dht_is_subvol_in_layout(dht_layout_t *layout, xlator_t *xlator) { - int i = 0; - - for (i = 0; i < layout->cnt; i++) { - /* Check if xlator is already part of layout, and layout is - * non-zero. */ - if (!strcmp (layout->list[i].xlator->name, xlator->name)) { - if (layout->list[i].start != layout->list[i].stop) - return _gf_true; - break; - } - } - return _gf_false; + int i = 0; + + for (i = 0; i < layout->cnt; i++) { + /* Check if xlator is already part of layout, and layout is + * non-zero. */ + if (!strcmp(layout->list[i].xlator->name, xlator->name)) { + if (layout->list[i].start != layout->list[i].stop) + return _gf_true; + break; + } + } + return _gf_false; } int64_t -dht_layout_entry_cmp (dht_layout_t *layout, int i, int j) +dht_layout_entry_cmp(dht_layout_t *layout, int i, int j) { - int64_t diff = 0; + int64_t diff = 0; - /* swap zero'ed out layouts to front, if needed */ - if (!layout->list[j].start && !layout->list[j].stop) { - diff = (int64_t) layout->list[i].stop - - (int64_t) layout->list[j].stop; - goto out; - } - diff = (int64_t) layout->list[i].start - - (int64_t) layout->list[j].start; + /* swap zero'ed out layouts to front, if needed */ + if (!layout->list[j].start && !layout->list[j].stop) { + diff = (int64_t)layout->list[i].stop - (int64_t)layout->list[j].stop; + goto out; + } + diff = (int64_t)layout->list[i].start - (int64_t)layout->list[j].start; out: - return diff; + return diff; } - int -dht_layout_sort (dht_layout_t *layout) +dht_layout_sort(dht_layout_t *layout) { - int i = 0; - int j = 0; - int64_t ret = 0; + int i = 0; + int j = 0; + int64_t ret = 0; - /* TODO: O(n^2) -- bad bad */ + /* TODO: O(n^2) -- bad bad */ - for (i = 0; i < layout->cnt - 1; i++) { - for (j = i + 1; j < layout->cnt; j++) { - ret = dht_layout_entry_cmp (layout, i, j); - if (ret > 0) - dht_layout_entry_swap (layout, i, j); - } + for (i = 0; i < layout->cnt - 1; i++) { + for (j = i + 1; j < layout->cnt; j++) { + ret = dht_layout_entry_cmp(layout, i, j); + if (ret > 0) + dht_layout_entry_swap(layout, i, j); } + } - return 0; + return 0; } int -dht_layout_sort_volname (dht_layout_t *layout) +dht_layout_sort_volname(dht_layout_t *layout) { - int i = 0; - int j = 0; - int64_t ret = 0; + int i = 0; + int j = 0; + int64_t ret = 0; - /* TODO: O(n^2) -- bad bad */ + /* TODO: O(n^2) -- bad bad */ - for (i = 0; i < layout->cnt - 1; i++) { - for (j = i + 1; j < layout->cnt; j++) { - ret = dht_layout_entry_cmp_volname (layout, i, j); - if (ret > 0) - dht_layout_entry_swap (layout, i, j); - } + for (i = 0; i < layout->cnt - 1; i++) { + for (j = i + 1; j < layout->cnt; j++) { + ret = dht_layout_entry_cmp_volname(layout, i, j); + if (ret > 0) + dht_layout_entry_swap(layout, i, j); } + } - return 0; + return 0; } - int -dht_layout_anomalies (xlator_t *this, loc_t *loc, dht_layout_t *layout, - uint32_t *holes_p, uint32_t *overlaps_p, - uint32_t *missing_p, uint32_t *down_p, uint32_t *misc_p, - uint32_t *no_space_p) +dht_layout_anomalies(xlator_t *this, loc_t *loc, dht_layout_t *layout, + uint32_t *holes_p, uint32_t *overlaps_p, + uint32_t *missing_p, uint32_t *down_p, uint32_t *misc_p, + uint32_t *no_space_p) { - uint32_t overlaps = 0; - uint32_t missing = 0; - uint32_t down = 0; - uint32_t misc = 0; - uint32_t hole_cnt = 0; - uint32_t overlap_cnt = 0; - int i = 0; - int ret = 0; - uint32_t prev_stop = 0; - uint32_t last_stop = 0; - char is_virgin = 1; - uint32_t no_space = 0; - - /* This function scans through the layout spread of a directory to - check if there are any anomalies. Prior to calling this function - the layout entries should be sorted in the ascending order. - - If the layout entry has err != 0 - then increment the corresponding anomaly. - else - if (start of the current layout entry > stop + 1 of previous - non erroneous layout entry) - then it indicates a hole in the layout - if (start of the current layout entry < stop + 1 of previous - non erroneous layout entry) - then it indicates an overlap in the layout - */ - last_stop = layout->list[0].start - 1; - prev_stop = last_stop; - - for (i = 0; i < layout->cnt; i++) { - switch (layout->list[i].err) { - case -1: - case ENOENT: - case ESTALE: - missing++; - continue; - case ENOTCONN: - down++; - continue; - case ENOSPC: - no_space++; - continue; - case 0: - /* if err == 0 and start == stop, then it is a non misc++; - * participating subvolume(spread-cnt). Then, do not - * check for anomalies. If start != stop, then treat it - * as misc err */ - if (layout->list[i].start == layout->list[i].stop) { - continue; - } - break; - default: - misc++; - continue; - } - - is_virgin = 0; - - if ((prev_stop + 1) < layout->list[i].start) { - hole_cnt++; + uint32_t overlaps = 0; + uint32_t missing = 0; + uint32_t down = 0; + uint32_t misc = 0; + uint32_t hole_cnt = 0; + uint32_t overlap_cnt = 0; + int i = 0; + int ret = 0; + uint32_t prev_stop = 0; + uint32_t last_stop = 0; + char is_virgin = 1; + uint32_t no_space = 0; + + /* This function scans through the layout spread of a directory to + check if there are any anomalies. Prior to calling this function + the layout entries should be sorted in the ascending order. + + If the layout entry has err != 0 + then increment the corresponding anomaly. + else + if (start of the current layout entry > stop + 1 of previous + non erroneous layout entry) + then it indicates a hole in the layout + if (start of the current layout entry < stop + 1 of previous + non erroneous layout entry) + then it indicates an overlap in the layout + */ + last_stop = layout->list[0].start - 1; + prev_stop = last_stop; + + for (i = 0; i < layout->cnt; i++) { + switch (layout->list[i].err) { + case -1: + case ENOENT: + case ESTALE: + missing++; + continue; + case ENOTCONN: + down++; + continue; + case ENOSPC: + no_space++; + continue; + case 0: + /* if err == 0 and start == stop, then it is a non misc++; + * participating subvolume(spread-cnt). Then, do not + * check for anomalies. If start != stop, then treat it + * as misc err */ + if (layout->list[i].start == layout->list[i].stop) { + continue; } + break; + default: + misc++; + continue; + } - if ((prev_stop + 1) > layout->list[i].start) { - overlap_cnt++; - overlaps += ((prev_stop + 1) - layout->list[i].start); - } - prev_stop = layout->list[i].stop; + is_virgin = 0; + + if ((prev_stop + 1) < layout->list[i].start) { + hole_cnt++; } - if ((last_stop - prev_stop) || is_virgin) - hole_cnt++; + if ((prev_stop + 1) > layout->list[i].start) { + overlap_cnt++; + overlaps += ((prev_stop + 1) - layout->list[i].start); + } + prev_stop = layout->list[i].stop; + } - if (holes_p) - *holes_p = hole_cnt; + if ((last_stop - prev_stop) || is_virgin) + hole_cnt++; - if (overlaps_p) - *overlaps_p = overlap_cnt; + if (holes_p) + *holes_p = hole_cnt; - if (missing_p) - *missing_p = missing; + if (overlaps_p) + *overlaps_p = overlap_cnt; - if (down_p) - *down_p = down; + if (missing_p) + *missing_p = missing; - if (misc_p) - *misc_p = misc; + if (down_p) + *down_p = down; - if (no_space_p) - *no_space_p = no_space; + if (misc_p) + *misc_p = misc; - return ret; -} + if (no_space_p) + *no_space_p = no_space; + return ret; +} int -dht_layout_missing_dirs (dht_layout_t *layout) +dht_layout_missing_dirs(dht_layout_t *layout) { - int i = 0, missing = 0; + int i = 0, missing = 0; - if (layout == NULL) - goto out; + if (layout == NULL) + goto out; - for (i = 0; i < layout->cnt; i++) { - if ((layout->list[i].err == ENOENT) - || ((layout->list[i].err == -1) - && (layout->list[i].start == 0) - && (layout->list[i].stop == 0))) { - missing++; - } + for (i = 0; i < layout->cnt; i++) { + if ((layout->list[i].err == ENOENT) || + ((layout->list[i].err == -1) && (layout->list[i].start == 0) && + (layout->list[i].stop == 0))) { + missing++; } + } out: - return missing; + return missing; } - int -dht_layout_normalize (xlator_t *this, loc_t *loc, dht_layout_t *layout) +dht_layout_normalize(xlator_t *this, loc_t *loc, dht_layout_t *layout) { - int ret = 0; - uint32_t holes = 0; - uint32_t overlaps = 0; - uint32_t missing = 0; - uint32_t down = 0; - uint32_t misc = 0, missing_dirs = 0; - char gfid[GF_UUID_BUF_SIZE] = {0}; - - ret = dht_layout_sort (layout); - if (ret == -1) { - gf_msg (this->name, GF_LOG_WARNING, 0, - DHT_MSG_LAYOUT_SORT_FAILED, - "sort failed?! how the ...."); - goto out; - } - - gf_uuid_unparse(loc->gfid, gfid); - - ret = dht_layout_anomalies (this, loc, layout, - &holes, &overlaps, - &missing, &down, &misc, NULL); - if (ret == -1) { - gf_msg (this->name, GF_LOG_WARNING, 0, - DHT_MSG_FIND_LAYOUT_ANOMALIES_ERROR, - "Error finding anomalies in %s, gfid = %s", - loc->path, gfid); - goto out; - } - - if (holes || overlaps) { - if (missing == layout->cnt) { - gf_msg_debug (this->name, 0, - "Directory %s looked up first time" - " gfid = %s", loc->path, gfid); - } else { - gf_msg (this->name, GF_LOG_INFO, 0, - DHT_MSG_ANOMALIES_INFO, - "Found anomalies in %s (gfid = %s). " - "Holes=%d overlaps=%d", - loc->path, gfid, holes, overlaps ); - } - ret = -1; - } - - if (ret >= 0) { - missing_dirs = dht_layout_missing_dirs (layout); - /* TODO During DHT selfheal rewrite (almost) find a better place - * to detect this - probably in dht_layout_anomalies() - */ - if (missing_dirs > 0) - ret += missing_dirs; - } + int ret = 0; + uint32_t holes = 0; + uint32_t overlaps = 0; + uint32_t missing = 0; + uint32_t down = 0; + uint32_t misc = 0, missing_dirs = 0; + char gfid[GF_UUID_BUF_SIZE] = {0}; + + ret = dht_layout_sort(layout); + if (ret == -1) { + gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_LAYOUT_SORT_FAILED, + "sort failed?! how the ...."); + goto out; + } + + gf_uuid_unparse(loc->gfid, gfid); + + ret = dht_layout_anomalies(this, loc, layout, &holes, &overlaps, &missing, + &down, &misc, NULL); + if (ret == -1) { + gf_msg(this->name, GF_LOG_WARNING, 0, + DHT_MSG_FIND_LAYOUT_ANOMALIES_ERROR, + "Error finding anomalies in %s, gfid = %s", loc->path, gfid); + goto out; + } + + if (holes || overlaps) { + if (missing == layout->cnt) { + gf_msg_debug(this->name, 0, + "Directory %s looked up first time" + " gfid = %s", + loc->path, gfid); + } else { + gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_ANOMALIES_INFO, + "Found anomalies in %s (gfid = %s). " + "Holes=%d overlaps=%d", + loc->path, gfid, holes, overlaps); + } + ret = -1; + } + + if (ret >= 0) { + missing_dirs = dht_layout_missing_dirs(layout); + /* TODO During DHT selfheal rewrite (almost) find a better place + * to detect this - probably in dht_layout_anomalies() + */ + if (missing_dirs > 0) + ret += missing_dirs; + } out: - return ret; + return ret; } int -dht_dir_has_layout (dict_t *xattr, char *name) +dht_dir_has_layout(dict_t *xattr, char *name) { + void *disk_layout_raw = NULL; - void *disk_layout_raw = NULL; - - return dict_get_ptr (xattr, name, &disk_layout_raw); + return dict_get_ptr(xattr, name, &disk_layout_raw); } int -dht_layout_dir_mismatch (xlator_t *this, dht_layout_t *layout, xlator_t *subvol, - loc_t *loc, dict_t *xattr) +dht_layout_dir_mismatch(xlator_t *this, dht_layout_t *layout, xlator_t *subvol, + loc_t *loc, dict_t *xattr) { - int idx = 0; - int pos = -1; - int ret = 0; - int err = 0; - int dict_ret = 0; - int32_t disk_layout[4]; - void *disk_layout_raw = NULL; - uint32_t start_off = -1; - uint32_t stop_off = -1; - uint32_t commit_hash = -1; - dht_conf_t *conf = this->private; - char gfid[GF_UUID_BUF_SIZE] = {0}; - - if(loc && loc->inode) - gf_uuid_unparse(loc->inode->gfid, gfid); - - for (idx = 0; idx < layout->cnt; idx++) { - if (layout->list[idx].xlator == subvol) { - pos = idx; - break; - } - } - - if (pos == -1) { - if (loc) { - gf_msg_debug (this->name, 0, - "%s - no layout info for subvolume %s", - loc ? loc->path : "path not found", - subvol->name); - } - ret = 1; - goto out; - } - - err = layout->list[pos].err; - - if (!xattr) { - if (err == 0) { - if (loc) { - gf_msg (this->name, GF_LOG_INFO, 0, - DHT_MSG_DICT_GET_FAILED, - "%s: xattr dictionary is NULL", - loc->path); - } else { - gf_msg (this->name, GF_LOG_INFO, 0, - DHT_MSG_DICT_GET_FAILED, - "path not found: " - "xattr dictionary is NULL"); - } - ret = -1; - } - goto out; - } - - dict_ret = dict_get_ptr (xattr, conf->xattr_name, - &disk_layout_raw); - - if (dict_ret < 0) { - if (err == 0 && layout->list[pos].stop) { - if (loc) { - gf_msg (this->name, GF_LOG_INFO, 0, - DHT_MSG_DISK_LAYOUT_MISSING, - "%s: Disk layout missing, gfid = %s", - loc->path, gfid); - } else { - gf_msg (this->name, GF_LOG_INFO, 0, - DHT_MSG_DISK_LAYOUT_MISSING, - "path not found: " - "Disk layout missing, gfid = %s", - gfid); - } - ret = -1; - } - goto out; - } - - memcpy (disk_layout, disk_layout_raw, sizeof (disk_layout)); - - start_off = ntoh32 (disk_layout[2]); - stop_off = ntoh32 (disk_layout[3]); - commit_hash = ntoh32 (disk_layout[0]); - - if ((layout->list[pos].start != start_off) - || (layout->list[pos].stop != stop_off) - || (layout->list[pos].commit_hash != commit_hash)) { - gf_msg (this->name, GF_LOG_INFO, 0, - DHT_MSG_LAYOUT_INFO, - "subvol: %s; inode layout - %"PRIu32" - %"PRIu32 - " - %"PRIu32"; " - "disk layout - %"PRIu32" - %"PRIu32" - %"PRIu32, - layout->list[pos].xlator->name, - layout->list[pos].start, layout->list[pos].stop, - layout->list[pos].commit_hash, - start_off, stop_off, commit_hash); - ret = 1; - } else { - ret = 0; - } + int idx = 0; + int pos = -1; + int ret = 0; + int err = 0; + int dict_ret = 0; + int32_t disk_layout[4]; + void *disk_layout_raw = NULL; + uint32_t start_off = -1; + uint32_t stop_off = -1; + uint32_t commit_hash = -1; + dht_conf_t *conf = this->private; + char gfid[GF_UUID_BUF_SIZE] = {0}; + + if (loc && loc->inode) + gf_uuid_unparse(loc->inode->gfid, gfid); + + for (idx = 0; idx < layout->cnt; idx++) { + if (layout->list[idx].xlator == subvol) { + pos = idx; + break; + } + } + + if (pos == -1) { + if (loc) { + gf_msg_debug(this->name, 0, "%s - no layout info for subvolume %s", + loc ? loc->path : "path not found", subvol->name); + } + ret = 1; + goto out; + } + + err = layout->list[pos].err; + + if (!xattr) { + if (err == 0) { + if (loc) { + gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_DICT_GET_FAILED, + "%s: xattr dictionary is NULL", loc->path); + } else { + gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_DICT_GET_FAILED, + "path not found: " + "xattr dictionary is NULL"); + } + ret = -1; + } + goto out; + } + + dict_ret = dict_get_ptr(xattr, conf->xattr_name, &disk_layout_raw); + + if (dict_ret < 0) { + if (err == 0 && layout->list[pos].stop) { + if (loc) { + gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_DISK_LAYOUT_MISSING, + "%s: Disk layout missing, gfid = %s", loc->path, gfid); + } else { + gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_DISK_LAYOUT_MISSING, + "path not found: " + "Disk layout missing, gfid = %s", + gfid); + } + ret = -1; + } + goto out; + } + + memcpy(disk_layout, disk_layout_raw, sizeof(disk_layout)); + + start_off = ntoh32(disk_layout[2]); + stop_off = ntoh32(disk_layout[3]); + commit_hash = ntoh32(disk_layout[0]); + + if ((layout->list[pos].start != start_off) || + (layout->list[pos].stop != stop_off) || + (layout->list[pos].commit_hash != commit_hash)) { + gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LAYOUT_INFO, + "subvol: %s; inode layout - %" PRIu32 " - %" PRIu32 " - %" PRIu32 + "; " + "disk layout - %" PRIu32 " - %" PRIu32 " - %" PRIu32, + layout->list[pos].xlator->name, layout->list[pos].start, + layout->list[pos].stop, layout->list[pos].commit_hash, start_off, + stop_off, commit_hash); + ret = 1; + } else { + ret = 0; + } out: - return ret; + return ret; } - int -dht_layout_preset (xlator_t *this, xlator_t *subvol, inode_t *inode) +dht_layout_preset(xlator_t *this, xlator_t *subvol, inode_t *inode) { - dht_layout_t *layout = NULL; - int ret = -1; - dht_conf_t *conf = NULL; - - conf = this->private; - if (!conf) - goto out; - - - layout = dht_layout_for_subvol (this, subvol); - if (!layout) { - gf_msg (this->name, GF_LOG_INFO, 0, - DHT_MSG_SUBVOL_NO_LAYOUT_INFO, - "no pre-set layout for subvolume %s", - subvol ? subvol->name : ""); - ret = -1; - goto out; - } - - gf_msg_debug (this->name, 0, "file = %s, subvol = %s", - uuid_utoa (inode->gfid), subvol ? subvol->name : ""); - - LOCK (&conf->layout_lock); - { - dht_inode_ctx_layout_set (inode, this, layout); - } - - UNLOCK (&conf->layout_lock); - - ret = 0; + dht_layout_t *layout = NULL; + int ret = -1; + dht_conf_t *conf = NULL; + + conf = this->private; + if (!conf) + goto out; + + layout = dht_layout_for_subvol(this, subvol); + if (!layout) { + gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_SUBVOL_NO_LAYOUT_INFO, + "no pre-set layout for subvolume %s", + subvol ? subvol->name : ""); + ret = -1; + goto out; + } + + gf_msg_debug(this->name, 0, "file = %s, subvol = %s", + uuid_utoa(inode->gfid), subvol ? subvol->name : ""); + + LOCK(&conf->layout_lock); + { + dht_inode_ctx_layout_set(inode, this, layout); + } + + UNLOCK(&conf->layout_lock); + + ret = 0; out: - return ret; + return ret; } int -dht_layout_index_for_subvol (dht_layout_t *layout, xlator_t *subvol) +dht_layout_index_for_subvol(dht_layout_t *layout, xlator_t *subvol) { - int i = 0, ret = -1; + int i = 0, ret = -1; - for (i = 0; i < layout->cnt; i++) { - if (layout->list[i].xlator == subvol) { - ret = i; - break; - } + for (i = 0; i < layout->cnt; i++) { + if (layout->list[i].xlator == subvol) { + ret = i; + break; } + } - return ret; + return ret; } diff --git a/xlators/cluster/dht/src/dht-linkfile.c b/xlators/cluster/dht/src/dht-linkfile.c index a6d994b4157..eb1695f7e05 100644 --- a/xlators/cluster/dht/src/dht-linkfile.c +++ b/xlators/cluster/dht/src/dht-linkfile.c @@ -8,7 +8,6 @@ cases as published by the Free Software Foundation. */ - #include "glusterfs.h" #include "xlator.h" #include "compat.h" @@ -16,343 +15,325 @@ #include "dht-messages.h" int -dht_linkfile_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, - inode_t *inode, struct iatt *stbuf, dict_t *xattr, - struct iatt *postparent) +dht_linkfile_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, inode_t *inode, + struct iatt *stbuf, dict_t *xattr, + struct iatt *postparent) { - char is_linkfile = 0; - dht_conf_t *conf = NULL; - dht_local_t *local = NULL; - xlator_t *prev = NULL; - char gfid[GF_UUID_BUF_SIZE] = {0}; - - local = frame->local; - prev = cookie; - conf = this->private; - - if (op_ret) - goto out; - - gf_uuid_unparse(local->loc.gfid, gfid); - - is_linkfile = check_is_linkfile (inode, stbuf, xattr, - conf->link_xattr_name); - if (!is_linkfile) - gf_msg (this->name, GF_LOG_WARNING, 0, - DHT_MSG_NOT_LINK_FILE_ERROR, - "got non-linkfile %s:%s, gfid = %s", - prev->name, local->loc.path, gfid); + char is_linkfile = 0; + dht_conf_t *conf = NULL; + dht_local_t *local = NULL; + xlator_t *prev = NULL; + char gfid[GF_UUID_BUF_SIZE] = {0}; + + local = frame->local; + prev = cookie; + conf = this->private; + + if (op_ret) + goto out; + + gf_uuid_unparse(local->loc.gfid, gfid); + + is_linkfile = check_is_linkfile(inode, stbuf, xattr, conf->link_xattr_name); + if (!is_linkfile) + gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_NOT_LINK_FILE_ERROR, + "got non-linkfile %s:%s, gfid = %s", prev->name, local->loc.path, + gfid); out: - local->linkfile.linkfile_cbk (frame, cookie, this, op_ret, op_errno, - inode, stbuf, postparent, postparent, - xattr); - return 0; + local->linkfile.linkfile_cbk(frame, cookie, this, op_ret, op_errno, inode, + stbuf, postparent, postparent, xattr); + return 0; } #define is_equal(a, b) ((a) == (b)) int -dht_linkfile_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, inode_t *inode, - struct iatt *stbuf, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) +dht_linkfile_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, inode_t *inode, + struct iatt *stbuf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) { - dht_local_t *local = NULL; - xlator_t *subvol = NULL; - dict_t *xattrs = NULL; - dht_conf_t *conf = NULL; - int ret = -1; - - local = frame->local; - - if (!op_ret) - local->linked = _gf_true; - - FRAME_SU_UNDO (frame, dht_local_t); - - if (op_ret && (op_errno == EEXIST)) { - conf = this->private; - subvol = cookie; - if (!subvol) - goto out; - xattrs = dict_new (); - if (!xattrs) - goto out; - ret = dict_set_uint32 (xattrs, conf->link_xattr_name, 256); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_DICT_SET_FAILED, - "Failed to set dictionary value. key : %s", - conf->link_xattr_name); - goto out; - } - - STACK_WIND_COOKIE (frame, dht_linkfile_lookup_cbk, subvol, - subvol, subvol->fops->lookup, &local->linkfile.loc, - xattrs); - if (xattrs) - dict_unref (xattrs); - return 0; + dht_local_t *local = NULL; + xlator_t *subvol = NULL; + dict_t *xattrs = NULL; + dht_conf_t *conf = NULL; + int ret = -1; + + local = frame->local; + + if (!op_ret) + local->linked = _gf_true; + + FRAME_SU_UNDO(frame, dht_local_t); + + if (op_ret && (op_errno == EEXIST)) { + conf = this->private; + subvol = cookie; + if (!subvol) + goto out; + xattrs = dict_new(); + if (!xattrs) + goto out; + ret = dict_set_uint32(xattrs, conf->link_xattr_name, 256); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED, + "Failed to set dictionary value. key : %s", + conf->link_xattr_name); + goto out; } -out: - local->linkfile.linkfile_cbk (frame, cookie, this, op_ret, op_errno, - inode, stbuf, preparent, postparent, - xdata); + + STACK_WIND_COOKIE(frame, dht_linkfile_lookup_cbk, subvol, subvol, + subvol->fops->lookup, &local->linkfile.loc, xattrs); if (xattrs) - dict_unref (xattrs); + dict_unref(xattrs); return 0; + } +out: + local->linkfile.linkfile_cbk(frame, cookie, this, op_ret, op_errno, inode, + stbuf, preparent, postparent, xdata); + if (xattrs) + dict_unref(xattrs); + return 0; } - int -dht_linkfile_create (call_frame_t *frame, fop_mknod_cbk_t linkfile_cbk, - xlator_t *this, - xlator_t *tovol, xlator_t *fromvol, loc_t *loc) +dht_linkfile_create(call_frame_t *frame, fop_mknod_cbk_t linkfile_cbk, + xlator_t *this, xlator_t *tovol, xlator_t *fromvol, + loc_t *loc) { - dht_local_t *local = NULL; - dict_t *dict = NULL; - int need_unref = 0; - int ret = 0; - dht_conf_t *conf = this->private; - char gfid[GF_UUID_BUF_SIZE] = {0}; - - local = frame->local; - local->linkfile.linkfile_cbk = linkfile_cbk; - local->linkfile.srcvol = tovol; - loc_copy (&local->linkfile.loc, loc); - - local->linked = _gf_false; - - dict = local->params; - if (!dict) { - dict = dict_new (); - if (!dict) - goto out; - need_unref = 1; - } - - - if (!gf_uuid_is_null (local->gfid)) { - gf_uuid_unparse(local->gfid, gfid); - - ret = dict_set_gfuuid (dict, "gfid-req", local->gfid, true); - if (ret) - gf_msg ("dht-linkfile", GF_LOG_INFO, 0, - DHT_MSG_DICT_SET_FAILED, - "%s: Failed to set dictionary value: " - "key = gfid-req, gfid = %s ", loc->path, gfid); - } else { - gf_uuid_unparse(loc->gfid, gfid); - } - - ret = dict_set_str (dict, GLUSTERFS_INTERNAL_FOP_KEY, "yes"); + dht_local_t *local = NULL; + dict_t *dict = NULL; + int need_unref = 0; + int ret = 0; + dht_conf_t *conf = this->private; + char gfid[GF_UUID_BUF_SIZE] = {0}; + + local = frame->local; + local->linkfile.linkfile_cbk = linkfile_cbk; + local->linkfile.srcvol = tovol; + loc_copy(&local->linkfile.loc, loc); + + local->linked = _gf_false; + + dict = local->params; + if (!dict) { + dict = dict_new(); + if (!dict) + goto out; + need_unref = 1; + } + + if (!gf_uuid_is_null(local->gfid)) { + gf_uuid_unparse(local->gfid, gfid); + + ret = dict_set_gfuuid(dict, "gfid-req", local->gfid, true); if (ret) - gf_msg ("dht-linkfile", GF_LOG_INFO, 0, - DHT_MSG_DICT_SET_FAILED, - "%s: Failed to set dictionary value: key = %s," - " gfid = %s", loc->path, - GLUSTERFS_INTERNAL_FOP_KEY, gfid); - - ret = dict_set_str (dict, conf->link_xattr_name, tovol->name); - - if (ret < 0) { - gf_msg (frame->this->name, GF_LOG_INFO, 0, - DHT_MSG_CREATE_LINK_FAILED, - "%s: failed to initialize linkfile data, gfid = %s", - loc->path, gfid); - goto out; - } - - local->link_subvol = fromvol; - /* Always create as root:root. dht_linkfile_attr_heal fixes the - * ownsership */ - FRAME_SU_DO (frame, dht_local_t); - STACK_WIND_COOKIE (frame, dht_linkfile_create_cbk, fromvol, fromvol, - fromvol->fops->mknod, loc, - S_IFREG | DHT_LINKFILE_MODE, 0, 0, dict); - - if (need_unref && dict) - dict_unref (dict); - - return 0; + gf_msg("dht-linkfile", GF_LOG_INFO, 0, DHT_MSG_DICT_SET_FAILED, + "%s: Failed to set dictionary value: " + "key = gfid-req, gfid = %s ", + loc->path, gfid); + } else { + gf_uuid_unparse(loc->gfid, gfid); + } + + ret = dict_set_str(dict, GLUSTERFS_INTERNAL_FOP_KEY, "yes"); + if (ret) + gf_msg("dht-linkfile", GF_LOG_INFO, 0, DHT_MSG_DICT_SET_FAILED, + "%s: Failed to set dictionary value: key = %s," + " gfid = %s", + loc->path, GLUSTERFS_INTERNAL_FOP_KEY, gfid); + + ret = dict_set_str(dict, conf->link_xattr_name, tovol->name); + + if (ret < 0) { + gf_msg(frame->this->name, GF_LOG_INFO, 0, DHT_MSG_CREATE_LINK_FAILED, + "%s: failed to initialize linkfile data, gfid = %s", loc->path, + gfid); + goto out; + } + + local->link_subvol = fromvol; + /* Always create as root:root. dht_linkfile_attr_heal fixes the + * ownsership */ + FRAME_SU_DO(frame, dht_local_t); + STACK_WIND_COOKIE(frame, dht_linkfile_create_cbk, fromvol, fromvol, + fromvol->fops->mknod, loc, S_IFREG | DHT_LINKFILE_MODE, 0, + 0, dict); + + if (need_unref && dict) + dict_unref(dict); + + return 0; out: - local->linkfile.linkfile_cbk (frame, frame->this, frame->this, -1, ENOMEM, - loc->inode, NULL, NULL, NULL, NULL); + local->linkfile.linkfile_cbk(frame, frame->this, frame->this, -1, ENOMEM, + loc->inode, NULL, NULL, NULL, NULL); - if (need_unref && dict) - dict_unref (dict); + if (need_unref && dict) + dict_unref(dict); - return 0; + return 0; } - int -dht_linkfile_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, - struct iatt *preparent, struct iatt *postparent, - dict_t *xdata) +dht_linkfile_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + struct iatt *preparent, struct iatt *postparent, + dict_t *xdata) { - dht_local_t *local = NULL; - xlator_t *subvol = NULL; - char gfid[GF_UUID_BUF_SIZE] = {0}; - - local = frame->local; - subvol = cookie; + dht_local_t *local = NULL; + xlator_t *subvol = NULL; + char gfid[GF_UUID_BUF_SIZE] = {0}; + local = frame->local; + subvol = cookie; - if (op_ret == -1) { - - gf_uuid_unparse(local->loc.gfid, gfid); - gf_msg (this->name, GF_LOG_INFO, op_errno, - DHT_MSG_UNLINK_FAILED, - "Unlinking linkfile %s (gfid = %s)on " - "subvolume %s failed ", - local->loc.path, gfid, subvol->name); - } + if (op_ret == -1) { + gf_uuid_unparse(local->loc.gfid, gfid); + gf_msg(this->name, GF_LOG_INFO, op_errno, DHT_MSG_UNLINK_FAILED, + "Unlinking linkfile %s (gfid = %s)on " + "subvolume %s failed ", + local->loc.path, gfid, subvol->name); + } - DHT_STACK_DESTROY (frame); + DHT_STACK_DESTROY(frame); - return 0; + return 0; } - int -dht_linkfile_unlink (call_frame_t *frame, xlator_t *this, - xlator_t *subvol, loc_t *loc) +dht_linkfile_unlink(call_frame_t *frame, xlator_t *this, xlator_t *subvol, + loc_t *loc) { - call_frame_t *unlink_frame = NULL; - dht_local_t *unlink_local = NULL; + call_frame_t *unlink_frame = NULL; + dht_local_t *unlink_local = NULL; - unlink_frame = copy_frame (frame); - if (!unlink_frame) { - goto err; - } + unlink_frame = copy_frame(frame); + if (!unlink_frame) { + goto err; + } - /* Using non-fop value here, as anyways, 'local->fop' is not used in - this particular case */ - unlink_local = dht_local_init (unlink_frame, loc, NULL, - GF_FOP_MAXVALUE); - if (!unlink_local) { - goto err; - } + /* Using non-fop value here, as anyways, 'local->fop' is not used in + this particular case */ + unlink_local = dht_local_init(unlink_frame, loc, NULL, GF_FOP_MAXVALUE); + if (!unlink_local) { + goto err; + } - STACK_WIND_COOKIE (unlink_frame, dht_linkfile_unlink_cbk, subvol, - subvol, subvol->fops->unlink, - &unlink_local->loc, 0, NULL); + STACK_WIND_COOKIE(unlink_frame, dht_linkfile_unlink_cbk, subvol, subvol, + subvol->fops->unlink, &unlink_local->loc, 0, NULL); - return 0; + return 0; err: - if (unlink_frame) - DHT_STACK_DESTROY (unlink_frame); + if (unlink_frame) + DHT_STACK_DESTROY(unlink_frame); - return -1; + return -1; } - xlator_t * -dht_linkfile_subvol (xlator_t *this, inode_t *inode, struct iatt *stbuf, - dict_t *xattr) +dht_linkfile_subvol(xlator_t *this, inode_t *inode, struct iatt *stbuf, + dict_t *xattr) { - dht_conf_t *conf = NULL; - xlator_t *subvol = NULL; - void *volname = NULL; - int i = 0, ret = 0; + dht_conf_t *conf = NULL; + xlator_t *subvol = NULL; + void *volname = NULL; + int i = 0, ret = 0; - conf = this->private; + conf = this->private; - if (!xattr) - goto out; + if (!xattr) + goto out; - ret = dict_get_ptr (xattr, conf->link_xattr_name, &volname); + ret = dict_get_ptr(xattr, conf->link_xattr_name, &volname); - if ((-1 == ret) || !volname) - goto out; + if ((-1 == ret) || !volname) + goto out; - for (i = 0; i < conf->subvolume_cnt; i++) { - if (strcmp (conf->subvolumes[i]->name, (char *)volname) == 0) { - subvol = conf->subvolumes[i]; - break; - } + for (i = 0; i < conf->subvolume_cnt; i++) { + if (strcmp(conf->subvolumes[i]->name, (char *)volname) == 0) { + subvol = conf->subvolumes[i]; + break; } + } out: - return subvol; + return subvol; } int -dht_linkfile_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, struct iatt *statpre, - struct iatt *statpost, dict_t *xdata) +dht_linkfile_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, struct iatt *statpre, + struct iatt *statpost, dict_t *xdata) { - dht_local_t *local = NULL; - loc_t *loc = NULL; + dht_local_t *local = NULL; + loc_t *loc = NULL; - local = frame->local; - loc = &local->loc; + local = frame->local; + loc = &local->loc; - if (op_ret) - gf_msg (this->name, GF_LOG_ERROR, op_errno, - DHT_MSG_SETATTR_FAILED, - "Failed to set attr uid/gid on %s" - " : ", - (loc->path? loc->path: "NULL"), - uuid_utoa(local->gfid)); + if (op_ret) + gf_msg(this->name, GF_LOG_ERROR, op_errno, DHT_MSG_SETATTR_FAILED, + "Failed to set attr uid/gid on %s" + " : ", + (loc->path ? loc->path : "NULL"), uuid_utoa(local->gfid)); - DHT_STACK_DESTROY (frame); + DHT_STACK_DESTROY(frame); - return 0; + return 0; } int -dht_linkfile_attr_heal (call_frame_t *frame, xlator_t *this) +dht_linkfile_attr_heal(call_frame_t *frame, xlator_t *this) { - int ret = -1; - call_frame_t *copy = NULL; - dht_local_t *local = NULL; - dht_local_t *copy_local = NULL; - xlator_t *subvol = NULL; - struct iatt stbuf = {0,}; - dict_t *xattr = NULL; - - local = frame->local; - - GF_VALIDATE_OR_GOTO ("dht", local, out); - GF_VALIDATE_OR_GOTO ("dht", local->link_subvol, out); - - if (local->stbuf.ia_type == IA_INVAL) - return 0; + int ret = -1; + call_frame_t *copy = NULL; + dht_local_t *local = NULL; + dht_local_t *copy_local = NULL; + xlator_t *subvol = NULL; + struct iatt stbuf = { + 0, + }; + dict_t *xattr = NULL; + + local = frame->local; + + GF_VALIDATE_OR_GOTO("dht", local, out); + GF_VALIDATE_OR_GOTO("dht", local->link_subvol, out); + + if (local->stbuf.ia_type == IA_INVAL) + return 0; - DHT_MARK_FOP_INTERNAL (xattr); + DHT_MARK_FOP_INTERNAL(xattr); - gf_uuid_copy (local->loc.gfid, local->stbuf.ia_gfid); + gf_uuid_copy(local->loc.gfid, local->stbuf.ia_gfid); - copy = copy_frame (frame); + copy = copy_frame(frame); - if (!copy) - goto out; + if (!copy) + goto out; - copy_local = dht_local_init (copy, &local->loc, NULL, 0); + copy_local = dht_local_init(copy, &local->loc, NULL, 0); - if (!copy_local) - goto out; + if (!copy_local) + goto out; - stbuf = local->stbuf; - subvol = local->link_subvol; + stbuf = local->stbuf; + subvol = local->link_subvol; - copy->local = copy_local; + copy->local = copy_local; - FRAME_SU_DO (copy, dht_local_t); + FRAME_SU_DO(copy, dht_local_t); - STACK_WIND (copy, dht_linkfile_setattr_cbk, subvol, - subvol->fops->setattr, ©_local->loc, - &stbuf, (GF_SET_ATTR_UID | GF_SET_ATTR_GID), xattr); - ret = 0; + STACK_WIND(copy, dht_linkfile_setattr_cbk, subvol, subvol->fops->setattr, + ©_local->loc, &stbuf, (GF_SET_ATTR_UID | GF_SET_ATTR_GID), + xattr); + ret = 0; out: - if ((ret < 0) && (copy)) - DHT_STACK_DESTROY (copy); + if ((ret < 0) && (copy)) + DHT_STACK_DESTROY(copy); - if (xattr) - dict_unref (xattr); + if (xattr) + dict_unref(xattr); - return ret; + return ret; } diff --git a/xlators/cluster/dht/src/dht-lock.c b/xlators/cluster/dht/src/dht-lock.c index 3f389eafa75..f9bac4f97c8 100644 --- a/xlators/cluster/dht/src/dht-lock.c +++ b/xlators/cluster/dht/src/dht-lock.c @@ -11,1252 +11,1265 @@ #include "dht-lock.h" static char * -dht_lock_asprintf (dht_lock_t *lock) +dht_lock_asprintf(dht_lock_t *lock) { - char *lk_buf = NULL; - char gfid[GF_UUID_BUF_SIZE] = {0, }; + char *lk_buf = NULL; + char gfid[GF_UUID_BUF_SIZE] = { + 0, + }; - if (lock == NULL) - goto out; + if (lock == NULL) + goto out; - uuid_utoa_r (lock->loc.gfid, gfid); + uuid_utoa_r(lock->loc.gfid, gfid); - gf_asprintf (&lk_buf, "%s:%s", lock->xl->name, gfid); + gf_asprintf(&lk_buf, "%s:%s", lock->xl->name, gfid); out: - return lk_buf; + return lk_buf; } static void -dht_log_lk_array (char *name, gf_loglevel_t log_level, dht_lock_t **lk_array, - int count) +dht_log_lk_array(char *name, gf_loglevel_t log_level, dht_lock_t **lk_array, + int count) { - int i = 0; - char *lk_buf = NULL; + int i = 0; + char *lk_buf = NULL; - if ((lk_array == NULL) || (count == 0)) - goto out; + if ((lk_array == NULL) || (count == 0)) + goto out; - for (i = 0; i < count; i++) { - lk_buf = dht_lock_asprintf (lk_array[i]); - if (!lk_buf) - goto out; + for (i = 0; i < count; i++) { + lk_buf = dht_lock_asprintf(lk_array[i]); + if (!lk_buf) + goto out; - gf_msg (name, log_level, 0, DHT_MSG_LK_ARRAY_INFO, - "%d. %s", i, lk_buf); - GF_FREE (lk_buf); - } + gf_msg(name, log_level, 0, DHT_MSG_LK_ARRAY_INFO, "%d. %s", i, lk_buf); + GF_FREE(lk_buf); + } out: - return; + return; } static void -dht_lock_stack_destroy (call_frame_t *lock_frame, dht_lock_type_t lk) +dht_lock_stack_destroy(call_frame_t *lock_frame, dht_lock_type_t lk) { - dht_local_t *local = NULL; + dht_local_t *local = NULL; - local = lock_frame->local; + local = lock_frame->local; - if (lk == DHT_INODELK) { - local->lock[0].layout.my_layout.locks = NULL; - local->lock[0].layout.my_layout.lk_count = 0; - } else { - local->lock[0].ns.directory_ns.locks = NULL; - local->lock[0].ns.directory_ns.lk_count = 0; - } + if (lk == DHT_INODELK) { + local->lock[0].layout.my_layout.locks = NULL; + local->lock[0].layout.my_layout.lk_count = 0; + } else { + local->lock[0].ns.directory_ns.locks = NULL; + local->lock[0].ns.directory_ns.lk_count = 0; + } - DHT_STACK_DESTROY (lock_frame); - return; + DHT_STACK_DESTROY(lock_frame); + return; } static void -dht_lock_free (dht_lock_t *lock) +dht_lock_free(dht_lock_t *lock) { - if (lock == NULL) - goto out; + if (lock == NULL) + goto out; - loc_wipe (&lock->loc); - GF_FREE (lock->domain); - GF_FREE (lock->basename); - mem_put (lock); + loc_wipe(&lock->loc); + GF_FREE(lock->domain); + GF_FREE(lock->basename); + mem_put(lock); out: - return; + return; } static void -dht_set_lkowner (dht_lock_t **lk_array, int count, gf_lkowner_t *lkowner) +dht_set_lkowner(dht_lock_t **lk_array, int count, gf_lkowner_t *lkowner) { - int i = 0; + int i = 0; - if (!lk_array || !lkowner) - goto out; + if (!lk_array || !lkowner) + goto out; - for (i = 0; i < count; i++) { - lk_array[i]->lk_owner = *lkowner; - } + for (i = 0; i < count; i++) { + lk_array[i]->lk_owner = *lkowner; + } out: - return; + return; } static int -dht_lock_request_cmp (const void *val1, const void *val2) +dht_lock_request_cmp(const void *val1, const void *val2) { - dht_lock_t *lock1 = NULL; - dht_lock_t *lock2 = NULL; - int ret = -1; + dht_lock_t *lock1 = NULL; + dht_lock_t *lock2 = NULL; + int ret = -1; - lock1 = *(dht_lock_t **)val1; - lock2 = *(dht_lock_t **)val2; + lock1 = *(dht_lock_t **)val1; + lock2 = *(dht_lock_t **)val2; - GF_VALIDATE_OR_GOTO ("dht-locks", lock1, out); - GF_VALIDATE_OR_GOTO ("dht-locks", lock2, out); + GF_VALIDATE_OR_GOTO("dht-locks", lock1, out); + GF_VALIDATE_OR_GOTO("dht-locks", lock2, out); - ret = strcmp (lock1->xl->name, lock2->xl->name); + ret = strcmp(lock1->xl->name, lock2->xl->name); - if (ret == 0) { - ret = gf_uuid_compare (lock1->loc.gfid, lock2->loc.gfid); - } + if (ret == 0) { + ret = gf_uuid_compare(lock1->loc.gfid, lock2->loc.gfid); + } out: - return ret; + return ret; } static int -dht_lock_order_requests (dht_lock_t **locks, int count) +dht_lock_order_requests(dht_lock_t **locks, int count) { - int ret = -1; + int ret = -1; - if (!locks || !count) - goto out; + if (!locks || !count) + goto out; - qsort (locks, count, sizeof (*locks), dht_lock_request_cmp); - ret = 0; + qsort(locks, count, sizeof(*locks), dht_lock_request_cmp); + ret = 0; out: - return ret; + return ret; } void -dht_lock_array_free (dht_lock_t **lk_array, int count) +dht_lock_array_free(dht_lock_t **lk_array, int count) { - int i = 0; - dht_lock_t *lock = NULL; + int i = 0; + dht_lock_t *lock = NULL; - if (lk_array == NULL) - goto out; + if (lk_array == NULL) + goto out; - for (i = 0; i < count; i++) { - lock = lk_array[i]; - lk_array[i] = NULL; - dht_lock_free (lock); - } + for (i = 0; i < count; i++) { + lock = lk_array[i]; + lk_array[i] = NULL; + dht_lock_free(lock); + } out: - return; + return; } int32_t -dht_lock_count (dht_lock_t **lk_array, int lk_count) +dht_lock_count(dht_lock_t **lk_array, int lk_count) { - int i = 0, locked = 0; + int i = 0, locked = 0; - if ((lk_array == NULL) || (lk_count == 0)) - goto out; + if ((lk_array == NULL) || (lk_count == 0)) + goto out; - for (i = 0; i < lk_count; i++) { - if (lk_array[i]->locked) - locked++; - } + for (i = 0; i < lk_count; i++) { + if (lk_array[i]->locked) + locked++; + } out: - return locked; + return locked; } static call_frame_t * -dht_lock_frame (call_frame_t *parent_frame) +dht_lock_frame(call_frame_t *parent_frame) { - call_frame_t *lock_frame = NULL; + call_frame_t *lock_frame = NULL; - lock_frame = copy_frame (parent_frame); - if (lock_frame == NULL) - goto out; + lock_frame = copy_frame(parent_frame); + if (lock_frame == NULL) + goto out; - set_lk_owner_from_ptr (&lock_frame->root->lk_owner, parent_frame->root); + set_lk_owner_from_ptr(&lock_frame->root->lk_owner, parent_frame->root); out: - return lock_frame; + return lock_frame; } dht_lock_t * -dht_lock_new (xlator_t *this, xlator_t *xl, loc_t *loc, short type, - const char *domain, const char *basename, - dht_reaction_type_t do_on_failure) +dht_lock_new(xlator_t *this, xlator_t *xl, loc_t *loc, short type, + const char *domain, const char *basename, + dht_reaction_type_t do_on_failure) { - dht_conf_t *conf = NULL; - dht_lock_t *lock = NULL; - - conf = this->private; - - lock = mem_get0 (conf->lock_pool); - if (lock == NULL) - goto out; - - lock->xl = xl; - lock->type = type; - lock->do_on_failure = do_on_failure; - - lock->domain = gf_strdup (domain); - if (lock->domain == NULL) { - dht_lock_free (lock); - lock = NULL; - goto out; + dht_conf_t *conf = NULL; + dht_lock_t *lock = NULL; + + conf = this->private; + + lock = mem_get0(conf->lock_pool); + if (lock == NULL) + goto out; + + lock->xl = xl; + lock->type = type; + lock->do_on_failure = do_on_failure; + + lock->domain = gf_strdup(domain); + if (lock->domain == NULL) { + dht_lock_free(lock); + lock = NULL; + goto out; + } + + if (basename) { + lock->basename = gf_strdup(basename); + if (lock->basename == NULL) { + dht_lock_free(lock); + lock = NULL; + goto out; } - - if (basename) { - lock->basename = gf_strdup (basename); - if (lock->basename == NULL) { - dht_lock_free (lock); - lock = NULL; - goto out; - } - } - - /* Fill only inode and gfid. - posix and protocol/server give preference to pargfid/basename over - gfid/inode for resolution if all the three parameters of loc_t are - present. I want to avoid the following hypothetical situation: - - 1. rebalance did a lookup on a dentry and got a gfid. - 2. rebalance acquires lock on loc_t which was filled with gfid and - path (pargfid/bname) from step 1. - 3. somebody deleted and recreated the same file - 4. rename on the same path acquires lock on loc_t which now points - to a different inode (and hence gets the lock). - 5. rebalance continues to migrate file (note that not all fops done - by rebalance during migration are inode/gfid based Eg., unlink) - 6. rename continues. - */ - lock->loc.inode = inode_ref (loc->inode); - loc_gfid (loc, lock->loc.gfid); + } + + /* Fill only inode and gfid. + posix and protocol/server give preference to pargfid/basename over + gfid/inode for resolution if all the three parameters of loc_t are + present. I want to avoid the following hypothetical situation: + + 1. rebalance did a lookup on a dentry and got a gfid. + 2. rebalance acquires lock on loc_t which was filled with gfid and + path (pargfid/bname) from step 1. + 3. somebody deleted and recreated the same file + 4. rename on the same path acquires lock on loc_t which now points + to a different inode (and hence gets the lock). + 5. rebalance continues to migrate file (note that not all fops done + by rebalance during migration are inode/gfid based Eg., unlink) + 6. rename continues. + */ + lock->loc.inode = inode_ref(loc->inode); + loc_gfid(loc, lock->loc.gfid); out: - return lock; + return lock; } static int -dht_local_entrylk_init (call_frame_t *frame, dht_lock_t **lk_array, - int lk_count, fop_entrylk_cbk_t entrylk_cbk) +dht_local_entrylk_init(call_frame_t *frame, dht_lock_t **lk_array, int lk_count, + fop_entrylk_cbk_t entrylk_cbk) { - int ret = -1; - dht_local_t *local = NULL; + int ret = -1; + dht_local_t *local = NULL; - local = frame->local; + local = frame->local; - if (local == NULL) { - local = dht_local_init (frame, NULL, NULL, 0); - } + if (local == NULL) { + local = dht_local_init(frame, NULL, NULL, 0); + } - if (local == NULL) { - goto out; - } + if (local == NULL) { + goto out; + } - local->lock[0].ns.directory_ns.entrylk_cbk = entrylk_cbk; - local->lock[0].ns.directory_ns.locks = lk_array; - local->lock[0].ns.directory_ns.lk_count = lk_count; + local->lock[0].ns.directory_ns.entrylk_cbk = entrylk_cbk; + local->lock[0].ns.directory_ns.locks = lk_array; + local->lock[0].ns.directory_ns.lk_count = lk_count; - ret = dht_lock_order_requests (local->lock[0].ns.directory_ns.locks, - local->lock[0].ns.directory_ns.lk_count); - if (ret < 0) - goto out; + ret = dht_lock_order_requests(local->lock[0].ns.directory_ns.locks, + local->lock[0].ns.directory_ns.lk_count); + if (ret < 0) + goto out; - ret = 0; + ret = 0; out: - return ret; + return ret; } static void -dht_entrylk_done (call_frame_t *lock_frame) +dht_entrylk_done(call_frame_t *lock_frame) { - fop_entrylk_cbk_t entrylk_cbk = NULL; - call_frame_t *main_frame = NULL; - dht_local_t *local = NULL; + fop_entrylk_cbk_t entrylk_cbk = NULL; + call_frame_t *main_frame = NULL; + dht_local_t *local = NULL; - local = lock_frame->local; - main_frame = local->main_frame; + local = lock_frame->local; + main_frame = local->main_frame; - local->lock[0].ns.directory_ns.locks = NULL; - local->lock[0].ns.directory_ns.lk_count = 0; + local->lock[0].ns.directory_ns.locks = NULL; + local->lock[0].ns.directory_ns.lk_count = 0; - entrylk_cbk = local->lock[0].ns.directory_ns.entrylk_cbk; - local->lock[0].ns.directory_ns.entrylk_cbk = NULL; + entrylk_cbk = local->lock[0].ns.directory_ns.entrylk_cbk; + local->lock[0].ns.directory_ns.entrylk_cbk = NULL; - entrylk_cbk (main_frame, NULL, main_frame->this, - local->lock[0].ns.directory_ns.op_ret, - local->lock[0].ns.directory_ns.op_errno, NULL); + entrylk_cbk(main_frame, NULL, main_frame->this, + local->lock[0].ns.directory_ns.op_ret, + local->lock[0].ns.directory_ns.op_errno, NULL); - dht_lock_stack_destroy (lock_frame, DHT_ENTRYLK); - return; + dht_lock_stack_destroy(lock_frame, DHT_ENTRYLK); + return; } static int32_t -dht_unlock_entrylk_done (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) +dht_unlock_entrylk_done(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - dht_local_t *local = NULL; - char gfid[GF_UUID_BUF_SIZE] = {0}; - - local = frame->local; - gf_uuid_unparse (local->lock[0].ns.directory_ns.locks[0]->loc.inode->gfid, gfid); - - if (op_ret < 0) { - gf_msg (this->name, GF_LOG_WARNING, op_errno, - DHT_MSG_PARENT_LAYOUT_CHANGED, - "unlock failed on gfid: %s, stale lock might be left " - "in DHT_LAYOUT_HEAL_DOMAIN", gfid); - } - - DHT_STACK_DESTROY (frame); - return 0; + dht_local_t *local = NULL; + char gfid[GF_UUID_BUF_SIZE] = {0}; + + local = frame->local; + gf_uuid_unparse(local->lock[0].ns.directory_ns.locks[0]->loc.inode->gfid, + gfid); + + if (op_ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, op_errno, + DHT_MSG_PARENT_LAYOUT_CHANGED, + "unlock failed on gfid: %s, stale lock might be left " + "in DHT_LAYOUT_HEAL_DOMAIN", + gfid); + } + + DHT_STACK_DESTROY(frame); + return 0; } static int32_t -dht_unlock_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) +dht_unlock_entrylk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - dht_local_t *local = NULL; - int lk_index = 0, call_cnt = 0; - char gfid[GF_UUID_BUF_SIZE] = {0}; + dht_local_t *local = NULL; + int lk_index = 0, call_cnt = 0; + char gfid[GF_UUID_BUF_SIZE] = {0}; - lk_index = (long) cookie; + lk_index = (long)cookie; - local = frame->local; + local = frame->local; - uuid_utoa_r (local->lock[0].ns.directory_ns.locks[lk_index]->loc.gfid, gfid); + uuid_utoa_r(local->lock[0].ns.directory_ns.locks[lk_index]->loc.gfid, gfid); - if (op_ret < 0) { - gf_msg (this->name, GF_LOG_WARNING, op_errno, - DHT_MSG_UNLOCKING_FAILED, - "unlocking failed on %s:%s", - local->lock[0].ns.directory_ns.locks[lk_index]->xl->name, - gfid); - } else { - local->lock[0].ns.directory_ns.locks[lk_index]->locked = 0; - } + if (op_ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, op_errno, DHT_MSG_UNLOCKING_FAILED, + "unlocking failed on %s:%s", + local->lock[0].ns.directory_ns.locks[lk_index]->xl->name, gfid); + } else { + local->lock[0].ns.directory_ns.locks[lk_index]->locked = 0; + } - call_cnt = dht_frame_return (frame); - if (is_last_call (call_cnt)) { - dht_entrylk_done (frame); - } + call_cnt = dht_frame_return(frame); + if (is_last_call(call_cnt)) { + dht_entrylk_done(frame); + } - return 0; + return 0; } static int32_t -dht_unlock_entrylk (call_frame_t *frame, dht_lock_t **lk_array, int lk_count, - fop_entrylk_cbk_t entrylk_cbk) +dht_unlock_entrylk(call_frame_t *frame, dht_lock_t **lk_array, int lk_count, + fop_entrylk_cbk_t entrylk_cbk) { - dht_local_t *local = NULL; - int ret = -1 , i = 0; - call_frame_t *lock_frame = NULL; - int call_cnt = 0; - - GF_VALIDATE_OR_GOTO ("dht-locks", frame, done); - GF_VALIDATE_OR_GOTO (frame->this->name, lk_array, done); - GF_VALIDATE_OR_GOTO (frame->this->name, entrylk_cbk, done); - - call_cnt = dht_lock_count (lk_array, lk_count); - if (call_cnt == 0) { - ret = 0; - goto done; - } - - lock_frame = dht_lock_frame (frame); - if (lock_frame == NULL) { - gf_msg (frame->this->name, GF_LOG_WARNING, 0, - DHT_MSG_UNLOCKING_FAILED, - "cannot allocate a frame, not unlocking following " - "entrylks:"); - - dht_log_lk_array (frame->this->name, GF_LOG_WARNING, lk_array, - lk_count); - goto done; - } - - ret = dht_local_entrylk_init (lock_frame, lk_array, lk_count, - entrylk_cbk); - if (ret < 0) { - gf_msg (frame->this->name, GF_LOG_WARNING, 0, - DHT_MSG_UNLOCKING_FAILED, - "storing locks in local failed, not unlocking " - "following entrylks:"); - - dht_log_lk_array (frame->this->name, GF_LOG_WARNING, lk_array, - lk_count); - - goto done; - } + dht_local_t *local = NULL; + int ret = -1, i = 0; + call_frame_t *lock_frame = NULL; + int call_cnt = 0; - local = lock_frame->local; - local->main_frame = frame; - local->call_cnt = call_cnt; - - for (i = 0; i < local->lock[0].ns.directory_ns.lk_count; i++) { - if (!local->lock[0].ns.directory_ns.locks[i]->locked) - continue; - - lock_frame->root->lk_owner = local->lock[0].ns.directory_ns.locks[i]->lk_owner; - STACK_WIND_COOKIE (lock_frame, dht_unlock_entrylk_cbk, - (void *)(long)i, - local->lock[0].ns.directory_ns.locks[i]->xl, - local->lock[0].ns.directory_ns.locks[i]->xl->fops->entrylk, - local->lock[0].ns.directory_ns.locks[i]->domain, - &local->lock[0].ns.directory_ns.locks[i]->loc, - local->lock[0].ns.directory_ns.locks[i]->basename, - ENTRYLK_UNLOCK, ENTRYLK_WRLCK, NULL); - if (!--call_cnt) - break; - } + GF_VALIDATE_OR_GOTO("dht-locks", frame, done); + GF_VALIDATE_OR_GOTO(frame->this->name, lk_array, done); + GF_VALIDATE_OR_GOTO(frame->this->name, entrylk_cbk, done); - return 0; + call_cnt = dht_lock_count(lk_array, lk_count); + if (call_cnt == 0) { + ret = 0; + goto done; + } + + lock_frame = dht_lock_frame(frame); + if (lock_frame == NULL) { + gf_msg(frame->this->name, GF_LOG_WARNING, 0, DHT_MSG_UNLOCKING_FAILED, + "cannot allocate a frame, not unlocking following " + "entrylks:"); + + dht_log_lk_array(frame->this->name, GF_LOG_WARNING, lk_array, lk_count); + goto done; + } + + ret = dht_local_entrylk_init(lock_frame, lk_array, lk_count, entrylk_cbk); + if (ret < 0) { + gf_msg(frame->this->name, GF_LOG_WARNING, 0, DHT_MSG_UNLOCKING_FAILED, + "storing locks in local failed, not unlocking " + "following entrylks:"); + + dht_log_lk_array(frame->this->name, GF_LOG_WARNING, lk_array, lk_count); + + goto done; + } + + local = lock_frame->local; + local->main_frame = frame; + local->call_cnt = call_cnt; + + for (i = 0; i < local->lock[0].ns.directory_ns.lk_count; i++) { + if (!local->lock[0].ns.directory_ns.locks[i]->locked) + continue; + + lock_frame->root + ->lk_owner = local->lock[0].ns.directory_ns.locks[i]->lk_owner; + STACK_WIND_COOKIE( + lock_frame, dht_unlock_entrylk_cbk, (void *)(long)i, + local->lock[0].ns.directory_ns.locks[i]->xl, + local->lock[0].ns.directory_ns.locks[i]->xl->fops->entrylk, + local->lock[0].ns.directory_ns.locks[i]->domain, + &local->lock[0].ns.directory_ns.locks[i]->loc, + local->lock[0].ns.directory_ns.locks[i]->basename, ENTRYLK_UNLOCK, + ENTRYLK_WRLCK, NULL); + if (!--call_cnt) + break; + } + + return 0; done: - if (lock_frame) - dht_lock_stack_destroy (lock_frame, DHT_ENTRYLK); + if (lock_frame) + dht_lock_stack_destroy(lock_frame, DHT_ENTRYLK); - /* no locks acquired, invoke entrylk_cbk */ - if (ret == 0) - entrylk_cbk (frame, NULL, frame->this, 0, 0, NULL); + /* no locks acquired, invoke entrylk_cbk */ + if (ret == 0) + entrylk_cbk(frame, NULL, frame->this, 0, 0, NULL); - return ret; + return ret; } int32_t -dht_unlock_entrylk_wrapper (call_frame_t *frame, dht_elock_wrap_t *entrylk) +dht_unlock_entrylk_wrapper(call_frame_t *frame, dht_elock_wrap_t *entrylk) { - dht_local_t *local = NULL, *lock_local = NULL; - call_frame_t *lock_frame = NULL; - char pgfid[GF_UUID_BUF_SIZE] = {0}; - int ret = 0; - - local = frame->local; - - if (!entrylk || !entrylk->locks) - goto out; - - gf_uuid_unparse (local->loc.parent->gfid, pgfid); - - lock_frame = copy_frame (frame); - if (lock_frame == NULL) { - gf_msg (frame->this->name, GF_LOG_WARNING, ENOMEM, - DHT_MSG_PARENT_LAYOUT_CHANGED, - "mkdir (%s/%s) (path: %s): " - "copy frame failed", pgfid, local->loc.name, - local->loc.path); - goto done; - } - - lock_local = dht_local_init (lock_frame, NULL, NULL, 0); - if (lock_local == NULL) { - gf_msg (frame->this->name, GF_LOG_WARNING, ENOMEM, - DHT_MSG_PARENT_LAYOUT_CHANGED, - "mkdir (%s/%s) (path: %s): " - "local creation failed", pgfid, local->loc.name, - local->loc.path); - goto done; - } - - lock_frame->local = lock_local; - - lock_local->lock[0].ns.directory_ns.locks = entrylk->locks; - lock_local->lock[0].ns.directory_ns.lk_count = entrylk->lk_count; - entrylk->locks = NULL; - entrylk->lk_count = 0; - - ret = dht_unlock_entrylk (lock_frame, - lock_local->lock[0].ns.directory_ns.locks, - lock_local->lock[0].ns.directory_ns.lk_count, - dht_unlock_entrylk_done); - if (ret) - goto done; - - lock_frame = NULL; + dht_local_t *local = NULL, *lock_local = NULL; + call_frame_t *lock_frame = NULL; + char pgfid[GF_UUID_BUF_SIZE] = {0}; + int ret = 0; + + local = frame->local; + + if (!entrylk || !entrylk->locks) + goto out; + + gf_uuid_unparse(local->loc.parent->gfid, pgfid); + + lock_frame = copy_frame(frame); + if (lock_frame == NULL) { + gf_msg(frame->this->name, GF_LOG_WARNING, ENOMEM, + DHT_MSG_PARENT_LAYOUT_CHANGED, + "mkdir (%s/%s) (path: %s): " + "copy frame failed", + pgfid, local->loc.name, local->loc.path); + goto done; + } + + lock_local = dht_local_init(lock_frame, NULL, NULL, 0); + if (lock_local == NULL) { + gf_msg(frame->this->name, GF_LOG_WARNING, ENOMEM, + DHT_MSG_PARENT_LAYOUT_CHANGED, + "mkdir (%s/%s) (path: %s): " + "local creation failed", + pgfid, local->loc.name, local->loc.path); + goto done; + } + + lock_frame->local = lock_local; + + lock_local->lock[0].ns.directory_ns.locks = entrylk->locks; + lock_local->lock[0].ns.directory_ns.lk_count = entrylk->lk_count; + entrylk->locks = NULL; + entrylk->lk_count = 0; + + ret = dht_unlock_entrylk( + lock_frame, lock_local->lock[0].ns.directory_ns.locks, + lock_local->lock[0].ns.directory_ns.lk_count, dht_unlock_entrylk_done); + if (ret) + goto done; + + lock_frame = NULL; done: - if (lock_frame != NULL) { - DHT_STACK_DESTROY (lock_frame); - } + if (lock_frame != NULL) { + DHT_STACK_DESTROY(lock_frame); + } out: - return 0; + return 0; } static int -dht_entrylk_cleanup_cbk (call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, int32_t op_errno, - dict_t *xdata) +dht_entrylk_cleanup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - dht_entrylk_done (frame); - return 0; + dht_entrylk_done(frame); + return 0; } static void -dht_entrylk_cleanup (call_frame_t *lock_frame) +dht_entrylk_cleanup(call_frame_t *lock_frame) { - dht_lock_t **lk_array = NULL; - int lk_count = 0, lk_acquired = 0; - dht_local_t *local = NULL; + dht_lock_t **lk_array = NULL; + int lk_count = 0, lk_acquired = 0; + dht_local_t *local = NULL; - local = lock_frame->local; + local = lock_frame->local; - lk_array = local->lock[0].ns.directory_ns.locks; - lk_count = local->lock[0].ns.directory_ns.lk_count; + lk_array = local->lock[0].ns.directory_ns.locks; + lk_count = local->lock[0].ns.directory_ns.lk_count; - lk_acquired = dht_lock_count (lk_array, lk_count); - if (lk_acquired != 0) { - dht_unlock_entrylk (lock_frame, lk_array, lk_count, - dht_entrylk_cleanup_cbk); - } else { - dht_entrylk_done (lock_frame); - } + lk_acquired = dht_lock_count(lk_array, lk_count); + if (lk_acquired != 0) { + dht_unlock_entrylk(lock_frame, lk_array, lk_count, + dht_entrylk_cleanup_cbk); + } else { + dht_entrylk_done(lock_frame); + } - return; + return; } - static int32_t -dht_blocking_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) +dht_blocking_entrylk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - int lk_index = 0; - int i = 0; - dht_local_t *local = NULL; - - lk_index = (long) cookie; - - local = frame->local; - if (op_ret == 0) { - local->lock[0].ns.directory_ns.locks[lk_index]->locked = _gf_true; - } else { - switch (op_errno) { - case ESTALE: - case ENOENT: - if (local->lock[0].ns.directory_ns.locks[lk_index]->do_on_failure != IGNORE_ENOENT_ESTALE) { - local->lock[0].ns.directory_ns.op_ret = -1; - local->lock[0].ns.directory_ns.op_errno = op_errno; - goto cleanup; - } - break; - default: - local->lock[0].ns.directory_ns.op_ret = -1; - local->lock[0].ns.directory_ns.op_errno = op_errno; - goto cleanup; + int lk_index = 0; + int i = 0; + dht_local_t *local = NULL; + + lk_index = (long)cookie; + + local = frame->local; + if (op_ret == 0) { + local->lock[0].ns.directory_ns.locks[lk_index]->locked = _gf_true; + } else { + switch (op_errno) { + case ESTALE: + case ENOENT: + if (local->lock[0] + .ns.directory_ns.locks[lk_index] + ->do_on_failure != IGNORE_ENOENT_ESTALE) { + local->lock[0].ns.directory_ns.op_ret = -1; + local->lock[0].ns.directory_ns.op_errno = op_errno; + goto cleanup; } + break; + default: + local->lock[0].ns.directory_ns.op_ret = -1; + local->lock[0].ns.directory_ns.op_errno = op_errno; + goto cleanup; } + } - if (lk_index == (local->lock[0].ns.directory_ns.lk_count - 1)) { - for (i = 0; (i < local->lock[0].ns.directory_ns.lk_count) && - (!local->lock[0].ns.directory_ns.locks[i]->locked); i++) - ; + if (lk_index == (local->lock[0].ns.directory_ns.lk_count - 1)) { + for (i = 0; (i < local->lock[0].ns.directory_ns.lk_count) && + (!local->lock[0].ns.directory_ns.locks[i]->locked); + i++) + ; - if (i == local->lock[0].ns.directory_ns.lk_count) { - local->lock[0].ns.directory_ns.op_ret = -1; - local->lock[0].ns.directory_ns.op_errno = op_errno; - } - - dht_entrylk_done (frame); - } else { - dht_blocking_entrylk_rec (frame, ++lk_index); + if (i == local->lock[0].ns.directory_ns.lk_count) { + local->lock[0].ns.directory_ns.op_ret = -1; + local->lock[0].ns.directory_ns.op_errno = op_errno; } - return 0; + dht_entrylk_done(frame); + } else { + dht_blocking_entrylk_rec(frame, ++lk_index); + } + + return 0; cleanup: - dht_entrylk_cleanup (frame); + dht_entrylk_cleanup(frame); - return 0; + return 0; } void -dht_blocking_entrylk_rec (call_frame_t *frame, int i) +dht_blocking_entrylk_rec(call_frame_t *frame, int i) { - dht_local_t *local = NULL; + dht_local_t *local = NULL; - local = frame->local; + local = frame->local; - STACK_WIND_COOKIE (frame, dht_blocking_entrylk_cbk, - (void *) (long) i, - local->lock[0].ns.directory_ns.locks[i]->xl, - local->lock[0].ns.directory_ns.locks[i]->xl->fops->entrylk, - local->lock[0].ns.directory_ns.locks[i]->domain, - &local->lock[0].ns.directory_ns.locks[i]->loc, - local->lock[0].ns.directory_ns.locks[i]->basename, - ENTRYLK_LOCK, ENTRYLK_WRLCK, NULL); + STACK_WIND_COOKIE( + frame, dht_blocking_entrylk_cbk, (void *)(long)i, + local->lock[0].ns.directory_ns.locks[i]->xl, + local->lock[0].ns.directory_ns.locks[i]->xl->fops->entrylk, + local->lock[0].ns.directory_ns.locks[i]->domain, + &local->lock[0].ns.directory_ns.locks[i]->loc, + local->lock[0].ns.directory_ns.locks[i]->basename, ENTRYLK_LOCK, + ENTRYLK_WRLCK, NULL); - return; + return; } int -dht_blocking_entrylk (call_frame_t *frame, dht_lock_t **lk_array, - int lk_count, fop_entrylk_cbk_t entrylk_cbk) +dht_blocking_entrylk(call_frame_t *frame, dht_lock_t **lk_array, int lk_count, + fop_entrylk_cbk_t entrylk_cbk) { - int ret = -1; - call_frame_t *lock_frame = NULL; - dht_local_t *local = NULL; - - GF_VALIDATE_OR_GOTO ("dht-locks", frame, out); - GF_VALIDATE_OR_GOTO (frame->this->name, lk_array, out); - GF_VALIDATE_OR_GOTO (frame->this->name, entrylk_cbk, out); - - lock_frame = dht_lock_frame (frame); - if (lock_frame == NULL) - goto out; - - ret = dht_local_entrylk_init (lock_frame, lk_array, lk_count, - entrylk_cbk); - if (ret < 0) { - goto out; - } + int ret = -1; + call_frame_t *lock_frame = NULL; + dht_local_t *local = NULL; + + GF_VALIDATE_OR_GOTO("dht-locks", frame, out); + GF_VALIDATE_OR_GOTO(frame->this->name, lk_array, out); + GF_VALIDATE_OR_GOTO(frame->this->name, entrylk_cbk, out); - dht_set_lkowner (lk_array, lk_count, &lock_frame->root->lk_owner); + lock_frame = dht_lock_frame(frame); + if (lock_frame == NULL) + goto out; - local = lock_frame->local; - local->main_frame = frame; + ret = dht_local_entrylk_init(lock_frame, lk_array, lk_count, entrylk_cbk); + if (ret < 0) { + goto out; + } - dht_blocking_entrylk_rec (lock_frame, 0); + dht_set_lkowner(lk_array, lk_count, &lock_frame->root->lk_owner); - return 0; + local = lock_frame->local; + local->main_frame = frame; + + dht_blocking_entrylk_rec(lock_frame, 0); + + return 0; out: - if (lock_frame) - dht_lock_stack_destroy (lock_frame, DHT_ENTRYLK); + if (lock_frame) + dht_lock_stack_destroy(lock_frame, DHT_ENTRYLK); - return -1; + return -1; } static int -dht_local_inodelk_init (call_frame_t *frame, dht_lock_t **lk_array, - int lk_count, fop_inodelk_cbk_t inodelk_cbk) +dht_local_inodelk_init(call_frame_t *frame, dht_lock_t **lk_array, int lk_count, + fop_inodelk_cbk_t inodelk_cbk) { - int ret = -1; - dht_local_t *local = NULL; + int ret = -1; + dht_local_t *local = NULL; - local = frame->local; + local = frame->local; - if (local == NULL) { - local = dht_local_init (frame, NULL, NULL, 0); - } + if (local == NULL) { + local = dht_local_init(frame, NULL, NULL, 0); + } - if (local == NULL) { - goto out; - } + if (local == NULL) { + goto out; + } - local->lock[0].layout.my_layout.inodelk_cbk = inodelk_cbk; - local->lock[0].layout.my_layout.locks = lk_array; - local->lock[0].layout.my_layout.lk_count = lk_count; + local->lock[0].layout.my_layout.inodelk_cbk = inodelk_cbk; + local->lock[0].layout.my_layout.locks = lk_array; + local->lock[0].layout.my_layout.lk_count = lk_count; - ret = dht_lock_order_requests (local->lock[0].layout.my_layout.locks, - local->lock[0].layout.my_layout.lk_count); - if (ret < 0) - goto out; + ret = dht_lock_order_requests(local->lock[0].layout.my_layout.locks, + local->lock[0].layout.my_layout.lk_count); + if (ret < 0) + goto out; - ret = 0; + ret = 0; out: - return ret; + return ret; } static void -dht_inodelk_done (call_frame_t *lock_frame) +dht_inodelk_done(call_frame_t *lock_frame) { - fop_inodelk_cbk_t inodelk_cbk = NULL; - call_frame_t *main_frame = NULL; - dht_local_t *local = NULL; + fop_inodelk_cbk_t inodelk_cbk = NULL; + call_frame_t *main_frame = NULL; + dht_local_t *local = NULL; - local = lock_frame->local; - main_frame = local->main_frame; + local = lock_frame->local; + main_frame = local->main_frame; - local->lock[0].layout.my_layout.locks = NULL; - local->lock[0].layout.my_layout.lk_count = 0; + local->lock[0].layout.my_layout.locks = NULL; + local->lock[0].layout.my_layout.lk_count = 0; - inodelk_cbk = local->lock[0].layout.my_layout.inodelk_cbk; - local->lock[0].layout.my_layout.inodelk_cbk = NULL; + inodelk_cbk = local->lock[0].layout.my_layout.inodelk_cbk; + local->lock[0].layout.my_layout.inodelk_cbk = NULL; - inodelk_cbk (main_frame, NULL, main_frame->this, - local->lock[0].layout.my_layout.op_ret, - local->lock[0].layout.my_layout.op_errno, NULL); + inodelk_cbk(main_frame, NULL, main_frame->this, + local->lock[0].layout.my_layout.op_ret, + local->lock[0].layout.my_layout.op_errno, NULL); - dht_lock_stack_destroy (lock_frame, DHT_INODELK); - return; + dht_lock_stack_destroy(lock_frame, DHT_INODELK); + return; } static int32_t -dht_unlock_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) +dht_unlock_inodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - dht_local_t *local = NULL; - int lk_index = 0, call_cnt = 0; - char gfid[GF_UUID_BUF_SIZE] = {0}; - - lk_index = (long) cookie; - - local = frame->local; - if (op_ret < 0) { - uuid_utoa_r (local->lock[0].layout.my_layout.locks[lk_index]->loc.gfid, - gfid); - - gf_msg (this->name, GF_LOG_WARNING, op_errno, - DHT_MSG_UNLOCKING_FAILED, - "unlocking failed on %s:%s", - local->lock[0].layout.my_layout.locks[lk_index]->xl->name, - gfid); - } else { - local->lock[0].layout.my_layout.locks[lk_index]->locked = 0; - } - - call_cnt = dht_frame_return (frame); - if (is_last_call (call_cnt)) { - dht_inodelk_done (frame); - } - - return 0; + dht_local_t *local = NULL; + int lk_index = 0, call_cnt = 0; + char gfid[GF_UUID_BUF_SIZE] = {0}; + + lk_index = (long)cookie; + + local = frame->local; + if (op_ret < 0) { + uuid_utoa_r(local->lock[0].layout.my_layout.locks[lk_index]->loc.gfid, + gfid); + + gf_msg(this->name, GF_LOG_WARNING, op_errno, DHT_MSG_UNLOCKING_FAILED, + "unlocking failed on %s:%s", + local->lock[0].layout.my_layout.locks[lk_index]->xl->name, gfid); + } else { + local->lock[0].layout.my_layout.locks[lk_index]->locked = 0; + } + + call_cnt = dht_frame_return(frame); + if (is_last_call(call_cnt)) { + dht_inodelk_done(frame); + } + + return 0; } static int32_t -dht_unlock_inodelk_done (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) +dht_unlock_inodelk_done(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - dht_local_t *local = NULL; - char gfid[GF_UUID_BUF_SIZE] = {0}; - - local = frame->local; - gf_uuid_unparse (local->lock[0].layout.my_layout.locks[0]->loc.inode->gfid, gfid); - - if (op_ret < 0) { - gf_msg (this->name, GF_LOG_WARNING, op_errno, - DHT_MSG_PARENT_LAYOUT_CHANGED, - "unlock failed on gfid: %s, stale lock might be left " - "in DHT_LAYOUT_HEAL_DOMAIN", gfid); - } - - DHT_STACK_DESTROY (frame); - return 0; + dht_local_t *local = NULL; + char gfid[GF_UUID_BUF_SIZE] = {0}; + + local = frame->local; + gf_uuid_unparse(local->lock[0].layout.my_layout.locks[0]->loc.inode->gfid, + gfid); + + if (op_ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, op_errno, + DHT_MSG_PARENT_LAYOUT_CHANGED, + "unlock failed on gfid: %s, stale lock might be left " + "in DHT_LAYOUT_HEAL_DOMAIN", + gfid); + } + + DHT_STACK_DESTROY(frame); + return 0; } int32_t -dht_unlock_inodelk (call_frame_t *frame, dht_lock_t **lk_array, int lk_count, - fop_inodelk_cbk_t inodelk_cbk) +dht_unlock_inodelk(call_frame_t *frame, dht_lock_t **lk_array, int lk_count, + fop_inodelk_cbk_t inodelk_cbk) { - dht_local_t *local = NULL; - struct gf_flock flock = {0,}; - int ret = -1 , i = 0; - call_frame_t *lock_frame = NULL; - int call_cnt = 0; - - GF_VALIDATE_OR_GOTO ("dht-locks", frame, done); - GF_VALIDATE_OR_GOTO (frame->this->name, lk_array, done); - GF_VALIDATE_OR_GOTO (frame->this->name, inodelk_cbk, done); - - call_cnt = dht_lock_count (lk_array, lk_count); - if (call_cnt == 0) { - ret = 0; - goto done; - } - - lock_frame = dht_lock_frame (frame); - if (lock_frame == NULL) { - gf_msg (frame->this->name, GF_LOG_WARNING, 0, - DHT_MSG_UNLOCKING_FAILED, - "cannot allocate a frame, not unlocking following " - "locks:"); - - dht_log_lk_array (frame->this->name, GF_LOG_WARNING, lk_array, - lk_count); - goto done; - } - - ret = dht_local_inodelk_init (lock_frame, lk_array, lk_count, - inodelk_cbk); - if (ret < 0) { - gf_msg (frame->this->name, GF_LOG_WARNING, 0, - DHT_MSG_UNLOCKING_FAILED, - "storing locks in local failed, not unlocking " - "following locks:"); - - dht_log_lk_array (frame->this->name, GF_LOG_WARNING, lk_array, - lk_count); - - goto done; - } - - local = lock_frame->local; - local->main_frame = frame; - local->call_cnt = call_cnt; - - flock.l_type = F_UNLCK; - - for (i = 0; i < local->lock[0].layout.my_layout.lk_count; i++) { - if (!local->lock[0].layout.my_layout.locks[i]->locked) - continue; - - lock_frame->root->lk_owner = local->lock[0].layout.my_layout.locks[i]->lk_owner; - STACK_WIND_COOKIE (lock_frame, dht_unlock_inodelk_cbk, - (void *)(long)i, - local->lock[0].layout.my_layout.locks[i]->xl, - local->lock[0].layout.my_layout.locks[i]->xl->fops->inodelk, - local->lock[0].layout.my_layout.locks[i]->domain, - &local->lock[0].layout.my_layout.locks[i]->loc, F_SETLK, - &flock, NULL); - if (!--call_cnt) - break; - } - - return 0; + dht_local_t *local = NULL; + struct gf_flock flock = { + 0, + }; + int ret = -1, i = 0; + call_frame_t *lock_frame = NULL; + int call_cnt = 0; + + GF_VALIDATE_OR_GOTO("dht-locks", frame, done); + GF_VALIDATE_OR_GOTO(frame->this->name, lk_array, done); + GF_VALIDATE_OR_GOTO(frame->this->name, inodelk_cbk, done); + + call_cnt = dht_lock_count(lk_array, lk_count); + if (call_cnt == 0) { + ret = 0; + goto done; + } + + lock_frame = dht_lock_frame(frame); + if (lock_frame == NULL) { + gf_msg(frame->this->name, GF_LOG_WARNING, 0, DHT_MSG_UNLOCKING_FAILED, + "cannot allocate a frame, not unlocking following " + "locks:"); + + dht_log_lk_array(frame->this->name, GF_LOG_WARNING, lk_array, lk_count); + goto done; + } + + ret = dht_local_inodelk_init(lock_frame, lk_array, lk_count, inodelk_cbk); + if (ret < 0) { + gf_msg(frame->this->name, GF_LOG_WARNING, 0, DHT_MSG_UNLOCKING_FAILED, + "storing locks in local failed, not unlocking " + "following locks:"); + + dht_log_lk_array(frame->this->name, GF_LOG_WARNING, lk_array, lk_count); + + goto done; + } + + local = lock_frame->local; + local->main_frame = frame; + local->call_cnt = call_cnt; + + flock.l_type = F_UNLCK; + + for (i = 0; i < local->lock[0].layout.my_layout.lk_count; i++) { + if (!local->lock[0].layout.my_layout.locks[i]->locked) + continue; + + lock_frame->root + ->lk_owner = local->lock[0].layout.my_layout.locks[i]->lk_owner; + STACK_WIND_COOKIE( + lock_frame, dht_unlock_inodelk_cbk, (void *)(long)i, + local->lock[0].layout.my_layout.locks[i]->xl, + local->lock[0].layout.my_layout.locks[i]->xl->fops->inodelk, + local->lock[0].layout.my_layout.locks[i]->domain, + &local->lock[0].layout.my_layout.locks[i]->loc, F_SETLK, &flock, + NULL); + if (!--call_cnt) + break; + } + + return 0; done: - if (lock_frame) - dht_lock_stack_destroy (lock_frame, DHT_INODELK); + if (lock_frame) + dht_lock_stack_destroy(lock_frame, DHT_INODELK); - /* no locks acquired, invoke inodelk_cbk */ - if (ret == 0) - inodelk_cbk (frame, NULL, frame->this, 0, 0, NULL); + /* no locks acquired, invoke inodelk_cbk */ + if (ret == 0) + inodelk_cbk(frame, NULL, frame->this, 0, 0, NULL); - return ret; + return ret; } int32_t -dht_unlock_inodelk_wrapper (call_frame_t *frame, dht_ilock_wrap_t *inodelk) +dht_unlock_inodelk_wrapper(call_frame_t *frame, dht_ilock_wrap_t *inodelk) { - dht_local_t *local = NULL, *lock_local = NULL; - call_frame_t *lock_frame = NULL; - char pgfid[GF_UUID_BUF_SIZE] = {0}; - int ret = 0; - - local = frame->local; - - if (!inodelk || !inodelk->locks) - goto out; - - gf_uuid_unparse (local->loc.parent->gfid, pgfid); - - lock_frame = copy_frame (frame); - if (lock_frame == NULL) { - gf_msg (frame->this->name, GF_LOG_WARNING, ENOMEM, - DHT_MSG_PARENT_LAYOUT_CHANGED, - "mkdir (%s/%s) (path: %s): " - "copy frame failed", pgfid, local->loc.name, - local->loc.path); - goto done; - } - - lock_local = dht_local_init (lock_frame, NULL, NULL, 0); - if (lock_local == NULL) { - gf_msg (frame->this->name, GF_LOG_WARNING, ENOMEM, - DHT_MSG_PARENT_LAYOUT_CHANGED, - "mkdir (%s/%s) (path: %s): " - "local creation failed", pgfid, local->loc.name, - local->loc.path); - goto done; - } - - lock_frame->local = lock_local; - - lock_local->lock[0].layout.my_layout.locks = inodelk->locks; - lock_local->lock[0].layout.my_layout.lk_count = inodelk->lk_count; - inodelk->locks = NULL; - inodelk->lk_count = 0; - - ret = dht_unlock_inodelk (lock_frame, - lock_local->lock[0].layout.my_layout.locks, - lock_local->lock[0].layout.my_layout.lk_count, - dht_unlock_inodelk_done); - - if (ret) - goto done; - - lock_frame = NULL; + dht_local_t *local = NULL, *lock_local = NULL; + call_frame_t *lock_frame = NULL; + char pgfid[GF_UUID_BUF_SIZE] = {0}; + int ret = 0; + + local = frame->local; + + if (!inodelk || !inodelk->locks) + goto out; + + gf_uuid_unparse(local->loc.parent->gfid, pgfid); + + lock_frame = copy_frame(frame); + if (lock_frame == NULL) { + gf_msg(frame->this->name, GF_LOG_WARNING, ENOMEM, + DHT_MSG_PARENT_LAYOUT_CHANGED, + "mkdir (%s/%s) (path: %s): " + "copy frame failed", + pgfid, local->loc.name, local->loc.path); + goto done; + } + + lock_local = dht_local_init(lock_frame, NULL, NULL, 0); + if (lock_local == NULL) { + gf_msg(frame->this->name, GF_LOG_WARNING, ENOMEM, + DHT_MSG_PARENT_LAYOUT_CHANGED, + "mkdir (%s/%s) (path: %s): " + "local creation failed", + pgfid, local->loc.name, local->loc.path); + goto done; + } + + lock_frame->local = lock_local; + + lock_local->lock[0].layout.my_layout.locks = inodelk->locks; + lock_local->lock[0].layout.my_layout.lk_count = inodelk->lk_count; + inodelk->locks = NULL; + inodelk->lk_count = 0; + + ret = dht_unlock_inodelk( + lock_frame, lock_local->lock[0].layout.my_layout.locks, + lock_local->lock[0].layout.my_layout.lk_count, dht_unlock_inodelk_done); + + if (ret) + goto done; + + lock_frame = NULL; done: - if (lock_frame != NULL) { - DHT_STACK_DESTROY (lock_frame); - } + if (lock_frame != NULL) { + DHT_STACK_DESTROY(lock_frame); + } out: - return 0; + return 0; } static int -dht_inodelk_cleanup_cbk (call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, int32_t op_errno, - dict_t *xdata) +dht_inodelk_cleanup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - dht_inodelk_done (frame); - return 0; + dht_inodelk_done(frame); + return 0; } static void -dht_inodelk_cleanup (call_frame_t *lock_frame) +dht_inodelk_cleanup(call_frame_t *lock_frame) { - dht_lock_t **lk_array = NULL; - int lk_count = 0, lk_acquired = 0; - dht_local_t *local = NULL; + dht_lock_t **lk_array = NULL; + int lk_count = 0, lk_acquired = 0; + dht_local_t *local = NULL; - local = lock_frame->local; + local = lock_frame->local; - lk_array = local->lock[0].layout.my_layout.locks; - lk_count = local->lock[0].layout.my_layout.lk_count; + lk_array = local->lock[0].layout.my_layout.locks; + lk_count = local->lock[0].layout.my_layout.lk_count; - lk_acquired = dht_lock_count (lk_array, lk_count); - if (lk_acquired != 0) { - dht_unlock_inodelk (lock_frame, lk_array, lk_count, - dht_inodelk_cleanup_cbk); - } else { - dht_inodelk_done (lock_frame); - } + lk_acquired = dht_lock_count(lk_array, lk_count); + if (lk_acquired != 0) { + dht_unlock_inodelk(lock_frame, lk_array, lk_count, + dht_inodelk_cleanup_cbk); + } else { + dht_inodelk_done(lock_frame); + } - return; + return; } static int32_t -dht_nonblocking_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) +dht_nonblocking_inodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - dht_local_t *local = NULL; - int lk_index = 0, call_cnt = 0; - char gfid[GF_UUID_BUF_SIZE] = {0}; + dht_local_t *local = NULL; + int lk_index = 0, call_cnt = 0; + char gfid[GF_UUID_BUF_SIZE] = {0}; - local = frame->local; - lk_index = (long) cookie; + local = frame->local; + lk_index = (long)cookie; - if (op_ret == -1) { - local->lock[0].layout.my_layout.op_ret = -1; - local->lock[0].layout.my_layout.op_errno = op_errno; - - if (local && local->lock[0].layout.my_layout.locks[lk_index]) { - uuid_utoa_r (local->lock[0].layout.my_layout.locks[lk_index]->loc.inode->gfid, - gfid); + if (op_ret == -1) { + local->lock[0].layout.my_layout.op_ret = -1; + local->lock[0].layout.my_layout.op_errno = op_errno; - gf_msg_debug (this->name, op_errno, - "inodelk failed on gfid: %s " - "subvolume: %s", gfid, - local->lock[0].layout.my_layout.locks[lk_index]->xl->name); - } + if (local && local->lock[0].layout.my_layout.locks[lk_index]) { + uuid_utoa_r(local->lock[0] + .layout.my_layout.locks[lk_index] + ->loc.inode->gfid, + gfid); - goto out; + gf_msg_debug( + this->name, op_errno, + "inodelk failed on gfid: %s " + "subvolume: %s", + gfid, + local->lock[0].layout.my_layout.locks[lk_index]->xl->name); } - local->lock[0].layout.my_layout.locks[lk_index]->locked = _gf_true; + goto out; + } -out: - call_cnt = dht_frame_return (frame); - if (is_last_call (call_cnt)) { - if (local->lock[0].layout.my_layout.op_ret < 0) { - dht_inodelk_cleanup (frame); - return 0; - } + local->lock[0].layout.my_layout.locks[lk_index]->locked = _gf_true; - dht_inodelk_done (frame); +out: + call_cnt = dht_frame_return(frame); + if (is_last_call(call_cnt)) { + if (local->lock[0].layout.my_layout.op_ret < 0) { + dht_inodelk_cleanup(frame); + return 0; } - return 0; + dht_inodelk_done(frame); + } + + return 0; } int -dht_nonblocking_inodelk (call_frame_t *frame, dht_lock_t **lk_array, - int lk_count, fop_inodelk_cbk_t inodelk_cbk) +dht_nonblocking_inodelk(call_frame_t *frame, dht_lock_t **lk_array, + int lk_count, fop_inodelk_cbk_t inodelk_cbk) { - struct gf_flock flock = {0,}; - int i = 0, ret = 0; - dht_local_t *local = NULL; - call_frame_t *lock_frame = NULL; - - GF_VALIDATE_OR_GOTO ("dht-locks", frame, out); - GF_VALIDATE_OR_GOTO (frame->this->name, lk_array, out); - GF_VALIDATE_OR_GOTO (frame->this->name, inodelk_cbk, out); - - lock_frame = dht_lock_frame (frame); - if (lock_frame == NULL) - goto out; - - ret = dht_local_inodelk_init (lock_frame, lk_array, lk_count, - inodelk_cbk); - if (ret < 0) { - goto out; - } + struct gf_flock flock = { + 0, + }; + int i = 0, ret = 0; + dht_local_t *local = NULL; + call_frame_t *lock_frame = NULL; - dht_set_lkowner (lk_array, lk_count, &lock_frame->root->lk_owner); + GF_VALIDATE_OR_GOTO("dht-locks", frame, out); + GF_VALIDATE_OR_GOTO(frame->this->name, lk_array, out); + GF_VALIDATE_OR_GOTO(frame->this->name, inodelk_cbk, out); - local = lock_frame->local; - local->main_frame = frame; + lock_frame = dht_lock_frame(frame); + if (lock_frame == NULL) + goto out; - local->call_cnt = lk_count; + ret = dht_local_inodelk_init(lock_frame, lk_array, lk_count, inodelk_cbk); + if (ret < 0) { + goto out; + } - for (i = 0; i < lk_count; i++) { - flock.l_type = local->lock[0].layout.my_layout.locks[i]->type; + dht_set_lkowner(lk_array, lk_count, &lock_frame->root->lk_owner); - STACK_WIND_COOKIE (lock_frame, dht_nonblocking_inodelk_cbk, - (void *) (long) i, - local->lock[0].layout.my_layout.locks[i]->xl, - local->lock[0].layout.my_layout.locks[i]->xl->fops->inodelk, - local->lock[0].layout.my_layout.locks[i]->domain, - &local->lock[0].layout.my_layout.locks[i]->loc, - F_SETLK, - &flock, NULL); - } + local = lock_frame->local; + local->main_frame = frame; + + local->call_cnt = lk_count; + + for (i = 0; i < lk_count; i++) { + flock.l_type = local->lock[0].layout.my_layout.locks[i]->type; + + STACK_WIND_COOKIE( + lock_frame, dht_nonblocking_inodelk_cbk, (void *)(long)i, + local->lock[0].layout.my_layout.locks[i]->xl, + local->lock[0].layout.my_layout.locks[i]->xl->fops->inodelk, + local->lock[0].layout.my_layout.locks[i]->domain, + &local->lock[0].layout.my_layout.locks[i]->loc, F_SETLK, &flock, + NULL); + } - return 0; + return 0; out: - if (lock_frame) - dht_lock_stack_destroy (lock_frame, DHT_INODELK); + if (lock_frame) + dht_lock_stack_destroy(lock_frame, DHT_INODELK); - return -1; + return -1; } static int32_t -dht_blocking_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) +dht_blocking_inodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - int lk_index = 0; - int i = 0; - dht_local_t *local = NULL; - char gfid[GF_UUID_BUF_SIZE] = {0,}; - dht_reaction_type_t reaction = 0; - - lk_index = (long) cookie; - - local = frame->local; - if (op_ret == 0) { - local->lock[0].layout.my_layout.locks[lk_index]->locked = _gf_true; - } else { - switch (op_errno) { - case ESTALE: - case ENOENT: - reaction = local->lock[0].layout.my_layout.locks[lk_index]->do_on_failure; - if ((reaction != IGNORE_ENOENT_ESTALE) && - (reaction != IGNORE_ENOENT_ESTALE_EIO)) { - gf_uuid_unparse (local->lock[0].layout.my_layout.locks[lk_index]->loc.gfid, gfid); - local->lock[0].layout.my_layout.op_ret = -1; - local->lock[0].layout.my_layout.op_errno = op_errno; - gf_msg (this->name, GF_LOG_ERROR, op_errno, - DHT_MSG_INODELK_FAILED, - "inodelk failed on subvol %s. gfid:%s", - local->lock[0].layout.my_layout.locks[lk_index]->xl->name, - gfid); - goto cleanup; - } - break; - case EIO: - reaction = local->lock[0].layout.my_layout.locks[lk_index]->do_on_failure; - if (reaction != IGNORE_ENOENT_ESTALE_EIO) { - gf_uuid_unparse (local->lock[0].layout.my_layout.locks[lk_index]->loc.gfid, gfid); - local->lock[0].layout.my_layout.op_ret = -1; - local->lock[0].layout.my_layout.op_errno = op_errno; - gf_msg (this->name, GF_LOG_ERROR, op_errno, - DHT_MSG_INODELK_FAILED, - "inodelk failed on subvol %s. gfid:%s", - local->lock[0].layout.my_layout.locks[lk_index]->xl->name, - gfid); - goto cleanup; - } - break; - - default: - gf_uuid_unparse (local->lock[0].layout.my_layout.locks[lk_index]->loc.gfid, gfid); - local->lock[0].layout.my_layout.op_ret = -1; - local->lock[0].layout.my_layout.op_errno = op_errno; - gf_msg (this->name, GF_LOG_ERROR, op_errno, - DHT_MSG_INODELK_FAILED, - "inodelk failed on subvol %s, gfid:%s", - local->lock[0].layout.my_layout.locks[lk_index]->xl->name, gfid); - goto cleanup; + int lk_index = 0; + int i = 0; + dht_local_t *local = NULL; + char gfid[GF_UUID_BUF_SIZE] = { + 0, + }; + dht_reaction_type_t reaction = 0; + + lk_index = (long)cookie; + + local = frame->local; + if (op_ret == 0) { + local->lock[0].layout.my_layout.locks[lk_index]->locked = _gf_true; + } else { + switch (op_errno) { + case ESTALE: + case ENOENT: + reaction = local->lock[0] + .layout.my_layout.locks[lk_index] + ->do_on_failure; + if ((reaction != IGNORE_ENOENT_ESTALE) && + (reaction != IGNORE_ENOENT_ESTALE_EIO)) { + gf_uuid_unparse(local->lock[0] + .layout.my_layout.locks[lk_index] + ->loc.gfid, + gfid); + local->lock[0].layout.my_layout.op_ret = -1; + local->lock[0].layout.my_layout.op_errno = op_errno; + gf_msg(this->name, GF_LOG_ERROR, op_errno, + DHT_MSG_INODELK_FAILED, + "inodelk failed on subvol %s. gfid:%s", + local->lock[0] + .layout.my_layout.locks[lk_index] + ->xl->name, + gfid); + goto cleanup; } - } + break; + case EIO: + reaction = local->lock[0] + .layout.my_layout.locks[lk_index] + ->do_on_failure; + if (reaction != IGNORE_ENOENT_ESTALE_EIO) { + gf_uuid_unparse(local->lock[0] + .layout.my_layout.locks[lk_index] + ->loc.gfid, + gfid); + local->lock[0].layout.my_layout.op_ret = -1; + local->lock[0].layout.my_layout.op_errno = op_errno; + gf_msg(this->name, GF_LOG_ERROR, op_errno, + DHT_MSG_INODELK_FAILED, + "inodelk failed on subvol %s. gfid:%s", + local->lock[0] + .layout.my_layout.locks[lk_index] + ->xl->name, + gfid); + goto cleanup; + } + break; - if (lk_index == (local->lock[0].layout.my_layout.lk_count - 1)) { - for (i = 0; (i < local->lock[0].layout.my_layout.lk_count) && - (!local->lock[0].layout.my_layout.locks[i]->locked); i++) - ; + default: + gf_uuid_unparse( + local->lock[0].layout.my_layout.locks[lk_index]->loc.gfid, + gfid); + local->lock[0].layout.my_layout.op_ret = -1; + local->lock[0].layout.my_layout.op_errno = op_errno; + gf_msg( + this->name, GF_LOG_ERROR, op_errno, DHT_MSG_INODELK_FAILED, + "inodelk failed on subvol %s, gfid:%s", + local->lock[0].layout.my_layout.locks[lk_index]->xl->name, + gfid); + goto cleanup; + } + } - if (i == local->lock[0].layout.my_layout.lk_count) { - local->lock[0].layout.my_layout.op_ret = -1; - local->lock[0].layout.my_layout.op_errno = op_errno; - } + if (lk_index == (local->lock[0].layout.my_layout.lk_count - 1)) { + for (i = 0; (i < local->lock[0].layout.my_layout.lk_count) && + (!local->lock[0].layout.my_layout.locks[i]->locked); + i++) + ; - dht_inodelk_done (frame); - } else { - dht_blocking_inodelk_rec (frame, ++lk_index); + if (i == local->lock[0].layout.my_layout.lk_count) { + local->lock[0].layout.my_layout.op_ret = -1; + local->lock[0].layout.my_layout.op_errno = op_errno; } - return 0; + dht_inodelk_done(frame); + } else { + dht_blocking_inodelk_rec(frame, ++lk_index); + } + + return 0; cleanup: - dht_inodelk_cleanup (frame); + dht_inodelk_cleanup(frame); - return 0; + return 0; } void -dht_blocking_inodelk_rec (call_frame_t *frame, int i) +dht_blocking_inodelk_rec(call_frame_t *frame, int i) { - dht_local_t *local = NULL; - struct gf_flock flock = {0,}; + dht_local_t *local = NULL; + struct gf_flock flock = { + 0, + }; - local = frame->local; + local = frame->local; - flock.l_type = local->lock[0].layout.my_layout.locks[i]->type; + flock.l_type = local->lock[0].layout.my_layout.locks[i]->type; - STACK_WIND_COOKIE (frame, dht_blocking_inodelk_cbk, - (void *) (long) i, - local->lock[0].layout.my_layout.locks[i]->xl, - local->lock[0].layout.my_layout.locks[i]->xl->fops->inodelk, - local->lock[0].layout.my_layout.locks[i]->domain, - &local->lock[0].layout.my_layout.locks[i]->loc, - F_SETLKW, - &flock, NULL); + STACK_WIND_COOKIE( + frame, dht_blocking_inodelk_cbk, (void *)(long)i, + local->lock[0].layout.my_layout.locks[i]->xl, + local->lock[0].layout.my_layout.locks[i]->xl->fops->inodelk, + local->lock[0].layout.my_layout.locks[i]->domain, + &local->lock[0].layout.my_layout.locks[i]->loc, F_SETLKW, &flock, NULL); - return; + return; } int -dht_blocking_inodelk (call_frame_t *frame, dht_lock_t **lk_array, - int lk_count, fop_inodelk_cbk_t inodelk_cbk) +dht_blocking_inodelk(call_frame_t *frame, dht_lock_t **lk_array, int lk_count, + fop_inodelk_cbk_t inodelk_cbk) { - int ret = -1; - call_frame_t *lock_frame = NULL; - dht_local_t *local = NULL; - dht_local_t *tmp_local = NULL; - char gfid[GF_UUID_BUF_SIZE] = {0,}; - - GF_VALIDATE_OR_GOTO ("dht-locks", frame, out); - GF_VALIDATE_OR_GOTO (frame->this->name, lk_array, out); - GF_VALIDATE_OR_GOTO (frame->this->name, inodelk_cbk, out); - - tmp_local = frame->local; - - lock_frame = dht_lock_frame (frame); - if (lock_frame == NULL) { - gf_uuid_unparse (tmp_local->loc.gfid, gfid); - gf_msg ("dht", GF_LOG_ERROR, ENOMEM, - DHT_MSG_LOCK_FRAME_FAILED, - "memory allocation failed for lock_frame. gfid:%s" - " path:%s", gfid, tmp_local->loc.path); - goto out; - } - - ret = dht_local_inodelk_init (lock_frame, lk_array, lk_count, - inodelk_cbk); - if (ret < 0) { - gf_uuid_unparse (tmp_local->loc.gfid, gfid); - gf_msg ("dht", GF_LOG_ERROR, ENOMEM, - DHT_MSG_LOCAL_LOCK_INIT_FAILED, - "dht_local_lock_init failed, gfid: %s path:%s", gfid, - tmp_local->loc.path); - goto out; - } - - dht_set_lkowner (lk_array, lk_count, &lock_frame->root->lk_owner); - - local = lock_frame->local; - local->main_frame = frame; - - dht_blocking_inodelk_rec (lock_frame, 0); - - return 0; + int ret = -1; + call_frame_t *lock_frame = NULL; + dht_local_t *local = NULL; + dht_local_t *tmp_local = NULL; + char gfid[GF_UUID_BUF_SIZE] = { + 0, + }; + + GF_VALIDATE_OR_GOTO("dht-locks", frame, out); + GF_VALIDATE_OR_GOTO(frame->this->name, lk_array, out); + GF_VALIDATE_OR_GOTO(frame->this->name, inodelk_cbk, out); + + tmp_local = frame->local; + + lock_frame = dht_lock_frame(frame); + if (lock_frame == NULL) { + gf_uuid_unparse(tmp_local->loc.gfid, gfid); + gf_msg("dht", GF_LOG_ERROR, ENOMEM, DHT_MSG_LOCK_FRAME_FAILED, + "memory allocation failed for lock_frame. gfid:%s" + " path:%s", + gfid, tmp_local->loc.path); + goto out; + } + + ret = dht_local_inodelk_init(lock_frame, lk_array, lk_count, inodelk_cbk); + if (ret < 0) { + gf_uuid_unparse(tmp_local->loc.gfid, gfid); + gf_msg("dht", GF_LOG_ERROR, ENOMEM, DHT_MSG_LOCAL_LOCK_INIT_FAILED, + "dht_local_lock_init failed, gfid: %s path:%s", gfid, + tmp_local->loc.path); + goto out; + } + + dht_set_lkowner(lk_array, lk_count, &lock_frame->root->lk_owner); + + local = lock_frame->local; + local->main_frame = frame; + + dht_blocking_inodelk_rec(lock_frame, 0); + + return 0; out: - if (lock_frame) - dht_lock_stack_destroy (lock_frame, DHT_INODELK); + if (lock_frame) + dht_lock_stack_destroy(lock_frame, DHT_INODELK); - return -1; + return -1; } void -dht_unlock_namespace (call_frame_t *frame, dht_dir_transaction_t *lock) +dht_unlock_namespace(call_frame_t *frame, dht_dir_transaction_t *lock) { - GF_VALIDATE_OR_GOTO ("dht-locks", frame, out); - GF_VALIDATE_OR_GOTO (frame->this->name, lock, out); + GF_VALIDATE_OR_GOTO("dht-locks", frame, out); + GF_VALIDATE_OR_GOTO(frame->this->name, lock, out); - dht_unlock_entrylk_wrapper (frame, &lock->ns.directory_ns); - dht_unlock_inodelk_wrapper (frame, &lock->ns.parent_layout); + dht_unlock_entrylk_wrapper(frame, &lock->ns.directory_ns); + dht_unlock_inodelk_wrapper(frame, &lock->ns.parent_layout); out: - return; + return; } static int32_t -dht_protect_namespace_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) +dht_protect_namespace_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - dht_local_t *local = NULL; + dht_local_t *local = NULL; - local = frame->local; - if (op_ret != 0) - dht_unlock_inodelk_wrapper (frame, - &local->current->ns.parent_layout); + local = frame->local; + if (op_ret != 0) + dht_unlock_inodelk_wrapper(frame, &local->current->ns.parent_layout); - local->current->ns.ns_cbk (frame, cookie, this, op_ret, op_errno, - xdata); - return 0; + local->current->ns.ns_cbk(frame, cookie, this, op_ret, op_errno, xdata); + return 0; } int32_t -dht_blocking_entrylk_after_inodelk (call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, - int32_t op_errno, dict_t *xdata) +dht_blocking_entrylk_after_inodelk(call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, + int32_t op_errno, dict_t *xdata) { - dht_local_t *local = NULL; - int ret = -1; - loc_t *loc = NULL; - dht_lock_t **lk_array = NULL; - char pgfid[GF_UUID_BUF_SIZE] = {0}; - int count = 0; - dht_elock_wrap_t *entrylk = NULL; - - local = frame->local; - entrylk = &local->current->ns.directory_ns; - - if (op_ret < 0) { - local->op_ret = -1; - local->op_errno = op_errno; - goto err; - } - - loc = &entrylk->locks[0]->loc; - gf_uuid_unparse (loc->gfid, pgfid); - - local->op_ret = 0; - lk_array = entrylk->locks; - count = entrylk->lk_count; - - ret = dht_blocking_entrylk (frame, lk_array, count, - dht_protect_namespace_cbk); - - if (ret < 0) { - local->op_ret = -1; - local->op_errno = EIO; - gf_msg (this->name, GF_LOG_WARNING, local->op_errno, - DHT_MSG_ENTRYLK_ERROR, - "%s (%s/%s): " - "dht_blocking_entrylk failed after taking inodelk", - gf_fop_list[local->fop], pgfid, - entrylk->locks[0]->basename); - goto err; - } - - return 0; + dht_local_t *local = NULL; + int ret = -1; + loc_t *loc = NULL; + dht_lock_t **lk_array = NULL; + char pgfid[GF_UUID_BUF_SIZE] = {0}; + int count = 0; + dht_elock_wrap_t *entrylk = NULL; + + local = frame->local; + entrylk = &local->current->ns.directory_ns; + + if (op_ret < 0) { + local->op_ret = -1; + local->op_errno = op_errno; + goto err; + } + + loc = &entrylk->locks[0]->loc; + gf_uuid_unparse(loc->gfid, pgfid); + + local->op_ret = 0; + lk_array = entrylk->locks; + count = entrylk->lk_count; + + ret = dht_blocking_entrylk(frame, lk_array, count, + dht_protect_namespace_cbk); + + if (ret < 0) { + local->op_ret = -1; + local->op_errno = EIO; + gf_msg(this->name, GF_LOG_WARNING, local->op_errno, + DHT_MSG_ENTRYLK_ERROR, + "%s (%s/%s): " + "dht_blocking_entrylk failed after taking inodelk", + gf_fop_list[local->fop], pgfid, entrylk->locks[0]->basename); + goto err; + } + + return 0; err: - if (lk_array != NULL) { - dht_lock_array_free (lk_array, count); - GF_FREE (lk_array); - entrylk->locks = NULL; - entrylk->lk_count = 0; - } - - /* Unlock inodelk. No harm calling unlock twice */ - dht_unlock_inodelk_wrapper (frame, &local->current->ns.parent_layout); - /* Call ns_cbk. It will take care of unwinding */ - local->current->ns.ns_cbk (frame, NULL, this, local->op_ret, - local->op_errno, NULL); - return 0; + if (lk_array != NULL) { + dht_lock_array_free(lk_array, count); + GF_FREE(lk_array); + entrylk->locks = NULL; + entrylk->lk_count = 0; + } + + /* Unlock inodelk. No harm calling unlock twice */ + dht_unlock_inodelk_wrapper(frame, &local->current->ns.parent_layout); + /* Call ns_cbk. It will take care of unwinding */ + local->current->ns.ns_cbk(frame, NULL, this, local->op_ret, local->op_errno, + NULL); + return 0; } /* Given the loc and the subvol, this routine takes the inodelk on @@ -1265,137 +1278,131 @@ err: * and then entrylk serially. */ int -dht_protect_namespace (call_frame_t *frame, loc_t *loc, - xlator_t *subvol, - struct dht_namespace *ns, - fop_entrylk_cbk_t ns_cbk) +dht_protect_namespace(call_frame_t *frame, loc_t *loc, xlator_t *subvol, + struct dht_namespace *ns, fop_entrylk_cbk_t ns_cbk) { - dht_ilock_wrap_t *inodelk = NULL; - dht_elock_wrap_t *entrylk = NULL; - dht_lock_t **lk_array = NULL; - dht_local_t *local = NULL; - xlator_t *this = NULL; - loc_t parent = {0,}; - int ret = -1; - char pgfid[GF_UUID_BUF_SIZE] = {0}; - int32_t op_errno = 0; - int count = 1; - - GF_VALIDATE_OR_GOTO ("dht-locks", frame, out); - GF_VALIDATE_OR_GOTO (frame->this->name, loc, out); - GF_VALIDATE_OR_GOTO (frame->this->name, loc->parent, out); - GF_VALIDATE_OR_GOTO (frame->this->name, subvol, out); - - local = frame->local; - this = frame->this; - - inodelk = &ns->parent_layout; - entrylk = &ns->directory_ns; - - /* Initialize entrylk_cbk and parent loc */ - ns->ns_cbk = ns_cbk; - - ret = dht_build_parent_loc (this, &parent, loc, &op_errno); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, op_errno, - DHT_MSG_LOC_FAILED, "gfid:%s (name:%s) (path: %s): " - "parent loc build failed", loc->gfid, loc->name, - loc->path); - goto out; - } - gf_uuid_unparse (parent.gfid, pgfid); - - /* Alloc inodelk */ - inodelk->locks = GF_CALLOC (count, sizeof (*lk_array), - gf_common_mt_pointer); - if (inodelk->locks == NULL) { - local->op_errno = ENOMEM; - gf_msg (this->name, GF_LOG_WARNING, local->op_errno, - DHT_MSG_NO_MEMORY, - "%s (%s/%s) (path: %s): " - "calloc failure", - gf_fop_list[local->fop], pgfid, loc->name, loc->path); - goto out; - } - - inodelk->locks[0] = dht_lock_new (this, subvol, &parent, F_RDLCK, - DHT_LAYOUT_HEAL_DOMAIN, NULL, - FAIL_ON_ANY_ERROR); - if (inodelk->locks[0] == NULL) { - local->op_errno = ENOMEM; - gf_msg (this->name, GF_LOG_WARNING, local->op_errno, - DHT_MSG_NO_MEMORY, - "%s (%s/%s) (path: %s): " - "inodelk: lock allocation failed", - gf_fop_list[local->fop], pgfid, loc->name, loc->path); - goto err; - } - inodelk->lk_count = count; - - /* Allock entrylk */ - entrylk->locks = GF_CALLOC (count, sizeof (*lk_array), - gf_common_mt_pointer); - if (entrylk->locks == NULL) { - local->op_errno = ENOMEM; - gf_msg (this->name, GF_LOG_WARNING, local->op_errno, - DHT_MSG_NO_MEMORY, - "%s (%s/%s) (path: %s): " - "entrylk: calloc failure", - gf_fop_list[local->fop], pgfid, loc->name, loc->path); - - goto err; - } - - entrylk->locks[0] = dht_lock_new (this, subvol, &parent, F_WRLCK, - DHT_ENTRY_SYNC_DOMAIN, loc->name, - FAIL_ON_ANY_ERROR); - if (entrylk->locks[0] == NULL) { - local->op_errno = ENOMEM; - gf_msg (this->name, GF_LOG_WARNING, local->op_errno, - DHT_MSG_NO_MEMORY, - "%s (%s/%s) (path: %s): " - "entrylk: lock allocation failed", - gf_fop_list[local->fop], pgfid, loc->name, loc->path); - - goto err; - } - entrylk->lk_count = count; - - /* Take read inodelk on parent. If it is successful, take write entrylk - * on name in cbk. - */ - lk_array = inodelk->locks; - ret = dht_blocking_inodelk (frame, lk_array, count, - dht_blocking_entrylk_after_inodelk); - if (ret < 0) { - local->op_errno = EIO; - gf_msg (this->name, GF_LOG_WARNING, local->op_errno, - DHT_MSG_INODELK_ERROR, - "%s (%s/%s) (path: %s): " - "dht_blocking_inodelk failed", - gf_fop_list[local->fop], pgfid, loc->name, loc->path); - goto err; - } - - loc_wipe (&parent); - - return 0; + dht_ilock_wrap_t *inodelk = NULL; + dht_elock_wrap_t *entrylk = NULL; + dht_lock_t **lk_array = NULL; + dht_local_t *local = NULL; + xlator_t *this = NULL; + loc_t parent = { + 0, + }; + int ret = -1; + char pgfid[GF_UUID_BUF_SIZE] = {0}; + int32_t op_errno = 0; + int count = 1; + + GF_VALIDATE_OR_GOTO("dht-locks", frame, out); + GF_VALIDATE_OR_GOTO(frame->this->name, loc, out); + GF_VALIDATE_OR_GOTO(frame->this->name, loc->parent, out); + GF_VALIDATE_OR_GOTO(frame->this->name, subvol, out); + + local = frame->local; + this = frame->this; + + inodelk = &ns->parent_layout; + entrylk = &ns->directory_ns; + + /* Initialize entrylk_cbk and parent loc */ + ns->ns_cbk = ns_cbk; + + ret = dht_build_parent_loc(this, &parent, loc, &op_errno); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, op_errno, DHT_MSG_LOC_FAILED, + "gfid:%s (name:%s) (path: %s): " + "parent loc build failed", + loc->gfid, loc->name, loc->path); + goto out; + } + gf_uuid_unparse(parent.gfid, pgfid); + + /* Alloc inodelk */ + inodelk->locks = GF_CALLOC(count, sizeof(*lk_array), gf_common_mt_pointer); + if (inodelk->locks == NULL) { + local->op_errno = ENOMEM; + gf_msg(this->name, GF_LOG_WARNING, local->op_errno, DHT_MSG_NO_MEMORY, + "%s (%s/%s) (path: %s): " + "calloc failure", + gf_fop_list[local->fop], pgfid, loc->name, loc->path); + goto out; + } + + inodelk->locks[0] = dht_lock_new(this, subvol, &parent, F_RDLCK, + DHT_LAYOUT_HEAL_DOMAIN, NULL, + FAIL_ON_ANY_ERROR); + if (inodelk->locks[0] == NULL) { + local->op_errno = ENOMEM; + gf_msg(this->name, GF_LOG_WARNING, local->op_errno, DHT_MSG_NO_MEMORY, + "%s (%s/%s) (path: %s): " + "inodelk: lock allocation failed", + gf_fop_list[local->fop], pgfid, loc->name, loc->path); + goto err; + } + inodelk->lk_count = count; + + /* Allock entrylk */ + entrylk->locks = GF_CALLOC(count, sizeof(*lk_array), gf_common_mt_pointer); + if (entrylk->locks == NULL) { + local->op_errno = ENOMEM; + gf_msg(this->name, GF_LOG_WARNING, local->op_errno, DHT_MSG_NO_MEMORY, + "%s (%s/%s) (path: %s): " + "entrylk: calloc failure", + gf_fop_list[local->fop], pgfid, loc->name, loc->path); + + goto err; + } + + entrylk->locks[0] = dht_lock_new(this, subvol, &parent, F_WRLCK, + DHT_ENTRY_SYNC_DOMAIN, loc->name, + FAIL_ON_ANY_ERROR); + if (entrylk->locks[0] == NULL) { + local->op_errno = ENOMEM; + gf_msg(this->name, GF_LOG_WARNING, local->op_errno, DHT_MSG_NO_MEMORY, + "%s (%s/%s) (path: %s): " + "entrylk: lock allocation failed", + gf_fop_list[local->fop], pgfid, loc->name, loc->path); + + goto err; + } + entrylk->lk_count = count; + + /* Take read inodelk on parent. If it is successful, take write entrylk + * on name in cbk. + */ + lk_array = inodelk->locks; + ret = dht_blocking_inodelk(frame, lk_array, count, + dht_blocking_entrylk_after_inodelk); + if (ret < 0) { + local->op_errno = EIO; + gf_msg(this->name, GF_LOG_WARNING, local->op_errno, + DHT_MSG_INODELK_ERROR, + "%s (%s/%s) (path: %s): " + "dht_blocking_inodelk failed", + gf_fop_list[local->fop], pgfid, loc->name, loc->path); + goto err; + } + + loc_wipe(&parent); + + return 0; err: - if (entrylk->locks != NULL) { - dht_lock_array_free (entrylk->locks, count); - GF_FREE (entrylk->locks); - entrylk->locks = NULL; - entrylk->lk_count = 0; - } + if (entrylk->locks != NULL) { + dht_lock_array_free(entrylk->locks, count); + GF_FREE(entrylk->locks); + entrylk->locks = NULL; + entrylk->lk_count = 0; + } - if (inodelk->locks != NULL) { - dht_lock_array_free (inodelk->locks, count); - GF_FREE (inodelk->locks); - inodelk->locks = NULL; - inodelk->lk_count = 0; - } + if (inodelk->locks != NULL) { + dht_lock_array_free(inodelk->locks, count); + GF_FREE(inodelk->locks); + inodelk->locks = NULL; + inodelk->lk_count = 0; + } - loc_wipe (&parent); + loc_wipe(&parent); out: - return -1; + return -1; } diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c index 9983429acec..7a2539dc5f5 100644 --- a/xlators/cluster/dht/src/dht-rebalance.c +++ b/xlators/cluster/dht/src/dht-rebalance.c @@ -8,7 +8,6 @@ cases as published by the Free Software Foundation. */ - #include "tier.h" #include "dht-common.h" #include "xlator.h" @@ -18,244 +17,230 @@ #include #include "events.h" -#define GF_DISK_SECTOR_SIZE 512 -#define DHT_REBALANCE_PID 4242 /* Change it if required */ -#define DHT_REBALANCE_BLKSIZE (1024 * 1024) /* 1 MB */ -#define MAX_MIGRATE_QUEUE_COUNT 500 -#define MIN_MIGRATE_QUEUE_COUNT 200 -#define MAX_REBAL_TYPE_SIZE 16 -#define FILE_CNT_INTERVAL 600 /* 10 mins */ -#define ESTIMATE_START_INTERVAL 600 /* 10 mins */ -#define HARDLINK_MIG_INPROGRESS -2 -#define SKIP_MIGRATION_FD_POSITIVE -3 +#define GF_DISK_SECTOR_SIZE 512 +#define DHT_REBALANCE_PID 4242 /* Change it if required */ +#define DHT_REBALANCE_BLKSIZE (1024 * 1024) /* 1 MB */ +#define MAX_MIGRATE_QUEUE_COUNT 500 +#define MIN_MIGRATE_QUEUE_COUNT 200 +#define MAX_REBAL_TYPE_SIZE 16 +#define FILE_CNT_INTERVAL 600 /* 10 mins */ +#define ESTIMATE_START_INTERVAL 600 /* 10 mins */ +#define HARDLINK_MIG_INPROGRESS -2 +#define SKIP_MIGRATION_FD_POSITIVE -3 #ifndef MAX -#define MAX(a, b) (((a) > (b))?(a):(b)) +#define MAX(a, b) (((a) > (b)) ? (a) : (b)) #endif - -#define GF_CRAWL_INDEX_MOVE(idx, sv_cnt) { \ - idx++; \ - idx %= sv_cnt; \ - } +#define GF_CRAWL_INDEX_MOVE(idx, sv_cnt) \ + { \ + idx++; \ + idx %= sv_cnt; \ + } uint64_t g_totalfiles = 0; uint64_t g_totalsize = 0; - void -gf_defrag_free_dir_dfmeta (struct dir_dfmeta *meta, int local_subvols_cnt) +gf_defrag_free_dir_dfmeta(struct dir_dfmeta *meta, int local_subvols_cnt) { - int i = 0; + int i = 0; - if (meta) { - for (i = 0; i < local_subvols_cnt; i++) { - gf_dirent_free (&meta->equeue[i]); - } - - GF_FREE (meta->equeue); - GF_FREE (meta->head); - GF_FREE (meta->iterator); - GF_FREE (meta->offset_var); - GF_FREE (meta->fetch_entries); - GF_FREE (meta); + if (meta) { + for (i = 0; i < local_subvols_cnt; i++) { + gf_dirent_free(&meta->equeue[i]); } + + GF_FREE(meta->equeue); + GF_FREE(meta->head); + GF_FREE(meta->iterator); + GF_FREE(meta->offset_var); + GF_FREE(meta->fetch_entries); + GF_FREE(meta); + } } void -gf_defrag_free_container (struct dht_container *container) +gf_defrag_free_container(struct dht_container *container) { - if (container) { - gf_dirent_entry_free (container->df_entry); + if (container) { + gf_dirent_entry_free(container->df_entry); - if (container->parent_loc) { - loc_wipe (container->parent_loc); - } + if (container->parent_loc) { + loc_wipe(container->parent_loc); + } - GF_FREE (container->parent_loc); + GF_FREE(container->parent_loc); - GF_FREE (container); - } + GF_FREE(container); + } } void -dht_set_global_defrag_error (gf_defrag_info_t *defrag, int ret) +dht_set_global_defrag_error(gf_defrag_info_t *defrag, int ret) { - LOCK (&defrag->lock); - { - defrag->global_error = ret; - } - UNLOCK (&defrag->lock); - return; + LOCK(&defrag->lock); + { + defrag->global_error = ret; + } + UNLOCK(&defrag->lock); + return; } - static gf_boolean_t -dht_is_tier_command (int cmd) { - - gf_boolean_t is_tier = _gf_false; +dht_is_tier_command(int cmd) +{ + gf_boolean_t is_tier = _gf_false; - switch (cmd) { + switch (cmd) { case GF_DEFRAG_CMD_START_TIER: case GF_DEFRAG_CMD_STATUS_TIER: case GF_DEFRAG_CMD_START_DETACH_TIER: case GF_DEFRAG_CMD_STOP_DETACH_TIER: case GF_DEFRAG_CMD_PAUSE_TIER: case GF_DEFRAG_CMD_RESUME_TIER: - is_tier = _gf_true; - break; + is_tier = _gf_true; + break; default: - break; - } - return is_tier; - + break; + } + return is_tier; } - static int -dht_send_rebalance_event (xlator_t *this, int cmd, gf_defrag_status_t status) +dht_send_rebalance_event(xlator_t *this, int cmd, gf_defrag_status_t status) { - int ret = -1; - char *volname = NULL; - char *tmpstr = NULL; - char *ptr = NULL; - char *suffix = "-dht"; - dht_conf_t *conf = NULL; - gf_defrag_info_t *defrag = NULL; - int len = 0; - - eventtypes_t event = EVENT_LAST; - - switch (status) { + int ret = -1; + char *volname = NULL; + char *tmpstr = NULL; + char *ptr = NULL; + char *suffix = "-dht"; + dht_conf_t *conf = NULL; + gf_defrag_info_t *defrag = NULL; + int len = 0; + + eventtypes_t event = EVENT_LAST; + + switch (status) { case GF_DEFRAG_STATUS_COMPLETE: - event = EVENT_VOLUME_REBALANCE_COMPLETE; - break; + event = EVENT_VOLUME_REBALANCE_COMPLETE; + break; case GF_DEFRAG_STATUS_FAILED: - event = EVENT_VOLUME_REBALANCE_FAILED; - break; + event = EVENT_VOLUME_REBALANCE_FAILED; + break; case GF_DEFRAG_STATUS_STOPPED: - event = EVENT_VOLUME_REBALANCE_STOP; - break; + event = EVENT_VOLUME_REBALANCE_STOP; + break; default: - break; - - } - - if (dht_is_tier_command (cmd)) { - /* We should have the tier volume name*/ - conf = this->private; - defrag = conf->defrag; - volname = defrag->tier_conf.volname; - } else { - /* DHT volume */ - len = strlen (this->name) - strlen (suffix); - tmpstr = gf_strdup (this->name); - if (tmpstr) { - ptr = tmpstr + len; - if (!strcmp (ptr, suffix)) { - tmpstr[len] = '\0'; - volname = tmpstr; - } - } - } - - if (!volname) { - /* Better than nothing */ - volname = this->name; - } + break; + } - if (event != EVENT_LAST) { - gf_event (event, "volume=%s", volname); - } - - GF_FREE (tmpstr); - return ret; + if (dht_is_tier_command(cmd)) { + /* We should have the tier volume name*/ + conf = this->private; + defrag = conf->defrag; + volname = defrag->tier_conf.volname; + } else { + /* DHT volume */ + len = strlen(this->name) - strlen(suffix); + tmpstr = gf_strdup(this->name); + if (tmpstr) { + ptr = tmpstr + len; + if (!strcmp(ptr, suffix)) { + tmpstr[len] = '\0'; + volname = tmpstr; + } + } + } + + if (!volname) { + /* Better than nothing */ + volname = this->name; + } + + if (event != EVENT_LAST) { + gf_event(event, "volume=%s", volname); + } + + GF_FREE(tmpstr); + return ret; } - static void -dht_strip_out_acls (dict_t *dict) +dht_strip_out_acls(dict_t *dict) { - if (dict) { - dict_del (dict, "trusted.SGI_ACL_FILE"); - dict_del (dict, POSIX_ACL_ACCESS_XATTR); - } + if (dict) { + dict_del(dict, "trusted.SGI_ACL_FILE"); + dict_del(dict, POSIX_ACL_ACCESS_XATTR); + } } - - static int -dht_write_with_holes (xlator_t *to, fd_t *fd, struct iovec *vec, int count, - int32_t size, off_t offset, struct iobref *iobref, - int *fop_errno) +dht_write_with_holes(xlator_t *to, fd_t *fd, struct iovec *vec, int count, + int32_t size, off_t offset, struct iobref *iobref, + int *fop_errno) { - int i = 0; - int ret = -1; - int start_idx = 0; - int tmp_offset = 0; - int write_needed = 0; - int buf_len = 0; - int size_pending = 0; - char *buf = NULL; - - /* loop through each vector */ - for (i = 0; i < count; i++) { - buf = vec[i].iov_base; - buf_len = vec[i].iov_len; - - for (start_idx = 0; (start_idx + GF_DISK_SECTOR_SIZE) <= buf_len; - start_idx += GF_DISK_SECTOR_SIZE) { - - if (mem_0filled (buf + start_idx, GF_DISK_SECTOR_SIZE) != 0) { - write_needed = 1; - continue; - } - - if (write_needed) { - ret = syncop_write (to, fd, (buf + tmp_offset), - (start_idx - tmp_offset), - (offset + tmp_offset), - iobref, 0, NULL, NULL); - /* 'path' will be logged in calling function */ - if (ret < 0) { - gf_log (THIS->name, GF_LOG_WARNING, - "failed to write (%s)", - strerror (-ret)); - *fop_errno = -ret; - ret = -1; - goto out; - } - - write_needed = 0; - } - tmp_offset = start_idx + GF_DISK_SECTOR_SIZE; + int i = 0; + int ret = -1; + int start_idx = 0; + int tmp_offset = 0; + int write_needed = 0; + int buf_len = 0; + int size_pending = 0; + char *buf = NULL; + + /* loop through each vector */ + for (i = 0; i < count; i++) { + buf = vec[i].iov_base; + buf_len = vec[i].iov_len; + + for (start_idx = 0; (start_idx + GF_DISK_SECTOR_SIZE) <= buf_len; + start_idx += GF_DISK_SECTOR_SIZE) { + if (mem_0filled(buf + start_idx, GF_DISK_SECTOR_SIZE) != 0) { + write_needed = 1; + continue; + } + + if (write_needed) { + ret = syncop_write( + to, fd, (buf + tmp_offset), (start_idx - tmp_offset), + (offset + tmp_offset), iobref, 0, NULL, NULL); + /* 'path' will be logged in calling function */ + if (ret < 0) { + gf_log(THIS->name, GF_LOG_WARNING, "failed to write (%s)", + strerror(-ret)); + *fop_errno = -ret; + ret = -1; + goto out; } - if ((start_idx < buf_len) || write_needed) { - /* This means, last chunk is not yet written.. write it */ - ret = syncop_write (to, fd, (buf + tmp_offset), - (buf_len - tmp_offset), - (offset + tmp_offset), iobref, 0, - NULL, NULL); - if (ret < 0) { - /* 'path' will be logged in calling function */ - gf_log (THIS->name, GF_LOG_WARNING, - "failed to write (%s)", - strerror (-ret)); - *fop_errno = -ret; - ret = -1; - goto out; - } - } + write_needed = 0; + } + tmp_offset = start_idx + GF_DISK_SECTOR_SIZE; + } - size_pending = (size - buf_len); - if (!size_pending) - break; + if ((start_idx < buf_len) || write_needed) { + /* This means, last chunk is not yet written.. write it */ + ret = syncop_write(to, fd, (buf + tmp_offset), + (buf_len - tmp_offset), (offset + tmp_offset), + iobref, 0, NULL, NULL); + if (ret < 0) { + /* 'path' will be logged in calling function */ + gf_log(THIS->name, GF_LOG_WARNING, "failed to write (%s)", + strerror(-ret)); + *fop_errno = -ret; + ret = -1; + goto out; + } } - ret = size; -out: - return ret; + size_pending = (size - buf_len); + if (!size_pending) + break; + } + ret = size; +out: + return ret; } /* @@ -294,323 +279,318 @@ be converted to "0" in dht_migrate_file. */ int32_t -gf_defrag_handle_hardlink (xlator_t *this, loc_t *loc, int *fop_errno) +gf_defrag_handle_hardlink(xlator_t *this, loc_t *loc, int *fop_errno) { - int32_t ret = -1; - xlator_t *cached_subvol = NULL; - xlator_t *hashed_subvol = NULL; - xlator_t *linkto_subvol = NULL; - data_t *data = NULL; - struct iatt iatt = {0,}; - int32_t op_errno = 0; - dht_conf_t *conf = NULL; - gf_loglevel_t loglevel = 0; - dict_t *link_xattr = NULL; - dict_t *dict = NULL; - dict_t *xattr_rsp = NULL; - struct iatt stbuf = {0,}; - + int32_t ret = -1; + xlator_t *cached_subvol = NULL; + xlator_t *hashed_subvol = NULL; + xlator_t *linkto_subvol = NULL; + data_t *data = NULL; + struct iatt iatt = { + 0, + }; + int32_t op_errno = 0; + dht_conf_t *conf = NULL; + gf_loglevel_t loglevel = 0; + dict_t *link_xattr = NULL; + dict_t *dict = NULL; + dict_t *xattr_rsp = NULL; + struct iatt stbuf = { + 0, + }; + + *fop_errno = EINVAL; + + GF_VALIDATE_OR_GOTO("defrag", loc, out); + GF_VALIDATE_OR_GOTO("defrag", loc->name, out); + GF_VALIDATE_OR_GOTO("defrag", this, out); + GF_VALIDATE_OR_GOTO("defrag", this->private, out); + + conf = this->private; + + if (gf_uuid_is_null(loc->pargfid)) { + gf_msg("", GF_LOG_ERROR, 0, DHT_MSG_MIGRATE_FILE_FAILED, + "Migrate file failed :" + "loc->pargfid is NULL for %s", + loc->path); *fop_errno = EINVAL; + ret = -1; + goto out; + } + + if (gf_uuid_is_null(loc->gfid)) { + gf_msg("", GF_LOG_ERROR, 0, DHT_MSG_MIGRATE_FILE_FAILED, + "Migrate file failed :" + "loc->gfid is NULL for %s", + loc->path); + *fop_errno = EINVAL; + ret = -1; + goto out; + } - GF_VALIDATE_OR_GOTO ("defrag", loc, out); - GF_VALIDATE_OR_GOTO ("defrag", loc->name, out); - GF_VALIDATE_OR_GOTO ("defrag", this, out); - GF_VALIDATE_OR_GOTO ("defrag", this->private, out); - - conf = this->private; - - if (gf_uuid_is_null (loc->pargfid)) { - gf_msg ("", GF_LOG_ERROR, 0, - DHT_MSG_MIGRATE_FILE_FAILED, - "Migrate file failed :" - "loc->pargfid is NULL for %s", loc->path); - *fop_errno = EINVAL; - ret = -1; - goto out; - } - - if (gf_uuid_is_null (loc->gfid)) { - gf_msg ("", GF_LOG_ERROR, 0, - DHT_MSG_MIGRATE_FILE_FAILED, - "Migrate file failed :" - "loc->gfid is NULL for %s", loc->path); - *fop_errno = EINVAL; - ret = -1; - goto out; - } - - link_xattr = dict_new (); - if (!link_xattr) { - ret = -1; - *fop_errno = ENOMEM; - goto out; - } - - /* - Parallel migration can lead to migration of the hard link multiple - times which can lead to data loss. Hence, adding a fresh lookup to - decide whether migration is required or not. - - Elaborating the scenario for let say 10 hardlinks [link{1..10}]: - Let say the first hard link "link1" does the setxattr of the - new hashed subvolume info on the cached file. As there are multiple - threads working, we might have already all the links created on the - new hashed by the time we reach hardlink let say link5. Now the - number of links on hashed is equal to that of cached. Hence, file - migration will happen for link6. - - Cached Hashed - --------T link6 rwxrwxrwx link6 - - Now post above state all the link file on the cached will be zero - byte linkto files. Hence, if we still do migration for the following - files link{7..10}, we will end up migrating 0 data leading to data - loss. - Hence, a lookup can make sure whether we need to migrate the - file or not. - */ - - dict = dict_new (); - if (!dict) { - ret = -1; - *fop_errno = ENOMEM; - gf_msg (this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY, - "could not allocate memory for dict"); - goto out; - } - - ret = dict_set_int32 (dict, conf->link_xattr_name, 256); + link_xattr = dict_new(); + if (!link_xattr) { + ret = -1; + *fop_errno = ENOMEM; + goto out; + } + + /* + Parallel migration can lead to migration of the hard link multiple + times which can lead to data loss. Hence, adding a fresh lookup to + decide whether migration is required or not. + + Elaborating the scenario for let say 10 hardlinks [link{1..10}]: + Let say the first hard link "link1" does the setxattr of the + new hashed subvolume info on the cached file. As there are multiple + threads working, we might have already all the links created on the + new hashed by the time we reach hardlink let say link5. Now the + number of links on hashed is equal to that of cached. Hence, file + migration will happen for link6. + + Cached Hashed + --------T link6 rwxrwxrwx link6 + + Now post above state all the link file on the cached will be zero + byte linkto files. Hence, if we still do migration for the following + files link{7..10}, we will end up migrating 0 data leading to data + loss. + Hence, a lookup can make sure whether we need to migrate the + file or not. + */ + + dict = dict_new(); + if (!dict) { + ret = -1; + *fop_errno = ENOMEM; + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY, + "could not allocate memory for dict"); + goto out; + } + + ret = dict_set_int32(dict, conf->link_xattr_name, 256); + if (ret) { + *fop_errno = ENOMEM; + ret = -1; + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_MIGRATE_FILE_FAILED, + "Migrate file failed:" + "%s: failed to set 'linkto' key in dict", + loc->path); + goto out; + } + + ret = syncop_lookup(this, loc, &stbuf, NULL, dict, &xattr_rsp); + if (ret) { + /*Ignore ENOENT and ESTALE as file might have been + migrated already*/ + if (-ret == ENOENT || -ret == ESTALE) { + ret = -2; + goto out; + } + gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_MIGRATE_FILE_FAILED, + "Migrate file failed:%s lookup failed with ret = %d", loc->path, + ret); + *fop_errno = -ret; + ret = -1; + goto out; + } + + cached_subvol = dht_subvol_get_cached(this, loc->inode); + if (!cached_subvol) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_MIGRATE_FILE_FAILED, + "Migrate file failed :" + "Failed to get cached subvol" + " for %s on %s", + loc->name, this->name); + *fop_errno = EINVAL; + ret = -1; + goto out; + } + + hashed_subvol = dht_subvol_get_hashed(this, loc); + if (!hashed_subvol) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_MIGRATE_FILE_FAILED, + "Migrate file failed :" + "Failed to get hashed subvol" + " for %s on %s", + loc->name, this->name); + *fop_errno = EINVAL; + ret = -1; + goto out; + } + + /* Hardlink migration happens only with remove-brick. So this condition will + * be true only when the migration has happened. In case hardlinks are + * migrated for rebalance case, remove this check. Having this check here + * avoid redundant calls below*/ + if (hashed_subvol == cached_subvol) { + ret = -2; + goto out; + } + + gf_log(this->name, GF_LOG_INFO, + "Attempting to migrate hardlink %s " + "with gfid %s from %s -> %s", + loc->name, uuid_utoa(loc->gfid), cached_subvol->name, + hashed_subvol->name); + + data = dict_get(xattr_rsp, conf->link_xattr_name); + /* set linkto on cached -> hashed if not present, else link it */ + if (!data) { + ret = dict_set_str(link_xattr, conf->link_xattr_name, + hashed_subvol->name); if (ret) { - *fop_errno = ENOMEM; - ret = -1; - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_MIGRATE_FILE_FAILED, - "Migrate file failed:" - "%s: failed to set 'linkto' key in dict", loc->path); - goto out; + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_MIGRATE_FILE_FAILED, + "Migrate file failed :" + "Failed to set dictionary value:" + " key = %s for %s", + conf->link_xattr_name, loc->name); + *fop_errno = ENOMEM; + ret = -1; + goto out; } - ret = syncop_lookup (this, loc, &stbuf, NULL, dict, &xattr_rsp); + ret = syncop_setxattr(cached_subvol, loc, link_xattr, 0, NULL, NULL); if (ret) { - /*Ignore ENOENT and ESTALE as file might have been - migrated already*/ - if (-ret == ENOENT || -ret == ESTALE) { - ret = -2; - goto out; - } - gf_msg (this->name, GF_LOG_ERROR, -ret, - DHT_MSG_MIGRATE_FILE_FAILED, - "Migrate file failed:%s lookup failed with ret = %d", - loc->path, ret); - *fop_errno = -ret; - ret = -1; - goto out; - } - - cached_subvol = dht_subvol_get_cached (this, loc->inode); - if (!cached_subvol) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_MIGRATE_FILE_FAILED, - "Migrate file failed :" - "Failed to get cached subvol" - " for %s on %s", loc->name, this->name); - *fop_errno = EINVAL; - ret = -1; - goto out; - } - - hashed_subvol = dht_subvol_get_hashed (this, loc); - if (!hashed_subvol) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_MIGRATE_FILE_FAILED, - "Migrate file failed :" - "Failed to get hashed subvol" - " for %s on %s", loc->name, this->name); - *fop_errno = EINVAL; - ret = -1; - goto out; - } - - /* Hardlink migration happens only with remove-brick. So this condition will - * be true only when the migration has happened. In case hardlinks are migrated - * for rebalance case, remove this check. Having this check here avoid redundant - * calls below*/ - if (hashed_subvol == cached_subvol) { - ret = -2; - goto out; + gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_MIGRATE_FILE_FAILED, + "Migrate file failed :" + "Linkto setxattr failed %s -> %s", + cached_subvol->name, loc->name); + *fop_errno = -ret; + ret = -1; + goto out; } - gf_log (this->name, GF_LOG_INFO, "Attempting to migrate hardlink %s " - "with gfid %s from %s -> %s", loc->name, uuid_utoa (loc->gfid), - cached_subvol->name, hashed_subvol->name); - - data = dict_get (xattr_rsp, conf->link_xattr_name); - /* set linkto on cached -> hashed if not present, else link it */ - if (!data) { - ret = dict_set_str (link_xattr, conf->link_xattr_name, - hashed_subvol->name); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_MIGRATE_FILE_FAILED, - "Migrate file failed :" - "Failed to set dictionary value:" - " key = %s for %s", - conf->link_xattr_name, loc->name); - *fop_errno = ENOMEM; - ret = -1; - goto out; - } - - ret = syncop_setxattr (cached_subvol, loc, link_xattr, 0, NULL, - NULL); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, -ret, - DHT_MSG_MIGRATE_FILE_FAILED, - "Migrate file failed :" - "Linkto setxattr failed %s -> %s", - cached_subvol->name, - loc->name); - *fop_errno = -ret; - ret = -1; - goto out; - } - - gf_msg_debug (this->name, 0, "hardlink target subvol created on %s " - ",cached %s, file %s", - hashed_subvol->name, cached_subvol->name, loc->path); + gf_msg_debug(this->name, 0, + "hardlink target subvol created on %s " + ",cached %s, file %s", + hashed_subvol->name, cached_subvol->name, loc->path); - ret = -2; - goto out; + ret = -2; + goto out; + } else { + linkto_subvol = dht_linkfile_subvol(this, NULL, NULL, xattr_rsp); + if (!linkto_subvol) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_SUBVOL_ERROR, + "Failed to get " + "linkto subvol for %s", + loc->name); } else { - linkto_subvol = dht_linkfile_subvol (this, NULL, NULL, xattr_rsp); - if (!linkto_subvol) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_SUBVOL_ERROR, - "Failed to get " - "linkto subvol for %s", loc->name); - } else { - hashed_subvol = linkto_subvol; - } - - ret = syncop_link (hashed_subvol, loc, loc, &iatt, NULL, NULL); - if (ret) { - op_errno = -ret; - ret = -1; - - loglevel = (op_errno == EEXIST) ? GF_LOG_DEBUG : \ - GF_LOG_ERROR; - gf_msg (this->name, loglevel, op_errno, - DHT_MSG_MIGRATE_HARDLINK_FILE_FAILED, - "link of %s -> %s" - " failed on subvol %s", loc->name, - uuid_utoa(loc->gfid), - hashed_subvol->name); - if (op_errno != EEXIST) { - *fop_errno = op_errno; - goto out; - } - } else { - gf_msg_debug (this->name, 0, "syncop_link successful for" - " hardlink %s on subvol %s, cached %s", loc->path, - hashed_subvol->name, cached_subvol->name); - - } + hashed_subvol = linkto_subvol; } - ret = syncop_lookup (hashed_subvol, loc, &iatt, NULL, NULL, NULL); + ret = syncop_link(hashed_subvol, loc, loc, &iatt, NULL, NULL); if (ret) { - gf_msg (this->name, GF_LOG_ERROR, -ret, - DHT_MSG_MIGRATE_FILE_FAILED, - "Migrate file failed :Failed lookup %s on %s ", - loc->name, hashed_subvol->name); - - *fop_errno = -ret; - ret = -1; - goto out; + op_errno = -ret; + ret = -1; + + loglevel = (op_errno == EEXIST) ? GF_LOG_DEBUG : GF_LOG_ERROR; + gf_msg(this->name, loglevel, op_errno, + DHT_MSG_MIGRATE_HARDLINK_FILE_FAILED, + "link of %s -> %s" + " failed on subvol %s", + loc->name, uuid_utoa(loc->gfid), hashed_subvol->name); + if (op_errno != EEXIST) { + *fop_errno = op_errno; + goto out; + } + } else { + gf_msg_debug(this->name, 0, + "syncop_link successful for" + " hardlink %s on subvol %s, cached %s", + loc->path, hashed_subvol->name, cached_subvol->name); } + } - /* There is a race where on the target subvol for the hardlink - * (note: hash subvol for the hardlink might differ from this), some - * other client(non-rebalance) would have created a linkto file for that - * hardlink as part of lookup. So let say there are 10 hardlinks, on the - * 5th hardlink it self the hardlinks might have migrated. Now for - * (6..10th) hardlinks the cached and target would be same as the file - * has already migrated. Hence this check is needed */ - if (cached_subvol == hashed_subvol) { - gf_msg_debug (this->name, 0, "source %s and destination %s " - "for hardlink %s are same", cached_subvol->name, - hashed_subvol->name, loc->path); - ret = -2; - goto out; - } + ret = syncop_lookup(hashed_subvol, loc, &iatt, NULL, NULL, NULL); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_MIGRATE_FILE_FAILED, + "Migrate file failed :Failed lookup %s on %s ", loc->name, + hashed_subvol->name); - if (iatt.ia_nlink == stbuf.ia_nlink) { - ret = dht_migrate_file (this, loc, cached_subvol, hashed_subvol, - GF_DHT_MIGRATE_HARDLINK_IN_PROGRESS, - fop_errno); - if (ret) { - goto out; - } - } + *fop_errno = -ret; + ret = -1; + goto out; + } + + /* There is a race where on the target subvol for the hardlink + * (note: hash subvol for the hardlink might differ from this), some + * other client(non-rebalance) would have created a linkto file for that + * hardlink as part of lookup. So let say there are 10 hardlinks, on the + * 5th hardlink it self the hardlinks might have migrated. Now for + * (6..10th) hardlinks the cached and target would be same as the file + * has already migrated. Hence this check is needed */ + if (cached_subvol == hashed_subvol) { + gf_msg_debug(this->name, 0, + "source %s and destination %s " + "for hardlink %s are same", + cached_subvol->name, hashed_subvol->name, loc->path); ret = -2; + goto out; + } + + if (iatt.ia_nlink == stbuf.ia_nlink) { + ret = dht_migrate_file(this, loc, cached_subvol, hashed_subvol, + GF_DHT_MIGRATE_HARDLINK_IN_PROGRESS, fop_errno); + if (ret) { + goto out; + } + } + ret = -2; out: - if (link_xattr) - dict_unref (link_xattr); + if (link_xattr) + dict_unref(link_xattr); - if (xattr_rsp) - dict_unref (xattr_rsp); + if (xattr_rsp) + dict_unref(xattr_rsp); - if (dict) - dict_unref (dict); + if (dict) + dict_unref(dict); - return ret; + return ret; } - - static int -__check_file_has_hardlink (xlator_t *this, loc_t *loc, - struct iatt *stbuf, dict_t *xattrs, int flags, - gf_defrag_info_t *defrag, dht_conf_t *conf, int *fop_errno) +__check_file_has_hardlink(xlator_t *this, loc_t *loc, struct iatt *stbuf, + dict_t *xattrs, int flags, gf_defrag_info_t *defrag, + dht_conf_t *conf, int *fop_errno) { - int ret = 0; + int ret = 0; - if (flags == GF_DHT_MIGRATE_HARDLINK_IN_PROGRESS) { - ret = 0; - return ret; - } - if (stbuf->ia_nlink > 1) { - /* support for decomission */ - if (flags == GF_DHT_MIGRATE_HARDLINK) { - synclock_lock (&conf->link_lock); - ret = gf_defrag_handle_hardlink - (this, loc, fop_errno); - synclock_unlock (&conf->link_lock); - /* - Returning zero will force the file to be remigrated. - Checkout gf_defrag_handle_hardlink for more information. - */ - if (ret && ret != -2) { - gf_msg (this->name, GF_LOG_WARNING, 0, - DHT_MSG_MIGRATE_FILE_FAILED, - "Migrate file failed:" - "%s: failed to migrate file with link", - loc->path); - } - } else { - gf_msg (this->name, GF_LOG_WARNING, 0, - DHT_MSG_MIGRATE_FILE_FAILED, - "Migration skipped for:" - "%s: file has hardlinks", loc->path); - *fop_errno = ENOTSUP; - ret = 1; - } - } + if (flags == GF_DHT_MIGRATE_HARDLINK_IN_PROGRESS) { + ret = 0; + return ret; + } + if (stbuf->ia_nlink > 1) { + /* support for decomission */ + if (flags == GF_DHT_MIGRATE_HARDLINK) { + synclock_lock(&conf->link_lock); + ret = gf_defrag_handle_hardlink(this, loc, fop_errno); + synclock_unlock(&conf->link_lock); + /* + Returning zero will force the file to be remigrated. + Checkout gf_defrag_handle_hardlink for more information. + */ + if (ret && ret != -2) { + gf_msg(this->name, GF_LOG_WARNING, 0, + DHT_MSG_MIGRATE_FILE_FAILED, + "Migrate file failed:" + "%s: failed to migrate file with link", + loc->path); + } + } else { + gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_MIGRATE_FILE_FAILED, + "Migration skipped for:" + "%s: file has hardlinks", + loc->path); + *fop_errno = ENOTSUP; + ret = 1; + } + } - return ret; + return ret; } - /* return values 0 : File will be migrated @@ -620,4818 +600,4660 @@ __check_file_has_hardlink (xlator_t *this, loc_t *loc, -1 : failure */ static int -__is_file_migratable (xlator_t *this, loc_t *loc, - struct iatt *stbuf, dict_t *xattrs, int flags, - gf_defrag_info_t *defrag, dht_conf_t *conf, - int *fop_errno) +__is_file_migratable(xlator_t *this, loc_t *loc, struct iatt *stbuf, + dict_t *xattrs, int flags, gf_defrag_info_t *defrag, + dht_conf_t *conf, int *fop_errno) { - int ret = -1; - int lock_count = 0; - - if (IA_ISDIR (stbuf->ia_type)) { - gf_msg (this->name, GF_LOG_WARNING, 0, - DHT_MSG_MIGRATE_FILE_FAILED, - "Migrate file failed:" - "%s: migrate-file called on directory", loc->path); - *fop_errno = EISDIR; - ret = -1; - goto out; - } - - if (!conf->lock_migration_enabled) { - ret = dict_get_int32 (xattrs, GLUSTERFS_POSIXLK_COUNT, - &lock_count); - if (ret) { - gf_msg (this->name, GF_LOG_WARNING, 0, - DHT_MSG_MIGRATE_FILE_FAILED, - "Migrate file failed:" - "%s: Unable to get lock count for file", - loc->path); - *fop_errno = EINVAL; - ret = -1; - goto out; - } - - if (lock_count) { - gf_msg (this->name, GF_LOG_WARNING, 0, - DHT_MSG_MIGRATE_FILE_FAILED, - "Migrate file failed: %s: File has locks." - " Skipping file migration", loc->path); - *fop_errno = ENOTSUP; - ret = 1; - goto out; - } - } + int ret = -1; + int lock_count = 0; + + if (IA_ISDIR(stbuf->ia_type)) { + gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_MIGRATE_FILE_FAILED, + "Migrate file failed:" + "%s: migrate-file called on directory", + loc->path); + *fop_errno = EISDIR; + ret = -1; + goto out; + } - /* Check if file has hardlink*/ - ret = __check_file_has_hardlink (this, loc, stbuf, xattrs, - flags, defrag, conf, fop_errno); + if (!conf->lock_migration_enabled) { + ret = dict_get_int32(xattrs, GLUSTERFS_POSIXLK_COUNT, &lock_count); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_MIGRATE_FILE_FAILED, + "Migrate file failed:" + "%s: Unable to get lock count for file", + loc->path); + *fop_errno = EINVAL; + ret = -1; + goto out; + } + + if (lock_count) { + gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_MIGRATE_FILE_FAILED, + "Migrate file failed: %s: File has locks." + " Skipping file migration", + loc->path); + *fop_errno = ENOTSUP; + ret = 1; + goto out; + } + } + + /* Check if file has hardlink*/ + ret = __check_file_has_hardlink(this, loc, stbuf, xattrs, flags, defrag, + conf, fop_errno); out: - return ret; + return ret; } - static int -__dht_rebalance_create_dst_file (xlator_t *this, xlator_t *to, xlator_t *from, - loc_t *loc, struct iatt *stbuf, fd_t **dst_fd, - int *fop_errno) +__dht_rebalance_create_dst_file(xlator_t *this, xlator_t *to, xlator_t *from, + loc_t *loc, struct iatt *stbuf, fd_t **dst_fd, + int *fop_errno) { - int ret = -1; - int ret2 = -1; - fd_t *fd = NULL; - struct iatt new_stbuf = {0,}; - struct iatt check_stbuf= {0,}; - dht_conf_t *conf = NULL; - dict_t *dict = NULL; - dict_t *xdata = NULL; - - conf = this->private; + int ret = -1; + int ret2 = -1; + fd_t *fd = NULL; + struct iatt new_stbuf = { + 0, + }; + struct iatt check_stbuf = { + 0, + }; + dht_conf_t *conf = NULL; + dict_t *dict = NULL; + dict_t *xdata = NULL; + + conf = this->private; + + dict = dict_new(); + if (!dict) { + *fop_errno = ENOMEM; + ret = -1; + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY, + "dictionary allocation failed for" + "path:%s", + loc->path); + goto out; + } + ret = dict_set_gfuuid(dict, "gfid-req", stbuf->ia_gfid, true); + if (ret) { + *fop_errno = ENOMEM; + ret = -1; + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED, + "%s: failed to set dictionary value: key = gfid-req", loc->path); + goto out; + } + + ret = dict_set_str(dict, conf->link_xattr_name, from->name); + if (ret) { + *fop_errno = ENOMEM; + ret = -1; + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED, + "%s: failed to set dictionary value: key = %s ", loc->path, + conf->link_xattr_name); + goto out; + } + + fd = fd_create(loc->inode, DHT_REBALANCE_PID); + if (!fd) { + *fop_errno = ENOMEM; + ret = -1; + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_MIGRATE_FILE_FAILED, + "%s: fd create failed (destination)", loc->path); + goto out; + } - dict = dict_new (); - if (!dict) { - *fop_errno = ENOMEM; - ret = -1; - gf_msg (this->name, GF_LOG_ERROR, ENOMEM, - DHT_MSG_NO_MEMORY, "dictionary allocation failed for" - "path:%s", loc->path); - goto out; - } - ret = dict_set_gfuuid (dict, "gfid-req", stbuf->ia_gfid, true); - if (ret) { - *fop_errno = ENOMEM; - ret = -1; - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_DICT_SET_FAILED, - "%s: failed to set dictionary value: key = gfid-req", - loc->path); - goto out; + if (!!dht_is_tier_xlator(this)) { + xdata = dict_new(); + if (!xdata) { + *fop_errno = ENOMEM; + ret = -1; + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, + DHT_MSG_MIGRATE_FILE_FAILED, "%s: dict_new failed)", + loc->path); + goto out; } - ret = dict_set_str (dict, conf->link_xattr_name, from->name); + ret = dict_set_int32(xdata, GF_CLEAN_WRITE_PROTECTION, 1); if (ret) { - *fop_errno = ENOMEM; - ret = -1; - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_DICT_SET_FAILED, - "%s: failed to set dictionary value: key = %s ", - loc->path, conf->link_xattr_name); - goto out; - } - - fd = fd_create (loc->inode, DHT_REBALANCE_PID); - if (!fd) { - *fop_errno = ENOMEM; - ret = -1; - gf_msg (this->name, GF_LOG_ERROR, ENOMEM, - DHT_MSG_MIGRATE_FILE_FAILED, - "%s: fd create failed (destination)", - loc->path); - goto out; - } - - if (!!dht_is_tier_xlator (this)) { - xdata = dict_new (); - if (!xdata) { - *fop_errno = ENOMEM; - ret = -1; - gf_msg (this->name, GF_LOG_ERROR, ENOMEM, - DHT_MSG_MIGRATE_FILE_FAILED, - "%s: dict_new failed)", - loc->path); - goto out; - } + *fop_errno = ENOMEM; + ret = -1; + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED, + "%s: failed to set dictionary value: key = %s ", loc->path, + GF_CLEAN_WRITE_PROTECTION); + goto out; + } + } + + ret = syncop_lookup(to, loc, &new_stbuf, NULL, xdata, NULL); + if (!ret) { + /* File exits in the destination, check if gfid matches */ + if (gf_uuid_compare(stbuf->ia_gfid, new_stbuf.ia_gfid) != 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_GFID_MISMATCH, + "file %s exists in %s with different gfid", loc->path, + to->name); + *fop_errno = EINVAL; + ret = -1; + goto out; + } + } + if ((ret < 0) && (-ret != ENOENT)) { + /* File exists in destination, but not accessible */ + gf_msg(THIS->name, GF_LOG_WARNING, -ret, DHT_MSG_MIGRATE_FILE_FAILED, + "%s: failed to lookup file", loc->path); + *fop_errno = -ret; + ret = -1; + goto out; + } - ret = dict_set_int32 (xdata, GF_CLEAN_WRITE_PROTECTION, 1); - if (ret) { - *fop_errno = ENOMEM; - ret = -1; - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_DICT_SET_FAILED, - "%s: failed to set dictionary value: key = %s ", - loc->path, GF_CLEAN_WRITE_PROTECTION); - goto out; + /* Create the destination with LINKFILE mode, and linkto xattr, + if the linkfile already exists, just open the file */ + if (!ret) { + /* + * File already present, just open the file. + */ + ret = syncop_open(to, loc, O_RDWR, fd, NULL, NULL); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_MIGRATE_FILE_FAILED, + "failed to open %s on %s", loc->path, to->name); + *fop_errno = -ret; + ret = -1; + goto out; + } + } else { + ret = syncop_create(to, loc, O_RDWR, DHT_LINKFILE_MODE, fd, &new_stbuf, + dict, NULL); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_MIGRATE_FILE_FAILED, + "failed to create %s on %s", loc->path, to->name); + *fop_errno = -ret; + ret = -1; + goto out; + } + } + + fd_bind(fd); + + /*Reason of doing lookup after create again: + *In the create, there is some time-gap between opening fd at the + *server (posix_layer) and binding it in server (incrementing fd count), + *so if in that time-gap, if other process sends unlink considering it + *as a linkto file, because inode->fd count will be 0, so file will be + *unlinked at the backend. And because further operations are performed + *on fd, so though migration will be done but will end with no file + *at the backend. + */ + + ret = syncop_lookup(to, loc, &check_stbuf, NULL, NULL, NULL); + if (!ret) { + if (gf_uuid_compare(stbuf->ia_gfid, check_stbuf.ia_gfid) != 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_GFID_MISMATCH, + "file %s exists in %s with different gfid," + "found in lookup after create", + loc->path, to->name); + *fop_errno = EINVAL; + ret = -1; + goto out; + } + } + + if (-ret == ENOENT) { + gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_MIGRATE_FILE_FAILED, + "%s: file does not exist" + "on %s", + loc->path, to->name); + *fop_errno = -ret; + ret = -1; + goto out; + } + + ret = syncop_fsetattr(to, fd, stbuf, (GF_SET_ATTR_UID | GF_SET_ATTR_GID), + NULL, NULL, NULL, NULL); + if (ret < 0) { + *fop_errno = -ret; + gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_MIGRATE_FILE_FAILED, + "chown failed for %s on %s", loc->path, to->name); + } + + /* No need to bother about 0 byte size files */ + if (stbuf->ia_size > 0) { + if (conf->use_fallocate) { + ret = syncop_fallocate(to, fd, 0, 0, stbuf->ia_size, NULL, NULL); + if (ret < 0) { + if (ret == -EOPNOTSUPP || ret == -EINVAL || ret == -ENOSYS) { + conf->use_fallocate = _gf_false; + } else { + gf_msg(this->name, GF_LOG_ERROR, -ret, + DHT_MSG_MIGRATE_FILE_FAILED, + "fallocate failed for %s on %s", loc->path, + to->name); + + *fop_errno = -ret; + + /* fallocate does not release the space + * in some cases + */ + ret2 = syncop_ftruncate(to, fd, 0, NULL, NULL, NULL, NULL); + if (ret2 < 0) { + gf_msg(this->name, GF_LOG_WARNING, -ret2, + DHT_MSG_MIGRATE_FILE_FAILED, + "ftruncate failed for " + "%s on %s", + loc->path, to->name); + } + goto out; } + } } - ret = syncop_lookup (to, loc, &new_stbuf, NULL, xdata, NULL); - if (!ret) { - /* File exits in the destination, check if gfid matches */ - if (gf_uuid_compare (stbuf->ia_gfid, new_stbuf.ia_gfid) != 0) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_GFID_MISMATCH, - "file %s exists in %s with different gfid", - loc->path, to->name); - *fop_errno = EINVAL; - ret = -1; - goto out; - } - } - if ((ret < 0) && (-ret != ENOENT)) { - /* File exists in destination, but not accessible */ - gf_msg (THIS->name, GF_LOG_WARNING, -ret, - DHT_MSG_MIGRATE_FILE_FAILED, - "%s: failed to lookup file", - loc->path); + if (!conf->use_fallocate) { + ret = syncop_ftruncate(to, fd, stbuf->ia_size, NULL, NULL, NULL, + NULL); + if (ret < 0) { *fop_errno = -ret; - ret = -1; - goto out; + gf_msg(this->name, GF_LOG_WARNING, -ret, + DHT_MSG_MIGRATE_FILE_FAILED, + "ftruncate failed for %s on %s", loc->path, to->name); + } } + } - /* Create the destination with LINKFILE mode, and linkto xattr, - if the linkfile already exists, just open the file */ - if (!ret) { - /* - * File already present, just open the file. - */ - ret = syncop_open (to, loc, O_RDWR, fd, NULL, NULL); - if (ret < 0) { - gf_msg (this->name, GF_LOG_ERROR, -ret, - DHT_MSG_MIGRATE_FILE_FAILED, - "failed to open %s on %s", - loc->path, to->name); - *fop_errno = -ret; - ret = -1; - goto out; - } - } else { - ret = syncop_create (to, loc, O_RDWR, DHT_LINKFILE_MODE, fd, - &new_stbuf, dict, NULL); - if (ret < 0) { - gf_msg (this->name, GF_LOG_ERROR, -ret, - DHT_MSG_MIGRATE_FILE_FAILED, - "failed to create %s on %s", - loc->path, to->name); - *fop_errno = -ret; - ret = -1; - goto out; - } + /* success */ + ret = 0; - } + if (dst_fd) + *dst_fd = fd; - fd_bind (fd); +out: + if (ret) { + if (fd) { + fd_unref(fd); + } + } + if (dict) + dict_unref(dict); - /*Reason of doing lookup after create again: - *In the create, there is some time-gap between opening fd at the - *server (posix_layer) and binding it in server (incrementing fd count), - *so if in that time-gap, if other process sends unlink considering it - *as a linkto file, because inode->fd count will be 0, so file will be - *unlinked at the backend. And because further operations are performed - *on fd, so though migration will be done but will end with no file - *at the backend. - */ + if (xdata) + dict_unref(dict); - ret = syncop_lookup (to, loc, &check_stbuf, NULL, NULL, NULL); - if (!ret) { + return ret; +} - if (gf_uuid_compare (stbuf->ia_gfid, check_stbuf.ia_gfid) != 0) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_GFID_MISMATCH, - "file %s exists in %s with different gfid," - "found in lookup after create", - loc->path, to->name); - *fop_errno = EINVAL; - ret = -1; - goto out; - } +static int +__dht_check_free_space(xlator_t *this, xlator_t *to, xlator_t *from, loc_t *loc, + struct iatt *stbuf, int flag, dht_conf_t *conf, + gf_boolean_t *target_changed, xlator_t **new_subvol, + int *fop_errno) +{ + struct statvfs src_statfs = { + 0, + }; + struct statvfs dst_statfs = { + 0, + }; + int ret = -1; + dict_t *xdata = NULL; + dht_layout_t *layout = NULL; + uint64_t src_statfs_blocks = 1; + uint64_t dst_statfs_blocks = 1; + double dst_post_availspacepercent = 0; + double src_post_availspacepercent = 0; + uint64_t file_blocks = 0; + uint64_t src_total_blocks = 0; + uint64_t dst_total_blocks = 0; + + xdata = dict_new(); + if (!xdata) { + *fop_errno = ENOMEM; + ret = -1; + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY, + "failed to allocate dictionary"); + goto out; + } + + ret = dict_set_int8(xdata, GF_INTERNAL_IGNORE_DEEM_STATFS, 1); + if (ret) { + gf_log(this->name, GF_LOG_ERROR, + "Failed to set " GF_INTERNAL_IGNORE_DEEM_STATFS " in dict"); + ret = -1; + *fop_errno = ENOMEM; + goto out; + } + + ret = syncop_statfs(from, loc, &src_statfs, xdata, NULL); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_MIGRATE_FILE_FAILED, + "failed to get statfs of %s on %s", loc->path, from->name); + *fop_errno = -ret; + ret = -1; + goto out; + } + + ret = syncop_statfs(to, loc, &dst_statfs, xdata, NULL); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_MIGRATE_FILE_FAILED, + "failed to get statfs of %s on %s", loc->path, to->name); + *fop_errno = -ret; + ret = -1; + goto out; + } + + gf_msg_debug(this->name, 0, + "min_free_disk - %f , block available - " + "%lu , block size - %lu ", + conf->min_free_disk, dst_statfs.f_bavail, dst_statfs.f_bsize); + + dst_statfs_blocks = dst_statfs.f_bavail * + (dst_statfs.f_frsize / GF_DISK_SECTOR_SIZE); + + src_statfs_blocks = src_statfs.f_bavail * + (src_statfs.f_frsize / GF_DISK_SECTOR_SIZE); + + dst_total_blocks = dst_statfs.f_blocks * + (dst_statfs.f_frsize / GF_DISK_SECTOR_SIZE); + + src_total_blocks = src_statfs.f_blocks * + (src_statfs.f_frsize / GF_DISK_SECTOR_SIZE); + + /* if force option is given, do not check for space @ dst. + * Check only if space is avail for the file */ + if (flag != GF_DHT_MIGRATE_DATA) + goto check_avail_space; + + /* Check: + During rebalance `migrate-data` - Destination subvol experiences + a `reduction` in 'blocks' of free space, at the same time source + subvol gains certain 'blocks' of free space. A valid check is + necessary here to avoid erroneous move to destination where + the space could be scantily available. + With heterogeneous brick support, an actual space comparison could + prevent any files being migrated to newly added bricks if they are + smaller then the free space available on the existing bricks. + */ + if (stbuf) { + if (!conf->use_fallocate) { + file_blocks = stbuf->ia_size + GF_DISK_SECTOR_SIZE - 1; + file_blocks /= GF_DISK_SECTOR_SIZE; + + if (file_blocks >= dst_statfs_blocks) { + dst_statfs_blocks = 0; + } else { + dst_statfs_blocks -= file_blocks; + } + } + + src_post_availspacepercent = ((src_statfs_blocks + file_blocks) * 100) / + src_total_blocks; + + dst_post_availspacepercent = (dst_statfs_blocks * 100) / + dst_total_blocks; + + if (dst_post_availspacepercent < src_post_availspacepercent) { + gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_MIGRATE_FILE_FAILED, + "data movement of file " + "{blocks:%" PRIu64 + " name:(%s)} would result in " + "dst node (%s:%" PRIu64 + ") having lower disk " + "space than the source node (%s:%" PRIu64 + ")" + ".Skipping file.", + stbuf->ia_blocks, loc->path, to->name, dst_statfs_blocks, + from->name, src_statfs_blocks); + + /* this is not a 'failure', but we don't want to + consider this as 'success' too :-/ */ + *fop_errno = ENOSPC; + ret = 1; + goto out; + } + } +check_avail_space: + if (conf->disk_unit == 'p' && dst_statfs.f_blocks) { + dst_post_availspacepercent = (dst_statfs_blocks * 100) / + dst_total_blocks; + + gf_msg_debug(this->name, 0, + "file : %s, post_availspacepercent" + " : %lf f_bavail : %lu min-free-disk: %lf", + loc->path, dst_post_availspacepercent, dst_statfs.f_bavail, + conf->min_free_disk); + + if (dst_post_availspacepercent < conf->min_free_disk) { + gf_msg(this->name, GF_LOG_WARNING, 0, 0, + "Write will cross min-free-disk for " + "file - %s on subvol - %s. Looking " + "for new subvol", + loc->path, to->name); + + goto find_new_subvol; + } else { + ret = 0; + goto out; } + } - if (-ret == ENOENT) { - gf_msg (this->name, GF_LOG_ERROR, -ret, - DHT_MSG_MIGRATE_FILE_FAILED, "%s: file does not exist" - "on %s", loc->path, to->name); - *fop_errno = -ret; - ret = -1; - goto out; - } + if (conf->disk_unit != 'p') { + if ((dst_statfs_blocks * GF_DISK_SECTOR_SIZE) < conf->min_free_disk) { + gf_msg_debug(this->name, 0, + "file : %s, destination " + "frsize: %lu f_bavail : %lu " + "min-free-disk: %lf", + loc->path, dst_statfs.f_frsize, dst_statfs.f_bavail, + conf->min_free_disk); - ret = syncop_fsetattr (to, fd, stbuf, - (GF_SET_ATTR_UID | GF_SET_ATTR_GID), - NULL, NULL, NULL, NULL); - if (ret < 0) { - *fop_errno = -ret; - gf_msg (this->name, GF_LOG_ERROR, -ret, - DHT_MSG_MIGRATE_FILE_FAILED, - "chown failed for %s on %s", - loc->path, to->name); - } + gf_msg(this->name, GF_LOG_WARNING, 0, 0, + "write will" + " cross min-free-disk for file - %s on subvol -" + " %s. looking for new subvol", + loc->path, to->name); - /* No need to bother about 0 byte size files */ - if (stbuf->ia_size > 0) { - if (conf->use_fallocate) { - ret = syncop_fallocate (to, fd, 0, 0, stbuf->ia_size, - NULL, NULL); - if (ret < 0) { - if (ret == -EOPNOTSUPP || ret == -EINVAL || - ret == -ENOSYS) { - conf->use_fallocate = _gf_false; - } else { - gf_msg (this->name, GF_LOG_ERROR, -ret, - DHT_MSG_MIGRATE_FILE_FAILED, - "fallocate failed for %s on %s", - loc->path, to->name); - - *fop_errno = -ret; - - /* fallocate does not release the space - * in some cases - */ - ret2 = syncop_ftruncate (to, fd, 0, - NULL, NULL, - NULL, NULL); - if (ret2 < 0) { - gf_msg (this->name, - GF_LOG_WARNING, -ret2, - DHT_MSG_MIGRATE_FILE_FAILED, - "ftruncate failed for " - "%s on %s", - loc->path, to->name); - } - goto out; - } - } - } + goto find_new_subvol; - if (!conf->use_fallocate) { - ret = syncop_ftruncate (to, fd, stbuf->ia_size, NULL, - NULL, NULL, NULL); - if (ret < 0) { - *fop_errno = -ret; - gf_msg (this->name, GF_LOG_WARNING, -ret, - DHT_MSG_MIGRATE_FILE_FAILED, - "ftruncate failed for %s on %s", - loc->path, to->name); - } - } + } else { + ret = 0; + goto out; } + } - /* success */ +find_new_subvol: + layout = dht_layout_get(this, loc->parent); + if (!layout) { + gf_log(this->name, GF_LOG_ERROR, "Layout is NULL"); + *fop_errno = EINVAL; + ret = -1; + goto out; + } + + *new_subvol = dht_subvol_with_free_space_inodes(this, to, from, layout, + stbuf->ia_size); + if ((!(*new_subvol)) || (*new_subvol == from)) { + gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_SUBVOL_INSUFF_SPACE, + "Could not find any subvol" + " with space accommodating the file - %s. Consider " + "adding bricks", + loc->path); + + *target_changed = _gf_false; + *fop_errno = ENOSPC; + ret = -1; + } else { + gf_msg(this->name, GF_LOG_INFO, 0, 0, + "new target found - %s" + " for file - %s", + (*new_subvol)->name, loc->path); + *target_changed = _gf_true; ret = 0; - - if (dst_fd) - *dst_fd = fd; + } out: - if (ret) { - if (fd) { - fd_unref (fd); - } - } - if (dict) - dict_unref (dict); - - if (xdata) - dict_unref (dict); - - - return ret; + if (xdata) + dict_unref(xdata); + return ret; } static int -__dht_check_free_space (xlator_t *this, xlator_t *to, xlator_t *from, - loc_t *loc, struct iatt *stbuf, int flag, - dht_conf_t *conf, gf_boolean_t *target_changed, - xlator_t **new_subvol, int *fop_errno) +__dht_rebalance_migrate_data(xlator_t *this, gf_defrag_info_t *defrag, + xlator_t *from, xlator_t *to, fd_t *src, fd_t *dst, + uint64_t ia_size, int hole_exists, int *fop_errno) { - struct statvfs src_statfs = {0,}; - struct statvfs dst_statfs = {0,}; - int ret = -1; - dict_t *xdata = NULL; - dht_layout_t *layout = NULL; - uint64_t src_statfs_blocks = 1; - uint64_t dst_statfs_blocks = 1; - double dst_post_availspacepercent = 0; - double src_post_availspacepercent = 0; - uint64_t file_blocks = 0; - uint64_t src_total_blocks = 0; - uint64_t dst_total_blocks = 0; - - xdata = dict_new (); - if (!xdata) { - *fop_errno = ENOMEM; - ret = -1; - gf_msg (this->name, GF_LOG_ERROR, ENOMEM, - DHT_MSG_NO_MEMORY, - "failed to allocate dictionary"); - goto out; - } + int ret = 0; + int count = 0; + off_t offset = 0; + struct iovec *vector = NULL; + struct iobref *iobref = NULL; + uint64_t total = 0; + size_t read_size = 0; + dict_t *xdata = NULL; + dht_conf_t *conf = NULL; + + conf = this->private; + /* if file size is '0', no need to enter this loop */ + while (total < ia_size) { + read_size = (((ia_size - total) > DHT_REBALANCE_BLKSIZE) + ? DHT_REBALANCE_BLKSIZE + : (ia_size - total)); + + ret = syncop_readv(from, src, read_size, offset, 0, &vector, &count, + &iobref, NULL, NULL, NULL); + if (!ret || (ret < 0)) { + *fop_errno = -ret; + break; + } + + if (hole_exists) { + ret = dht_write_with_holes(to, dst, vector, count, ret, offset, + iobref, fop_errno); + } else { + if (!conf->force_migration && !dht_is_tier_xlator(this)) { + xdata = dict_new(); + if (!xdata) { + gf_msg("dht", GF_LOG_ERROR, 0, DHT_MSG_MIGRATE_FILE_FAILED, + "insufficient memory"); + ret = -1; + *fop_errno = ENOMEM; + break; + } - ret = dict_set_int8 (xdata, GF_INTERNAL_IGNORE_DEEM_STATFS, 1); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, - "Failed to set " - GF_INTERNAL_IGNORE_DEEM_STATFS" in dict"); - ret = -1; - *fop_errno = ENOMEM; - goto out; - } + /* Fail this write and abort rebalance if we + * detect a write from client since migration of + * this file started. This is done to avoid + * potential data corruption due to out of order + * writes from rebalance and client to the same + * region (as compared between src and dst + * files). See + * https://github.com/gluster/glusterfs/issues/308 + * for more details. + */ + ret = dict_set_int32(xdata, GF_AVOID_OVERWRITE, 1); + if (ret) { + gf_msg("dht", GF_LOG_ERROR, 0, ENOMEM, + "failed to set dict"); + ret = -1; + *fop_errno = ENOMEM; + break; + } + } - ret = syncop_statfs (from, loc, &src_statfs, xdata, NULL); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, -ret, - DHT_MSG_MIGRATE_FILE_FAILED, - "failed to get statfs of %s on %s", - loc->path, from->name); + ret = syncop_writev(to, dst, vector, count, offset, iobref, 0, NULL, + NULL, xdata, NULL); + if (ret < 0) { *fop_errno = -ret; - ret = -1; - goto out; + } } - ret = syncop_statfs (to, loc, &dst_statfs, xdata, NULL); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, -ret, - DHT_MSG_MIGRATE_FILE_FAILED, - "failed to get statfs of %s on %s", - loc->path, to->name); - *fop_errno = -ret; - ret = -1; - goto out; + if ((defrag && defrag->cmd == GF_DEFRAG_CMD_START_TIER) && + (gf_defrag_get_pause_state(&defrag->tier_conf) != TIER_RUNNING)) { + gf_msg("tier", GF_LOG_INFO, 0, DHT_MSG_TIER_PAUSED, + "Migrate file paused"); + ret = -1; } - gf_msg_debug (this->name, 0, "min_free_disk - %f , block available - " - "%lu , block size - %lu ", conf->min_free_disk, - dst_statfs.f_bavail, dst_statfs.f_bsize); - - dst_statfs_blocks = dst_statfs.f_bavail * - (dst_statfs.f_frsize / - GF_DISK_SECTOR_SIZE); - - src_statfs_blocks = src_statfs.f_bavail * - (src_statfs.f_frsize / - GF_DISK_SECTOR_SIZE); - - dst_total_blocks = dst_statfs.f_blocks * - (dst_statfs.f_frsize / - GF_DISK_SECTOR_SIZE); - - src_total_blocks = src_statfs.f_blocks * - (src_statfs.f_frsize / - GF_DISK_SECTOR_SIZE); - - /* if force option is given, do not check for space @ dst. - * Check only if space is avail for the file */ - if (flag != GF_DHT_MIGRATE_DATA) - goto check_avail_space; - - /* Check: - During rebalance `migrate-data` - Destination subvol experiences - a `reduction` in 'blocks' of free space, at the same time source - subvol gains certain 'blocks' of free space. A valid check is - necessary here to avoid erroneous move to destination where - the space could be scantily available. - With heterogeneous brick support, an actual space comparison could - prevent any files being migrated to newly added bricks if they are - smaller then the free space available on the existing bricks. - */ - if (stbuf) { - if (!conf->use_fallocate) { - file_blocks = stbuf->ia_size + GF_DISK_SECTOR_SIZE - 1; - file_blocks /= GF_DISK_SECTOR_SIZE; - - if (file_blocks >= dst_statfs_blocks) { - dst_statfs_blocks = 0; - } else { - dst_statfs_blocks -= file_blocks; - } - } - - src_post_availspacepercent = - ((src_statfs_blocks + file_blocks) * 100) / src_total_blocks; - - dst_post_availspacepercent = - (dst_statfs_blocks * 100) / dst_total_blocks; - - if (dst_post_availspacepercent < src_post_availspacepercent) { - gf_msg (this->name, GF_LOG_WARNING, 0, - DHT_MSG_MIGRATE_FILE_FAILED, - "data movement of file " - "{blocks:%"PRIu64" name:(%s)} would result in " - "dst node (%s:%"PRIu64") having lower disk " - "space than the source node (%s:%"PRIu64")" - ".Skipping file.", stbuf->ia_blocks, loc->path, - to->name, dst_statfs_blocks, from->name, - src_statfs_blocks); - - /* this is not a 'failure', but we don't want to - consider this as 'success' too :-/ */ - *fop_errno = ENOSPC; - ret = 1; - goto out; - } + if (ret < 0) { + break; } -check_avail_space: - if (conf->disk_unit == 'p' && dst_statfs.f_blocks) { - dst_post_availspacepercent = - (dst_statfs_blocks * 100) / dst_total_blocks; - - gf_msg_debug (this->name, 0, "file : %s, post_availspacepercent" - " : %lf f_bavail : %lu min-free-disk: %lf", - loc->path, dst_post_availspacepercent, - dst_statfs.f_bavail, conf->min_free_disk); - - if (dst_post_availspacepercent < conf->min_free_disk) { - gf_msg (this->name, GF_LOG_WARNING, 0, 0, - "Write will cross min-free-disk for " - "file - %s on subvol - %s. Looking " - "for new subvol", loc->path, to->name); - - goto find_new_subvol; - } else { - ret = 0; - goto out; - } - } + offset += ret; + total += ret; - if (conf->disk_unit != 'p') { - if ((dst_statfs_blocks * GF_DISK_SECTOR_SIZE) < - conf->min_free_disk) { - gf_msg_debug (this->name, 0, "file : %s, destination " - "frsize: %lu f_bavail : %lu " - "min-free-disk: %lf", loc->path, - dst_statfs.f_frsize, dst_statfs.f_bavail, - conf->min_free_disk); + GF_FREE(vector); + if (iobref) + iobref_unref(iobref); + iobref = NULL; + vector = NULL; + } + if (iobref) + iobref_unref(iobref); + GF_FREE(vector); + + if (ret >= 0) + ret = 0; + else + ret = -1; - gf_msg (this->name, GF_LOG_WARNING, 0, 0, "write will" - " cross min-free-disk for file - %s on subvol -" - " %s. looking for new subvol", loc->path, - to->name); + if (xdata) { + dict_unref(xdata); + } - goto find_new_subvol; + return ret; +} - } else { - ret = 0; - goto out; - } - } +static int +__dht_rebalance_open_src_file(xlator_t *this, xlator_t *from, xlator_t *to, + loc_t *loc, struct iatt *stbuf, fd_t **src_fd, + gf_boolean_t *clean_src, int *fop_errno) +{ + int ret = 0; + fd_t *fd = NULL; + dict_t *dict = NULL; + struct iatt iatt = { + 0, + }; + dht_conf_t *conf = NULL; + + conf = this->private; + + *clean_src = _gf_false; + + fd = fd_create(loc->inode, DHT_REBALANCE_PID); + if (!fd) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_MIGRATE_FILE_FAILED, + "%s: fd create failed (source)", loc->path); + *fop_errno = ENOMEM; + ret = -1; + goto out; + } + + ret = syncop_open(from, loc, O_RDWR, fd, NULL, NULL); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_MIGRATE_FILE_FAILED, + "failed to open file %s on %s", loc->path, from->name); + *fop_errno = -ret; + ret = -1; + goto out; + } -find_new_subvol: - layout = dht_layout_get (this, loc->parent); - if (!layout) { - gf_log (this->name, GF_LOG_ERROR, "Layout is NULL"); - *fop_errno = EINVAL; - ret = -1; - goto out; - } + fd_bind(fd); - *new_subvol = dht_subvol_with_free_space_inodes (this, to, from, layout, - stbuf->ia_size); - if ((!(*new_subvol)) || (*new_subvol == from)) { - gf_msg (this->name, GF_LOG_WARNING, 0, - DHT_MSG_SUBVOL_INSUFF_SPACE, "Could not find any subvol" - " with space accommodating the file - %s. Consider " - "adding bricks", loc->path); + if (src_fd) + *src_fd = fd; - *target_changed = _gf_false; - *fop_errno = ENOSPC; - ret = -1; - } else { - gf_msg (this->name, GF_LOG_INFO, 0, 0, "new target found - %s" - " for file - %s", (*new_subvol)->name, loc->path); - *target_changed = _gf_true; - ret = 0; - } + ret = -1; + dict = dict_new(); + if (!dict) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_MIGRATE_FILE_FAILED, + "%s: Could not allocate memory for dict", loc->path); + *fop_errno = ENOMEM; + ret = -1; + goto out; + } + + ret = dict_set_str(dict, conf->link_xattr_name, to->name); + if (ret) { + gf_log(this->name, GF_LOG_ERROR, + "failed to set xattr in dict for %s (linkto:%s)", loc->path, + to->name); + *fop_errno = ENOMEM; + ret = -1; + goto out; + } + + /* Once the migration starts, the source should have 'linkto' key set + to show which is the target, so other clients can work around it */ + ret = syncop_setxattr(from, loc, dict, 0, NULL, NULL); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_MIGRATE_FILE_FAILED, + "failed to set xattr on %s in %s", loc->path, from->name); + *fop_errno = -ret; + ret = -1; + goto out; + } + + /* Reset source mode/xattr if migration fails*/ + *clean_src = _gf_true; + + /* mode should be (+S+T) to indicate migration is in progress */ + iatt.ia_prot = stbuf->ia_prot; + iatt.ia_type = stbuf->ia_type; + iatt.ia_prot.sticky = 1; + iatt.ia_prot.sgid = 1; + + ret = syncop_setattr(from, loc, &iatt, GF_SET_ATTR_MODE, NULL, NULL, NULL, + NULL); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_MIGRATE_FILE_FAILED, + "failed to set mode on %s in %s", loc->path, from->name); + *fop_errno = -ret; + ret = -1; + goto out; + } + /* success */ + ret = 0; out: - if (xdata) - dict_unref (xdata); - return ret; + if (dict) + dict_unref(dict); + + return ret; } -static int -__dht_rebalance_migrate_data (xlator_t *this, gf_defrag_info_t *defrag, - xlator_t *from, xlator_t *to, fd_t *src, - fd_t *dst, uint64_t ia_size, int hole_exists, - int *fop_errno) +int +migrate_special_files(xlator_t *this, xlator_t *from, xlator_t *to, loc_t *loc, + struct iatt *buf, int *fop_errno) { - int ret = 0; - int count = 0; - off_t offset = 0; - struct iovec *vector = NULL; - struct iobref *iobref = NULL; - uint64_t total = 0; - size_t read_size = 0; - dict_t *xdata = NULL; - dht_conf_t *conf = NULL; + int ret = -1; + dict_t *rsp_dict = NULL; + dict_t *dict = NULL; + char *link = NULL; + struct iatt stbuf = { + 0, + }; + dht_conf_t *conf = this->private; + + dict = dict_new(); + if (!dict) { + *fop_errno = ENOMEM; + ret = -1; + goto out; + } + ret = dict_set_int32(dict, conf->link_xattr_name, 256); + if (ret) { + *fop_errno = ENOMEM; + ret = -1; + gf_log(this->name, GF_LOG_ERROR, + "%s: failed to set 'linkto' key in dict", loc->path); + goto out; + } + + /* check in the destination if the file is link file */ + ret = syncop_lookup(to, loc, &stbuf, NULL, dict, &rsp_dict); + if ((ret < 0) && (-ret != ENOENT)) { + gf_msg(this->name, GF_LOG_WARNING, -ret, DHT_MSG_MIGRATE_FILE_FAILED, + "%s: lookup failed", loc->path); + *fop_errno = -ret; + ret = -1; + goto out; + } + + /* we no more require this key */ + dict_del(dict, conf->link_xattr_name); + + /* file exists in target node, only if it is 'linkfile' its valid, + otherwise, error out */ + if (!ret) { + if (!check_is_linkfile(loc->inode, &stbuf, rsp_dict, + conf->link_xattr_name)) { + gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_MIGRATE_FILE_FAILED, + "%s: file exists in destination", loc->path); + *fop_errno = EINVAL; + ret = -1; + goto out; + } + + /* as file is linkfile, delete it */ + ret = syncop_unlink(to, loc, NULL, NULL); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, -ret, + DHT_MSG_MIGRATE_FILE_FAILED, + "%s: failed to delete the linkfile", loc->path); + *fop_errno = -ret; + ret = -1; + goto out; + } + } + + /* Set the gfid of the source file in dict */ + ret = dict_set_gfuuid(dict, "gfid-req", buf->ia_gfid, true); + if (ret) { + *fop_errno = ENOMEM; + ret = -1; + gf_log(this->name, GF_LOG_ERROR, + "%s: failed to set gfid in dict for create", loc->path); + goto out; + } + + /* Create the file in target */ + if (IA_ISLNK(buf->ia_type)) { + /* Handle symlinks separately */ + ret = syncop_readlink(from, loc, &link, buf->ia_size, NULL, NULL); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, -ret, + DHT_MSG_MIGRATE_FILE_FAILED, + "%s: readlink on symlink failed", loc->path); + *fop_errno = -ret; + ret = -1; + goto out; + } - conf = this->private; - /* if file size is '0', no need to enter this loop */ - while (total < ia_size) { - read_size = (((ia_size - total) > DHT_REBALANCE_BLKSIZE) ? - DHT_REBALANCE_BLKSIZE : (ia_size - total)); + ret = syncop_symlink(to, loc, link, 0, dict, NULL); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, -ret, + DHT_MSG_MIGRATE_FILE_FAILED, "%s: creating symlink failed", + loc->path); + *fop_errno = -ret; + ret = -1; + goto out; + } + + goto done; + } + + ret = syncop_mknod(to, loc, st_mode_from_ia(buf->ia_prot, buf->ia_type), + makedev(ia_major(buf->ia_rdev), ia_minor(buf->ia_rdev)), + 0, dict, NULL); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, -ret, DHT_MSG_MIGRATE_FILE_FAILED, + "%s: mknod failed", loc->path); + *fop_errno = -ret; + ret = -1; + goto out; + } - ret = syncop_readv (from, src, read_size, - offset, 0, &vector, &count, &iobref, NULL, - NULL, NULL); - if (!ret || (ret < 0)) { - *fop_errno = -ret; - break; - } +done: + ret = syncop_setattr(to, loc, buf, + (GF_SET_ATTR_MTIME | GF_SET_ATTR_UID | + GF_SET_ATTR_GID | GF_SET_ATTR_MODE), + NULL, NULL, NULL, NULL); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, -ret, DHT_MSG_MIGRATE_FILE_FAILED, + "%s: failed to perform setattr on %s", loc->path, to->name); + *fop_errno = -ret; + } + + ret = syncop_unlink(from, loc, NULL, NULL); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, -ret, DHT_MSG_MIGRATE_FILE_FAILED, + "%s: unlink failed", loc->path); + *fop_errno = -ret; + ret = -1; + } - if (hole_exists) { - ret = dht_write_with_holes (to, dst, vector, count, - ret, offset, iobref, - fop_errno); - } else { - if (!conf->force_migration && - !dht_is_tier_xlator (this)) { - xdata = dict_new (); - if (!xdata) { - gf_msg ("dht", GF_LOG_ERROR, 0, - DHT_MSG_MIGRATE_FILE_FAILED, - "insufficient memory"); - ret = -1; - *fop_errno = ENOMEM; - break; - } - - /* Fail this write and abort rebalance if we - * detect a write from client since migration of - * this file started. This is done to avoid - * potential data corruption due to out of order - * writes from rebalance and client to the same - * region (as compared between src and dst - * files). See - * https://github.com/gluster/glusterfs/issues/308 - * for more details. - */ - ret = dict_set_int32 (xdata, - GF_AVOID_OVERWRITE, 1); - if (ret) { - gf_msg ("dht", GF_LOG_ERROR, 0, - ENOMEM, "failed to set dict"); - ret = -1; - *fop_errno = ENOMEM; - break; - } +out: + GF_FREE(link); + if (dict) + dict_unref(dict); - } + if (rsp_dict) + dict_unref(rsp_dict); - ret = syncop_writev (to, dst, vector, count, - offset, iobref, 0, NULL, NULL, - xdata, NULL); - if (ret < 0) { - *fop_errno = -ret; - } - } + return ret; +} - if ((defrag && defrag->cmd == GF_DEFRAG_CMD_START_TIER) && - (gf_defrag_get_pause_state (&defrag->tier_conf) != TIER_RUNNING)) { - gf_msg ("tier", GF_LOG_INFO, 0, - DHT_MSG_TIER_PAUSED, - "Migrate file paused"); - ret = -1; - } +static int +__dht_migration_cleanup_src_file(xlator_t *this, loc_t *loc, fd_t *fd, + xlator_t *from, ia_prot_t *src_ia_prot) +{ + int ret = -1; + dht_conf_t *conf = NULL; + struct iatt new_stbuf = { + 0, + }; + + if (!this || !fd || !from || !src_ia_prot) { + goto out; + } + + conf = this->private; + + /*Revert source mode and xattr changes*/ + ret = syncop_fstat(from, fd, &new_stbuf, NULL, NULL); + if (ret < 0) { + /* Failed to get the stat info */ + gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_MIGRATE_FILE_FAILED, + "Migrate file cleanup failed: failed to fstat " + "file %s on %s ", + loc->path, from->name); + ret = -1; + goto out; + } - if (ret < 0) { - break; - } + /* Remove the sticky bit and sgid bit set, reset it to 0*/ + if (!src_ia_prot->sticky) + new_stbuf.ia_prot.sticky = 0; - offset += ret; - total += ret; + if (!src_ia_prot->sgid) + new_stbuf.ia_prot.sgid = 0; - GF_FREE (vector); - if (iobref) - iobref_unref (iobref); - iobref = NULL; - vector = NULL; - } - if (iobref) - iobref_unref (iobref); - GF_FREE (vector); + ret = syncop_fsetattr(from, fd, &new_stbuf, + (GF_SET_ATTR_GID | GF_SET_ATTR_MODE), NULL, NULL, + NULL, NULL); - if (ret >= 0) - ret = 0; - else - ret = -1; + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, -ret, DHT_MSG_MIGRATE_FILE_FAILED, + "Migrate file cleanup failed:" + "%s: failed to perform fsetattr on %s ", + loc->path, from->name); + ret = -1; + goto out; + } + + ret = syncop_fremovexattr(from, fd, conf->link_xattr_name, 0, NULL); + if (ret) { + gf_log(this->name, GF_LOG_WARNING, + "%s: failed to remove linkto xattr on %s (%s)", loc->path, + from->name, strerror(-ret)); + ret = -1; + goto out; + } - if (xdata) { - dict_unref (xdata); - } + ret = 0; - return ret; +out: + return ret; } +/* + return values: -static int -__dht_rebalance_open_src_file (xlator_t *this, xlator_t *from, xlator_t *to, loc_t *loc, - struct iatt *stbuf, fd_t **src_fd, - gf_boolean_t *clean_src, int *fop_errno) + -1 : failure + 0 : successfully migrated data + 1 : not a failure, but we can't migrate data as of now +*/ +int +dht_migrate_file(xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to, + int flag, int *fop_errno) { + int ret = -1; + struct iatt new_stbuf = { + 0, + }; + struct iatt stbuf = { + 0, + }; + struct iatt empty_iatt = { + 0, + }; + ia_prot_t src_ia_prot = { + 0, + }; + fd_t *src_fd = NULL; + fd_t *dst_fd = NULL; + dict_t *dict = NULL; + dict_t *xattr = NULL; + dict_t *xattr_rsp = NULL; + int file_has_holes = 0; + dht_conf_t *conf = this->private; + int rcvd_enoent_from_src = 0; + struct gf_flock flock = { + 0, + }; + struct gf_flock plock = { + 0, + }; + loc_t tmp_loc = { + 0, + }; + loc_t parent_loc = { + 0, + }; + gf_boolean_t inodelk_locked = _gf_false; + gf_boolean_t entrylk_locked = _gf_false; + gf_boolean_t p_locked = _gf_false; + int lk_ret = -1; + gf_defrag_info_t *defrag = NULL; + gf_boolean_t clean_src = _gf_false; + gf_boolean_t clean_dst = _gf_false; + int log_level = GF_LOG_INFO; + gf_boolean_t delete_src_linkto = _gf_true; + lock_migration_info_t locklist; + dict_t *meta_dict = NULL; + gf_boolean_t meta_locked = _gf_false; + gf_boolean_t target_changed = _gf_false; + xlator_t *new_target = NULL; + xlator_t *old_target = NULL; + xlator_t *hashed_subvol = NULL; + fd_t *linkto_fd = NULL; + + if (from == to) { + gf_msg_debug(this->name, 0, + "destination and source are same. file %s" + " might have migrated already", + loc->path); + ret = 0; + goto out; + } - int ret = 0; - fd_t *fd = NULL; - dict_t *dict = NULL; - struct iatt iatt = {0,}; - dht_conf_t *conf = NULL; + /* If defrag is NULL, it should be assumed that migration is triggered + * from client */ + defrag = conf->defrag; - conf = this->private; + /* migration of files from clients is restricted to non-tiered clients + * for now */ + if (!defrag && dht_is_tier_xlator(this)) { + ret = ENOTSUP; + goto out; + } - *clean_src = _gf_false; + if (defrag && defrag->tier_conf.is_tier) + log_level = GF_LOG_TRACE; - fd = fd_create (loc->inode, DHT_REBALANCE_PID); - if (!fd) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_MIGRATE_FILE_FAILED, - "%s: fd create failed (source)", loc->path); - *fop_errno = ENOMEM; - ret = -1; - goto out; - } + gf_log(this->name, log_level, "%s: attempting to move from %s to %s", + loc->path, from->name, to->name); - ret = syncop_open (from, loc, O_RDWR, fd, NULL, NULL); - if (ret < 0) { - gf_msg (this->name, GF_LOG_ERROR, -ret, - DHT_MSG_MIGRATE_FILE_FAILED, - "failed to open file %s on %s", - loc->path, from->name); - *fop_errno = -ret; - ret = -1; - goto out; + dict = dict_new(); + if (!dict) { + ret = -1; + *fop_errno = ENOMEM; + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY, + "Could not allocate memory for dict"); + goto out; + } + ret = dict_set_int32(dict, conf->link_xattr_name, 256); + if (ret) { + *fop_errno = ENOMEM; + ret = -1; + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_MIGRATE_FILE_FAILED, + "Migrate file failed:" + "%s: failed to set 'linkto' key in dict", + loc->path); + goto out; + } + + /* Do not migrate file in case lock migration is not enabled on the + * volume*/ + if (!conf->lock_migration_enabled) { + ret = dict_set_int32(dict, GLUSTERFS_POSIXLK_COUNT, sizeof(int32_t)); + if (ret) { + *fop_errno = ENOMEM; + ret = -1; + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_MIGRATE_FILE_FAILED, + "Migrate file failed: %s: failed to " + "set " GLUSTERFS_POSIXLK_COUNT " key in dict", + loc->path); + goto out; + } + } else { + gf_msg(this->name, GF_LOG_INFO, 0, 0, + "locks will be migrated" + " for file: %s", + loc->path); + } + + ret = dht_build_parent_loc(this, &parent_loc, loc, fop_errno); + if (ret < 0) { + ret = -1; + gf_msg(this->name, GF_LOG_WARNING, *fop_errno, + DHT_MSG_MIGRATE_FILE_FAILED, + "%s: failed to build parent loc, which is needed to " + "acquire entrylk to synchronize with renames on this " + "path. Skipping migration", + loc->path); + goto out; + } + + hashed_subvol = dht_subvol_get_hashed(this, loc); + if (hashed_subvol == NULL) { + ret = -1; + gf_msg(this->name, GF_LOG_WARNING, EINVAL, DHT_MSG_MIGRATE_FILE_FAILED, + "%s: cannot find hashed subvol which is needed to " + "synchronize with renames on this path. " + "Skipping migration", + loc->path); + goto out; + } + + flock.l_type = F_WRLCK; + + tmp_loc.inode = inode_ref(loc->inode); + gf_uuid_copy(tmp_loc.gfid, loc->gfid); + tmp_loc.path = gf_strdup(loc->path); + + /* this inodelk happens with flock.owner being zero. But to synchronize + * hardlink migration we need to have different lkowner for each migration + * Filed a bug here: https://bugzilla.redhat.com/show_bug.cgi?id=1468202 to + * track the fix for this. Currently synclock takes care of synchronizing + * hardlink migration. Once this bug is fixed we can avoid taking synclock + */ + ret = syncop_inodelk(from, DHT_FILE_MIGRATE_DOMAIN, &tmp_loc, F_SETLKW, + &flock, NULL, NULL); + if (ret < 0) { + *fop_errno = -ret; + ret = -1; + gf_msg(this->name, GF_LOG_WARNING, *fop_errno, + DHT_MSG_MIGRATE_FILE_FAILED, + "migrate file failed: " + "%s: failed to lock file on %s", + loc->path, from->name); + goto out; + } + + inodelk_locked = _gf_true; + + /* dht_rename has changed to use entrylk on hashed subvol for + * synchronization. So, rebalance too has to acquire an entrylk on + * hashed subvol. + */ + ret = syncop_entrylk(hashed_subvol, DHT_ENTRY_SYNC_DOMAIN, &parent_loc, + loc->name, ENTRYLK_LOCK, ENTRYLK_WRLCK, NULL, NULL); + if (ret < 0) { + *fop_errno = -ret; + ret = -1; + gf_msg(this->name, GF_LOG_WARNING, *fop_errno, + DHT_MSG_MIGRATE_FILE_FAILED, + "%s: failed to acquire entrylk on subvol %s", loc->path, + hashed_subvol->name); + goto out; + } + + entrylk_locked = _gf_true; + + /* Phase 1 - Data migration is in progress from now on */ + ret = syncop_lookup(from, loc, &stbuf, NULL, dict, &xattr_rsp); + if (ret) { + *fop_errno = -ret; + ret = -1; + gf_msg(this->name, GF_LOG_ERROR, *fop_errno, + DHT_MSG_MIGRATE_FILE_FAILED, + "Migrate file failed:" + "%s: lookup failed on %s", + loc->path, from->name); + goto out; + } + + /* preserve source mode, so set the same to the destination */ + src_ia_prot = stbuf.ia_prot; + + /* Check if file can be migrated */ + ret = __is_file_migratable(this, loc, &stbuf, xattr_rsp, flag, defrag, conf, + fop_errno); + if (ret) { + if (ret == HARDLINK_MIG_INPROGRESS) + ret = 0; + goto out; + } + + /* Take care of the special files */ + if (!IA_ISREG(stbuf.ia_type)) { + /* Special files */ + ret = migrate_special_files(this, from, to, loc, &stbuf, fop_errno); + goto out; + } + + /* create the destination, with required modes/xattr */ + ret = __dht_rebalance_create_dst_file(this, to, from, loc, &stbuf, &dst_fd, + fop_errno); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, + "Create dst failed" + " on - %s for file - %s", + to->name, loc->path); + goto out; + } + + clean_dst = _gf_true; + + ret = __dht_check_free_space(this, to, from, loc, &stbuf, flag, conf, + &target_changed, &new_target, fop_errno); + if (target_changed) { + /* Can't handle for hardlinks. Marking this as failure */ + if (flag == GF_DHT_MIGRATE_HARDLINK_IN_PROGRESS || stbuf.ia_nlink > 1) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_SUBVOL_INSUFF_SPACE, + "Exiting migration for" + " file - %s. flag - %d, stbuf.ia_nlink - %d", + loc->path, flag, stbuf.ia_nlink); + ret = -1; + goto out; + } + + ret = syncop_ftruncate(to, dst_fd, 0, NULL, NULL, NULL, NULL); + if (ret) { + gf_log(this->name, GF_LOG_WARNING, + "%s: failed to perform truncate on %s (%s)", loc->path, + to->name, strerror(-ret)); } - fd_bind (fd); + syncop_close(dst_fd); + dst_fd = NULL; + + old_target = to; + to = new_target; + + clean_dst = _gf_false; - if (src_fd) - *src_fd = fd; + /* if the file migration is successful to this new target, then + * update the xattr on the old destination to point the new + * destination. We need to do update this only post migration + * as in case of failure the linkto needs to point to the source + * subvol */ + ret = __dht_rebalance_create_dst_file(this, to, from, loc, &stbuf, + &dst_fd, fop_errno); + if (ret) { + gf_log(this->name, GF_LOG_ERROR, + "Create dst failed" + " on - %s for file - %s", + to->name, loc->path); + goto out; + } else { + gf_msg(this->name, GF_LOG_INFO, 0, 0, + "destination for file " + "- %s is changed to - %s", + loc->path, to->name); + clean_dst = _gf_true; + } + } + + if (ret) { + goto out; + } + + /* Open the source, and also update mode/xattr */ + ret = __dht_rebalance_open_src_file(this, from, to, loc, &stbuf, &src_fd, + &clean_src, fop_errno); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_MIGRATE_FILE_FAILED, + "Migrate file failed: failed to open %s on %s", loc->path, + from->name); + goto out; + } + + /* TODO: move all xattr related operations to fd based operations */ + ret = syncop_listxattr(from, loc, &xattr, NULL, NULL); + if (ret < 0) { + *fop_errno = -ret; + gf_msg(this->name, GF_LOG_WARNING, *fop_errno, + DHT_MSG_MIGRATE_FILE_FAILED, + "Migrate file failed:" + "%s: failed to get xattr from %s", + loc->path, from->name); + ret = -1; + goto out; + } + + /* Copying posix acls to the linkto file messes up the permissions*/ + dht_strip_out_acls(xattr); + + /* Remove the linkto xattr as we don't want to overwrite the value + * set on the dst. + */ + dict_del(xattr, conf->link_xattr_name); + + /* We need to error out if this fails as having the wrong shard xattrs + * set on the dst could cause data corruption + */ + ret = syncop_fsetxattr(to, dst_fd, xattr, 0, NULL, NULL); + if (ret < 0) { + *fop_errno = -ret; + gf_msg(this->name, GF_LOG_WARNING, -ret, DHT_MSG_MIGRATE_FILE_FAILED, + "%s: failed to set xattr on %s", loc->path, to->name); + ret = -1; + goto out; + } + if (xattr_rsp) { + /* we no more require this key */ + dict_del(dict, conf->link_xattr_name); + dict_unref(xattr_rsp); + } + + ret = syncop_fstat(from, src_fd, &stbuf, dict, &xattr_rsp); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_MIGRATE_FILE_FAILED, + "Migrate file failed:failed to lookup %s on %s ", loc->path, + from->name); + *fop_errno = -ret; ret = -1; - dict = dict_new (); - if (!dict) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_MIGRATE_FILE_FAILED, - "%s: Could not allocate memory for dict", loc->path); - *fop_errno = ENOMEM; - ret = -1; - goto out; + goto out; + } + + /* Check again if file has hardlink */ + ret = __check_file_has_hardlink(this, loc, &stbuf, xattr_rsp, flag, defrag, + conf, fop_errno); + if (ret) { + if (ret == HARDLINK_MIG_INPROGRESS) + ret = 0; + goto out; + } + /* Try to preserve 'holes' while migrating data */ + if (stbuf.ia_size > (stbuf.ia_blocks * GF_DISK_SECTOR_SIZE)) + file_has_holes = 1; + + ret = __dht_rebalance_migrate_data(this, defrag, from, to, src_fd, dst_fd, + stbuf.ia_size, file_has_holes, + fop_errno); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_MIGRATE_FILE_FAILED, + "Migrate file failed: %s: failed to migrate data", loc->path); + + ret = -1; + goto out; + } + + /* TODO: Sync the locks */ + + ret = syncop_fsync(to, dst_fd, 0, NULL, NULL, NULL, NULL); + if (ret) { + gf_log(this->name, GF_LOG_WARNING, "%s: failed to fsync on %s (%s)", + loc->path, to->name, strerror(-ret)); + *fop_errno = -ret; + } + + /* Phase 2 - Data-Migration Complete, Housekeeping updates pending */ + + ret = syncop_fstat(from, src_fd, &new_stbuf, NULL, NULL); + if (ret < 0) { + /* Failed to get the stat info */ + gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_MIGRATE_FILE_FAILED, + "Migrate file failed: failed to fstat file %s on %s ", loc->path, + from->name); + *fop_errno = -ret; + ret = -1; + goto out; + } + + /* Lock the entire source file to prevent clients from taking a + lock on it as dht_lk does not handle file migration. + + This still leaves a small window where conflicting locks can + be granted to different clients. If client1 requests a blocking + lock on the src file, it will be granted after the migrating + process releases its lock. If client2 requests a lock on the dst + data file, it will also be granted, but all FOPs will be redirected + to the dst data file. + */ + + /* Take meta lock */ + + if (conf->lock_migration_enabled) { + meta_dict = dict_new(); + if (!meta_dict) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_MIGRATE_FILE_FAILED, + "dict_new failed"); + + *fop_errno = ENOMEM; + ret = -1; + goto out; } - ret = dict_set_str (dict, conf->link_xattr_name, to->name); + ret = dict_set_str(meta_dict, GLUSTERFS_INTERNAL_FOP_KEY, "yes"); if (ret) { - gf_log (this->name, GF_LOG_ERROR, - "failed to set xattr in dict for %s (linkto:%s)", - loc->path, to->name); - *fop_errno = ENOMEM; - ret = -1; - goto out; + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED, + "Failed to set dictionary value: key = %s," + " path = %s", + GLUSTERFS_INTERNAL_FOP_KEY, loc->path); + *fop_errno = ENOMEM; + ret = -1; + goto out; } - /* Once the migration starts, the source should have 'linkto' key set - to show which is the target, so other clients can work around it */ - ret = syncop_setxattr (from, loc, dict, 0, NULL, NULL); + ret = dict_set_int32(meta_dict, GF_META_LOCK_KEY, 1); if (ret) { - gf_msg (this->name, GF_LOG_ERROR, -ret, - DHT_MSG_MIGRATE_FILE_FAILED, - "failed to set xattr on %s in %s", - loc->path, from->name); - *fop_errno = -ret; - ret = -1; - goto out; + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_MIGRATE_FILE_FAILED, + "Trace dict_set failed"); + *fop_errno = ENOMEM; + ret = -1; + goto out; } - /* Reset source mode/xattr if migration fails*/ - *clean_src = _gf_true; + ret = syncop_setxattr(from, loc, meta_dict, 0, NULL, NULL); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_MIGRATE_FILE_FAILED, + "Trace syncop_setxattr metalock failed"); + + *fop_errno = -ret; + ret = -1; + goto out; + } else { + meta_locked = _gf_true; + } + } - /* mode should be (+S+T) to indicate migration is in progress */ - iatt.ia_prot = stbuf->ia_prot; - iatt.ia_type = stbuf->ia_type; - iatt.ia_prot.sticky = 1; - iatt.ia_prot.sgid = 1; + if (!conf->lock_migration_enabled) { + plock.l_type = F_WRLCK; + plock.l_start = 0; + plock.l_len = 0; + plock.l_whence = SEEK_SET; - ret = syncop_setattr (from, loc, &iatt, GF_SET_ATTR_MODE, NULL, NULL, - NULL, NULL); + ret = syncop_lk(from, src_fd, F_SETLK, &plock, NULL, NULL); if (ret) { - gf_msg (this->name, GF_LOG_ERROR, -ret, - DHT_MSG_MIGRATE_FILE_FAILED, - "failed to set mode on %s in %s", - loc->path, from->name); - *fop_errno = -ret; - ret = -1; - goto out; + gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_MIGRATE_FILE_FAILED, + "Migrate file failed:" + "%s: Failed to lock on %s", + loc->path, from->name); + *fop_errno = -ret; + ret = -1; + goto out; } - /* success */ - ret = 0; -out: - if (dict) - dict_unref (dict); + p_locked = _gf_true; - return ret; -} + } else { + INIT_LIST_HEAD(&locklist.list); -int -migrate_special_files (xlator_t *this, xlator_t *from, xlator_t *to, loc_t *loc, - struct iatt *buf, int *fop_errno) -{ - int ret = -1; - dict_t *rsp_dict = NULL; - dict_t *dict = NULL; - char *link = NULL; - struct iatt stbuf = {0,}; - dht_conf_t *conf = this->private; - - dict = dict_new (); - if (!dict) { - *fop_errno = ENOMEM; + ret = syncop_getactivelk(from, loc, &locklist, NULL, NULL); + if (ret == 0) { + gf_log(this->name, GF_LOG_INFO, "No active locks on:%s", loc->path); + + } else if (ret > 0) { + ret = syncop_setactivelk(to, loc, &locklist, NULL, NULL); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, -ret, + DHT_MSG_LOCK_MIGRATION_FAILED, "write lock failed on:%s", + loc->path); + + *fop_errno = -ret; ret = -1; - goto out; - } - ret = dict_set_int32 (dict, conf->link_xattr_name, 256); + goto metaunlock; + } + } else { + gf_msg(this->name, GF_LOG_ERROR, -ret, + DHT_MSG_LOCK_MIGRATION_FAILED, + "getactivelk failed for file: %s", loc->path); + *fop_errno = -ret; + } + } + + /* source would have both sticky bit and sgid bit set, reset it to 0, + and set the source permission on destination, if it was not set + prior to setting rebalance-modes in source */ + if (!src_ia_prot.sticky) + new_stbuf.ia_prot.sticky = 0; + + if (!src_ia_prot.sgid) + new_stbuf.ia_prot.sgid = 0; + + /* TODO: if the source actually had sticky bit, or sgid bit set, + we are not handling it */ + + ret = syncop_fsetattr( + to, dst_fd, &new_stbuf, + (GF_SET_ATTR_UID | GF_SET_ATTR_GID | GF_SET_ATTR_MODE), NULL, NULL, + NULL, NULL); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, -ret, DHT_MSG_MIGRATE_FILE_FAILED, + "Migrate file failed:" + "%s: failed to perform setattr on %s ", + loc->path, to->name); + *fop_errno = -ret; + ret = -1; + goto metaunlock; + } + + /* Because 'futimes' is not portable */ + ret = syncop_setattr(to, loc, &new_stbuf, + (GF_SET_ATTR_MTIME | GF_SET_ATTR_ATIME), NULL, NULL, + NULL, NULL); + if (ret) { + gf_log(this->name, GF_LOG_WARNING, + "%s: failed to perform setattr on %s ", loc->path, to->name); + *fop_errno = -ret; + } + + if (target_changed) { + dict_del(dict, GLUSTERFS_POSIXLK_COUNT); + ret = dict_set_str(dict, conf->link_xattr_name, to->name); if (ret) { + gf_log(this->name, GF_LOG_ERROR, + "failed to set xattr in dict for %s (linkto:%s)", loc->path, + to->name); + *fop_errno = ENOMEM; + ret = -1; + goto out; + } + + ret = syncop_setxattr(old_target, loc, dict, 0, NULL, NULL); + if (ret && -ret != ESTALE && -ret != ENOENT) { + gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_MIGRATE_FILE_FAILED, + "failed to set xattr on %s in %s", loc->path, + old_target->name); + *fop_errno = -ret; + ret = -1; + goto out; + } else if (-ret == ESTALE || -ret == ENOENT) { + /* The failure ESTALE indicates that the linkto + * file on the hashed subvol might have been deleted. + * In this case will create a linkto file with new target + * as linkto xattr value*/ + linkto_fd = fd_create(loc->inode, DHT_REBALANCE_PID); + if (!linkto_fd) { + gf_msg(this->name, GF_LOG_ERROR, errno, + DHT_MSG_MIGRATE_FILE_FAILED, "%s: fd create failed", + loc->path); *fop_errno = ENOMEM; ret = -1; - gf_log (this->name, GF_LOG_ERROR, - "%s: failed to set 'linkto' key in dict", loc->path); goto out; - } - - /* check in the destination if the file is link file */ - ret = syncop_lookup (to, loc, &stbuf, NULL, dict, &rsp_dict); - if ((ret < 0) && (-ret != ENOENT)) { - gf_msg (this->name, GF_LOG_WARNING, -ret, - DHT_MSG_MIGRATE_FILE_FAILED, - "%s: lookup failed", - loc->path); + } + ret = syncop_create(old_target, loc, O_RDWR, DHT_LINKFILE_MODE, + linkto_fd, NULL, dict, NULL); + if (ret != 0 && -ret != EEXIST && -ret != ESTALE) { *fop_errno = -ret; ret = -1; - goto out; + gf_msg(this->name, GF_LOG_ERROR, -ret, + DHT_MSG_MIGRATE_FILE_FAILED, + "failed to create linkto file on %s in %s", loc->path, + old_target->name); + goto out; + } else if (ret == 0) { + ret = syncop_fsetattr(old_target, linkto_fd, &stbuf, + (GF_SET_ATTR_UID | GF_SET_ATTR_GID), NULL, + NULL, NULL, NULL); + if (ret < 0) { + *fop_errno = -ret; + gf_msg(this->name, GF_LOG_ERROR, -ret, + DHT_MSG_MIGRATE_FILE_FAILED, + "chown failed for %s on %s", loc->path, + old_target->name); + } + } + } + } + + clean_dst = _gf_false; + + /* Posix acls are not set on DHT linkto files as part of the initial + * initial xattrs set on the dst file, so these need + * to be set on the dst file after the linkto attrs are removed. + * TODO: Optimize this. + */ + if (xattr) { + dict_unref(xattr); + xattr = NULL; + } + + /* Set only the Posix ACLs this time */ + ret = syncop_getxattr(from, loc, &xattr, POSIX_ACL_ACCESS_XATTR, NULL, + NULL); + if (ret < 0) { + if ((-ret != ENODATA) && (-ret != ENOATTR)) { + gf_msg(this->name, GF_LOG_WARNING, -ret, + DHT_MSG_MIGRATE_FILE_FAILED, + "Migrate file failed:" + "%s: failed to get xattr from %s", + loc->path, from->name); + *fop_errno = -ret; + } + } else { + ret = syncop_setxattr(to, loc, xattr, 0, NULL, NULL); + if (ret < 0) { + /* Potential problem here where Posix ACLs will + * not be set on the target file */ + + gf_msg(this->name, GF_LOG_WARNING, -ret, + DHT_MSG_MIGRATE_FILE_FAILED, + "Migrate file failed:" + "%s: failed to set xattr on %s", + loc->path, to->name); + *fop_errno = -ret; + } + } + + /* store size of previous migrated file */ + if (defrag && defrag->tier_conf.is_tier) { + if (from != TIER_HASHED_SUBVOL) { + defrag->tier_conf.st_last_promoted_size = stbuf.ia_size; + } else { + /* Don't delete the linkto file on the hashed subvol */ + delete_src_linkto = _gf_false; + defrag->tier_conf.st_last_demoted_size = stbuf.ia_size; + } + } + + /* The src file is being unlinked after this so we don't need + to clean it up */ + clean_src = _gf_false; + + /* Make the source as a linkfile first before deleting it */ + empty_iatt.ia_prot.sticky = 1; + ret = syncop_fsetattr(from, src_fd, &empty_iatt, GF_SET_ATTR_MODE, NULL, + NULL, NULL, NULL); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, -ret, DHT_MSG_MIGRATE_FILE_FAILED, + "Migrate file failed:" + "%s: failed to perform setattr on %s ", + loc->path, from->name); + *fop_errno = -ret; + ret = -1; + goto metaunlock; + } + + /* Free up the data blocks on the source node, as the whole + file is migrated */ + ret = syncop_ftruncate(from, src_fd, 0, NULL, NULL, NULL, NULL); + if (ret) { + gf_log(this->name, GF_LOG_WARNING, + "%s: failed to perform truncate on %s (%s)", loc->path, + from->name, strerror(-ret)); + *fop_errno = -ret; + } + + /* remove the 'linkto' xattr from the destination */ + ret = syncop_fremovexattr(to, dst_fd, conf->link_xattr_name, 0, NULL); + if (ret) { + gf_log(this->name, GF_LOG_WARNING, + "%s: failed to perform removexattr on %s (%s)", loc->path, + to->name, strerror(-ret)); + *fop_errno = -ret; + } + + /* Do a stat and check the gfid before unlink */ + + /* + * Cached file changes its state from non-linkto to linkto file after + * migrating data. If lookup from any other mount-point is performed, + * converted-linkto-cached file will be treated as a stale and will be + * unlinked. But by this time, file is already migrated. So further + * failure because of ENOENT should not be treated as error + */ + + ret = syncop_stat(from, loc, &empty_iatt, NULL, NULL); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, -ret, DHT_MSG_MIGRATE_FILE_FAILED, + "%s: failed to do a stat on %s", loc->path, from->name); + + if (-ret != ENOENT) { + *fop_errno = -ret; + ret = -1; + goto metaunlock; + } + + rcvd_enoent_from_src = 1; + } + + if ((gf_uuid_compare(empty_iatt.ia_gfid, loc->gfid) == 0) && + (!rcvd_enoent_from_src) && delete_src_linkto) { + /* take out the source from namespace */ + ret = syncop_unlink(from, loc, NULL, NULL); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, -ret, + DHT_MSG_MIGRATE_FILE_FAILED, + "%s: failed to perform unlink on %s", loc->path, from->name); + *fop_errno = -ret; + ret = -1; + goto metaunlock; } + } - /* we no more require this key */ - dict_del (dict, conf->link_xattr_name); + ret = syncop_lookup(this, loc, NULL, NULL, NULL, NULL); + if (ret) { + gf_msg_debug(this->name, -ret, + "%s: failed to lookup the file on subvolumes", loc->path); + *fop_errno = -ret; + } - /* file exists in target node, only if it is 'linkfile' its valid, - otherwise, error out */ - if (!ret) { - if (!check_is_linkfile (loc->inode, &stbuf, rsp_dict, - conf->link_xattr_name)) { - gf_msg (this->name, GF_LOG_WARNING, 0, - DHT_MSG_MIGRATE_FILE_FAILED, - "%s: file exists in destination", loc->path); - *fop_errno = EINVAL; - ret = -1; - goto out; - } + gf_msg(this->name, log_level, 0, DHT_MSG_MIGRATE_FILE_COMPLETE, + "completed migration of %s from subvolume %s to %s", loc->path, + from->name, to->name); - /* as file is linkfile, delete it */ - ret = syncop_unlink (to, loc, NULL, NULL); - if (ret) { - gf_msg (this->name, GF_LOG_WARNING, -ret, - DHT_MSG_MIGRATE_FILE_FAILED, - "%s: failed to delete the linkfile", - loc->path); - *fop_errno = -ret; - ret = -1; - goto out; - } - } + ret = 0; - /* Set the gfid of the source file in dict */ - ret = dict_set_gfuuid (dict, "gfid-req", buf->ia_gfid, true); - if (ret) { - *fop_errno = ENOMEM; - ret = -1; - gf_log (this->name, GF_LOG_ERROR, - "%s: failed to set gfid in dict for create", loc->path); - goto out; - } +metaunlock: - /* Create the file in target */ - if (IA_ISLNK (buf->ia_type)) { - /* Handle symlinks separately */ - ret = syncop_readlink (from, loc, &link, buf->ia_size, NULL, - NULL); - if (ret < 0) { - gf_msg (this->name, GF_LOG_WARNING, -ret, - DHT_MSG_MIGRATE_FILE_FAILED, - "%s: readlink on symlink failed", - loc->path); - *fop_errno = -ret; - ret = -1; - goto out; - } + if (conf->lock_migration_enabled && meta_locked) { + dict_del(meta_dict, GF_META_LOCK_KEY); - ret = syncop_symlink (to, loc, link, 0, dict, NULL); - if (ret) { - gf_msg (this->name, GF_LOG_WARNING, -ret, - DHT_MSG_MIGRATE_FILE_FAILED, - "%s: creating symlink failed", - loc->path); - *fop_errno = -ret; - ret = -1; - goto out; - } + ret = dict_set_int32(meta_dict, GF_META_UNLOCK_KEY, 1); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_MIGRATE_FILE_FAILED, + "Trace dict_set failed"); - goto done; + *fop_errno = ENOMEM; + ret = -1; + goto out; } - ret = syncop_mknod (to, loc, st_mode_from_ia (buf->ia_prot, - buf->ia_type), - makedev (ia_major (buf->ia_rdev), - ia_minor (buf->ia_rdev)), 0, dict, NULL); - if (ret) { - gf_msg (this->name, GF_LOG_WARNING, -ret, - DHT_MSG_MIGRATE_FILE_FAILED, - "%s: mknod failed", - loc->path); - *fop_errno = -ret; - ret = -1; - goto out; - } + if (clean_dst == _gf_false) + ret = dict_set_int32(meta_dict, "status", 1); + else + ret = dict_set_int32(meta_dict, "status", 0); -done: - ret = syncop_setattr (to, loc, buf, - (GF_SET_ATTR_MTIME | - GF_SET_ATTR_UID | GF_SET_ATTR_GID | - GF_SET_ATTR_MODE), NULL, NULL, NULL, NULL); if (ret) { - gf_msg (this->name, GF_LOG_WARNING, -ret, - DHT_MSG_MIGRATE_FILE_FAILED, - "%s: failed to perform setattr on %s", - loc->path, to->name); - *fop_errno = -ret; + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_MIGRATE_FILE_FAILED, + "Trace dict_set failed"); + + *fop_errno = ENOMEM; + ret = -1; + goto out; } - ret = syncop_unlink (from, loc, NULL, NULL); + ret = syncop_setxattr(from, loc, meta_dict, 0, NULL, NULL); if (ret) { - gf_msg (this->name, GF_LOG_WARNING, -ret, - DHT_MSG_MIGRATE_FILE_FAILED, - "%s: unlink failed", - loc->path); - *fop_errno = -ret; - ret = -1; + gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_MIGRATE_FILE_FAILED, + "Trace syncop_setxattr meta unlock failed"); + + *fop_errno = -ret; + ret = -1; + goto out; } + } out: - GF_FREE (link); - if (dict) - dict_unref (dict); + if (clean_src) { + /* Revert source mode and xattr changes*/ + lk_ret = __dht_migration_cleanup_src_file(this, loc, src_fd, from, + &src_ia_prot); + if (lk_ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_MIGRATE_FILE_FAILED, + "%s: failed to cleanup source file on %s", loc->path, + from->name); + } + } + + /* reset the destination back to 0 */ + if (clean_dst) { + lk_ret = syncop_ftruncate(to, dst_fd, 0, NULL, NULL, NULL, NULL); + if (lk_ret) { + gf_msg(this->name, GF_LOG_ERROR, -lk_ret, + DHT_MSG_MIGRATE_FILE_FAILED, + "Migrate file failed: " + "%s: failed to reset target size back to 0", + loc->path); + } + } + + if (inodelk_locked) { + flock.l_type = F_UNLCK; + + lk_ret = syncop_inodelk(from, DHT_FILE_MIGRATE_DOMAIN, &tmp_loc, + F_SETLK, &flock, NULL, NULL); + if (lk_ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, -lk_ret, + DHT_MSG_MIGRATE_FILE_FAILED, + "%s: failed to unlock file on %s", loc->path, from->name); + } + } + + if (entrylk_locked) { + lk_ret = syncop_entrylk(hashed_subvol, DHT_ENTRY_SYNC_DOMAIN, + &parent_loc, loc->name, ENTRYLK_UNLOCK, + ENTRYLK_UNLOCK, NULL, NULL); + if (lk_ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, -lk_ret, + DHT_MSG_MIGRATE_FILE_FAILED, + "%s: failed to unlock entrylk on %s", loc->path, + hashed_subvol->name); + } + } + + if (p_locked) { + plock.l_type = F_UNLCK; + lk_ret = syncop_lk(from, src_fd, F_SETLK, &plock, NULL, NULL); + + if (lk_ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, -lk_ret, + DHT_MSG_MIGRATE_FILE_FAILED, + "%s: failed to unlock file on %s", loc->path, from->name); + } + } + + if (!dht_is_tier_xlator(this)) { + lk_ret = syncop_removexattr(to, loc, GF_PROTECT_FROM_EXTERNAL_WRITES, + NULL, NULL); + if (lk_ret && (lk_ret != -ENODATA) && (lk_ret != -ENOATTR)) { + gf_msg(this->name, GF_LOG_WARNING, -lk_ret, 0, + "%s: removexattr failed key %s", loc->path, + GF_PROTECT_FROM_EXTERNAL_WRITES); + } + } - if (rsp_dict) - dict_unref (rsp_dict); + if (dict) + dict_unref(dict); - return ret; + if (xattr) + dict_unref(xattr); + if (xattr_rsp) + dict_unref(xattr_rsp); + + if (dst_fd) + syncop_close(dst_fd); + if (src_fd) + syncop_close(src_fd); + if (linkto_fd) + syncop_close(linkto_fd); + + loc_wipe(&tmp_loc); + loc_wipe(&parent_loc); + + return ret; } static int -__dht_migration_cleanup_src_file (xlator_t *this, loc_t *loc, fd_t *fd, - xlator_t *from, ia_prot_t *src_ia_prot) +rebalance_task(void *data) { - int ret = -1; - dht_conf_t *conf = NULL; - struct iatt new_stbuf = {0,}; + int ret = -1; + dht_local_t *local = NULL; + call_frame_t *frame = NULL; + int fop_errno = 0; - if (!this || !fd || !from || !src_ia_prot) { - goto out; - } + frame = data; - conf = this->private; + local = frame->local; - /*Revert source mode and xattr changes*/ - ret = syncop_fstat (from, fd, &new_stbuf, NULL, NULL); - if (ret < 0) { - /* Failed to get the stat info */ - gf_msg (this->name, GF_LOG_ERROR, -ret, - DHT_MSG_MIGRATE_FILE_FAILED, - "Migrate file cleanup failed: failed to fstat " - "file %s on %s ", loc->path, from->name); - ret = -1; - goto out; - } + /* This function is 'synchrounous', hence if it returns, + we are done with the task */ + ret = dht_migrate_file(THIS, &local->loc, local->rebalance.from_subvol, + local->rebalance.target_node, local->flags, + &fop_errno); + return ret; +} - /* Remove the sticky bit and sgid bit set, reset it to 0*/ - if (!src_ia_prot->sticky) - new_stbuf.ia_prot.sticky = 0; +static int +rebalance_task_completion(int op_ret, call_frame_t *sync_frame, void *data) +{ + int32_t op_errno = EINVAL; - if (!src_ia_prot->sgid) - new_stbuf.ia_prot.sgid = 0; - - ret = syncop_fsetattr (from, fd, &new_stbuf, - (GF_SET_ATTR_GID | GF_SET_ATTR_MODE), - NULL, NULL, NULL, NULL); - - if (ret) { - gf_msg (this->name, GF_LOG_WARNING, -ret, - DHT_MSG_MIGRATE_FILE_FAILED, - "Migrate file cleanup failed:" - "%s: failed to perform fsetattr on %s ", - loc->path, from->name); - ret = -1; - goto out; - } - - ret = syncop_fremovexattr (from, fd, conf->link_xattr_name, 0, NULL); - if (ret) { - gf_log (this->name, GF_LOG_WARNING, - "%s: failed to remove linkto xattr on %s (%s)", - loc->path, from->name, strerror (-ret)); - ret = -1; - goto out; - } - - ret = 0; - -out: - return ret; -} - - - -/* - return values: - - -1 : failure - 0 : successfully migrated data - 1 : not a failure, but we can't migrate data as of now -*/ -int -dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to, - int flag, int *fop_errno) -{ - int ret = -1; - struct iatt new_stbuf = {0,}; - struct iatt stbuf = {0,}; - struct iatt empty_iatt = {0,}; - ia_prot_t src_ia_prot = {0,}; - fd_t *src_fd = NULL; - fd_t *dst_fd = NULL; - dict_t *dict = NULL; - dict_t *xattr = NULL; - dict_t *xattr_rsp = NULL; - int file_has_holes = 0; - dht_conf_t *conf = this->private; - int rcvd_enoent_from_src = 0; - struct gf_flock flock = {0, }; - struct gf_flock plock = {0, }; - loc_t tmp_loc = {0, }; - loc_t parent_loc = {0, }; - gf_boolean_t inodelk_locked = _gf_false; - gf_boolean_t entrylk_locked = _gf_false; - gf_boolean_t p_locked = _gf_false; - int lk_ret = -1; - gf_defrag_info_t *defrag = NULL; - gf_boolean_t clean_src = _gf_false; - gf_boolean_t clean_dst = _gf_false; - int log_level = GF_LOG_INFO; - gf_boolean_t delete_src_linkto = _gf_true; - lock_migration_info_t locklist; - dict_t *meta_dict = NULL; - gf_boolean_t meta_locked = _gf_false; - gf_boolean_t target_changed = _gf_false; - xlator_t *new_target = NULL; - xlator_t *old_target = NULL; - xlator_t *hashed_subvol = NULL; - fd_t *linkto_fd = NULL; - - - if (from == to) { - gf_msg_debug (this->name, 0, "destination and source are same. file %s" - " might have migrated already", loc->path); - ret = 0; - goto out; - } - - /* If defrag is NULL, it should be assumed that migration is triggered - * from client */ - defrag = conf->defrag; - - /* migration of files from clients is restricted to non-tiered clients - * for now */ - if (!defrag && dht_is_tier_xlator (this)) { - ret = ENOTSUP; - goto out; - } - - if (defrag && defrag->tier_conf.is_tier) - log_level = GF_LOG_TRACE; - - gf_log (this->name, - log_level, "%s: attempting to move from %s to %s", - loc->path, from->name, to->name); - - dict = dict_new (); - if (!dict) { - ret = -1; - *fop_errno = ENOMEM; - gf_msg (this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY, - "Could not allocate memory for dict"); - goto out; - } - ret = dict_set_int32 (dict, conf->link_xattr_name, 256); - if (ret) { - *fop_errno = ENOMEM; - ret = -1; - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_MIGRATE_FILE_FAILED, - "Migrate file failed:" - "%s: failed to set 'linkto' key in dict", loc->path); - goto out; - } - - /* Do not migrate file in case lock migration is not enabled on the - * volume*/ - if (!conf->lock_migration_enabled) { - ret = dict_set_int32 (dict, - GLUSTERFS_POSIXLK_COUNT, sizeof(int32_t)); - if (ret) { - *fop_errno = ENOMEM; - ret = -1; - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_MIGRATE_FILE_FAILED, - "Migrate file failed: %s: failed to " - "set "GLUSTERFS_POSIXLK_COUNT" key in dict", - loc->path); - goto out; - } - } else { - gf_msg (this->name, GF_LOG_INFO, 0, 0, "locks will be migrated" - " for file: %s", loc->path); - } - - ret = dht_build_parent_loc (this, &parent_loc, loc, fop_errno); - if (ret < 0) { - ret = -1; - gf_msg (this->name, GF_LOG_WARNING, *fop_errno, - DHT_MSG_MIGRATE_FILE_FAILED, - "%s: failed to build parent loc, which is needed to " - "acquire entrylk to synchronize with renames on this " - "path. Skipping migration", loc->path); - goto out; - } - - hashed_subvol = dht_subvol_get_hashed (this, loc); - if (hashed_subvol == NULL) { - ret = -1; - gf_msg (this->name, GF_LOG_WARNING, EINVAL, - DHT_MSG_MIGRATE_FILE_FAILED, - "%s: cannot find hashed subvol which is needed to " - "synchronize with renames on this path. " - "Skipping migration", loc->path); - goto out; - } - - flock.l_type = F_WRLCK; - - tmp_loc.inode = inode_ref (loc->inode); - gf_uuid_copy (tmp_loc.gfid, loc->gfid); - tmp_loc.path = gf_strdup(loc->path); - - /* this inodelk happens with flock.owner being zero. But to synchronize - * hardlink migration we need to have different lkowner for each migration - * Filed a bug here: https://bugzilla.redhat.com/show_bug.cgi?id=1468202 to - * track the fix for this. Currently synclock takes care of synchronizing - * hardlink migration. Once this bug is fixed we can avoid taking synclock */ - ret = syncop_inodelk (from, DHT_FILE_MIGRATE_DOMAIN, &tmp_loc, F_SETLKW, - &flock, NULL, NULL); - if (ret < 0) { - *fop_errno = -ret; - ret = -1; - gf_msg (this->name, GF_LOG_WARNING, *fop_errno, - DHT_MSG_MIGRATE_FILE_FAILED, - "migrate file failed: " - "%s: failed to lock file on %s", - loc->path, from->name); - goto out; - } - - inodelk_locked = _gf_true; - - /* dht_rename has changed to use entrylk on hashed subvol for - * synchronization. So, rebalance too has to acquire an entrylk on - * hashed subvol. - */ - ret = syncop_entrylk (hashed_subvol, DHT_ENTRY_SYNC_DOMAIN, &parent_loc, - loc->name, ENTRYLK_LOCK, ENTRYLK_WRLCK, NULL, - NULL); - if (ret < 0) { - *fop_errno = -ret; - ret = -1; - gf_msg (this->name, GF_LOG_WARNING, *fop_errno, - DHT_MSG_MIGRATE_FILE_FAILED, - "%s: failed to acquire entrylk on subvol %s", - loc->path, hashed_subvol->name); - goto out; - } - - entrylk_locked = _gf_true; - - /* Phase 1 - Data migration is in progress from now on */ - ret = syncop_lookup (from, loc, &stbuf, NULL, dict, &xattr_rsp); - if (ret) { - *fop_errno = -ret; - ret = -1; - gf_msg (this->name, GF_LOG_ERROR, *fop_errno, - DHT_MSG_MIGRATE_FILE_FAILED, - "Migrate file failed:" - "%s: lookup failed on %s", - loc->path, from->name); - goto out; - } - - /* preserve source mode, so set the same to the destination */ - src_ia_prot = stbuf.ia_prot; - - /* Check if file can be migrated */ - ret = __is_file_migratable (this, loc, &stbuf, xattr_rsp, flag, defrag, conf, - fop_errno); - if (ret) { - if (ret == HARDLINK_MIG_INPROGRESS) - ret = 0; - goto out; - } - - /* Take care of the special files */ - if (!IA_ISREG (stbuf.ia_type)) { - /* Special files */ - ret = migrate_special_files (this, from, to, loc, &stbuf, - fop_errno); - goto out; - } - - /* create the destination, with required modes/xattr */ - ret = __dht_rebalance_create_dst_file (this, to, from, loc, &stbuf, - &dst_fd, fop_errno); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, 0, "Create dst failed" - " on - %s for file - %s", to->name, loc->path); - goto out; - } - - clean_dst = _gf_true; - - ret = __dht_check_free_space (this, to, from, loc, &stbuf, flag, conf, - &target_changed, &new_target, fop_errno); - if (target_changed) { - /* Can't handle for hardlinks. Marking this as failure */ - if (flag == GF_DHT_MIGRATE_HARDLINK_IN_PROGRESS || stbuf.ia_nlink > 1) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_SUBVOL_INSUFF_SPACE, "Exiting migration for" - " file - %s. flag - %d, stbuf.ia_nlink - %d", - loc->path, flag, stbuf.ia_nlink); - ret = -1; - goto out; - } - - - ret = syncop_ftruncate (to, dst_fd, 0, NULL, NULL, NULL, NULL); - if (ret) { - gf_log (this->name, GF_LOG_WARNING, - "%s: failed to perform truncate on %s (%s)", - loc->path, to->name, strerror (-ret)); - } - - syncop_close (dst_fd); - dst_fd = NULL; - - old_target = to; - to = new_target; - - clean_dst = _gf_false; - - - /* if the file migration is successful to this new target, then - * update the xattr on the old destination to point the new - * destination. We need to do update this only post migration - * as in case of failure the linkto needs to point to the source - * subvol */ - ret = __dht_rebalance_create_dst_file (this, to, from, loc, &stbuf, - &dst_fd, fop_errno); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, "Create dst failed" - " on - %s for file - %s", to->name, loc->path); - goto out; - } else { - gf_msg (this->name, GF_LOG_INFO, 0, 0, "destination for file " - "- %s is changed to - %s", loc->path, to->name); - clean_dst = _gf_true; - } - } - - if (ret) { - goto out; - } - - /* Open the source, and also update mode/xattr */ - ret = __dht_rebalance_open_src_file (this, from, to, loc, &stbuf, &src_fd, - &clean_src, fop_errno); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_MIGRATE_FILE_FAILED, - "Migrate file failed: failed to open %s on %s", - loc->path, from->name); - goto out; - } - - /* TODO: move all xattr related operations to fd based operations */ - ret = syncop_listxattr (from, loc, &xattr, NULL, NULL); - if (ret < 0) { - *fop_errno = -ret; - gf_msg (this->name, GF_LOG_WARNING, *fop_errno, - DHT_MSG_MIGRATE_FILE_FAILED, - "Migrate file failed:" - "%s: failed to get xattr from %s", - loc->path, from->name); - ret = -1; - goto out; - } - - /* Copying posix acls to the linkto file messes up the permissions*/ - dht_strip_out_acls (xattr); - - /* Remove the linkto xattr as we don't want to overwrite the value - * set on the dst. - */ - dict_del (xattr, conf->link_xattr_name); - - /* We need to error out if this fails as having the wrong shard xattrs - * set on the dst could cause data corruption - */ - ret = syncop_fsetxattr (to, dst_fd, xattr, 0, NULL, NULL); - if (ret < 0) { - *fop_errno = -ret; - gf_msg (this->name, GF_LOG_WARNING, -ret, - DHT_MSG_MIGRATE_FILE_FAILED, - "%s: failed to set xattr on %s", - loc->path, to->name); - ret = -1; - goto out; - } - - if (xattr_rsp) { - /* we no more require this key */ - dict_del (dict, conf->link_xattr_name); - dict_unref (xattr_rsp); - } - - ret = syncop_fstat (from, src_fd, &stbuf, dict, &xattr_rsp); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, -ret, - DHT_MSG_MIGRATE_FILE_FAILED, - "Migrate file failed:failed to lookup %s on %s ", - loc->path, from->name); - *fop_errno = -ret; - ret = -1; - goto out; - } - - /* Check again if file has hardlink */ - ret = __check_file_has_hardlink (this, loc, &stbuf, xattr_rsp, - flag, defrag, conf, fop_errno); - if (ret) { - if (ret == HARDLINK_MIG_INPROGRESS) - ret = 0; - goto out; - } - /* Try to preserve 'holes' while migrating data */ - if (stbuf.ia_size > (stbuf.ia_blocks * GF_DISK_SECTOR_SIZE)) - file_has_holes = 1; - - - ret = __dht_rebalance_migrate_data (this, defrag, from, to, - src_fd, dst_fd, stbuf.ia_size, - file_has_holes, fop_errno); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_MIGRATE_FILE_FAILED, - "Migrate file failed: %s: failed to migrate data", - loc->path); - - ret = -1; - goto out; - } - - /* TODO: Sync the locks */ - - ret = syncop_fsync (to, dst_fd, 0, NULL, NULL, NULL, NULL); - if (ret) { - gf_log (this->name, GF_LOG_WARNING, - "%s: failed to fsync on %s (%s)", - loc->path, to->name, strerror (-ret)); - *fop_errno = -ret; - } - - - /* Phase 2 - Data-Migration Complete, Housekeeping updates pending */ - - ret = syncop_fstat (from, src_fd, &new_stbuf, NULL, NULL); - if (ret < 0) { - /* Failed to get the stat info */ - gf_msg ( this->name, GF_LOG_ERROR, -ret, - DHT_MSG_MIGRATE_FILE_FAILED, - "Migrate file failed: failed to fstat file %s on %s ", - loc->path, from->name); - *fop_errno = -ret; - ret = -1; - goto out; - } - - /* Lock the entire source file to prevent clients from taking a - lock on it as dht_lk does not handle file migration. - - This still leaves a small window where conflicting locks can - be granted to different clients. If client1 requests a blocking - lock on the src file, it will be granted after the migrating - process releases its lock. If client2 requests a lock on the dst - data file, it will also be granted, but all FOPs will be redirected - to the dst data file. - */ - - /* Take meta lock */ - - if (conf->lock_migration_enabled) { - meta_dict = dict_new (); - if (!meta_dict) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_MIGRATE_FILE_FAILED, - "dict_new failed"); - - *fop_errno = ENOMEM; - ret = -1; - goto out; - } - - ret = dict_set_str (meta_dict, GLUSTERFS_INTERNAL_FOP_KEY, "yes"); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_DICT_SET_FAILED, - "Failed to set dictionary value: key = %s," - " path = %s", GLUSTERFS_INTERNAL_FOP_KEY, - loc->path); - *fop_errno = ENOMEM; - ret = -1; - goto out; - } - - ret = dict_set_int32 (meta_dict, GF_META_LOCK_KEY, 1); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_MIGRATE_FILE_FAILED, - "Trace dict_set failed"); - *fop_errno = ENOMEM; - ret = -1; - goto out; - } - - ret = syncop_setxattr (from, loc, meta_dict, 0, NULL, NULL); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, -ret, - DHT_MSG_MIGRATE_FILE_FAILED, - "Trace syncop_setxattr metalock failed"); - - *fop_errno = -ret; - ret = -1; - goto out; - } else { - meta_locked = _gf_true; - } - } - - if (!conf->lock_migration_enabled) { - plock.l_type = F_WRLCK; - plock.l_start = 0; - plock.l_len = 0; - plock.l_whence = SEEK_SET; - - ret = syncop_lk (from, src_fd, F_SETLK, &plock, NULL, NULL); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, -ret, - DHT_MSG_MIGRATE_FILE_FAILED, - "Migrate file failed:" - "%s: Failed to lock on %s", - loc->path, from->name); - *fop_errno = -ret; - ret = -1; - goto out; - } - - p_locked = _gf_true; - - } else { - - INIT_LIST_HEAD (&locklist.list); - - ret = syncop_getactivelk (from, loc, &locklist, NULL, NULL); - if (ret == 0) { - gf_log (this->name, GF_LOG_INFO, "No active locks on:%s" - , loc->path); - - } else if (ret > 0) { - - ret = syncop_setactivelk (to, loc, &locklist, NULL, - NULL); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, -ret, - DHT_MSG_LOCK_MIGRATION_FAILED, - "write lock failed on:%s", loc->path); - - *fop_errno = -ret; - ret = -1; - goto metaunlock; - } - } else { - gf_msg (this->name, GF_LOG_ERROR, -ret, - DHT_MSG_LOCK_MIGRATION_FAILED, - "getactivelk failed for file: %s", loc->path); - *fop_errno = -ret; - } - } - - - /* source would have both sticky bit and sgid bit set, reset it to 0, - and set the source permission on destination, if it was not set - prior to setting rebalance-modes in source */ - if (!src_ia_prot.sticky) - new_stbuf.ia_prot.sticky = 0; - - if (!src_ia_prot.sgid) - new_stbuf.ia_prot.sgid = 0; - - /* TODO: if the source actually had sticky bit, or sgid bit set, - we are not handling it */ - - ret = syncop_fsetattr (to, dst_fd, &new_stbuf, - (GF_SET_ATTR_UID | GF_SET_ATTR_GID | - GF_SET_ATTR_MODE), NULL, NULL, NULL, NULL); - if (ret) { - gf_msg (this->name, GF_LOG_WARNING, -ret, - DHT_MSG_MIGRATE_FILE_FAILED, - "Migrate file failed:" - "%s: failed to perform setattr on %s ", - loc->path, to->name); - *fop_errno = -ret; - ret = -1; - goto metaunlock; - } - - /* Because 'futimes' is not portable */ - ret = syncop_setattr (to, loc, &new_stbuf, - (GF_SET_ATTR_MTIME | GF_SET_ATTR_ATIME), - NULL, NULL, NULL, NULL); - if (ret) { - gf_log (this->name, GF_LOG_WARNING, - "%s: failed to perform setattr on %s ", - loc->path, to->name); - *fop_errno = -ret; - } - - if (target_changed) { - dict_del (dict, GLUSTERFS_POSIXLK_COUNT); - ret = dict_set_str (dict, conf->link_xattr_name, to->name); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, - "failed to set xattr in dict for %s (linkto:%s)", - loc->path, to->name); - *fop_errno = ENOMEM; - ret = -1; - goto out; - } - - ret = syncop_setxattr (old_target, loc, dict, 0, NULL, NULL); - if (ret && -ret != ESTALE && -ret != ENOENT) { - gf_msg (this->name, GF_LOG_ERROR, -ret, - DHT_MSG_MIGRATE_FILE_FAILED, - "failed to set xattr on %s in %s", - loc->path, old_target->name); - *fop_errno = -ret; - ret = -1; - goto out; - } else if (-ret == ESTALE || -ret == ENOENT) { - /* The failure ESTALE indicates that the linkto - * file on the hashed subvol might have been deleted. - * In this case will create a linkto file with new target - * as linkto xattr value*/ - linkto_fd = fd_create (loc->inode, DHT_REBALANCE_PID); - if (!linkto_fd) { - gf_msg (this->name, GF_LOG_ERROR, errno, - DHT_MSG_MIGRATE_FILE_FAILED, - "%s: fd create failed", loc->path); - *fop_errno = ENOMEM; - ret = -1; - goto out; - } - ret = syncop_create (old_target, loc, O_RDWR, - DHT_LINKFILE_MODE, linkto_fd, - NULL, dict, NULL); - if (ret != 0 && -ret != EEXIST && -ret != ESTALE) { - *fop_errno = -ret; - ret = -1; - gf_msg (this->name, GF_LOG_ERROR, -ret, - DHT_MSG_MIGRATE_FILE_FAILED, - "failed to create linkto file on %s in %s", - loc->path, old_target->name); - goto out; - } else if (ret == 0) { - ret = syncop_fsetattr (old_target, linkto_fd, &stbuf, - (GF_SET_ATTR_UID | GF_SET_ATTR_GID), - NULL, NULL, NULL, NULL); - if (ret < 0) { - *fop_errno = -ret; - gf_msg (this->name, GF_LOG_ERROR, -ret, - DHT_MSG_MIGRATE_FILE_FAILED, - "chown failed for %s on %s", - loc->path, old_target->name); - } - } - } - } - - clean_dst = _gf_false; - - /* Posix acls are not set on DHT linkto files as part of the initial - * initial xattrs set on the dst file, so these need - * to be set on the dst file after the linkto attrs are removed. - * TODO: Optimize this. - */ - if (xattr) { - dict_unref (xattr); - xattr = NULL; - } - - /* Set only the Posix ACLs this time */ - ret = syncop_getxattr (from, loc, &xattr, POSIX_ACL_ACCESS_XATTR, - NULL, NULL); - if (ret < 0) { - if ((-ret != ENODATA) && (-ret != ENOATTR)) { - gf_msg (this->name, GF_LOG_WARNING, -ret, - DHT_MSG_MIGRATE_FILE_FAILED, - "Migrate file failed:" - "%s: failed to get xattr from %s", - loc->path, from->name); - *fop_errno = -ret; - } - } else { - ret = syncop_setxattr (to, loc, xattr, 0, NULL, NULL); - if (ret < 0) { - /* Potential problem here where Posix ACLs will - * not be set on the target file */ - - gf_msg (this->name, GF_LOG_WARNING, -ret, - DHT_MSG_MIGRATE_FILE_FAILED, - "Migrate file failed:" - "%s: failed to set xattr on %s", - loc->path, to->name); - *fop_errno = -ret; - } - } - - /* store size of previous migrated file */ - if (defrag && defrag->tier_conf.is_tier) { - if (from != TIER_HASHED_SUBVOL) { - defrag->tier_conf.st_last_promoted_size = stbuf.ia_size; - } else { - /* Don't delete the linkto file on the hashed subvol */ - delete_src_linkto = _gf_false; - defrag->tier_conf.st_last_demoted_size = stbuf.ia_size; - } - } - - /* The src file is being unlinked after this so we don't need - to clean it up */ - clean_src = _gf_false; - - /* Make the source as a linkfile first before deleting it */ - empty_iatt.ia_prot.sticky = 1; - ret = syncop_fsetattr (from, src_fd, &empty_iatt, - GF_SET_ATTR_MODE, NULL, NULL, NULL, NULL); - if (ret) { - gf_msg (this->name, GF_LOG_WARNING, -ret, - DHT_MSG_MIGRATE_FILE_FAILED, - "Migrate file failed:" - "%s: failed to perform setattr on %s ", - loc->path, from->name); - *fop_errno = -ret; - ret = -1; - goto metaunlock; - } - - /* Free up the data blocks on the source node, as the whole - file is migrated */ - ret = syncop_ftruncate (from, src_fd, 0, NULL, NULL, NULL, NULL); - if (ret) { - gf_log (this->name, GF_LOG_WARNING, - "%s: failed to perform truncate on %s (%s)", - loc->path, from->name, strerror (-ret)); - *fop_errno = -ret; - } - - /* remove the 'linkto' xattr from the destination */ - ret = syncop_fremovexattr (to, dst_fd, conf->link_xattr_name, 0, NULL); - if (ret) { - gf_log (this->name, GF_LOG_WARNING, - "%s: failed to perform removexattr on %s (%s)", - loc->path, to->name, strerror (-ret)); - *fop_errno = -ret; - } - - /* Do a stat and check the gfid before unlink */ - - /* - * Cached file changes its state from non-linkto to linkto file after - * migrating data. If lookup from any other mount-point is performed, - * converted-linkto-cached file will be treated as a stale and will be - * unlinked. But by this time, file is already migrated. So further - * failure because of ENOENT should not be treated as error - */ - - ret = syncop_stat (from, loc, &empty_iatt, NULL, NULL); - if (ret) { - gf_msg (this->name, GF_LOG_WARNING, -ret, - DHT_MSG_MIGRATE_FILE_FAILED, - "%s: failed to do a stat on %s", - loc->path, from->name); - - if (-ret != ENOENT) { - *fop_errno = -ret; - ret = -1; - goto metaunlock; - } - - rcvd_enoent_from_src = 1; - } - - - if ((gf_uuid_compare (empty_iatt.ia_gfid, loc->gfid) == 0 ) && - (!rcvd_enoent_from_src) && delete_src_linkto) { - /* take out the source from namespace */ - ret = syncop_unlink (from, loc, NULL, NULL); - if (ret) { - gf_msg (this->name, GF_LOG_WARNING, -ret, - DHT_MSG_MIGRATE_FILE_FAILED, - "%s: failed to perform unlink on %s", - loc->path, from->name); - *fop_errno = -ret; - ret = -1; - goto metaunlock; - } - } - - ret = syncop_lookup (this, loc, NULL, NULL, NULL, NULL); - if (ret) { - gf_msg_debug (this->name, -ret, - "%s: failed to lookup the file on subvolumes", - loc->path); - *fop_errno = -ret; - } - - gf_msg (this->name, log_level, 0, - DHT_MSG_MIGRATE_FILE_COMPLETE, - "completed migration of %s from subvolume %s to %s", - loc->path, from->name, to->name); - - ret = 0; - -metaunlock: - - if (conf->lock_migration_enabled && meta_locked) { - - dict_del (meta_dict, GF_META_LOCK_KEY); - - ret = dict_set_int32 (meta_dict, GF_META_UNLOCK_KEY, 1); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_MIGRATE_FILE_FAILED, - "Trace dict_set failed"); - - *fop_errno = ENOMEM; - ret = -1; - goto out; - } - - if (clean_dst == _gf_false) - ret = dict_set_int32 (meta_dict, "status", 1); - else - ret = dict_set_int32 (meta_dict, "status", 0); - - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_MIGRATE_FILE_FAILED, - "Trace dict_set failed"); - - *fop_errno = ENOMEM; - ret = -1; - goto out; - } - - ret = syncop_setxattr (from, loc, meta_dict, 0, NULL, NULL); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, -ret, - DHT_MSG_MIGRATE_FILE_FAILED, - "Trace syncop_setxattr meta unlock failed"); - - *fop_errno = -ret; - ret = -1; - goto out; - } - } - -out: - if (clean_src) { - /* Revert source mode and xattr changes*/ - lk_ret = __dht_migration_cleanup_src_file (this, loc, src_fd, - from, &src_ia_prot); - if (lk_ret) { - gf_msg (this->name, GF_LOG_WARNING, 0, - DHT_MSG_MIGRATE_FILE_FAILED, - "%s: failed to cleanup source file on %s", - loc->path, from->name); - } - } - - /* reset the destination back to 0 */ - if (clean_dst) { - lk_ret = syncop_ftruncate (to, dst_fd, 0, NULL, NULL, - NULL, NULL); - if (lk_ret) { - gf_msg (this->name, GF_LOG_ERROR, -lk_ret, - DHT_MSG_MIGRATE_FILE_FAILED, - "Migrate file failed: " - "%s: failed to reset target size back to 0", - loc->path); - } - } - - if (inodelk_locked) { - flock.l_type = F_UNLCK; - - lk_ret = syncop_inodelk (from, DHT_FILE_MIGRATE_DOMAIN, - &tmp_loc, F_SETLK, &flock, NULL, NULL); - if (lk_ret < 0) { - gf_msg (this->name, GF_LOG_WARNING, -lk_ret, - DHT_MSG_MIGRATE_FILE_FAILED, - "%s: failed to unlock file on %s", - loc->path, from->name); - } - } - - if (entrylk_locked) { - lk_ret = syncop_entrylk (hashed_subvol, DHT_ENTRY_SYNC_DOMAIN, - &parent_loc, loc->name, ENTRYLK_UNLOCK, - ENTRYLK_UNLOCK, NULL, NULL); - if (lk_ret < 0) { - gf_msg (this->name, GF_LOG_WARNING, -lk_ret, - DHT_MSG_MIGRATE_FILE_FAILED, - "%s: failed to unlock entrylk on %s", - loc->path, hashed_subvol->name); - } - } - - if (p_locked) { - plock.l_type = F_UNLCK; - lk_ret = syncop_lk (from, src_fd, F_SETLK, &plock, NULL, NULL); - - if (lk_ret < 0) { - gf_msg (this->name, GF_LOG_WARNING, -lk_ret, - DHT_MSG_MIGRATE_FILE_FAILED, - "%s: failed to unlock file on %s", - loc->path, from->name); - } - } - - if (!dht_is_tier_xlator (this)) { - lk_ret = syncop_removexattr (to, loc, - GF_PROTECT_FROM_EXTERNAL_WRITES, - NULL, NULL); - if (lk_ret && (lk_ret != -ENODATA) && (lk_ret != -ENOATTR)) { - gf_msg (this->name, GF_LOG_WARNING, -lk_ret, 0, - "%s: removexattr failed key %s", loc->path, - GF_PROTECT_FROM_EXTERNAL_WRITES); - } - } - - if (dict) - dict_unref (dict); - - if (xattr) - dict_unref (xattr); - if (xattr_rsp) - dict_unref (xattr_rsp); - - if (dst_fd) - syncop_close (dst_fd); - if (src_fd) - syncop_close (src_fd); - if (linkto_fd) - syncop_close (linkto_fd); - - loc_wipe (&tmp_loc); - loc_wipe (&parent_loc); - - return ret; -} - -static int -rebalance_task (void *data) -{ - int ret = -1; - dht_local_t *local = NULL; - call_frame_t *frame = NULL; - int fop_errno = 0; - - frame = data; - - local = frame->local; - - /* This function is 'synchrounous', hence if it returns, - we are done with the task */ - ret = dht_migrate_file (THIS, &local->loc, local->rebalance.from_subvol, - local->rebalance.target_node, local->flags, - &fop_errno); - - return ret; -} - -static int -rebalance_task_completion (int op_ret, call_frame_t *sync_frame, void *data) -{ - int32_t op_errno = EINVAL; - - if (op_ret == -1) { - /* Failure of migration process, mostly due to write process. - as we can't preserve the exact errno, lets say there was - no space to migrate-data - */ - op_errno = ENOSPC; - } else if (op_ret == 1) { - /* migration didn't happen, but is not a failure, let the user - understand that he doesn't have permission to migrate the - file. - */ - op_ret = -1; - op_errno = EPERM; - } else if (op_ret != 0) { - op_errno = -op_ret; - op_ret = -1; - } - - DHT_STACK_UNWIND (setxattr, sync_frame, op_ret, op_errno, NULL); - return 0; -} + if (op_ret == -1) { + /* Failure of migration process, mostly due to write process. + as we can't preserve the exact errno, lets say there was + no space to migrate-data + */ + op_errno = ENOSPC; + } else if (op_ret == 1) { + /* migration didn't happen, but is not a failure, let the user + understand that he doesn't have permission to migrate the + file. + */ + op_ret = -1; + op_errno = EPERM; + } else if (op_ret != 0) { + op_errno = -op_ret; + op_ret = -1; + } + + DHT_STACK_UNWIND(setxattr, sync_frame, op_ret, op_errno, NULL); + return 0; +} int -dht_start_rebalance_task (xlator_t *this, call_frame_t *frame) +dht_start_rebalance_task(xlator_t *this, call_frame_t *frame) { - int ret = -1; + int ret = -1; - ret = synctask_new (this->ctx->env, rebalance_task, - rebalance_task_completion, - frame, frame); - return ret; + ret = synctask_new(this->ctx->env, rebalance_task, + rebalance_task_completion, frame, frame); + return ret; } - int -gf_listener_stop (xlator_t *this) +gf_listener_stop(xlator_t *this) { - glusterfs_ctx_t *ctx = NULL; - cmd_args_t *cmd_args = NULL; - int ret = 0; - - ctx = this->ctx; - GF_ASSERT (ctx); - cmd_args = &ctx->cmd_args; - if (cmd_args->sock_file) { - ret = sys_unlink (cmd_args->sock_file); - if (ret && (ENOENT == errno)) { - ret = 0; - } - } - - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, errno, - DHT_MSG_SOCKET_ERROR, - "Failed to unlink listener " - "socket %s", cmd_args->sock_file); - } - return ret; + glusterfs_ctx_t *ctx = NULL; + cmd_args_t *cmd_args = NULL; + int ret = 0; + + ctx = this->ctx; + GF_ASSERT(ctx); + cmd_args = &ctx->cmd_args; + if (cmd_args->sock_file) { + ret = sys_unlink(cmd_args->sock_file); + if (ret && (ENOENT == errno)) { + ret = 0; + } + } + + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, DHT_MSG_SOCKET_ERROR, + "Failed to unlink listener " + "socket %s", + cmd_args->sock_file); + } + return ret; } void -dht_build_root_inode (xlator_t *this, inode_t **inode) +dht_build_root_inode(xlator_t *this, inode_t **inode) { - inode_table_t *itable = NULL; - uuid_t root_gfid = {0, }; + inode_table_t *itable = NULL; + uuid_t root_gfid = { + 0, + }; - itable = inode_table_new (0, this); - if (!itable) - return; + itable = inode_table_new(0, this); + if (!itable) + return; - root_gfid[15] = 1; - *inode = inode_find (itable, root_gfid); + root_gfid[15] = 1; + *inode = inode_find(itable, root_gfid); } void -dht_build_root_loc (inode_t *inode, loc_t *loc) +dht_build_root_loc(inode_t *inode, loc_t *loc) { - loc->path = "/"; - loc->inode = inode; - loc->inode->ia_type = IA_IFDIR; - memset (loc->gfid, 0, 16); - loc->gfid[15] = 1; + loc->path = "/"; + loc->inode = inode; + loc->inode->ia_type = IA_IFDIR; + memset(loc->gfid, 0, 16); + loc->gfid[15] = 1; } /* return values: 1 -> error, bug ignore and continue 0 -> proceed -1 -> error, handle it */ int32_t -gf_defrag_handle_migrate_error (int32_t op_errno, gf_defrag_info_t *defrag) +gf_defrag_handle_migrate_error(int32_t op_errno, gf_defrag_info_t *defrag) { - int ret = 0; - /* if errno is not ENOTCONN, we can still continue - with rebalance process */ - if (op_errno != ENOTCONN) { - ret = 1; - goto out; - } - - if (op_errno == ENOTCONN) { - /* Most probably mount point went missing (mostly due - to a brick down), say rebalance failure to user, - let him restart it if everything is fine */ - defrag->defrag_status = GF_DEFRAG_STATUS_FAILED; - ret = -1; - goto out; - } + int ret = 0; + /* if errno is not ENOTCONN, we can still continue + with rebalance process */ + if (op_errno != ENOTCONN) { + ret = 1; + goto out; + } + + if (op_errno == ENOTCONN) { + /* Most probably mount point went missing (mostly due + to a brick down), say rebalance failure to user, + let him restart it if everything is fine */ + defrag->defrag_status = GF_DEFRAG_STATUS_FAILED; + ret = -1; + goto out; + } out: - return ret; + return ret; } static gf_boolean_t -gf_defrag_pattern_match (gf_defrag_info_t *defrag, char *name, uint64_t size) -{ - gf_defrag_pattern_list_t *trav = NULL; - gf_boolean_t match = _gf_false; - gf_boolean_t ret = _gf_false; - - GF_VALIDATE_OR_GOTO ("dht", defrag, out); - - trav = defrag->defrag_pattern; - while (trav) { - if (!fnmatch (trav->path_pattern, name, FNM_NOESCAPE)) { - match = _gf_true; - break; - } - trav = trav->next; - } - - if ((match == _gf_true) && (size >= trav->size)) - ret = _gf_true; - - out: - return ret; -} - -int dht_dfreaddirp_done (dht_dfoffset_ctx_t *offset_var, int cnt) { - - int i; - int result = 1; - - for (i = 0; i < cnt; i++) { - if (offset_var[i].readdir_done == 0) { - result = 0; - break; - } - } - return result; -} - -int static -gf_defrag_ctx_subvols_init (dht_dfoffset_ctx_t *offset_var, xlator_t *this) { - - int i; - dht_conf_t *conf = NULL; - - conf = this->private; - - if (!conf) - return -1; - - for (i = 0; i < conf->local_subvols_cnt; i++) { - offset_var[i].this = conf->local_subvols[i]; - offset_var[i].offset = (off_t) 0; - offset_var[i].readdir_done = 0; - } - - return 0; -} - - -static int -dht_get_first_non_null_index (subvol_nodeuuids_info_t *entry) -{ - int i = 0; - int index = 0; - - for (i = 0; i < entry->count; i++) { - if (!gf_uuid_is_null (entry->elements[i].uuid)) { - index = i; - goto out; - } - } - - if (i == entry->count) { - index = -1; - } -out: - return index; -} - - -/* Return value - * 0 : this node does not migrate the file - * 1 : this node migrates the file - * - * Use the hash value of the gfid to determine which node will migrate files. - * Using the gfid instead of the name also ensures that the same node handles - * all hardlinks. - */ - -int -gf_defrag_should_i_migrate (xlator_t *this, int local_subvol_index, uuid_t gfid) -{ - int ret = 0; - int i = local_subvol_index; - char *str = NULL; - uint32_t hashval = 0; - int32_t index = 0; - dht_conf_t *conf = NULL; - char buf[UUID_CANONICAL_FORM_LEN + 1] = {0, }; - subvol_nodeuuids_info_t *entry = NULL; - - - conf = this->private; - - /* Pure distribute. A subvol in this case - will be handled by only one node */ - - entry = &(conf->local_nodeuuids[i]); - if (entry->count == 1) { - return 1; - } - - str = uuid_utoa_r (gfid, buf); - ret = dht_hash_compute (this, 0, str, &hashval); - if (ret == 0) { - index = (hashval % entry->count); - if (entry->elements[index].info - == REBAL_NODEUUID_MINE) { - /* Index matches this node's nodeuuid.*/ - ret = 1; - goto out; - } - - /* Brick down - some other node has to migrate these files*/ - if (gf_uuid_is_null (entry->elements[index].uuid)) { - /* Fall back to the first non-null index */ - index = dht_get_first_non_null_index (entry); - - if (index == -1) { - /* None of the bricks in the subvol are up. - * CHILD_DOWN will kill the process soon */ - - return 0; - } - - if (entry->elements[index].info == REBAL_NODEUUID_MINE) { - /* Index matches this node's nodeuuid.*/ - ret = 1; - goto out; - } - } - } -out: - return ret; -} - - -int -gf_defrag_migrate_single_file (void *opaque) +gf_defrag_pattern_match(gf_defrag_info_t *defrag, char *name, uint64_t size) { - xlator_t *this = NULL; - dht_conf_t *conf = NULL; - gf_defrag_info_t *defrag = NULL; - int ret = 0; - gf_dirent_t *entry = NULL; - struct timeval start = {0,}; - loc_t entry_loc = {0,}; - loc_t *loc = NULL; - struct iatt iatt = {0,}; - dict_t *migrate_data = NULL; - struct timeval end = {0,}; - double elapsed = {0,}; - struct dht_container *rebal_entry = NULL; - inode_t *inode = NULL; - xlator_t *hashed_subvol = NULL; - xlator_t *cached_subvol = NULL; - call_frame_t *statfs_frame = NULL; - xlator_t *old_THIS = NULL; - data_t *tmp = NULL; - int fop_errno = 0; - gf_dht_migrate_data_type_t rebal_type = GF_DHT_MIGRATE_DATA; - char value[MAX_REBAL_TYPE_SIZE] = {0,}; - struct iatt *iatt_ptr = NULL; - gf_boolean_t update_skippedcount = _gf_true; - int i = 0; - - rebal_entry = (struct dht_container *)opaque; - if (!rebal_entry) { - gf_log ("DHT", GF_LOG_ERROR, "rebal_entry is NULL"); - ret = -1; - goto out; - } - - this = rebal_entry->this; - - conf = this->private; - - defrag = conf->defrag; - - loc = rebal_entry->parent_loc; - - migrate_data = rebal_entry->migrate_data; - - entry = rebal_entry->df_entry; - iatt_ptr = &entry->d_stat; - - if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) { - ret = -1; - goto out; - } - - if (defrag->stats == _gf_true) { - gettimeofday (&start, NULL); - } - - if (defrag->defrag_pattern && - (gf_defrag_pattern_match (defrag, entry->d_name, - entry->d_stat.ia_size) == _gf_false)) { - gf_log (this->name, GF_LOG_ERROR, "pattern_match failed"); - goto out; - } - - memset (&entry_loc, 0, sizeof (entry_loc)); - - ret = dht_build_child_loc (this, &entry_loc, loc, entry->d_name); - if (ret) { - LOCK (&defrag->lock); - { - defrag->total_failures += 1; - } - UNLOCK (&defrag->lock); - - ret = 0; - - gf_log (this->name, GF_LOG_ERROR, "Child loc build failed"); - - goto out; - } - - if (!gf_defrag_should_i_migrate (this, rebal_entry->local_subvol_index, - entry->d_stat.ia_gfid)) { - gf_msg_debug (this->name, 0, "Don't migrate %s ", - entry_loc.path); - goto out; - } - - gf_uuid_copy (entry_loc.gfid, entry->d_stat.ia_gfid); - - gf_uuid_copy (entry_loc.pargfid, loc->gfid); - - ret = syncop_lookup (this, &entry_loc, &iatt, NULL, NULL, NULL); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, -ret, - DHT_MSG_MIGRATE_FILE_FAILED, - "Migrate file failed: %s lookup failed", - entry_loc.path); - - /* Increase failure count only for remove-brick op, so that - * user is warned to check the removed-brick for any files left - * unmigrated - */ - if (conf->decommission_subvols_cnt) { - LOCK (&defrag->lock); - { - defrag->total_failures += 1; - } - UNLOCK (&defrag->lock); - } - - ret = 0; - goto out; - } - - iatt_ptr = &iatt; - - hashed_subvol = dht_subvol_get_hashed (this, &entry_loc); - if (!hashed_subvol) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_HASHED_SUBVOL_GET_FAILED, - "Failed to get hashed subvol for %s", - entry_loc.path); - ret = 0; - goto out; - } - - cached_subvol = dht_subvol_get_cached (this, entry_loc.inode); - if (!cached_subvol) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_CACHED_SUBVOL_GET_FAILED, - "Failed to get cached subvol for %s", - entry_loc.path); - - ret = 0; - goto out; - } - - if (hashed_subvol == cached_subvol) { - ret = 0; - goto out; - } - - inode = inode_link (entry_loc.inode, entry_loc.parent, entry->d_name, &iatt); - inode_unref (entry_loc.inode); - /* use the inode returned by inode_link */ - entry_loc.inode = inode; - - old_THIS = THIS; - THIS = this; - statfs_frame = create_frame (this, this->ctx->pool); - if (!statfs_frame) { - gf_msg (this->name, GF_LOG_ERROR, DHT_MSG_NO_MEMORY, ENOMEM, - "Insufficient memory. Frame creation failed"); - ret = -1; - goto out; - } - - /* async statfs information for honoring min-free-disk */ - dht_get_du_info (statfs_frame, this, loc); - THIS = old_THIS; - - tmp = dict_get (migrate_data, GF_XATTR_FILE_MIGRATE_KEY); - if (tmp) { - memcpy (value, tmp->data, tmp->len); - if (strcmp (value, "force") == 0) - rebal_type = GF_DHT_MIGRATE_DATA_EVEN_IF_LINK_EXISTS; - - if (conf->decommission_in_progress) - rebal_type = GF_DHT_MIGRATE_HARDLINK; - } - - ret = dht_migrate_file (this, &entry_loc, cached_subvol, - hashed_subvol, rebal_type, &fop_errno); - if (ret == 1) { - if (fop_errno == ENOSPC) { - gf_msg_debug (this->name, 0, "migrate-data skipped for" - " %s due to space constraints", - entry_loc.path); - - /* For remove-brick case if the source is not one of the - * removed-brick, do not mark the error as failure */ - if (conf->decommission_subvols_cnt) { - for (i = 0; i < conf->subvolume_cnt; i++) { - if (conf->decommissioned_bricks[i] == cached_subvol) { - LOCK (&defrag->lock); - { - defrag->total_failures += 1; - update_skippedcount = _gf_false; - } - UNLOCK (&defrag->lock); - - break; - } - } - } - - if (update_skippedcount) { - LOCK (&defrag->lock); - { - defrag->skipped += 1; - } - UNLOCK (&defrag->lock); - - gf_msg (this->name, GF_LOG_INFO, 0, - DHT_MSG_MIGRATE_FILE_SKIPPED, - "File migration skipped for %s.", - entry_loc.path); - } - - } else if (fop_errno == ENOTSUP) { - gf_msg_debug (this->name, 0, "migrate-data skipped for" - " hardlink %s ", entry_loc.path); - LOCK (&defrag->lock); - { - defrag->skipped += 1; - } - UNLOCK (&defrag->lock); - - gf_msg (this->name, GF_LOG_INFO, 0, - DHT_MSG_MIGRATE_FILE_SKIPPED, - "File migration skipped for %s.", - entry_loc.path); - } + gf_defrag_pattern_list_t *trav = NULL; + gf_boolean_t match = _gf_false; + gf_boolean_t ret = _gf_false; - ret = 0; - goto out; - } else if (ret < 0) { - if (fop_errno != EEXIST) { - gf_msg (this->name, GF_LOG_ERROR, fop_errno, - DHT_MSG_MIGRATE_FILE_FAILED, - "migrate-data failed for %s", entry_loc.path); - - LOCK (&defrag->lock); - { - defrag->total_failures += 1; - } - UNLOCK (&defrag->lock); - - } - - ret = gf_defrag_handle_migrate_error (fop_errno, defrag); - - if (!ret) { - gf_msg(this->name, GF_LOG_ERROR, fop_errno, - DHT_MSG_MIGRATE_FILE_FAILED, - "migrate-data on %s failed:", entry_loc.path); - } else if (ret == 1) { - ret = 0; - } - - goto out; - } - - LOCK (&defrag->lock); - { - defrag->total_files += 1; - defrag->total_data += iatt.ia_size; - } - UNLOCK (&defrag->lock); - - if (defrag->stats == _gf_true) { - gettimeofday (&end, NULL); - elapsed = (end.tv_sec - start.tv_sec) * 1e6 + - (end.tv_usec - start.tv_usec); - gf_log (this->name, GF_LOG_INFO, "Migration of " - "file:%s size:%"PRIu64" bytes took %.2f" - "secs and ret: %d", entry_loc.name, - iatt.ia_size, elapsed/1e6, ret); - } - -out: - if (statfs_frame) { - STACK_DESTROY (statfs_frame->root); - } - - if (iatt_ptr) { - LOCK (&defrag->lock); - { - defrag->size_processed += iatt_ptr->ia_size; - } - UNLOCK (&defrag->lock); - } - loc_wipe (&entry_loc); - - return ret; - -} + GF_VALIDATE_OR_GOTO("dht", defrag, out); -void * -gf_defrag_task (void *opaque) -{ - struct list_head *q_head = NULL; - struct dht_container *iterator = NULL; - gf_defrag_info_t *defrag = NULL; - int ret = 0; - pid_t pid = GF_CLIENT_PID_DEFRAG; - - defrag = (gf_defrag_info_t *)opaque; - if (!defrag) { - gf_msg ("dht", GF_LOG_ERROR, 0, 0, "defrag is NULL"); - goto out; + trav = defrag->defrag_pattern; + while (trav) { + if (!fnmatch(trav->path_pattern, name, FNM_NOESCAPE)) { + match = _gf_true; + break; } + trav = trav->next; + } - syncopctx_setfspid (&pid); + if ((match == _gf_true) && (size >= trav->size)) + ret = _gf_true; - q_head = &(defrag->queue[0].list); +out: + return ret; +} - /* The following while loop will dequeue one entry from the defrag->queue - under lock. We will update the defrag->global_error only when there - is an error which is critical to stop the rebalance process. The stop - message will be intimated to other migrator threads by setting the - defrag->defrag_status to GF_DEFRAG_STATUS_FAILED. +int +dht_dfreaddirp_done(dht_dfoffset_ctx_t *offset_var, int cnt) +{ + int i; + int result = 1; - In defrag->queue, a low watermark (MIN_MIGRATE_QUEUE_COUNT) is - maintained so that crawler does not starve the file migration - workers and a high watermark (MAX_MIGRATE_QUEUE_COUNT) so that - crawler does not go far ahead in filling up the queue. - */ + for (i = 0; i < cnt; i++) { + if (offset_var[i].readdir_done == 0) { + result = 0; + break; + } + } + return result; +} - while (_gf_true) { +int static gf_defrag_ctx_subvols_init(dht_dfoffset_ctx_t *offset_var, + xlator_t *this) +{ + int i; + dht_conf_t *conf = NULL; - if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) { - pthread_cond_broadcast ( - &defrag->rebalance_crawler_alarm); - pthread_cond_broadcast ( - &defrag->parallel_migration_cond); - goto out; - } + conf = this->private; - pthread_mutex_lock (&defrag->dfq_mutex); - { + if (!conf) + return -1; - /*Throttle down: - If the reconfigured count is less than current thread - count, then the current thread will sleep */ - - /*TODO: Need to refactor the following block to work - *under defrag->lock. For now access - * defrag->current_thread_count and rthcount under - * dfq_mutex lock */ - while (!defrag->crawl_done && - (defrag->recon_thread_count < - defrag->current_thread_count)) { - defrag->current_thread_count--; - gf_msg_debug ("DHT", 0, "Thread sleeping. " - "current thread count: %d", - defrag->current_thread_count); - - pthread_cond_wait ( - &defrag->df_wakeup_thread, - &defrag->dfq_mutex); - - defrag->current_thread_count++; - gf_msg_debug ("DHT", 0, "Thread wokeup. " - "current thread count: %d", - defrag->current_thread_count); + for (i = 0; i < conf->local_subvols_cnt; i++) { + offset_var[i].this = conf->local_subvols[i]; + offset_var[i].offset = (off_t)0; + offset_var[i].readdir_done = 0; + } - } + return 0; +} - if (defrag->q_entry_count) { - iterator = list_entry (q_head->next, - typeof(*iterator), list); - - gf_msg_debug ("DHT", 0, "picking entry " - "%s", iterator->df_entry->d_name); - - list_del_init (&(iterator->list)); - - defrag->q_entry_count--; - - if ((defrag->q_entry_count < - MIN_MIGRATE_QUEUE_COUNT) && - defrag->wakeup_crawler) { - pthread_cond_broadcast ( - &defrag->rebalance_crawler_alarm); - } - pthread_mutex_unlock (&defrag->dfq_mutex); - ret = gf_defrag_migrate_single_file - ((void *)iterator); - - /*Critical errors: ENOTCONN and ENOSPACE*/ - if (ret) { - dht_set_global_defrag_error - (defrag, ret); - - defrag->defrag_status = - GF_DEFRAG_STATUS_FAILED; - - pthread_cond_broadcast ( - &defrag->rebalance_crawler_alarm); - - pthread_cond_broadcast ( - &defrag->parallel_migration_cond); - - goto out; - } - - gf_defrag_free_container (iterator); - - continue; - } else { - - /* defrag->crawl_done flag is set means crawling - file system is done and hence a list_empty when - the above flag is set indicates there are no more - entries to be added to the queue and rebalance is - finished */ - - if (!defrag->crawl_done) { - - defrag->current_thread_count--; - gf_msg_debug ("DHT", 0, "Thread " - "sleeping while waiting " - "for migration entries. " - "current thread count:%d", - defrag->current_thread_count); - - pthread_cond_wait ( - &defrag->parallel_migration_cond, - &defrag->dfq_mutex); - } - - if (defrag->crawl_done && - !defrag->q_entry_count) { - defrag->current_thread_count++; - gf_msg_debug ("DHT", 0, "Exiting thread"); - - pthread_cond_broadcast ( - &defrag->parallel_migration_cond); - goto unlock; - } else { - defrag->current_thread_count++; - gf_msg_debug ("DHT", 0, "Thread woke up" - " as found migrating entries. " - "current thread count:%d", - defrag->current_thread_count); - - pthread_mutex_unlock - (&defrag->dfq_mutex); - continue; - } - } +static int +dht_get_first_non_null_index(subvol_nodeuuids_info_t *entry) +{ + int i = 0; + int index = 0; - } -unlock: - pthread_mutex_unlock (&defrag->dfq_mutex); - break; + for (i = 0; i < entry->count; i++) { + if (!gf_uuid_is_null(entry->elements[i].uuid)) { + index = i; + goto out; } + } + + if (i == entry->count) { + index = -1; + } out: - return NULL; + return index; } -int static -gf_defrag_get_entry (xlator_t *this, int i, struct dht_container **container, - loc_t *loc, dht_conf_t *conf, gf_defrag_info_t *defrag, - fd_t *fd, dict_t *migrate_data, - struct dir_dfmeta *dir_dfmeta, dict_t *xattr_req, - int *should_commit_hash, int *perrno) +/* Return value + * 0 : this node does not migrate the file + * 1 : this node migrates the file + * + * Use the hash value of the gfid to determine which node will migrate files. + * Using the gfid instead of the name also ensures that the same node handles + * all hardlinks. + */ + +int +gf_defrag_should_i_migrate(xlator_t *this, int local_subvol_index, uuid_t gfid) { - int ret = -1; - char is_linkfile = 0; - gf_dirent_t *df_entry = NULL; - struct dht_container *tmp_container = NULL; + int ret = 0; + int i = local_subvol_index; + char *str = NULL; + uint32_t hashval = 0; + int32_t index = 0; + dht_conf_t *conf = NULL; + char buf[UUID_CANONICAL_FORM_LEN + 1] = { + 0, + }; + subvol_nodeuuids_info_t *entry = NULL; + + conf = this->private; + + /* Pure distribute. A subvol in this case + will be handled by only one node */ + + entry = &(conf->local_nodeuuids[i]); + if (entry->count == 1) { + return 1; + } + + str = uuid_utoa_r(gfid, buf); + ret = dht_hash_compute(this, 0, str, &hashval); + if (ret == 0) { + index = (hashval % entry->count); + if (entry->elements[index].info == REBAL_NODEUUID_MINE) { + /* Index matches this node's nodeuuid.*/ + ret = 1; + goto out; + } + + /* Brick down - some other node has to migrate these files*/ + if (gf_uuid_is_null(entry->elements[index].uuid)) { + /* Fall back to the first non-null index */ + index = dht_get_first_non_null_index(entry); + + if (index == -1) { + /* None of the bricks in the subvol are up. + * CHILD_DOWN will kill the process soon */ - if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) { - ret = -1; - goto out; - } + return 0; + } - if (dir_dfmeta->offset_var[i].readdir_done == 1) { - ret = 0; + if (entry->elements[index].info == REBAL_NODEUUID_MINE) { + /* Index matches this node's nodeuuid.*/ + ret = 1; goto out; + } } + } +out: + return ret; +} - if (dir_dfmeta->fetch_entries[i] == 1) { - ret = syncop_readdirp (conf->local_subvols[i], fd, 131072, - dir_dfmeta->offset_var[i].offset, - &(dir_dfmeta->equeue[i]), - xattr_req, NULL); - if (ret == 0) { - dir_dfmeta->offset_var[i].readdir_done = 1; - ret = 0; - goto out; - } +int +gf_defrag_migrate_single_file(void *opaque) +{ + xlator_t *this = NULL; + dht_conf_t *conf = NULL; + gf_defrag_info_t *defrag = NULL; + int ret = 0; + gf_dirent_t *entry = NULL; + struct timeval start = { + 0, + }; + loc_t entry_loc = { + 0, + }; + loc_t *loc = NULL; + struct iatt iatt = { + 0, + }; + dict_t *migrate_data = NULL; + struct timeval end = { + 0, + }; + double elapsed = { + 0, + }; + struct dht_container *rebal_entry = NULL; + inode_t *inode = NULL; + xlator_t *hashed_subvol = NULL; + xlator_t *cached_subvol = NULL; + call_frame_t *statfs_frame = NULL; + xlator_t *old_THIS = NULL; + data_t *tmp = NULL; + int fop_errno = 0; + gf_dht_migrate_data_type_t rebal_type = GF_DHT_MIGRATE_DATA; + char value[MAX_REBAL_TYPE_SIZE] = { + 0, + }; + struct iatt *iatt_ptr = NULL; + gf_boolean_t update_skippedcount = _gf_true; + int i = 0; + + rebal_entry = (struct dht_container *)opaque; + if (!rebal_entry) { + gf_log("DHT", GF_LOG_ERROR, "rebal_entry is NULL"); + ret = -1; + goto out; + } - if (ret < 0) { - gf_msg (this->name, GF_LOG_WARNING, -ret, - DHT_MSG_MIGRATE_DATA_FAILED, - "Readdirp failed. Aborting data migration for " - "directory: %s", loc->path); - *perrno = -ret; - ret = -1; - goto out; - } + this = rebal_entry->this; - if (list_empty (&(dir_dfmeta->equeue[i].list))) { - dir_dfmeta->offset_var[i].readdir_done = 1; - ret = 0; - goto out; - } + conf = this->private; - dir_dfmeta->fetch_entries[i] = 0; - } + defrag = conf->defrag; - while (1) { + loc = rebal_entry->parent_loc; - if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) { - ret = -1; - goto out; - } + migrate_data = rebal_entry->migrate_data; - df_entry = list_entry (dir_dfmeta->iterator[i]->next, - typeof (*df_entry), list); + entry = rebal_entry->df_entry; + iatt_ptr = &entry->d_stat; - if (&df_entry->list == dir_dfmeta->head[i]) { - gf_dirent_free (&(dir_dfmeta->equeue[i])); - INIT_LIST_HEAD (&(dir_dfmeta->equeue[i].list)); - dir_dfmeta->fetch_entries[i] = 1; - dir_dfmeta->iterator[i] = dir_dfmeta->head[i]; - ret = 0; - goto out; - } + if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) { + ret = -1; + goto out; + } - dir_dfmeta->iterator[i] = dir_dfmeta->iterator[i]->next; + if (defrag->stats == _gf_true) { + gettimeofday(&start, NULL); + } - dir_dfmeta->offset_var[i].offset = df_entry->d_off; - if (!strcmp (df_entry->d_name, ".") || - !strcmp (df_entry->d_name, "..")) - continue; + if (defrag->defrag_pattern && + (gf_defrag_pattern_match(defrag, entry->d_name, + entry->d_stat.ia_size) == _gf_false)) { + gf_log(this->name, GF_LOG_ERROR, "pattern_match failed"); + goto out; + } - if (IA_ISDIR (df_entry->d_stat.ia_type)) { - defrag->size_processed += df_entry->d_stat.ia_size; - continue; - } + memset(&entry_loc, 0, sizeof(entry_loc)); - defrag->num_files_lookedup++; + ret = dht_build_child_loc(this, &entry_loc, loc, entry->d_name); + if (ret) { + LOCK(&defrag->lock); + { + defrag->total_failures += 1; + } + UNLOCK(&defrag->lock); - if (defrag->defrag_pattern && - (gf_defrag_pattern_match (defrag, df_entry->d_name, - df_entry->d_stat.ia_size) - == _gf_false)) { - defrag->size_processed += df_entry->d_stat.ia_size; - continue; - } + ret = 0; - is_linkfile = check_is_linkfile (NULL, &df_entry->d_stat, - df_entry->dict, - conf->link_xattr_name); + gf_log(this->name, GF_LOG_ERROR, "Child loc build failed"); - if (is_linkfile) { - /* No need to add linkto file to the queue for - migration. Only the actual data file need to - be checked for migration criteria. - */ + goto out; + } - gf_msg_debug (this->name, 0, "Skipping linkfile" - " %s on subvol: %s", df_entry->d_name, - conf->local_subvols[i]->name); - continue; - } + if (!gf_defrag_should_i_migrate(this, rebal_entry->local_subvol_index, + entry->d_stat.ia_gfid)) { + gf_msg_debug(this->name, 0, "Don't migrate %s ", entry_loc.path); + goto out; + } - /*Build Container Structure */ + gf_uuid_copy(entry_loc.gfid, entry->d_stat.ia_gfid); - tmp_container = GF_CALLOC (1, sizeof(struct dht_container), - gf_dht_mt_container_t); - if (!tmp_container) { - gf_log (this->name, GF_LOG_ERROR, "Failed to allocate " - "memory for container"); - ret = -1; - goto out; - } - tmp_container->df_entry = gf_dirent_for_name (df_entry->d_name); - if (!tmp_container->df_entry) { - gf_log (this->name, GF_LOG_ERROR, "Failed to allocate " - "memory for df_entry"); - ret = -1; - goto out; - } + gf_uuid_copy(entry_loc.pargfid, loc->gfid); - tmp_container->local_subvol_index = i; + ret = syncop_lookup(this, &entry_loc, &iatt, NULL, NULL, NULL); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_MIGRATE_FILE_FAILED, + "Migrate file failed: %s lookup failed", entry_loc.path); - tmp_container->df_entry->d_stat = df_entry->d_stat; + /* Increase failure count only for remove-brick op, so that + * user is warned to check the removed-brick for any files left + * unmigrated + */ + if (conf->decommission_subvols_cnt) { + LOCK(&defrag->lock); + { + defrag->total_failures += 1; + } + UNLOCK(&defrag->lock); + } + + ret = 0; + goto out; + } - tmp_container->df_entry->d_ino = df_entry->d_ino; + iatt_ptr = &iatt; - tmp_container->df_entry->d_type = df_entry->d_type; + hashed_subvol = dht_subvol_get_hashed(this, &entry_loc); + if (!hashed_subvol) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_HASHED_SUBVOL_GET_FAILED, + "Failed to get hashed subvol for %s", entry_loc.path); + ret = 0; + goto out; + } - tmp_container->df_entry->d_len = df_entry->d_len; + cached_subvol = dht_subvol_get_cached(this, entry_loc.inode); + if (!cached_subvol) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_CACHED_SUBVOL_GET_FAILED, + "Failed to get cached subvol for %s", entry_loc.path); - tmp_container->parent_loc = GF_CALLOC(1, sizeof(*loc), - gf_dht_mt_loc_t); - if (!tmp_container->parent_loc) { - gf_log (this->name, GF_LOG_ERROR, "Failed to allocate " - "memory for loc"); - ret = -1; - goto out; - } + ret = 0; + goto out; + } + if (hashed_subvol == cached_subvol) { + ret = 0; + goto out; + } + + inode = inode_link(entry_loc.inode, entry_loc.parent, entry->d_name, &iatt); + inode_unref(entry_loc.inode); + /* use the inode returned by inode_link */ + entry_loc.inode = inode; + + old_THIS = THIS; + THIS = this; + statfs_frame = create_frame(this, this->ctx->pool); + if (!statfs_frame) { + gf_msg(this->name, GF_LOG_ERROR, DHT_MSG_NO_MEMORY, ENOMEM, + "Insufficient memory. Frame creation failed"); + ret = -1; + goto out; + } + + /* async statfs information for honoring min-free-disk */ + dht_get_du_info(statfs_frame, this, loc); + THIS = old_THIS; + + tmp = dict_get(migrate_data, GF_XATTR_FILE_MIGRATE_KEY); + if (tmp) { + memcpy(value, tmp->data, tmp->len); + if (strcmp(value, "force") == 0) + rebal_type = GF_DHT_MIGRATE_DATA_EVEN_IF_LINK_EXISTS; + + if (conf->decommission_in_progress) + rebal_type = GF_DHT_MIGRATE_HARDLINK; + } + + ret = dht_migrate_file(this, &entry_loc, cached_subvol, hashed_subvol, + rebal_type, &fop_errno); + if (ret == 1) { + if (fop_errno == ENOSPC) { + gf_msg_debug(this->name, 0, + "migrate-data skipped for" + " %s due to space constraints", + entry_loc.path); + + /* For remove-brick case if the source is not one of the + * removed-brick, do not mark the error as failure */ + if (conf->decommission_subvols_cnt) { + for (i = 0; i < conf->subvolume_cnt; i++) { + if (conf->decommissioned_bricks[i] == cached_subvol) { + LOCK(&defrag->lock); + { + defrag->total_failures += 1; + update_skippedcount = _gf_false; + } + UNLOCK(&defrag->lock); - ret = loc_copy (tmp_container->parent_loc, loc); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, - "loc_copy failed"); - ret = -1; - goto out; + break; + } } + } - tmp_container->migrate_data = migrate_data; + if (update_skippedcount) { + LOCK(&defrag->lock); + { + defrag->skipped += 1; + } + UNLOCK(&defrag->lock); - tmp_container->this = this; + gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_MIGRATE_FILE_SKIPPED, + "File migration skipped for %s.", entry_loc.path); + } - if (df_entry->dict) - tmp_container->df_entry->dict = - dict_ref (df_entry->dict); + } else if (fop_errno == ENOTSUP) { + gf_msg_debug(this->name, 0, + "migrate-data skipped for" + " hardlink %s ", + entry_loc.path); + LOCK(&defrag->lock); + { + defrag->skipped += 1; + } + UNLOCK(&defrag->lock); - /*Build Container Structure >> END*/ + gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_MIGRATE_FILE_SKIPPED, + "File migration skipped for %s.", entry_loc.path); + } - ret = 0; - goto out; + ret = 0; + goto out; + } else if (ret < 0) { + if (fop_errno != EEXIST) { + gf_msg(this->name, GF_LOG_ERROR, fop_errno, + DHT_MSG_MIGRATE_FILE_FAILED, "migrate-data failed for %s", + entry_loc.path); + LOCK(&defrag->lock); + { + defrag->total_failures += 1; + } + UNLOCK(&defrag->lock); } + ret = gf_defrag_handle_migrate_error(fop_errno, defrag); + + if (!ret) { + gf_msg(this->name, GF_LOG_ERROR, fop_errno, + DHT_MSG_MIGRATE_FILE_FAILED, + "migrate-data on %s failed:", entry_loc.path); + } else if (ret == 1) { + ret = 0; + } + + goto out; + } + + LOCK(&defrag->lock); + { + defrag->total_files += 1; + defrag->total_data += iatt.ia_size; + } + UNLOCK(&defrag->lock); + + if (defrag->stats == _gf_true) { + gettimeofday(&end, NULL); + elapsed = (end.tv_sec - start.tv_sec) * 1e6 + + (end.tv_usec - start.tv_usec); + gf_log(this->name, GF_LOG_INFO, + "Migration of " + "file:%s size:%" PRIu64 + " bytes took %.2f" + "secs and ret: %d", + entry_loc.name, iatt.ia_size, elapsed / 1e6, ret); + } + out: - if (ret == 0) { - *container = tmp_container; - } else { - if (tmp_container) { - gf_defrag_free_container (tmp_container); - } + if (statfs_frame) { + STACK_DESTROY(statfs_frame->root); + } + + if (iatt_ptr) { + LOCK(&defrag->lock); + { + defrag->size_processed += iatt_ptr->ia_size; } + UNLOCK(&defrag->lock); + } + loc_wipe(&entry_loc); - return ret; + return ret; } -int -gf_defrag_process_dir (xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, - dict_t *migrate_data, int *perrno) +void * +gf_defrag_task(void *opaque) { - int ret = -1; - fd_t *fd = NULL; - dht_conf_t *conf = NULL; - gf_dirent_t entries; - dict_t *xattr_req = NULL; - struct timeval dir_start = {0,}; - struct timeval end = {0,}; - double elapsed = {0,}; - int local_subvols_cnt = 0; - int i = 0; - int j = 0; - struct dht_container *container = NULL; - int ldfq_count = 0; - int dfc_index = 0; - int throttle_up = 0; - struct dir_dfmeta *dir_dfmeta = NULL; - int should_commit_hash = 1; - - gf_log (this->name, GF_LOG_INFO, "migrate data called on %s", - loc->path); - gettimeofday (&dir_start, NULL); + struct list_head *q_head = NULL; + struct dht_container *iterator = NULL; + gf_defrag_info_t *defrag = NULL; + int ret = 0; + pid_t pid = GF_CLIENT_PID_DEFRAG; + + defrag = (gf_defrag_info_t *)opaque; + if (!defrag) { + gf_msg("dht", GF_LOG_ERROR, 0, 0, "defrag is NULL"); + goto out; + } + + syncopctx_setfspid(&pid); + + q_head = &(defrag->queue[0].list); + + /* The following while loop will dequeue one entry from the defrag->queue + under lock. We will update the defrag->global_error only when there + is an error which is critical to stop the rebalance process. The stop + message will be intimated to other migrator threads by setting the + defrag->defrag_status to GF_DEFRAG_STATUS_FAILED. + + In defrag->queue, a low watermark (MIN_MIGRATE_QUEUE_COUNT) is + maintained so that crawler does not starve the file migration + workers and a high watermark (MAX_MIGRATE_QUEUE_COUNT) so that + crawler does not go far ahead in filling up the queue. + */ + + while (_gf_true) { + if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) { + pthread_cond_broadcast(&defrag->rebalance_crawler_alarm); + pthread_cond_broadcast(&defrag->parallel_migration_cond); + goto out; + } - conf = this->private; - local_subvols_cnt = conf->local_subvols_cnt; + pthread_mutex_lock(&defrag->dfq_mutex); + { + /*Throttle down: + If the reconfigured count is less than current thread + count, then the current thread will sleep */ + + /*TODO: Need to refactor the following block to work + *under defrag->lock. For now access + * defrag->current_thread_count and rthcount under + * dfq_mutex lock */ + while (!defrag->crawl_done && (defrag->recon_thread_count < + defrag->current_thread_count)) { + defrag->current_thread_count--; + gf_msg_debug("DHT", 0, + "Thread sleeping. " + "current thread count: %d", + defrag->current_thread_count); + + pthread_cond_wait(&defrag->df_wakeup_thread, + &defrag->dfq_mutex); + + defrag->current_thread_count++; + gf_msg_debug("DHT", 0, + "Thread wokeup. " + "current thread count: %d", + defrag->current_thread_count); + } + + if (defrag->q_entry_count) { + iterator = list_entry(q_head->next, typeof(*iterator), list); + + gf_msg_debug("DHT", 0, + "picking entry " + "%s", + iterator->df_entry->d_name); + + list_del_init(&(iterator->list)); + + defrag->q_entry_count--; + + if ((defrag->q_entry_count < MIN_MIGRATE_QUEUE_COUNT) && + defrag->wakeup_crawler) { + pthread_cond_broadcast(&defrag->rebalance_crawler_alarm); + } + pthread_mutex_unlock(&defrag->dfq_mutex); + ret = gf_defrag_migrate_single_file((void *)iterator); - if (!local_subvols_cnt) { - ret = 0; - goto out; - } + /*Critical errors: ENOTCONN and ENOSPACE*/ + if (ret) { + dht_set_global_defrag_error(defrag, ret); - fd = fd_create (loc->inode, defrag->pid); - if (!fd) { - gf_log (this->name, GF_LOG_ERROR, "Failed to create fd"); - ret = -1; - goto out; - } + defrag->defrag_status = GF_DEFRAG_STATUS_FAILED; - ret = syncop_opendir (this, loc, fd, NULL, NULL); - if (ret) { - gf_msg (this->name, GF_LOG_WARNING, -ret, - DHT_MSG_MIGRATE_DATA_FAILED, - "Migrate data failed: Failed to open dir %s", - loc->path); - *perrno = -ret; - ret = -1; - goto out; - } + pthread_cond_broadcast(&defrag->rebalance_crawler_alarm); - fd_bind (fd); - dir_dfmeta = GF_CALLOC (1, sizeof (*dir_dfmeta), - gf_common_mt_pointer); - if (!dir_dfmeta) { - gf_log (this->name, GF_LOG_ERROR, "dir_dfmeta is NULL"); - ret = -1; - goto out; - } + pthread_cond_broadcast(&defrag->parallel_migration_cond); + goto out; + } - dir_dfmeta->head = GF_CALLOC (local_subvols_cnt, - sizeof (*(dir_dfmeta->head)), - gf_common_mt_pointer); - if (!dir_dfmeta->head) { - gf_log (this->name, GF_LOG_ERROR, "dir_dfmeta->head is NULL"); - ret = -1; - goto out; - } + gf_defrag_free_container(iterator); + + continue; + } else { + /* defrag->crawl_done flag is set means crawling + file system is done and hence a list_empty when + the above flag is set indicates there are no more + entries to be added to the queue and rebalance is + finished */ + + if (!defrag->crawl_done) { + defrag->current_thread_count--; + gf_msg_debug("DHT", 0, + "Thread " + "sleeping while waiting " + "for migration entries. " + "current thread count:%d", + defrag->current_thread_count); + + pthread_cond_wait(&defrag->parallel_migration_cond, + &defrag->dfq_mutex); + } - dir_dfmeta->iterator = GF_CALLOC (local_subvols_cnt, - sizeof (*(dir_dfmeta->iterator)), - gf_common_mt_pointer); - if (!dir_dfmeta->iterator) { - gf_log (this->name, GF_LOG_ERROR, - "dir_dfmeta->iterator is NULL"); - ret = -1; - goto out; - } + if (defrag->crawl_done && !defrag->q_entry_count) { + defrag->current_thread_count++; + gf_msg_debug("DHT", 0, "Exiting thread"); - dir_dfmeta->equeue = GF_CALLOC (local_subvols_cnt, sizeof (entries), - gf_dht_mt_dirent_t); - if (!dir_dfmeta->equeue) { - gf_log (this->name, GF_LOG_ERROR, "dir_dfmeta->equeue is NULL"); - ret = -1; - goto out; + pthread_cond_broadcast(&defrag->parallel_migration_cond); + goto unlock; + } else { + defrag->current_thread_count++; + gf_msg_debug("DHT", 0, + "Thread woke up" + " as found migrating entries. " + "current thread count:%d", + defrag->current_thread_count); + + pthread_mutex_unlock(&defrag->dfq_mutex); + continue; + } + } } + unlock: + pthread_mutex_unlock(&defrag->dfq_mutex); + break; + } +out: + return NULL; +} - dir_dfmeta->offset_var = GF_CALLOC (local_subvols_cnt, - sizeof (dht_dfoffset_ctx_t), - gf_dht_mt_octx_t); - if (!dir_dfmeta->offset_var) { - gf_log (this->name, GF_LOG_ERROR, - "dir_dfmeta->offset_var is NULL"); - ret = -1; - goto out; - } - ret = gf_defrag_ctx_subvols_init (dir_dfmeta->offset_var, this); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, "dht_dfoffset_ctx_t" - "initialization failed"); - ret = -1; - goto out; - } +int static gf_defrag_get_entry(xlator_t *this, int i, + struct dht_container **container, loc_t *loc, + dht_conf_t *conf, gf_defrag_info_t *defrag, + fd_t *fd, dict_t *migrate_data, + struct dir_dfmeta *dir_dfmeta, dict_t *xattr_req, + int *should_commit_hash, int *perrno) +{ + int ret = -1; + char is_linkfile = 0; + gf_dirent_t *df_entry = NULL; + struct dht_container *tmp_container = NULL; - dir_dfmeta->fetch_entries = GF_CALLOC (local_subvols_cnt, - sizeof (int), gf_common_mt_int); - if (!dir_dfmeta->fetch_entries) { - gf_log (this->name, GF_LOG_ERROR, - "dir_dfmeta->fetch_entries is NULL"); - ret = -1; - goto out; - } + if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) { + ret = -1; + goto out; + } - for (i = 0; i < local_subvols_cnt ; i++) { - INIT_LIST_HEAD (&(dir_dfmeta->equeue[i].list)); - dir_dfmeta->head[i] = &(dir_dfmeta->equeue[i].list); - dir_dfmeta->iterator[i] = dir_dfmeta->head[i]; - dir_dfmeta->fetch_entries[i] = 1; - } + if (dir_dfmeta->offset_var[i].readdir_done == 1) { + ret = 0; + goto out; + } - xattr_req = dict_new (); - if (!xattr_req) { - gf_log (this->name, GF_LOG_ERROR, "dict_new failed"); - ret = -1; - goto out; + if (dir_dfmeta->fetch_entries[i] == 1) { + ret = syncop_readdirp(conf->local_subvols[i], fd, 131072, + dir_dfmeta->offset_var[i].offset, + &(dir_dfmeta->equeue[i]), xattr_req, NULL); + if (ret == 0) { + dir_dfmeta->offset_var[i].readdir_done = 1; + ret = 0; + goto out; } - ret = dict_set_uint32 (xattr_req, - conf->link_xattr_name, 256); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, "failed to set dict for " - "key: %s", conf->link_xattr_name); - ret = -1; - goto out; + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, -ret, + DHT_MSG_MIGRATE_DATA_FAILED, + "Readdirp failed. Aborting data migration for " + "directory: %s", + loc->path); + *perrno = -ret; + ret = -1; + goto out; } - /* - Job: Read entries from each local subvol and store the entries - in equeue array of linked list. Now pick one entry from the - equeue array in a round robin basis and add them to defrag Queue. - */ - - while (!dht_dfreaddirp_done(dir_dfmeta->offset_var, - local_subvols_cnt)) { - - pthread_mutex_lock (&defrag->dfq_mutex); - { - - /*Throttle up: If reconfigured count is higher than - current thread count, wake up the sleeping threads - TODO: Need to refactor this. Instead of making the - thread sleep and wake, we should terminate and spawn - threads on-demand*/ - - if (defrag->recon_thread_count > - defrag->current_thread_count) { - throttle_up = - (defrag->recon_thread_count - - defrag->current_thread_count); - for (j = 0; j < throttle_up; j++) { - pthread_cond_signal ( - &defrag->df_wakeup_thread); - } + if (list_empty(&(dir_dfmeta->equeue[i].list))) { + dir_dfmeta->offset_var[i].readdir_done = 1; + ret = 0; + goto out; + } - } + dir_dfmeta->fetch_entries[i] = 0; + } - while (defrag->q_entry_count > - MAX_MIGRATE_QUEUE_COUNT) { - defrag->wakeup_crawler = 1; - pthread_cond_wait ( - &defrag->rebalance_crawler_alarm, - &defrag->dfq_mutex); - } + while (1) { + if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) { + ret = -1; + goto out; + } - ldfq_count = defrag->q_entry_count; + df_entry = list_entry(dir_dfmeta->iterator[i]->next, typeof(*df_entry), + list); - if (defrag->wakeup_crawler) { - defrag->wakeup_crawler = 0; - } + if (&df_entry->list == dir_dfmeta->head[i]) { + gf_dirent_free(&(dir_dfmeta->equeue[i])); + INIT_LIST_HEAD(&(dir_dfmeta->equeue[i].list)); + dir_dfmeta->fetch_entries[i] = 1; + dir_dfmeta->iterator[i] = dir_dfmeta->head[i]; + ret = 0; + goto out; + } - } - pthread_mutex_unlock (&defrag->dfq_mutex); + dir_dfmeta->iterator[i] = dir_dfmeta->iterator[i]->next; - while (ldfq_count <= MAX_MIGRATE_QUEUE_COUNT && - !dht_dfreaddirp_done(dir_dfmeta->offset_var, - local_subvols_cnt)) { + dir_dfmeta->offset_var[i].offset = df_entry->d_off; + if (!strcmp(df_entry->d_name, ".") || !strcmp(df_entry->d_name, "..")) + continue; - ret = gf_defrag_get_entry (this, dfc_index, &container, - loc, conf, defrag, fd, - migrate_data, dir_dfmeta, - xattr_req, - &should_commit_hash, perrno); + if (IA_ISDIR(df_entry->d_stat.ia_type)) { + defrag->size_processed += df_entry->d_stat.ia_size; + continue; + } - if (ret) { - gf_log (this->name, GF_LOG_WARNING, "Found " - "error from gf_defrag_get_entry"); + defrag->num_files_lookedup++; - ret = -1; - goto out; - } + if (defrag->defrag_pattern && + (gf_defrag_pattern_match(defrag, df_entry->d_name, + df_entry->d_stat.ia_size) == _gf_false)) { + defrag->size_processed += df_entry->d_stat.ia_size; + continue; + } - /* Check if we got an entry, else we need to move the - index to the next subvol */ - if (!container) { - GF_CRAWL_INDEX_MOVE(dfc_index, - local_subvols_cnt); - continue; - } + is_linkfile = check_is_linkfile(NULL, &df_entry->d_stat, df_entry->dict, + conf->link_xattr_name); - /* Q this entry in the dfq */ - pthread_mutex_lock (&defrag->dfq_mutex); - { - list_add_tail (&container->list, - &(defrag->queue[0].list)); - defrag->q_entry_count++; - ldfq_count = defrag->q_entry_count; - - gf_msg_debug (this->name, 0, "added " - "file:%s parent:%s to the queue ", - container->df_entry->d_name, - container->parent_loc->path); - - pthread_cond_signal ( - &defrag->parallel_migration_cond); - } - pthread_mutex_unlock (&defrag->dfq_mutex); + if (is_linkfile) { + /* No need to add linkto file to the queue for + migration. Only the actual data file need to + be checked for migration criteria. + */ - GF_CRAWL_INDEX_MOVE(dfc_index, local_subvols_cnt); - } + gf_msg_debug(this->name, 0, + "Skipping linkfile" + " %s on subvol: %s", + df_entry->d_name, conf->local_subvols[i]->name); + continue; } - gettimeofday (&end, NULL); - elapsed = (end.tv_sec - dir_start.tv_sec) * 1e6 + - (end.tv_usec - dir_start.tv_usec); - gf_log (this->name, GF_LOG_INFO, "Migration operation on dir %s took " - "%.2f secs", loc->path, elapsed/1e6); - ret = 0; -out: + /*Build Container Structure */ - gf_defrag_free_dir_dfmeta (dir_dfmeta, local_subvols_cnt); + tmp_container = GF_CALLOC(1, sizeof(struct dht_container), + gf_dht_mt_container_t); + if (!tmp_container) { + gf_log(this->name, GF_LOG_ERROR, + "Failed to allocate " + "memory for container"); + ret = -1; + goto out; + } + tmp_container->df_entry = gf_dirent_for_name(df_entry->d_name); + if (!tmp_container->df_entry) { + gf_log(this->name, GF_LOG_ERROR, + "Failed to allocate " + "memory for df_entry"); + ret = -1; + goto out; + } - if (xattr_req) - dict_unref(xattr_req); + tmp_container->local_subvol_index = i; - if (fd) - fd_unref (fd); + tmp_container->df_entry->d_stat = df_entry->d_stat; - if (ret == 0 && should_commit_hash == 0) { - ret = 2; - } + tmp_container->df_entry->d_ino = df_entry->d_ino; - /* It does not matter if it errored out - this number is - * used to calculate rebalance estimated time to complete. - * No locking required as dirs are processed by a single thread. - */ - defrag->num_dirs_processed++; - return ret; -} -int -gf_defrag_settle_hash (xlator_t *this, gf_defrag_info_t *defrag, - loc_t *loc, dict_t *fix_layout) -{ - int ret; - dht_conf_t *conf = NULL; - /* - * Now we're ready to update the directory commit hash for the volume - * root, so that hash miscompares and broadcast lookups can stop. - * However, we want to skip that if fix-layout is all we did. In - * that case, we want the miscompares etc. to continue until a real - * rebalance is complete. - */ - if (defrag->cmd == GF_DEFRAG_CMD_START_LAYOUT_FIX - || defrag->cmd == GF_DEFRAG_CMD_START_DETACH_TIER || - defrag->cmd == GF_DEFRAG_CMD_DETACH_START) { - return 0; - } + tmp_container->df_entry->d_type = df_entry->d_type; - conf = this->private; - if (!conf) { - /*Uh oh - */ - return -1; - } + tmp_container->df_entry->d_len = df_entry->d_len; - if (conf->local_subvols_cnt == 0 || !conf->lookup_optimize) { - /* Commit hash updates are only done on local subvolumes and - * only when lookup optimization is needed (for older client - * support) - */ - return 0; + tmp_container->parent_loc = GF_CALLOC(1, sizeof(*loc), gf_dht_mt_loc_t); + if (!tmp_container->parent_loc) { + gf_log(this->name, GF_LOG_ERROR, + "Failed to allocate " + "memory for loc"); + ret = -1; + goto out; } - ret = dict_set_uint32 (fix_layout, "new-commit-hash", - defrag->new_commit_hash); + ret = loc_copy(tmp_container->parent_loc, loc); if (ret) { - gf_log (this->name, GF_LOG_ERROR, - "Failed to set new-commit-hash"); - return -1; + gf_log(this->name, GF_LOG_ERROR, "loc_copy failed"); + ret = -1; + goto out; } - ret = syncop_setxattr (this, loc, fix_layout, 0, NULL, NULL); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, -ret, - DHT_MSG_LAYOUT_FIX_FAILED, - "fix layout on %s failed", loc->path); + tmp_container->migrate_data = migrate_data; - if (-ret == ENOENT || -ret == ESTALE) { - /* Dir most likely is deleted */ - return 0; - } + tmp_container->this = this; - return -1; - } + if (df_entry->dict) + tmp_container->df_entry->dict = dict_ref(df_entry->dict); - /* TBD: find more efficient solution than adding/deleting every time */ - dict_del(fix_layout, "new-commit-hash"); + /*Build Container Structure >> END*/ - return 0; -} + ret = 0; + goto out; + } +out: + if (ret == 0) { + *container = tmp_container; + } else { + if (tmp_container) { + gf_defrag_free_container(tmp_container); + } + } + return ret; +} -/* Function for doing a named lookup on file inodes during an attach tier - * So that a hardlink lookup heal i.e gfid to parent gfid lookup heal - * happens on pre-existing data. This is required so that the ctr database has - * hardlinks of all the exisitng file in the volume. CTR xlator on the - * brick/server side does db update/insert of the hardlink on a namelookup. - * Currently the namedlookup is done synchronous to the fixlayout that is - * triggered by attach tier. This is not performant, adding more time to - * fixlayout. The performant approach is record the hardlinks on a compressed - * datastore and then do the namelookup asynchronously later, giving the ctr db - * eventual consistency - * */ int -gf_fix_layout_tier_attach_lookup (xlator_t *this, - loc_t *parent_loc, - gf_dirent_t *file_dentry) +gf_defrag_process_dir(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, + dict_t *migrate_data, int *perrno) { - int ret = -1; - dict_t *lookup_xdata = NULL; - dht_conf_t *conf = NULL; - loc_t file_loc = {0,}; - struct iatt iatt = {0,}; - - GF_VALIDATE_OR_GOTO ("tier", this, out); - - GF_VALIDATE_OR_GOTO (this->name, parent_loc, out); - - GF_VALIDATE_OR_GOTO (this->name, file_dentry, out); - - GF_VALIDATE_OR_GOTO (this->name, this->private, out); - - if (!parent_loc->inode) { - gf_msg (this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, - "%s/%s parent is NULL", parent_loc->path, - file_dentry->d_name); - goto out; - } + int ret = -1; + fd_t *fd = NULL; + dht_conf_t *conf = NULL; + gf_dirent_t entries; + dict_t *xattr_req = NULL; + struct timeval dir_start = { + 0, + }; + struct timeval end = { + 0, + }; + double elapsed = { + 0, + }; + int local_subvols_cnt = 0; + int i = 0; + int j = 0; + struct dht_container *container = NULL; + int ldfq_count = 0; + int dfc_index = 0; + int throttle_up = 0; + struct dir_dfmeta *dir_dfmeta = NULL; + int should_commit_hash = 1; + + gf_log(this->name, GF_LOG_INFO, "migrate data called on %s", loc->path); + gettimeofday(&dir_start, NULL); + + conf = this->private; + local_subvols_cnt = conf->local_subvols_cnt; + + if (!local_subvols_cnt) { + ret = 0; + goto out; + } + fd = fd_create(loc->inode, defrag->pid); + if (!fd) { + gf_log(this->name, GF_LOG_ERROR, "Failed to create fd"); + ret = -1; + goto out; + } + + ret = syncop_opendir(this, loc, fd, NULL, NULL); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, -ret, DHT_MSG_MIGRATE_DATA_FAILED, + "Migrate data failed: Failed to open dir %s", loc->path); + *perrno = -ret; + ret = -1; + goto out; + } - conf = this->private; + fd_bind(fd); + dir_dfmeta = GF_CALLOC(1, sizeof(*dir_dfmeta), gf_common_mt_pointer); + if (!dir_dfmeta) { + gf_log(this->name, GF_LOG_ERROR, "dir_dfmeta is NULL"); + ret = -1; + goto out; + } - loc_wipe (&file_loc); + dir_dfmeta->head = GF_CALLOC(local_subvols_cnt, sizeof(*(dir_dfmeta->head)), + gf_common_mt_pointer); + if (!dir_dfmeta->head) { + gf_log(this->name, GF_LOG_ERROR, "dir_dfmeta->head is NULL"); + ret = -1; + goto out; + } + + dir_dfmeta->iterator = GF_CALLOC(local_subvols_cnt, + sizeof(*(dir_dfmeta->iterator)), + gf_common_mt_pointer); + if (!dir_dfmeta->iterator) { + gf_log(this->name, GF_LOG_ERROR, "dir_dfmeta->iterator is NULL"); + ret = -1; + goto out; + } - if (gf_uuid_is_null (file_dentry->d_stat.ia_gfid)) { - gf_msg (this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, - "%s/%s gfid not present", parent_loc->path, - file_dentry->d_name); - goto out; - } + dir_dfmeta->equeue = GF_CALLOC(local_subvols_cnt, sizeof(entries), + gf_dht_mt_dirent_t); + if (!dir_dfmeta->equeue) { + gf_log(this->name, GF_LOG_ERROR, "dir_dfmeta->equeue is NULL"); + ret = -1; + goto out; + } - gf_uuid_copy (file_loc.gfid, file_dentry->d_stat.ia_gfid); + dir_dfmeta->offset_var = GF_CALLOC( + local_subvols_cnt, sizeof(dht_dfoffset_ctx_t), gf_dht_mt_octx_t); + if (!dir_dfmeta->offset_var) { + gf_log(this->name, GF_LOG_ERROR, "dir_dfmeta->offset_var is NULL"); + ret = -1; + goto out; + } + ret = gf_defrag_ctx_subvols_init(dir_dfmeta->offset_var, this); + if (ret) { + gf_log(this->name, GF_LOG_ERROR, + "dht_dfoffset_ctx_t" + "initialization failed"); + ret = -1; + goto out; + } - if (gf_uuid_is_null (parent_loc->gfid)) { - gf_msg (this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, - "%s/%s" - " gfid not present", parent_loc->path, - file_dentry->d_name); - goto out; - } + dir_dfmeta->fetch_entries = GF_CALLOC(local_subvols_cnt, sizeof(int), + gf_common_mt_int); + if (!dir_dfmeta->fetch_entries) { + gf_log(this->name, GF_LOG_ERROR, "dir_dfmeta->fetch_entries is NULL"); + ret = -1; + goto out; + } + + for (i = 0; i < local_subvols_cnt; i++) { + INIT_LIST_HEAD(&(dir_dfmeta->equeue[i].list)); + dir_dfmeta->head[i] = &(dir_dfmeta->equeue[i].list); + dir_dfmeta->iterator[i] = dir_dfmeta->head[i]; + dir_dfmeta->fetch_entries[i] = 1; + } + + xattr_req = dict_new(); + if (!xattr_req) { + gf_log(this->name, GF_LOG_ERROR, "dict_new failed"); + ret = -1; + goto out; + } + + ret = dict_set_uint32(xattr_req, conf->link_xattr_name, 256); + if (ret) { + gf_log(this->name, GF_LOG_ERROR, + "failed to set dict for " + "key: %s", + conf->link_xattr_name); + ret = -1; + goto out; + } - gf_uuid_copy (file_loc.pargfid, parent_loc->gfid); + /* + Job: Read entries from each local subvol and store the entries + in equeue array of linked list. Now pick one entry from the + equeue array in a round robin basis and add them to defrag Queue. + */ + while (!dht_dfreaddirp_done(dir_dfmeta->offset_var, local_subvols_cnt)) { + pthread_mutex_lock(&defrag->dfq_mutex); + { + /*Throttle up: If reconfigured count is higher than + current thread count, wake up the sleeping threads + TODO: Need to refactor this. Instead of making the + thread sleep and wake, we should terminate and spawn + threads on-demand*/ + + if (defrag->recon_thread_count > defrag->current_thread_count) { + throttle_up = (defrag->recon_thread_count - + defrag->current_thread_count); + for (j = 0; j < throttle_up; j++) { + pthread_cond_signal(&defrag->df_wakeup_thread); + } + } - ret = dht_build_child_loc (this, &file_loc, parent_loc, - file_dentry->d_name); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, - "Child loc build failed"); - ret = -1; - goto out; - } + while (defrag->q_entry_count > MAX_MIGRATE_QUEUE_COUNT) { + defrag->wakeup_crawler = 1; + pthread_cond_wait(&defrag->rebalance_crawler_alarm, + &defrag->dfq_mutex); + } - lookup_xdata = dict_new (); - if (!lookup_xdata) { - gf_msg (this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, - "Failed creating lookup dict for %s", - file_dentry->d_name); - goto out; - } + ldfq_count = defrag->q_entry_count; - ret = dict_set_int32 (lookup_xdata, CTR_ATTACH_TIER_LOOKUP, 1); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, - "Failed to set lookup flag"); - goto out; + if (defrag->wakeup_crawler) { + defrag->wakeup_crawler = 0; + } } + pthread_mutex_unlock(&defrag->dfq_mutex); - gf_uuid_copy (file_loc.parent->gfid, parent_loc->gfid); - - /* Sending lookup to cold tier only */ - ret = syncop_lookup (conf->subvolumes[0], &file_loc, &iatt, - NULL, lookup_xdata, NULL); - if (ret) { - /* If the file does not exist on the cold tier than it must */ - /* have been discovered on the hot tier. This is not an error. */ - gf_msg (this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS, - "%s lookup to cold tier on attach heal failed", file_loc.path); - goto out; - } + while ( + ldfq_count <= MAX_MIGRATE_QUEUE_COUNT && + !dht_dfreaddirp_done(dir_dfmeta->offset_var, local_subvols_cnt)) { + ret = gf_defrag_get_entry(this, dfc_index, &container, loc, conf, + defrag, fd, migrate_data, dir_dfmeta, + xattr_req, &should_commit_hash, perrno); - ret = 0; + if (ret) { + gf_log(this->name, GF_LOG_WARNING, + "Found " + "error from gf_defrag_get_entry"); + ret = -1; + goto out; + } + + /* Check if we got an entry, else we need to move the + index to the next subvol */ + if (!container) { + GF_CRAWL_INDEX_MOVE(dfc_index, local_subvols_cnt); + continue; + } + + /* Q this entry in the dfq */ + pthread_mutex_lock(&defrag->dfq_mutex); + { + list_add_tail(&container->list, &(defrag->queue[0].list)); + defrag->q_entry_count++; + ldfq_count = defrag->q_entry_count; + + gf_msg_debug(this->name, 0, + "added " + "file:%s parent:%s to the queue ", + container->df_entry->d_name, + container->parent_loc->path); + + pthread_cond_signal(&defrag->parallel_migration_cond); + } + pthread_mutex_unlock(&defrag->dfq_mutex); + + GF_CRAWL_INDEX_MOVE(dfc_index, local_subvols_cnt); + } + } + + gettimeofday(&end, NULL); + elapsed = (end.tv_sec - dir_start.tv_sec) * 1e6 + + (end.tv_usec - dir_start.tv_usec); + gf_log(this->name, GF_LOG_INFO, + "Migration operation on dir %s took " + "%.2f secs", + loc->path, elapsed / 1e6); + ret = 0; out: - loc_wipe (&file_loc); + gf_defrag_free_dir_dfmeta(dir_dfmeta, local_subvols_cnt); - if (lookup_xdata) - dict_unref (lookup_xdata); + if (xattr_req) + dict_unref(xattr_req); - return ret; -} + if (fd) + fd_unref(fd); + + if (ret == 0 && should_commit_hash == 0) { + ret = 2; + } + /* It does not matter if it errored out - this number is + * used to calculate rebalance estimated time to complete. + * No locking required as dirs are processed by a single thread. + */ + defrag->num_dirs_processed++; + return ret; +} int -gf_defrag_fix_layout (xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, - dict_t *fix_layout, dict_t *migrate_data) +gf_defrag_settle_hash(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, + dict_t *fix_layout) { - int ret = -1; - loc_t entry_loc = {0,}; - fd_t *fd = NULL; - gf_dirent_t entries; - gf_dirent_t *tmp = NULL; - gf_dirent_t *entry = NULL; - gf_boolean_t free_entries = _gf_false; - off_t offset = 0; - struct iatt iatt = {0,}; - inode_t *linked_inode = NULL, *inode = NULL; - dht_conf_t *conf = NULL; - int should_commit_hash = 1; - int perrno = 0; + int ret; + dht_conf_t *conf = NULL; + /* + * Now we're ready to update the directory commit hash for the volume + * root, so that hash miscompares and broadcast lookups can stop. + * However, we want to skip that if fix-layout is all we did. In + * that case, we want the miscompares etc. to continue until a real + * rebalance is complete. + */ + if (defrag->cmd == GF_DEFRAG_CMD_START_LAYOUT_FIX || + defrag->cmd == GF_DEFRAG_CMD_START_DETACH_TIER || + defrag->cmd == GF_DEFRAG_CMD_DETACH_START) { + return 0; + } - conf = this->private; - if (!conf) { - ret = -1; - goto out; - } + conf = this->private; + if (!conf) { + /*Uh oh + */ + return -1; + } - ret = syncop_lookup (this, loc, &iatt, NULL, NULL, NULL); - if (ret) { - if (strcmp (loc->path, "/") == 0) { - gf_msg (this->name, GF_LOG_ERROR, -ret, - DHT_MSG_DIR_LOOKUP_FAILED, - "lookup failed for:%s", loc->path); + if (conf->local_subvols_cnt == 0 || !conf->lookup_optimize) { + /* Commit hash updates are only done on local subvolumes and + * only when lookup optimization is needed (for older client + * support) + */ + return 0; + } - defrag->total_failures++; - ret = -1; - goto out; - } + ret = dict_set_uint32(fix_layout, "new-commit-hash", + defrag->new_commit_hash); + if (ret) { + gf_log(this->name, GF_LOG_ERROR, "Failed to set new-commit-hash"); + return -1; + } - if (-ret == ENOENT || -ret == ESTALE) { - gf_msg (this->name, GF_LOG_INFO, -ret, - DHT_MSG_DIR_LOOKUP_FAILED, - "Dir:%s renamed or removed. Skipping", - loc->path); - if (conf->decommission_subvols_cnt) { - defrag->total_failures++; - } - ret = 0; - goto out; - } else { - gf_msg (this->name, GF_LOG_ERROR, -ret, - DHT_MSG_DIR_LOOKUP_FAILED, - "lookup failed for:%s", loc->path); + ret = syncop_setxattr(this, loc, fix_layout, 0, NULL, NULL); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_LAYOUT_FIX_FAILED, + "fix layout on %s failed", loc->path); - defrag->total_failures++; - goto out; - } + if (-ret == ENOENT || -ret == ESTALE) { + /* Dir most likely is deleted */ + return 0; } - fd = fd_create (loc->inode, defrag->pid); - if (!fd) { - gf_log (this->name, GF_LOG_ERROR, "Failed to create fd"); - ret = -1; - goto out; - } + return -1; + } - ret = syncop_opendir (this, loc, fd, NULL, NULL); - if (ret) { - if (-ret == ENOENT || -ret == ESTALE) { - if (conf->decommission_subvols_cnt) { - defrag->total_failures++; - } - ret = 0; - goto out; - } + /* TBD: find more efficient solution than adding/deleting every time */ + dict_del(fix_layout, "new-commit-hash"); - gf_log (this->name, GF_LOG_ERROR, "Failed to open dir %s, " - "err:%d", loc->path, -ret); + return 0; +} - ret = -1; - goto out; - } +/* Function for doing a named lookup on file inodes during an attach tier + * So that a hardlink lookup heal i.e gfid to parent gfid lookup heal + * happens on pre-existing data. This is required so that the ctr database has + * hardlinks of all the exisitng file in the volume. CTR xlator on the + * brick/server side does db update/insert of the hardlink on a namelookup. + * Currently the namedlookup is done synchronous to the fixlayout that is + * triggered by attach tier. This is not performant, adding more time to + * fixlayout. The performant approach is record the hardlinks on a compressed + * datastore and then do the namelookup asynchronously later, giving the ctr db + * eventual consistency + * */ +int +gf_fix_layout_tier_attach_lookup(xlator_t *this, loc_t *parent_loc, + gf_dirent_t *file_dentry) +{ + int ret = -1; + dict_t *lookup_xdata = NULL; + dht_conf_t *conf = NULL; + loc_t file_loc = { + 0, + }; + struct iatt iatt = { + 0, + }; - fd_bind (fd); - INIT_LIST_HEAD (&entries.list); + GF_VALIDATE_OR_GOTO("tier", this, out); - while ((ret = syncop_readdirp (this, fd, 131072, offset, &entries, - NULL, NULL)) != 0) - { - if (ret < 0) { - if (-ret == ENOENT || -ret == ESTALE) { - if (conf->decommission_subvols_cnt) { - defrag->total_failures++; - } - ret = 0; - goto out; - } + GF_VALIDATE_OR_GOTO(this->name, parent_loc, out); - gf_msg (this->name, GF_LOG_ERROR, -ret, - DHT_MSG_READDIR_ERROR, "readdirp failed for " - "path %s. Aborting fix-layout", loc->path); + GF_VALIDATE_OR_GOTO(this->name, file_dentry, out); - ret = -1; - goto out; - } + GF_VALIDATE_OR_GOTO(this->name, this->private, out); - if (list_empty (&entries.list)) - break; + if (!parent_loc->inode) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, + "%s/%s parent is NULL", parent_loc->path, file_dentry->d_name); + goto out; + } - free_entries = _gf_true; + conf = this->private; - list_for_each_entry_safe (entry, tmp, &entries.list, list) { - if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) { - ret = 1; - goto out; - } + loc_wipe(&file_loc); - offset = entry->d_off; - - if (!strcmp (entry->d_name, ".") || - !strcmp (entry->d_name, "..")) - continue; - if (!IA_ISDIR (entry->d_stat.ia_type)) { - - /* If its a fix layout during the attach - * tier operation do lookups on files - * on cold subvolume so that there is a - * CTR DB Lookup Heal triggered on existing - * data. - * */ - if (defrag->cmd == GF_DEFRAG_CMD_START_TIER) { - gf_fix_layout_tier_attach_lookup - (this, loc, entry); - } - - continue; - } - loc_wipe (&entry_loc); + if (gf_uuid_is_null(file_dentry->d_stat.ia_gfid)) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, + "%s/%s gfid not present", parent_loc->path, file_dentry->d_name); + goto out; + } - ret = dht_build_child_loc (this, &entry_loc, loc, - entry->d_name); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, "Child loc" - " build failed for entry: %s", - entry->d_name); + gf_uuid_copy(file_loc.gfid, file_dentry->d_stat.ia_gfid); - if (conf->decommission_in_progress) { - defrag->defrag_status = - GF_DEFRAG_STATUS_FAILED; + if (gf_uuid_is_null(parent_loc->gfid)) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, + "%s/%s" + " gfid not present", + parent_loc->path, file_dentry->d_name); + goto out; + } - goto out; - } else { - should_commit_hash = 0; + gf_uuid_copy(file_loc.pargfid, parent_loc->gfid); - continue; - } - } + ret = dht_build_child_loc(this, &file_loc, parent_loc, file_dentry->d_name); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, + "Child loc build failed"); + ret = -1; + goto out; + } + + lookup_xdata = dict_new(); + if (!lookup_xdata) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, + "Failed creating lookup dict for %s", file_dentry->d_name); + goto out; + } + + ret = dict_set_int32(lookup_xdata, CTR_ATTACH_TIER_LOOKUP, 1); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, + "Failed to set lookup flag"); + goto out; + } + + gf_uuid_copy(file_loc.parent->gfid, parent_loc->gfid); + + /* Sending lookup to cold tier only */ + ret = syncop_lookup(conf->subvolumes[0], &file_loc, &iatt, NULL, + lookup_xdata, NULL); + if (ret) { + /* If the file does not exist on the cold tier than it must */ + /* have been discovered on the hot tier. This is not an error. */ + gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS, + "%s lookup to cold tier on attach heal failed", file_loc.path); + goto out; + } + + ret = 0; - if (gf_uuid_is_null (entry->d_stat.ia_gfid)) { - gf_log (this->name, GF_LOG_ERROR, "%s/%s" - " gfid not present", loc->path, - entry->d_name); - continue; - } +out: + loc_wipe(&file_loc); - gf_uuid_copy (entry_loc.gfid, entry->d_stat.ia_gfid); + if (lookup_xdata) + dict_unref(lookup_xdata); - /*In case the gfid stored in the inode by inode_link - * and the gfid obtained in the lookup differs, then - * client3_3_lookup_cbk will return ESTALE and proper - * error will be captured - */ + return ret; +} - linked_inode = inode_link (entry_loc.inode, loc->inode, - entry->d_name, - &entry->d_stat); +int +gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, + dict_t *fix_layout, dict_t *migrate_data) +{ + int ret = -1; + loc_t entry_loc = { + 0, + }; + fd_t *fd = NULL; + gf_dirent_t entries; + gf_dirent_t *tmp = NULL; + gf_dirent_t *entry = NULL; + gf_boolean_t free_entries = _gf_false; + off_t offset = 0; + struct iatt iatt = { + 0, + }; + inode_t *linked_inode = NULL, *inode = NULL; + dht_conf_t *conf = NULL; + int should_commit_hash = 1; + int perrno = 0; + + conf = this->private; + if (!conf) { + ret = -1; + goto out; + } - inode = entry_loc.inode; - entry_loc.inode = linked_inode; - inode_unref (inode); + ret = syncop_lookup(this, loc, &iatt, NULL, NULL, NULL); + if (ret) { + if (strcmp(loc->path, "/") == 0) { + gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_DIR_LOOKUP_FAILED, + "lookup failed for:%s", loc->path); - if (gf_uuid_is_null (loc->gfid)) { - gf_log (this->name, GF_LOG_ERROR, "%s/%s" - " gfid not present", loc->path, - entry->d_name); - continue; - } + defrag->total_failures++; + ret = -1; + goto out; + } - gf_uuid_copy (entry_loc.pargfid, loc->gfid); - - ret = syncop_lookup (this, &entry_loc, &iatt, NULL, - NULL, NULL); - if (ret) { - if (-ret == ENOENT || -ret == ESTALE) { - gf_msg (this->name, GF_LOG_INFO, -ret, - DHT_MSG_DIR_LOOKUP_FAILED, - "Dir:%s renamed or removed. " - "Skipping", loc->path); - ret = 0; - if (conf->decommission_subvols_cnt) { - defrag->total_failures++; - } - continue; - } else { - gf_msg (this->name, GF_LOG_ERROR, -ret, - DHT_MSG_DIR_LOOKUP_FAILED, - "lookup failed for:%s", - entry_loc.path); - - defrag->total_failures++; - - if (conf->decommission_in_progress) { - defrag->defrag_status = - GF_DEFRAG_STATUS_FAILED; - ret = -1; - goto out; - } else { - should_commit_hash = 0; - continue; - } - } - } + if (-ret == ENOENT || -ret == ESTALE) { + gf_msg(this->name, GF_LOG_INFO, -ret, DHT_MSG_DIR_LOOKUP_FAILED, + "Dir:%s renamed or removed. Skipping", loc->path); + if (conf->decommission_subvols_cnt) { + defrag->total_failures++; + } + ret = 0; + goto out; + } else { + gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_DIR_LOOKUP_FAILED, + "lookup failed for:%s", loc->path); + + defrag->total_failures++; + goto out; + } + } - /* A return value of 2 means, either process_dir or - * lookup of a dir failed. Hence, don't commit hash - * for the current directory*/ + fd = fd_create(loc->inode, defrag->pid); + if (!fd) { + gf_log(this->name, GF_LOG_ERROR, "Failed to create fd"); + ret = -1; + goto out; + } - ret = gf_defrag_fix_layout (this, defrag, &entry_loc, - fix_layout, migrate_data); + ret = syncop_opendir(this, loc, fd, NULL, NULL); + if (ret) { + if (-ret == ENOENT || -ret == ESTALE) { + if (conf->decommission_subvols_cnt) { + defrag->total_failures++; + } + ret = 0; + goto out; + } - if (ret && ret != 2) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_LAYOUT_FIX_FAILED, - "Fix layout failed for %s", - entry_loc.path); + gf_log(this->name, GF_LOG_ERROR, + "Failed to open dir %s, " + "err:%d", + loc->path, -ret); - defrag->total_failures++; + ret = -1; + goto out; + } - if (conf->decommission_in_progress) { - defrag->defrag_status = - GF_DEFRAG_STATUS_FAILED; + fd_bind(fd); + INIT_LIST_HEAD(&entries.list); - goto out; - } else { - /* Let's not commit-hash if - * gf_defrag_fix_layout failed*/ - continue; - } - } + while ((ret = syncop_readdirp(this, fd, 131072, offset, &entries, NULL, + NULL)) != 0) { + if (ret < 0) { + if (-ret == ENOENT || -ret == ESTALE) { + if (conf->decommission_subvols_cnt) { + defrag->total_failures++; } + ret = 0; + goto out; + } - gf_dirent_free (&entries); - free_entries = _gf_false; - INIT_LIST_HEAD (&entries.list); + gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_READDIR_ERROR, + "readdirp failed for " + "path %s. Aborting fix-layout", + loc->path); + + ret = -1; + goto out; } - ret = syncop_setxattr (this, loc, fix_layout, 0, NULL, NULL); - if (ret) { - if (-ret == ENOENT || -ret == ESTALE) { - gf_msg (this->name, GF_LOG_INFO, -ret, - DHT_MSG_LAYOUT_FIX_FAILED, - "Setxattr failed. Dir %s " - "renamed or removed", - loc->path); - if (conf->decommission_subvols_cnt) { - defrag->total_failures++; - } - ret = 0; - } else { - gf_msg (this->name, GF_LOG_ERROR, -ret, - DHT_MSG_LAYOUT_FIX_FAILED, - "Setxattr failed for %s", - loc->path); + if (list_empty(&entries.list)) + break; - defrag->total_failures++; + free_entries = _gf_true; - if (conf->decommission_in_progress) { - defrag->defrag_status = GF_DEFRAG_STATUS_FAILED; - ret = -1; - goto out; - } - } - } + list_for_each_entry_safe(entry, tmp, &entries.list, list) + { + if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) { + ret = 1; + goto out; + } + + offset = entry->d_off; + + if (!strcmp(entry->d_name, ".") || !strcmp(entry->d_name, "..")) + continue; + if (!IA_ISDIR(entry->d_stat.ia_type)) { + /* If its a fix layout during the attach + * tier operation do lookups on files + * on cold subvolume so that there is a + * CTR DB Lookup Heal triggered on existing + * data. + * */ + if (defrag->cmd == GF_DEFRAG_CMD_START_TIER) { + gf_fix_layout_tier_attach_lookup(this, loc, entry); + } - if ((defrag->cmd != GF_DEFRAG_CMD_START_TIER) && - (defrag->cmd != GF_DEFRAG_CMD_START_LAYOUT_FIX)) { - ret = gf_defrag_process_dir (this, defrag, loc, migrate_data, - &perrno); + continue; + } + loc_wipe(&entry_loc); - if (ret && (ret != 2)) { - if (perrno == ENOENT || perrno == ESTALE) { - ret = 0; - goto out; - } else { + ret = dht_build_child_loc(this, &entry_loc, loc, entry->d_name); + if (ret) { + gf_log(this->name, GF_LOG_ERROR, + "Child loc" + " build failed for entry: %s", + entry->d_name); + + if (conf->decommission_in_progress) { + defrag->defrag_status = GF_DEFRAG_STATUS_FAILED; - defrag->total_failures++; + goto out; + } else { + should_commit_hash = 0; - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_DEFRAG_PROCESS_DIR_FAILED, - "gf_defrag_process_dir failed for " - "directory: %s", loc->path); + continue; + } + } + + if (gf_uuid_is_null(entry->d_stat.ia_gfid)) { + gf_log(this->name, GF_LOG_ERROR, + "%s/%s" + " gfid not present", + loc->path, entry->d_name); + continue; + } + + gf_uuid_copy(entry_loc.gfid, entry->d_stat.ia_gfid); + + /*In case the gfid stored in the inode by inode_link + * and the gfid obtained in the lookup differs, then + * client3_3_lookup_cbk will return ESTALE and proper + * error will be captured + */ + + linked_inode = inode_link(entry_loc.inode, loc->inode, + entry->d_name, &entry->d_stat); + + inode = entry_loc.inode; + entry_loc.inode = linked_inode; + inode_unref(inode); + + if (gf_uuid_is_null(loc->gfid)) { + gf_log(this->name, GF_LOG_ERROR, + "%s/%s" + " gfid not present", + loc->path, entry->d_name); + continue; + } + + gf_uuid_copy(entry_loc.pargfid, loc->gfid); + + ret = syncop_lookup(this, &entry_loc, &iatt, NULL, NULL, NULL); + if (ret) { + if (-ret == ENOENT || -ret == ESTALE) { + gf_msg(this->name, GF_LOG_INFO, -ret, + DHT_MSG_DIR_LOOKUP_FAILED, + "Dir:%s renamed or removed. " + "Skipping", + loc->path); + ret = 0; + if (conf->decommission_subvols_cnt) { + defrag->total_failures++; + } + continue; + } else { + gf_msg(this->name, GF_LOG_ERROR, -ret, + DHT_MSG_DIR_LOOKUP_FAILED, "lookup failed for:%s", + entry_loc.path); - if (conf->decommission_in_progress) { - goto out; - } + defrag->total_failures++; - should_commit_hash = 0; - } - } else if (ret == 2) { + if (conf->decommission_in_progress) { + defrag->defrag_status = GF_DEFRAG_STATUS_FAILED; + ret = -1; + goto out; + } else { should_commit_hash = 0; + continue; + } } - } + } + + /* A return value of 2 means, either process_dir or + * lookup of a dir failed. Hence, don't commit hash + * for the current directory*/ - gf_msg_trace (this->name, 0, "fix layout called on %s", loc->path); + ret = gf_defrag_fix_layout(this, defrag, &entry_loc, fix_layout, + migrate_data); - if (should_commit_hash && - gf_defrag_settle_hash (this, defrag, loc, fix_layout) != 0) { + if (ret && ret != 2) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LAYOUT_FIX_FAILED, + "Fix layout failed for %s", entry_loc.path); defrag->total_failures++; - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_SETTLE_HASH_FAILED, - "Settle hash failed for %s", - loc->path); + if (conf->decommission_in_progress) { + defrag->defrag_status = GF_DEFRAG_STATUS_FAILED; + + goto out; + } else { + /* Let's not commit-hash if + * gf_defrag_fix_layout failed*/ + continue; + } + } + } + + gf_dirent_free(&entries); + free_entries = _gf_false; + INIT_LIST_HEAD(&entries.list); + } + + ret = syncop_setxattr(this, loc, fix_layout, 0, NULL, NULL); + if (ret) { + if (-ret == ENOENT || -ret == ESTALE) { + gf_msg(this->name, GF_LOG_INFO, -ret, DHT_MSG_LAYOUT_FIX_FAILED, + "Setxattr failed. Dir %s " + "renamed or removed", + loc->path); + if (conf->decommission_subvols_cnt) { + defrag->total_failures++; + } + ret = 0; + } else { + gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_LAYOUT_FIX_FAILED, + "Setxattr failed for %s", loc->path); + defrag->total_failures++; + + if (conf->decommission_in_progress) { + defrag->defrag_status = GF_DEFRAG_STATUS_FAILED; ret = -1; + goto out; + } + } + } + + if ((defrag->cmd != GF_DEFRAG_CMD_START_TIER) && + (defrag->cmd != GF_DEFRAG_CMD_START_LAYOUT_FIX)) { + ret = gf_defrag_process_dir(this, defrag, loc, migrate_data, &perrno); + + if (ret && (ret != 2)) { + if (perrno == ENOENT || perrno == ESTALE) { + ret = 0; + goto out; + } else { + defrag->total_failures++; + + gf_msg(this->name, GF_LOG_ERROR, 0, + DHT_MSG_DEFRAG_PROCESS_DIR_FAILED, + "gf_defrag_process_dir failed for " + "directory: %s", + loc->path); if (conf->decommission_in_progress) { - defrag->defrag_status = GF_DEFRAG_STATUS_FAILED; - goto out; + goto out; } + + should_commit_hash = 0; + } + } else if (ret == 2) { + should_commit_hash = 0; } + } - ret = 0; -out: - if (free_entries) - gf_dirent_free (&entries); + gf_msg_trace(this->name, 0, "fix layout called on %s", loc->path); + + if (should_commit_hash && + gf_defrag_settle_hash(this, defrag, loc, fix_layout) != 0) { + defrag->total_failures++; - loc_wipe (&entry_loc); + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_SETTLE_HASH_FAILED, + "Settle hash failed for %s", loc->path); - if (fd) - fd_unref (fd); + ret = -1; - if (ret == 0 && should_commit_hash == 0) { - ret = 2; + if (conf->decommission_in_progress) { + defrag->defrag_status = GF_DEFRAG_STATUS_FAILED; + goto out; } + } - return ret; + ret = 0; +out: + if (free_entries) + gf_dirent_free(&entries); -} + loc_wipe(&entry_loc); + if (fd) + fd_unref(fd); + if (ret == 0 && should_commit_hash == 0) { + ret = 2; + } + + return ret; +} /****************************************************************************** * Tier background Fix layout functions ******************************************************************************/ /* This is the background tier fixlayout thread */ void * -gf_tier_do_fix_layout (void *args) +gf_tier_do_fix_layout(void *args) { - gf_tier_fix_layout_arg_t *tier_fix_layout_arg = args; - int ret = -1; - xlator_t *this = NULL; - dht_conf_t *conf = NULL; - gf_defrag_info_t *defrag = NULL; - dict_t *dict = NULL; - loc_t loc = {0,}; - struct iatt iatt = {0,}; - struct iatt parent = {0,}; - - GF_VALIDATE_OR_GOTO ("tier", tier_fix_layout_arg, out); - GF_VALIDATE_OR_GOTO ("tier", tier_fix_layout_arg->this, out); - this = tier_fix_layout_arg->this; - - conf = this->private; - GF_VALIDATE_OR_GOTO (this->name, conf, out); - - defrag = conf->defrag; - GF_VALIDATE_OR_GOTO (this->name, defrag, out); - GF_VALIDATE_OR_GOTO (this->name, defrag->root_inode, out); - - GF_VALIDATE_OR_GOTO (this->name, tier_fix_layout_arg->fix_layout, out); - - - /* Get Root loc_t */ - dht_build_root_loc (defrag->root_inode, &loc); - ret = syncop_lookup (this, &loc, &iatt, &parent, NULL, NULL); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, -ret, - DHT_MSG_REBALANCE_START_FAILED, - "Lookup on root failed."); - ret = -1; - goto out; - } - - - /* Start the crawl */ - gf_msg (this->name, GF_LOG_INFO, 0, - DHT_MSG_LOG_TIER_STATUS, "Tiering Fixlayout started"); - - ret = gf_defrag_fix_layout (this, defrag, &loc, - tier_fix_layout_arg->fix_layout, NULL); - if (ret && ret != 2) { - gf_msg (this->name, GF_LOG_ERROR, 0, DHT_MSG_REBALANCE_FAILED, - "Tiering fixlayout failed."); - ret = -1; - goto out; - } - - if (ret != 2 && gf_defrag_settle_hash - (this, defrag, &loc, - tier_fix_layout_arg->fix_layout) != 0) { - defrag->total_failures++; - ret = -1; - goto out; - } - - dict = dict_new (); - if (!dict) { - ret = -1; - goto out; - } + gf_tier_fix_layout_arg_t *tier_fix_layout_arg = args; + int ret = -1; + xlator_t *this = NULL; + dht_conf_t *conf = NULL; + gf_defrag_info_t *defrag = NULL; + dict_t *dict = NULL; + loc_t loc = { + 0, + }; + struct iatt iatt = { + 0, + }; + struct iatt parent = { + 0, + }; + + GF_VALIDATE_OR_GOTO("tier", tier_fix_layout_arg, out); + GF_VALIDATE_OR_GOTO("tier", tier_fix_layout_arg->this, out); + this = tier_fix_layout_arg->this; + + conf = this->private; + GF_VALIDATE_OR_GOTO(this->name, conf, out); + + defrag = conf->defrag; + GF_VALIDATE_OR_GOTO(this->name, defrag, out); + GF_VALIDATE_OR_GOTO(this->name, defrag->root_inode, out); + + GF_VALIDATE_OR_GOTO(this->name, tier_fix_layout_arg->fix_layout, out); + + /* Get Root loc_t */ + dht_build_root_loc(defrag->root_inode, &loc); + ret = syncop_lookup(this, &loc, &iatt, &parent, NULL, NULL); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_REBALANCE_START_FAILED, + "Lookup on root failed."); + ret = -1; + goto out; + } + + /* Start the crawl */ + gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS, + "Tiering Fixlayout started"); + + ret = gf_defrag_fix_layout(this, defrag, &loc, + tier_fix_layout_arg->fix_layout, NULL); + if (ret && ret != 2) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_REBALANCE_FAILED, + "Tiering fixlayout failed."); + ret = -1; + goto out; + } - ret = dict_set_str (dict, GF_XATTR_TIER_LAYOUT_FIXED_KEY, "yes"); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_REBALANCE_FAILED, - "Failed to set dictionary value: key = %s", - GF_XATTR_TIER_LAYOUT_FIXED_KEY); - ret = -1; - goto out; - } + if (ret != 2 && + gf_defrag_settle_hash(this, defrag, &loc, + tier_fix_layout_arg->fix_layout) != 0) { + defrag->total_failures++; + ret = -1; + goto out; + } - /* Marking the completion of tiering fix layout via a xattr on root */ - ret = syncop_setxattr (this, &loc, dict, 0, NULL, NULL); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, "Failed to set tiering fix " - "layout completed xattr on %s", loc.path); - ret = -1; - goto out; - } + dict = dict_new(); + if (!dict) { + ret = -1; + goto out; + } + + ret = dict_set_str(dict, GF_XATTR_TIER_LAYOUT_FIXED_KEY, "yes"); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_REBALANCE_FAILED, + "Failed to set dictionary value: key = %s", + GF_XATTR_TIER_LAYOUT_FIXED_KEY); + ret = -1; + goto out; + } + + /* Marking the completion of tiering fix layout via a xattr on root */ + ret = syncop_setxattr(this, &loc, dict, 0, NULL, NULL); + if (ret) { + gf_log(this->name, GF_LOG_ERROR, + "Failed to set tiering fix " + "layout completed xattr on %s", + loc.path); + ret = -1; + goto out; + } - ret = 0; + ret = 0; out: - if (ret && defrag) - defrag->total_failures++; + if (ret && defrag) + defrag->total_failures++; - if (dict) - dict_unref (dict); + if (dict) + dict_unref(dict); - return NULL; + return NULL; } int -gf_tier_start_fix_layout (xlator_t *this, - loc_t *loc, - gf_defrag_info_t *defrag, +gf_tier_start_fix_layout(xlator_t *this, loc_t *loc, gf_defrag_info_t *defrag, dict_t *fix_layout) { - int ret = -1; - dict_t *tier_dict = NULL; - gf_tier_fix_layout_arg_t *tier_fix_layout_arg = NULL; - - tier_dict = dict_new (); - if (!tier_dict) { - gf_log ("tier", GF_LOG_ERROR, "Tier fix layout failed :" - "Creation of tier_dict failed"); - ret = -1; - goto out; - } - - /* Check if layout is fixed already */ - ret = syncop_getxattr (this, loc, &tier_dict, - GF_XATTR_TIER_LAYOUT_FIXED_KEY, - NULL, NULL); - if (ret != 0) { - - tier_fix_layout_arg = &defrag->tier_conf.tier_fix_layout_arg; - - /*Fill crawl arguments */ - tier_fix_layout_arg->this = this; - tier_fix_layout_arg->fix_layout = fix_layout; - - /* Spawn the fix layout thread so that its done in the - * background */ - ret = gf_thread_create (&tier_fix_layout_arg->thread_id, NULL, - gf_tier_do_fix_layout, - tier_fix_layout_arg, "tierfixl"); - if (ret) { - gf_log ("tier", GF_LOG_ERROR, "Thread creation failed. " - "Background fix layout for tiering will not " - "work."); - defrag->total_failures++; - goto out; - } - } - ret = 0; + int ret = -1; + dict_t *tier_dict = NULL; + gf_tier_fix_layout_arg_t *tier_fix_layout_arg = NULL; + + tier_dict = dict_new(); + if (!tier_dict) { + gf_log("tier", GF_LOG_ERROR, + "Tier fix layout failed :" + "Creation of tier_dict failed"); + ret = -1; + goto out; + } + + /* Check if layout is fixed already */ + ret = syncop_getxattr(this, loc, &tier_dict, GF_XATTR_TIER_LAYOUT_FIXED_KEY, + NULL, NULL); + if (ret != 0) { + tier_fix_layout_arg = &defrag->tier_conf.tier_fix_layout_arg; + + /*Fill crawl arguments */ + tier_fix_layout_arg->this = this; + tier_fix_layout_arg->fix_layout = fix_layout; + + /* Spawn the fix layout thread so that its done in the + * background */ + ret = gf_thread_create(&tier_fix_layout_arg->thread_id, NULL, + gf_tier_do_fix_layout, tier_fix_layout_arg, + "tierfixl"); + if (ret) { + gf_log("tier", GF_LOG_ERROR, + "Thread creation failed. " + "Background fix layout for tiering will not " + "work."); + defrag->total_failures++; + goto out; + } + } + ret = 0; out: - if (tier_dict) - dict_unref (tier_dict); + if (tier_dict) + dict_unref(tier_dict); - return ret; + return ret; } void -gf_tier_clear_fix_layout (xlator_t *this, loc_t *loc, gf_defrag_info_t *defrag) +gf_tier_clear_fix_layout(xlator_t *this, loc_t *loc, gf_defrag_info_t *defrag) { - int ret = -1; - dict_t *dict = NULL; - - GF_VALIDATE_OR_GOTO ("tier", this, out); - GF_VALIDATE_OR_GOTO (this->name, loc, out); - GF_VALIDATE_OR_GOTO (this->name, defrag, out); - - /* Check if background fixlayout is completed. This is not - * multi-process safe i.e there is a possibility that by the time - * we move to remove the xattr there it might have been cleared by some - * other detach process from other node. We ignore the error if such - * a thing happens */ - ret = syncop_getxattr (this, loc, &dict, - GF_XATTR_TIER_LAYOUT_FIXED_KEY, NULL, NULL); - if (ret) { - /* Background fixlayout not complete - nothing to clear*/ - gf_msg (this->name, GF_LOG_WARNING, -ret, - DHT_MSG_LOG_TIER_STATUS, - "Unable to retrieve fixlayout xattr." - "Assume background fix layout not complete"); - goto out; - } - - ret = syncop_removexattr (this, loc, GF_XATTR_TIER_LAYOUT_FIXED_KEY, - NULL, NULL); - if (ret) { - gf_msg (this->name, GF_LOG_WARNING, -ret, - DHT_MSG_LOG_TIER_STATUS, - "Failed removing tier fix layout " - "xattr from %s", loc->path); - goto out; - } - ret = 0; + int ret = -1; + dict_t *dict = NULL; + + GF_VALIDATE_OR_GOTO("tier", this, out); + GF_VALIDATE_OR_GOTO(this->name, loc, out); + GF_VALIDATE_OR_GOTO(this->name, defrag, out); + + /* Check if background fixlayout is completed. This is not + * multi-process safe i.e there is a possibility that by the time + * we move to remove the xattr there it might have been cleared by some + * other detach process from other node. We ignore the error if such + * a thing happens */ + ret = syncop_getxattr(this, loc, &dict, GF_XATTR_TIER_LAYOUT_FIXED_KEY, + NULL, NULL); + if (ret) { + /* Background fixlayout not complete - nothing to clear*/ + gf_msg(this->name, GF_LOG_WARNING, -ret, DHT_MSG_LOG_TIER_STATUS, + "Unable to retrieve fixlayout xattr." + "Assume background fix layout not complete"); + goto out; + } + + ret = syncop_removexattr(this, loc, GF_XATTR_TIER_LAYOUT_FIXED_KEY, NULL, + NULL); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, -ret, DHT_MSG_LOG_TIER_STATUS, + "Failed removing tier fix layout " + "xattr from %s", + loc->path); + goto out; + } + ret = 0; out: - if (dict) - dict_unref (dict); + if (dict) + dict_unref(dict); } void -gf_tier_wait_fix_lookup (gf_defrag_info_t *defrag) { - if (defrag->tier_conf.tier_fix_layout_arg.thread_id) { - pthread_join (defrag->tier_conf.tier_fix_layout_arg.thread_id, - NULL); - } +gf_tier_wait_fix_lookup(gf_defrag_info_t *defrag) +{ + if (defrag->tier_conf.tier_fix_layout_arg.thread_id) { + pthread_join(defrag->tier_conf.tier_fix_layout_arg.thread_id, NULL); + } } /******************Tier background Fix layout functions END********************/ int -dht_init_local_subvols_and_nodeuuids (xlator_t *this, dht_conf_t *conf, - loc_t *loc) +dht_init_local_subvols_and_nodeuuids(xlator_t *this, dht_conf_t *conf, + loc_t *loc) { - - dict_t *dict = NULL; - gf_defrag_info_t *defrag = NULL; - uuid_t *uuid_ptr = NULL; - int ret = -1; - int i = 0; - int j = 0; - - defrag = conf->defrag; - - if (defrag->cmd != GF_DEFRAG_CMD_START_TIER) { - /* Find local subvolumes */ - ret = syncop_getxattr (this, loc, &dict, - GF_REBAL_FIND_LOCAL_SUBVOL, - NULL, NULL); - if (ret && (ret != -ENODATA)) { - gf_msg (this->name, GF_LOG_ERROR, -ret, 0, "local " - "subvolume determination failed with error: %d", - -ret); - ret = -1; - goto out; - } - - if (!ret) - goto out; + dict_t *dict = NULL; + gf_defrag_info_t *defrag = NULL; + uuid_t *uuid_ptr = NULL; + int ret = -1; + int i = 0; + int j = 0; + + defrag = conf->defrag; + + if (defrag->cmd != GF_DEFRAG_CMD_START_TIER) { + /* Find local subvolumes */ + ret = syncop_getxattr(this, loc, &dict, GF_REBAL_FIND_LOCAL_SUBVOL, + NULL, NULL); + if (ret && (ret != -ENODATA)) { + gf_msg(this->name, GF_LOG_ERROR, -ret, 0, + "local " + "subvolume determination failed with error: %d", + -ret); + ret = -1; + goto out; } - ret = syncop_getxattr (this, loc, &dict, - GF_REBAL_OLD_FIND_LOCAL_SUBVOL, - NULL, NULL); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, -ret, 0, "local " - "subvolume determination failed with error: %d", - -ret); - ret = -1; - goto out; - } - ret = 0; + if (!ret) + goto out; + } + + ret = syncop_getxattr(this, loc, &dict, GF_REBAL_OLD_FIND_LOCAL_SUBVOL, + NULL, NULL); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, -ret, 0, + "local " + "subvolume determination failed with error: %d", + -ret); + ret = -1; + goto out; + } + ret = 0; out: - if (ret) { - return ret; - } + if (ret) { + return ret; + } - for (i = 0 ; i < conf->local_subvols_cnt; i++) { - gf_msg (this->name, GF_LOG_INFO, 0, 0, "local subvol: " - "%s", conf->local_subvols[i]->name); + for (i = 0; i < conf->local_subvols_cnt; i++) { + gf_msg(this->name, GF_LOG_INFO, 0, 0, + "local subvol: " + "%s", + conf->local_subvols[i]->name); - for (j = 0; j < conf->local_nodeuuids[i].count; j++) { - uuid_ptr = &(conf->local_nodeuuids[i].elements[j].uuid); - gf_msg (this->name, GF_LOG_INFO, 0, 0, - "node uuid : %s", - uuid_utoa(*uuid_ptr)); - } + for (j = 0; j < conf->local_nodeuuids[i].count; j++) { + uuid_ptr = &(conf->local_nodeuuids[i].elements[j].uuid); + gf_msg(this->name, GF_LOG_INFO, 0, 0, "node uuid : %s", + uuid_utoa(*uuid_ptr)); } + } - return ret; + return ret; } - /* Functions for the rebalance estimates feature */ uint64_t -gf_defrag_subvol_file_size (xlator_t *this, loc_t *root_loc) -{ - int ret = -1; - struct statvfs buf = {0,}; - - if (!this) - return 0; - - ret = syncop_statfs (this, root_loc, &buf, NULL, NULL); - if (ret) { - /* Aargh! */ - return 0; - } - return ((buf.f_blocks - buf.f_bfree) * buf.f_frsize); -} - -uint64_t -gf_defrag_total_file_size (xlator_t *this, loc_t *root_loc) -{ - dht_conf_t *conf = NULL; - int i = 0; - uint64_t size_files = 0; - uint64_t total_size = 0; - - conf = this->private; - if (!conf) { - return 0; - } - - for (i = 0 ; i < conf->local_subvols_cnt; i++) { - size_files = gf_defrag_subvol_file_size (conf->local_subvols[i], - root_loc); - total_size += size_files; - gf_msg (this->name, GF_LOG_INFO, 0, 0, "local subvol: %s," - "cnt = %"PRIu64, conf->local_subvols[i]->name, - size_files); - } - - gf_msg (this->name, GF_LOG_INFO, 0, 0, - "Total size files = %"PRIu64, total_size); - - return total_size; -} - - -static void* -dht_file_counter_thread (void *args) +gf_defrag_subvol_file_size(xlator_t *this, loc_t *root_loc) { - gf_defrag_info_t *defrag = NULL; - loc_t root_loc = {0,}; - struct timespec time_to_wait = {0,}; - struct timeval now = {0,}; - uint64_t tmp_size = 0; + int ret = -1; + struct statvfs buf = { + 0, + }; + if (!this) + return 0; - if (!args) - return NULL; + ret = syncop_statfs(this, root_loc, &buf, NULL, NULL); + if (ret) { + /* Aargh! */ + return 0; + } + return ((buf.f_blocks - buf.f_bfree) * buf.f_frsize); +} - defrag = (gf_defrag_info_t *) args; - dht_build_root_loc (defrag->root_inode, &root_loc); +uint64_t +gf_defrag_total_file_size(xlator_t *this, loc_t *root_loc) +{ + dht_conf_t *conf = NULL; + int i = 0; + uint64_t size_files = 0; + uint64_t total_size = 0; - while (defrag->defrag_status == GF_DEFRAG_STATUS_STARTED) { + conf = this->private; + if (!conf) { + return 0; + } + + for (i = 0; i < conf->local_subvols_cnt; i++) { + size_files = gf_defrag_subvol_file_size(conf->local_subvols[i], + root_loc); + total_size += size_files; + gf_msg(this->name, GF_LOG_INFO, 0, 0, + "local subvol: %s," + "cnt = %" PRIu64, + conf->local_subvols[i]->name, size_files); + } + + gf_msg(this->name, GF_LOG_INFO, 0, 0, "Total size files = %" PRIu64, + total_size); + + return total_size; +} - gettimeofday (&now, NULL); - time_to_wait.tv_sec = now.tv_sec + 600; - time_to_wait.tv_nsec = 0; +static void * +dht_file_counter_thread(void *args) +{ + gf_defrag_info_t *defrag = NULL; + loc_t root_loc = { + 0, + }; + struct timespec time_to_wait = { + 0, + }; + struct timeval now = { + 0, + }; + uint64_t tmp_size = 0; + + if (!args) + return NULL; + defrag = (gf_defrag_info_t *)args; + dht_build_root_loc(defrag->root_inode, &root_loc); - pthread_mutex_lock (&defrag->fc_mutex); - pthread_cond_timedwait (&defrag->fc_wakeup_cond, - &defrag->fc_mutex, - &time_to_wait); + while (defrag->defrag_status == GF_DEFRAG_STATUS_STARTED) { + gettimeofday(&now, NULL); + time_to_wait.tv_sec = now.tv_sec + 600; + time_to_wait.tv_nsec = 0; - pthread_mutex_unlock (&defrag->fc_mutex); + pthread_mutex_lock(&defrag->fc_mutex); + pthread_cond_timedwait(&defrag->fc_wakeup_cond, &defrag->fc_mutex, + &time_to_wait); + pthread_mutex_unlock(&defrag->fc_mutex); - if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) - break; + if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) + break; - tmp_size = gf_defrag_total_file_size (defrag->this, - &root_loc); + tmp_size = gf_defrag_total_file_size(defrag->this, &root_loc); - gf_log ("dht", GF_LOG_INFO, - "tmp data size =%"PRIu64, - tmp_size); + gf_log("dht", GF_LOG_INFO, "tmp data size =%" PRIu64, tmp_size); - if (!tmp_size) { - gf_msg ("dht", GF_LOG_ERROR, 0, 0, "Failed to get " - "the total data size. Unable to estimate " - "time to complete rebalance."); - } else { - g_totalsize = tmp_size; - gf_msg_debug ("dht", 0, - "total data size =%"PRIu64, - g_totalsize); - } + if (!tmp_size) { + gf_msg("dht", GF_LOG_ERROR, 0, 0, + "Failed to get " + "the total data size. Unable to estimate " + "time to complete rebalance."); + } else { + g_totalsize = tmp_size; + gf_msg_debug("dht", 0, "total data size =%" PRIu64, g_totalsize); } + } - return NULL; + return NULL; } int -gf_defrag_estimates_cleanup (xlator_t *this, gf_defrag_info_t *defrag, - pthread_t filecnt_thread) +gf_defrag_estimates_cleanup(xlator_t *this, gf_defrag_info_t *defrag, + pthread_t filecnt_thread) { - int ret = -1; - - /* Wake up the filecounter thread. - * By now the defrag status will no longer be - * GF_DEFRAG_STATUS_STARTED so the thread will exit the loop. - */ - pthread_mutex_lock (&defrag->fc_mutex); - { - pthread_cond_broadcast (&defrag->fc_wakeup_cond); - } - pthread_mutex_unlock (&defrag->fc_mutex); - - ret = pthread_join (filecnt_thread, NULL); - if (ret) { - gf_msg ("dht", GF_LOG_ERROR, ret, 0, - "file_counter_thread: pthread_join failed."); - ret = -1; - } - return ret; + int ret = -1; + + /* Wake up the filecounter thread. + * By now the defrag status will no longer be + * GF_DEFRAG_STATUS_STARTED so the thread will exit the loop. + */ + pthread_mutex_lock(&defrag->fc_mutex); + { + pthread_cond_broadcast(&defrag->fc_wakeup_cond); + } + pthread_mutex_unlock(&defrag->fc_mutex); + + ret = pthread_join(filecnt_thread, NULL); + if (ret) { + gf_msg("dht", GF_LOG_ERROR, ret, 0, + "file_counter_thread: pthread_join failed."); + ret = -1; + } + return ret; } - int -gf_defrag_estimates_init (xlator_t *this, loc_t *loc, - pthread_t *filecnt_thread) +gf_defrag_estimates_init(xlator_t *this, loc_t *loc, pthread_t *filecnt_thread) { - int ret = -1; - dht_conf_t *conf = NULL; - gf_defrag_info_t *defrag = NULL; - - conf = this->private; - defrag = conf->defrag; - - g_totalsize = gf_defrag_total_file_size (this, loc); - if (!g_totalsize) { - gf_msg (this->name, GF_LOG_ERROR, 0, 0, "Failed to get " - "the total data size. Unable to estimate " - "time to complete rebalance."); - goto out; - } - - ret = gf_thread_create (filecnt_thread, NULL, - &dht_file_counter_thread, - (void *)defrag, "dhtfcnt"); - - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, ret, 0, "Failed to " - "create the file counter thread "); - ret = -1; - goto out; - } - ret = 0; + int ret = -1; + dht_conf_t *conf = NULL; + gf_defrag_info_t *defrag = NULL; + + conf = this->private; + defrag = conf->defrag; + + g_totalsize = gf_defrag_total_file_size(this, loc); + if (!g_totalsize) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, + "Failed to get " + "the total data size. Unable to estimate " + "time to complete rebalance."); + goto out; + } + + ret = gf_thread_create(filecnt_thread, NULL, &dht_file_counter_thread, + (void *)defrag, "dhtfcnt"); + + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, ret, 0, + "Failed to " + "create the file counter thread "); + ret = -1; + goto out; + } + ret = 0; out: - return ret; + return ret; } - /* Init and cleanup functions for parallel file migration*/ int -gf_defrag_parallel_migration_init (xlator_t *this, gf_defrag_info_t *defrag, - pthread_t **tid_array, int *thread_index) +gf_defrag_parallel_migration_init(xlator_t *this, gf_defrag_info_t *defrag, + pthread_t **tid_array, int *thread_index) { - int ret = -1; - int thread_spawn_count = 0; - int index = 0; - pthread_t *tid = NULL; - char thread_name[GF_THREAD_NAMEMAX] = {0,}; - - if (!defrag) - goto out; - - /* Initialize global entry queue */ - defrag->queue = GF_CALLOC (1, sizeof (struct dht_container), - gf_dht_mt_container_t); - - if (!defrag->queue) { - gf_msg (this->name, GF_LOG_ERROR, ENOMEM, 0, - "Failed to initialise migration queue"); - ret = -1; - goto out; - } + int ret = -1; + int thread_spawn_count = 0; + int index = 0; + pthread_t *tid = NULL; + char thread_name[GF_THREAD_NAMEMAX] = { + 0, + }; + + if (!defrag) + goto out; + + /* Initialize global entry queue */ + defrag->queue = GF_CALLOC(1, sizeof(struct dht_container), + gf_dht_mt_container_t); + + if (!defrag->queue) { + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, 0, + "Failed to initialise migration queue"); + ret = -1; + goto out; + } - INIT_LIST_HEAD (&(defrag->queue[0].list)); + INIT_LIST_HEAD(&(defrag->queue[0].list)); - thread_spawn_count = MAX (MAX_REBAL_THREADS, 4); + thread_spawn_count = MAX(MAX_REBAL_THREADS, 4); - gf_msg_debug (this->name, 0, "thread_spawn_count: %d", - thread_spawn_count); + gf_msg_debug(this->name, 0, "thread_spawn_count: %d", thread_spawn_count); - tid = GF_CALLOC (thread_spawn_count, sizeof (pthread_t), - gf_common_mt_pthread_t); - if (!tid) { - gf_msg (this->name, GF_LOG_ERROR, ENOMEM, 0, - "Failed to create migration threads"); - ret = -1; - goto out; - } - defrag->current_thread_count = thread_spawn_count; - - /*Spawn Threads Here*/ - while (index < thread_spawn_count) { - snprintf (thread_name, sizeof(thread_name), - "dhtmig%d", ((index + 1) & 0x3ff)); - ret = gf_thread_create (&(tid[index]), NULL, - &gf_defrag_task, (void *)defrag, - thread_name); - if (ret != 0) { - gf_msg ("DHT", GF_LOG_ERROR, ret, 0, - "Thread[%d] creation failed. ", - index); - ret = -1; - goto out; - } else { - gf_log ("DHT", GF_LOG_INFO, "Thread[%d] " - "creation successful", index); - } - index++; + tid = GF_CALLOC(thread_spawn_count, sizeof(pthread_t), + gf_common_mt_pthread_t); + if (!tid) { + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, 0, + "Failed to create migration threads"); + ret = -1; + goto out; + } + defrag->current_thread_count = thread_spawn_count; + + /*Spawn Threads Here*/ + while (index < thread_spawn_count) { + snprintf(thread_name, sizeof(thread_name), "dhtmig%d", + ((index + 1) & 0x3ff)); + ret = gf_thread_create(&(tid[index]), NULL, &gf_defrag_task, + (void *)defrag, thread_name); + if (ret != 0) { + gf_msg("DHT", GF_LOG_ERROR, ret, 0, "Thread[%d] creation failed. ", + index); + ret = -1; + goto out; + } else { + gf_log("DHT", GF_LOG_INFO, + "Thread[%d] " + "creation successful", + index); } + index++; + } - ret = 0; + ret = 0; out: - *thread_index = index; - *tid_array = tid; + *thread_index = index; + *tid_array = tid; - return ret; + return ret; } int -gf_defrag_parallel_migration_cleanup (gf_defrag_info_t *defrag, - pthread_t *tid_array, int thread_index) +gf_defrag_parallel_migration_cleanup(gf_defrag_info_t *defrag, + pthread_t *tid_array, int thread_index) { - int ret = -1; - int i = 0; + int ret = -1; + int i = 0; - if (!defrag) - goto out; + if (!defrag) + goto out; - /* Wake up all migration threads */ - pthread_mutex_lock (&defrag->dfq_mutex); - { - defrag->crawl_done = 1; + /* Wake up all migration threads */ + pthread_mutex_lock(&defrag->dfq_mutex); + { + defrag->crawl_done = 1; - pthread_cond_broadcast (&defrag->parallel_migration_cond); - pthread_cond_broadcast (&defrag->df_wakeup_thread); - } - pthread_mutex_unlock (&defrag->dfq_mutex); + pthread_cond_broadcast(&defrag->parallel_migration_cond); + pthread_cond_broadcast(&defrag->df_wakeup_thread); + } + pthread_mutex_unlock(&defrag->dfq_mutex); - /*Wait for all the threads to complete their task*/ - for (i = 0; i < thread_index; i++) { - pthread_join (tid_array[i], NULL); - } + /*Wait for all the threads to complete their task*/ + for (i = 0; i < thread_index; i++) { + pthread_join(tid_array[i], NULL); + } - GF_FREE (tid_array); + GF_FREE(tid_array); - /* Cleanup the migration queue */ - if (defrag->queue) { - gf_dirent_free (defrag->queue[0].df_entry); - INIT_LIST_HEAD (&(defrag->queue[0].list)); - } + /* Cleanup the migration queue */ + if (defrag->queue) { + gf_dirent_free(defrag->queue[0].df_entry); + INIT_LIST_HEAD(&(defrag->queue[0].list)); + } - GF_FREE (defrag->queue); + GF_FREE(defrag->queue); - ret = 0; + ret = 0; out: - return ret; + return ret; } - - int -gf_defrag_start_crawl (void *data) +gf_defrag_start_crawl(void *data) { - xlator_t *this = NULL; - dht_conf_t *conf = NULL; - gf_defrag_info_t *defrag = NULL; - dict_t *fix_layout = NULL; - dict_t *migrate_data = NULL; - dict_t *status = NULL; - glusterfs_ctx_t *ctx = NULL; - dht_methods_t *methods = NULL; - call_frame_t *statfs_frame = NULL; - xlator_t *old_THIS = NULL; - int ret = -1; - loc_t loc = {0,}; - struct iatt iatt = {0,}; - struct iatt parent = {0,}; - int thread_index = 0; - pthread_t *tid = NULL; - pthread_t filecnt_thread; - gf_boolean_t is_tier_detach = _gf_false; - gf_boolean_t fc_thread_started = _gf_false; - - this = data; - if (!this) - goto exit; - - ctx = this->ctx; - if (!ctx) - goto exit; - - conf = this->private; - if (!conf) - goto exit; - - defrag = conf->defrag; - if (!defrag) - goto exit; - - gettimeofday (&defrag->start_time, NULL); - dht_build_root_inode (this, &defrag->root_inode); - if (!defrag->root_inode) - goto out; + xlator_t *this = NULL; + dht_conf_t *conf = NULL; + gf_defrag_info_t *defrag = NULL; + dict_t *fix_layout = NULL; + dict_t *migrate_data = NULL; + dict_t *status = NULL; + glusterfs_ctx_t *ctx = NULL; + dht_methods_t *methods = NULL; + call_frame_t *statfs_frame = NULL; + xlator_t *old_THIS = NULL; + int ret = -1; + loc_t loc = { + 0, + }; + struct iatt iatt = { + 0, + }; + struct iatt parent = { + 0, + }; + int thread_index = 0; + pthread_t *tid = NULL; + pthread_t filecnt_thread; + gf_boolean_t is_tier_detach = _gf_false; + gf_boolean_t fc_thread_started = _gf_false; + + this = data; + if (!this) + goto exit; + + ctx = this->ctx; + if (!ctx) + goto exit; + + conf = this->private; + if (!conf) + goto exit; + + defrag = conf->defrag; + if (!defrag) + goto exit; + + gettimeofday(&defrag->start_time, NULL); + dht_build_root_inode(this, &defrag->root_inode); + if (!defrag->root_inode) + goto out; + + dht_build_root_loc(defrag->root_inode, &loc); + + /* fix-layout on '/' first */ + + ret = syncop_lookup(this, &loc, &iatt, &parent, NULL, NULL); + + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_REBALANCE_START_FAILED, + "Failed to start rebalance: look up on / failed"); + ret = -1; + goto out; + } - dht_build_root_loc (defrag->root_inode, &loc); + old_THIS = THIS; + THIS = this; - /* fix-layout on '/' first */ + statfs_frame = create_frame(this, this->ctx->pool); + if (!statfs_frame) { + gf_msg(this->name, GF_LOG_ERROR, DHT_MSG_NO_MEMORY, ENOMEM, + "Insufficient memory. Frame creation failed"); + ret = -1; + goto out; + } - ret = syncop_lookup (this, &loc, &iatt, &parent, NULL, NULL); + /* async statfs update for honoring min-free-disk */ + dht_get_du_info(statfs_frame, this, &loc); + THIS = old_THIS; - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, -ret, - DHT_MSG_REBALANCE_START_FAILED, - "Failed to start rebalance: look up on / failed"); - ret = -1; - goto out; - } + fix_layout = dict_new(); + if (!fix_layout) { + ret = -1; + goto out; + } + + /* + * Unfortunately, we can't do special xattrs (like fix.layout) and + * real ones in the same call currently, and changing it seems + * riskier than just doing two calls. + */ + + gf_log(this->name, GF_LOG_INFO, "%s using commit hash %u", __func__, + conf->vol_commit_hash); + + ret = dict_set_uint32(fix_layout, conf->commithash_xattr_name, + conf->vol_commit_hash); + if (ret) { + gf_log(this->name, GF_LOG_ERROR, "Failed to set %s", + conf->commithash_xattr_name); + defrag->total_failures++; + ret = -1; + goto out; + } + + ret = syncop_setxattr(this, &loc, fix_layout, 0, NULL, NULL); + if (ret) { + gf_log(this->name, GF_LOG_ERROR, + "Failed to set commit hash on %s. " + "Rebalance cannot proceed.", + loc.path); + defrag->total_failures++; + ret = -1; + goto out; + } + + /* We now return to our regularly scheduled program. */ + + ret = dict_set_str(fix_layout, GF_XATTR_FIX_LAYOUT_KEY, "yes"); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_REBALANCE_START_FAILED, + "Failed to start rebalance:" + "Failed to set dictionary value: key = %s", + GF_XATTR_FIX_LAYOUT_KEY); + defrag->total_failures++; + ret = -1; + goto out; + } - old_THIS = THIS; - THIS = this; + defrag->new_commit_hash = conf->vol_commit_hash; - statfs_frame = create_frame (this, this->ctx->pool); - if (!statfs_frame) { - gf_msg (this->name, GF_LOG_ERROR, DHT_MSG_NO_MEMORY, ENOMEM, - "Insufficient memory. Frame creation failed"); - ret = -1; - goto out; - } + ret = syncop_setxattr(this, &loc, fix_layout, 0, NULL, NULL); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_REBALANCE_FAILED, + "fix layout on %s failed", loc.path); + defrag->total_failures++; + ret = -1; + goto out; + } - /* async statfs update for honoring min-free-disk */ - dht_get_du_info (statfs_frame, this, &loc); - THIS = old_THIS; + if (defrag->cmd != GF_DEFRAG_CMD_START_LAYOUT_FIX) { + /* We need to migrate files */ - fix_layout = dict_new (); - if (!fix_layout) { - ret = -1; - goto out; + migrate_data = dict_new(); + if (!migrate_data) { + defrag->total_failures++; + ret = -1; + goto out; } - - /* - * Unfortunately, we can't do special xattrs (like fix.layout) and - * real ones in the same call currently, and changing it seems - * riskier than just doing two calls. - */ - - gf_log (this->name, GF_LOG_INFO, "%s using commit hash %u", - __func__, conf->vol_commit_hash); - - ret = dict_set_uint32 (fix_layout, conf->commithash_xattr_name, - conf->vol_commit_hash); + ret = dict_set_str( + migrate_data, GF_XATTR_FILE_MIGRATE_KEY, + (defrag->cmd == GF_DEFRAG_CMD_START_FORCE) ? "force" : "non-force"); if (ret) { - gf_log (this->name, GF_LOG_ERROR, - "Failed to set %s", conf->commithash_xattr_name); - defrag->total_failures++; - ret = -1; - goto out; + defrag->total_failures++; + ret = -1; + goto out; } - ret = syncop_setxattr (this, &loc, fix_layout, 0, NULL, NULL); + ret = dht_init_local_subvols_and_nodeuuids(this, conf, &loc); if (ret) { - gf_log (this->name, GF_LOG_ERROR, - "Failed to set commit hash on %s. " - "Rebalance cannot proceed.", - loc.path); - defrag->total_failures++; - ret = -1; - goto out; + ret = -1; + goto out; } - /* We now return to our regularly scheduled program. */ - - ret = dict_set_str (fix_layout, GF_XATTR_FIX_LAYOUT_KEY, "yes"); + /* Initialise the structures required for parallel migration */ + ret = gf_defrag_parallel_migration_init(this, defrag, &tid, + &thread_index); if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_REBALANCE_START_FAILED, - "Failed to start rebalance:" - "Failed to set dictionary value: key = %s", - GF_XATTR_FIX_LAYOUT_KEY); - defrag->total_failures++; - ret = -1; - goto out; + gf_msg(this->name, GF_LOG_ERROR, 0, 0, "Aborting rebalance."); + goto out; } - defrag->new_commit_hash = conf->vol_commit_hash; - - ret = syncop_setxattr (this, &loc, fix_layout, 0, NULL, NULL); + ret = gf_defrag_estimates_init(this, &loc, &filecnt_thread); if (ret) { - gf_msg (this->name, GF_LOG_ERROR, -ret, - DHT_MSG_REBALANCE_FAILED, - "fix layout on %s failed", - loc.path); - defrag->total_failures++; - ret = -1; - goto out; - } - - if (defrag->cmd != GF_DEFRAG_CMD_START_LAYOUT_FIX) { - /* We need to migrate files */ - - migrate_data = dict_new (); - if (!migrate_data) { - defrag->total_failures++; - ret = -1; - goto out; - } - ret = dict_set_str (migrate_data, GF_XATTR_FILE_MIGRATE_KEY, - (defrag->cmd == GF_DEFRAG_CMD_START_FORCE) - ? "force" : "non-force"); - if (ret) { - defrag->total_failures++; - ret = -1; - goto out; - } - - ret = dht_init_local_subvols_and_nodeuuids (this, conf, &loc); - if (ret) { - ret = -1; - goto out; - } - - /* Initialise the structures required for parallel migration */ - ret = gf_defrag_parallel_migration_init (this, defrag, &tid, - &thread_index); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, 0, - "Aborting rebalance."); - goto out; - } - - ret = gf_defrag_estimates_init (this, &loc, &filecnt_thread); - if (ret) { - /* Not a fatal error. Allow the rebalance to proceed*/ - ret = 0; - } else { - fc_thread_started = _gf_true; - } - } - - - if (defrag->cmd == GF_DEFRAG_CMD_START_TIER) { - /* Fix layout for attach tier */ - ret = gf_tier_start_fix_layout (this, &loc, defrag, fix_layout); - if (ret) { - goto out; - } - - methods = &(conf->methods); - - /* Calling tier_start of tier.c */ - methods->migration_other(this, defrag); - if (defrag->cmd == GF_DEFRAG_CMD_START_DETACH_TIER || - defrag->cmd == GF_DEFRAG_CMD_DETACH_START) { - - ret = dict_set_str (migrate_data, - GF_XATTR_FILE_MIGRATE_KEY, - "force"); - if (ret) - goto out; - - } + /* Not a fatal error. Allow the rebalance to proceed*/ + ret = 0; } else { - ret = gf_defrag_fix_layout (this, defrag, &loc, fix_layout, - migrate_data); - if (ret && ret != 2) { - defrag->total_failures++; - ret = -1; - goto out; - } - - if (ret != 2 && gf_defrag_settle_hash - (this, defrag, &loc, fix_layout) != 0) { - defrag->total_failures++; - ret = -1; - goto out; - } - - if (defrag->cmd == GF_DEFRAG_CMD_START_DETACH_TIER || - defrag->cmd == GF_DEFRAG_CMD_DETACH_START) - is_tier_detach = _gf_true; - + fc_thread_started = _gf_true; } + } - gf_log ("DHT", GF_LOG_INFO, "crawling file-system completed"); -out: - - /* We are here means crawling the entire file system is done - or something failed. Set defrag->crawl_done flag to intimate - the migrator threads to exhaust the defrag->queue and terminate*/ - + if (defrag->cmd == GF_DEFRAG_CMD_START_TIER) { + /* Fix layout for attach tier */ + ret = gf_tier_start_fix_layout(this, &loc, defrag, fix_layout); if (ret) { - defrag->defrag_status = GF_DEFRAG_STATUS_FAILED; - } - - - if (defrag->cmd == GF_DEFRAG_CMD_START_TIER) { - /* Wait for the tier fixlayout to - * complete if its was started.*/ - gf_tier_wait_fix_lookup (defrag); + goto out; } - if (is_tier_detach && ret == 0) { - /* If it was a detach remove the tier fix-layout - * xattr on root. Ignoring the failure, as nothing has to be - * done, logging is done in gf_tier_clear_fix_layout */ - gf_tier_clear_fix_layout (this, &loc, defrag); - } - - gf_defrag_parallel_migration_cleanup (defrag, tid, thread_index); + methods = &(conf->methods); - if ((defrag->defrag_status != GF_DEFRAG_STATUS_STOPPED) && - (defrag->defrag_status != GF_DEFRAG_STATUS_FAILED)) { - defrag->defrag_status = GF_DEFRAG_STATUS_COMPLETE; + /* Calling tier_start of tier.c */ + methods->migration_other(this, defrag); + if (defrag->cmd == GF_DEFRAG_CMD_START_DETACH_TIER || + defrag->cmd == GF_DEFRAG_CMD_DETACH_START) { + ret = dict_set_str(migrate_data, GF_XATTR_FILE_MIGRATE_KEY, + "force"); + if (ret) + goto out; } - - if (fc_thread_started) { - gf_defrag_estimates_cleanup (this, defrag, filecnt_thread); + } else { + ret = gf_defrag_fix_layout(this, defrag, &loc, fix_layout, + migrate_data); + if (ret && ret != 2) { + defrag->total_failures++; + ret = -1; + goto out; } - dht_send_rebalance_event (this, defrag->cmd, defrag->defrag_status); - - LOCK (&defrag->lock); - { - status = dict_new (); - gf_defrag_status_get (conf, status); - if (ctx && ctx->notify) - ctx->notify (GF_EN_DEFRAG_STATUS, status); - if (status) - dict_unref (status); - defrag->is_exiting = 1; + if (ret != 2 && + gf_defrag_settle_hash(this, defrag, &loc, fix_layout) != 0) { + defrag->total_failures++; + ret = -1; + goto out; } - UNLOCK (&defrag->lock); + if (defrag->cmd == GF_DEFRAG_CMD_START_DETACH_TIER || + defrag->cmd == GF_DEFRAG_CMD_DETACH_START) + is_tier_detach = _gf_true; + } - GF_FREE (defrag); - conf->defrag = NULL; - - if (migrate_data) - dict_unref (migrate_data); + gf_log("DHT", GF_LOG_INFO, "crawling file-system completed"); +out: - if (statfs_frame) { - STACK_DESTROY (statfs_frame->root); - } + /* We are here means crawling the entire file system is done + or something failed. Set defrag->crawl_done flag to intimate + the migrator threads to exhaust the defrag->queue and terminate*/ + + if (ret) { + defrag->defrag_status = GF_DEFRAG_STATUS_FAILED; + } + + if (defrag->cmd == GF_DEFRAG_CMD_START_TIER) { + /* Wait for the tier fixlayout to + * complete if its was started.*/ + gf_tier_wait_fix_lookup(defrag); + } + + if (is_tier_detach && ret == 0) { + /* If it was a detach remove the tier fix-layout + * xattr on root. Ignoring the failure, as nothing has to be + * done, logging is done in gf_tier_clear_fix_layout */ + gf_tier_clear_fix_layout(this, &loc, defrag); + } + + gf_defrag_parallel_migration_cleanup(defrag, tid, thread_index); + + if ((defrag->defrag_status != GF_DEFRAG_STATUS_STOPPED) && + (defrag->defrag_status != GF_DEFRAG_STATUS_FAILED)) { + defrag->defrag_status = GF_DEFRAG_STATUS_COMPLETE; + } + + if (fc_thread_started) { + gf_defrag_estimates_cleanup(this, defrag, filecnt_thread); + } + + dht_send_rebalance_event(this, defrag->cmd, defrag->defrag_status); + + LOCK(&defrag->lock); + { + status = dict_new(); + gf_defrag_status_get(conf, status); + if (ctx && ctx->notify) + ctx->notify(GF_EN_DEFRAG_STATUS, status); + if (status) + dict_unref(status); + defrag->is_exiting = 1; + } + UNLOCK(&defrag->lock); + + GF_FREE(defrag); + conf->defrag = NULL; + + if (migrate_data) + dict_unref(migrate_data); + + if (statfs_frame) { + STACK_DESTROY(statfs_frame->root); + } exit: - return ret; + return ret; } - - static int -gf_defrag_done (int ret, call_frame_t *sync_frame, void *data) +gf_defrag_done(int ret, call_frame_t *sync_frame, void *data) { - gf_listener_stop (sync_frame->this); + gf_listener_stop(sync_frame->this); - STACK_DESTROY (sync_frame->root); - kill (getpid(), SIGTERM); - return 0; + STACK_DESTROY(sync_frame->root); + kill(getpid(), SIGTERM); + return 0; } void * -gf_defrag_start (void *data) +gf_defrag_start(void *data) { - int ret = -1; - call_frame_t *frame = NULL; - dht_conf_t *conf = NULL; - gf_defrag_info_t *defrag = NULL; - xlator_t *this = NULL; - xlator_t *old_THIS = NULL; - - this = data; - conf = this->private; - if (!conf) - goto out; + int ret = -1; + call_frame_t *frame = NULL; + dht_conf_t *conf = NULL; + gf_defrag_info_t *defrag = NULL; + xlator_t *this = NULL; + xlator_t *old_THIS = NULL; - defrag = conf->defrag; - if (!defrag) - goto out; + this = data; + conf = this->private; + if (!conf) + goto out; - frame = create_frame (this, this->ctx->pool); - if (!frame) - goto out; + defrag = conf->defrag; + if (!defrag) + goto out; - frame->root->pid = GF_CLIENT_PID_DEFRAG; + frame = create_frame(this, this->ctx->pool); + if (!frame) + goto out; - defrag->pid = frame->root->pid; + frame->root->pid = GF_CLIENT_PID_DEFRAG; - defrag->defrag_status = GF_DEFRAG_STATUS_STARTED; + defrag->pid = frame->root->pid; - old_THIS = THIS; - THIS = this; - ret = synctask_new (this->ctx->env, gf_defrag_start_crawl, - gf_defrag_done, frame, this); + defrag->defrag_status = GF_DEFRAG_STATUS_STARTED; - if (ret) - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_REBALANCE_START_FAILED, - "Could not create task for rebalance"); - THIS = old_THIS; + old_THIS = THIS; + THIS = this; + ret = synctask_new(this->ctx->env, gf_defrag_start_crawl, gf_defrag_done, + frame, this); + + if (ret) + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_REBALANCE_START_FAILED, + "Could not create task for rebalance"); + THIS = old_THIS; out: - return NULL; + return NULL; } - uint64_t -gf_defrag_get_estimates_based_on_size (dht_conf_t *conf) +gf_defrag_get_estimates_based_on_size(dht_conf_t *conf) { - gf_defrag_info_t *defrag = NULL; - double rate_processed = 0; - uint64_t total_processed = 0; - uint64_t tmp_count = 0; - uint64_t time_to_complete = 0; - struct timeval now = {0,}; - double elapsed = 0; + gf_defrag_info_t *defrag = NULL; + double rate_processed = 0; + uint64_t total_processed = 0; + uint64_t tmp_count = 0; + uint64_t time_to_complete = 0; + struct timeval now = { + 0, + }; + double elapsed = 0; - defrag = conf->defrag; + defrag = conf->defrag; - if (!g_totalsize) - goto out; + if (!g_totalsize) + goto out; - gettimeofday (&now, NULL); - elapsed = now.tv_sec - defrag->start_time.tv_sec; + gettimeofday(&now, NULL); + elapsed = now.tv_sec - defrag->start_time.tv_sec; - /* Don't calculate the estimates for the first 10 minutes. - * It is unlikely to be accurate and estimates are not required - * if the process finishes in less than 10 mins. - */ + /* Don't calculate the estimates for the first 10 minutes. + * It is unlikely to be accurate and estimates are not required + * if the process finishes in less than 10 mins. + */ - if (elapsed < ESTIMATE_START_INTERVAL) { - gf_msg (THIS->name, GF_LOG_INFO, 0, 0, - "Rebalance estimates will not be available for the " - "first %d seconds.", ESTIMATE_START_INTERVAL); + if (elapsed < ESTIMATE_START_INTERVAL) { + gf_msg(THIS->name, GF_LOG_INFO, 0, 0, + "Rebalance estimates will not be available for the " + "first %d seconds.", + ESTIMATE_START_INTERVAL); - goto out; - } + goto out; + } - total_processed = defrag->size_processed; + total_processed = defrag->size_processed; - /* rate at which files processed */ - rate_processed = (total_processed)/elapsed; + /* rate at which files processed */ + rate_processed = (total_processed) / elapsed; - tmp_count = g_totalsize; + tmp_count = g_totalsize; - if (rate_processed) { - time_to_complete = (tmp_count)/rate_processed; + if (rate_processed) { + time_to_complete = (tmp_count) / rate_processed; - } else { - gf_msg (THIS->name, GF_LOG_ERROR, 0, 0, - "Unable to calculate estimated time for rebalance"); - } + } else { + gf_msg(THIS->name, GF_LOG_ERROR, 0, 0, + "Unable to calculate estimated time for rebalance"); + } - gf_log (THIS->name, GF_LOG_INFO, - "TIME: (size) total_processed=%"PRIu64" tmp_cnt = %"PRIu64"," - "rate_processed=%f, elapsed = %f", total_processed, tmp_count, - rate_processed, elapsed); + gf_log(THIS->name, GF_LOG_INFO, + "TIME: (size) total_processed=%" PRIu64 " tmp_cnt = %" PRIu64 + "," + "rate_processed=%f, elapsed = %f", + total_processed, tmp_count, rate_processed, elapsed); out: - return time_to_complete; + return time_to_complete; } - int -gf_defrag_status_get (dht_conf_t *conf, dict_t *dict) +gf_defrag_status_get(dht_conf_t *conf, dict_t *dict) { - int ret = 0; - uint64_t files = 0; - uint64_t size = 0; - uint64_t lookup = 0; - uint64_t failures = 0; - uint64_t skipped = 0; - uint64_t promoted = 0; - uint64_t demoted = 0; - char *status = ""; - double elapsed = 0; - struct timeval end = {0,}; - uint64_t time_to_complete = 0; - uint64_t time_left = 0; - gf_defrag_info_t *defrag = conf->defrag; - - if (!defrag) - goto out; - - ret = 0; - if (defrag->defrag_status == GF_DEFRAG_STATUS_NOT_STARTED) - goto out; - - files = defrag->total_files; - size = defrag->total_data; - lookup = defrag->num_files_lookedup; - failures = defrag->total_failures; - skipped = defrag->skipped; - promoted = defrag->total_files_promoted; - demoted = defrag->total_files_demoted; - - gettimeofday (&end, NULL); - - elapsed = end.tv_sec - defrag->start_time.tv_sec; - - - /* The rebalance is still in progress */ - - if ((defrag->cmd != GF_DEFRAG_CMD_START_TIER) - && (defrag->defrag_status == GF_DEFRAG_STATUS_STARTED)) { - - time_to_complete = gf_defrag_get_estimates_based_on_size (conf); - - if (time_to_complete && (time_to_complete > elapsed)) - time_left = time_to_complete - elapsed; - - gf_log (THIS->name, GF_LOG_INFO, - "TIME: Estimated total time to complete (size)= %"PRIu64 - " seconds, seconds left = %"PRIu64"", - time_to_complete, time_left); - } - - if (!dict) - goto log; - - ret = dict_set_uint64 (dict, "promoted", promoted); - if (ret) - gf_log (THIS->name, GF_LOG_WARNING, - "failed to set promoted count"); - - ret = dict_set_uint64 (dict, "demoted", demoted); - if (ret) - gf_log (THIS->name, GF_LOG_WARNING, - "failed to set demoted count"); - - ret = dict_set_uint64 (dict, "files", files); - if (ret) - gf_log (THIS->name, GF_LOG_WARNING, - "failed to set file count"); - - ret = dict_set_uint64 (dict, "size", size); - if (ret) - gf_log (THIS->name, GF_LOG_WARNING, - "failed to set size of xfer"); - - ret = dict_set_uint64 (dict, "lookups", lookup); - if (ret) - gf_log (THIS->name, GF_LOG_WARNING, - "failed to set lookedup file count"); - - - ret = dict_set_int32 (dict, "status", defrag->defrag_status); - if (ret) - gf_log (THIS->name, GF_LOG_WARNING, - "failed to set status"); - - ret = dict_set_double (dict, "run-time", elapsed); - if (ret) - gf_log (THIS->name, GF_LOG_WARNING, - "failed to set run-time"); - - ret = dict_set_uint64 (dict, "failures", failures); - if (ret) - gf_log (THIS->name, GF_LOG_WARNING, - "failed to set failure count"); - - ret = dict_set_uint64 (dict, "skipped", skipped); - if (ret) - gf_log (THIS->name, GF_LOG_WARNING, - "failed to set skipped file count"); - - ret = dict_set_uint64 (dict, "time-left", time_left); - if (ret) - gf_log (THIS->name, GF_LOG_WARNING, - "failed to set time-left"); + int ret = 0; + uint64_t files = 0; + uint64_t size = 0; + uint64_t lookup = 0; + uint64_t failures = 0; + uint64_t skipped = 0; + uint64_t promoted = 0; + uint64_t demoted = 0; + char *status = ""; + double elapsed = 0; + struct timeval end = { + 0, + }; + uint64_t time_to_complete = 0; + uint64_t time_left = 0; + gf_defrag_info_t *defrag = conf->defrag; + + if (!defrag) + goto out; + + ret = 0; + if (defrag->defrag_status == GF_DEFRAG_STATUS_NOT_STARTED) + goto out; + + files = defrag->total_files; + size = defrag->total_data; + lookup = defrag->num_files_lookedup; + failures = defrag->total_failures; + skipped = defrag->skipped; + promoted = defrag->total_files_promoted; + demoted = defrag->total_files_demoted; + + gettimeofday(&end, NULL); + + elapsed = end.tv_sec - defrag->start_time.tv_sec; + + /* The rebalance is still in progress */ + + if ((defrag->cmd != GF_DEFRAG_CMD_START_TIER) && + (defrag->defrag_status == GF_DEFRAG_STATUS_STARTED)) { + time_to_complete = gf_defrag_get_estimates_based_on_size(conf); + + if (time_to_complete && (time_to_complete > elapsed)) + time_left = time_to_complete - elapsed; + + gf_log(THIS->name, GF_LOG_INFO, + "TIME: Estimated total time to complete (size)= %" PRIu64 + " seconds, seconds left = %" PRIu64 "", + time_to_complete, time_left); + } + + if (!dict) + goto log; + + ret = dict_set_uint64(dict, "promoted", promoted); + if (ret) + gf_log(THIS->name, GF_LOG_WARNING, "failed to set promoted count"); + + ret = dict_set_uint64(dict, "demoted", demoted); + if (ret) + gf_log(THIS->name, GF_LOG_WARNING, "failed to set demoted count"); + + ret = dict_set_uint64(dict, "files", files); + if (ret) + gf_log(THIS->name, GF_LOG_WARNING, "failed to set file count"); + + ret = dict_set_uint64(dict, "size", size); + if (ret) + gf_log(THIS->name, GF_LOG_WARNING, "failed to set size of xfer"); + + ret = dict_set_uint64(dict, "lookups", lookup); + if (ret) + gf_log(THIS->name, GF_LOG_WARNING, "failed to set lookedup file count"); + + ret = dict_set_int32(dict, "status", defrag->defrag_status); + if (ret) + gf_log(THIS->name, GF_LOG_WARNING, "failed to set status"); + + ret = dict_set_double(dict, "run-time", elapsed); + if (ret) + gf_log(THIS->name, GF_LOG_WARNING, "failed to set run-time"); + + ret = dict_set_uint64(dict, "failures", failures); + if (ret) + gf_log(THIS->name, GF_LOG_WARNING, "failed to set failure count"); + + ret = dict_set_uint64(dict, "skipped", skipped); + if (ret) + gf_log(THIS->name, GF_LOG_WARNING, "failed to set skipped file count"); + + ret = dict_set_uint64(dict, "time-left", time_left); + if (ret) + gf_log(THIS->name, GF_LOG_WARNING, "failed to set time-left"); log: - switch (defrag->defrag_status) { + switch (defrag->defrag_status) { case GF_DEFRAG_STATUS_NOT_STARTED: - status = "not started"; - break; + status = "not started"; + break; case GF_DEFRAG_STATUS_STARTED: - status = "in progress"; - break; + status = "in progress"; + break; case GF_DEFRAG_STATUS_STOPPED: - status = "stopped"; - break; + status = "stopped"; + break; case GF_DEFRAG_STATUS_COMPLETE: - status = "completed"; - break; + status = "completed"; + break; case GF_DEFRAG_STATUS_FAILED: - status = "failed"; - break; + status = "failed"; + break; default: - break; - } - - gf_msg (THIS->name, GF_LOG_INFO, 0, DHT_MSG_REBALANCE_STATUS, - "Rebalance is %s. Time taken is %.2f secs", - status, elapsed); - gf_msg (THIS->name, GF_LOG_INFO, 0, DHT_MSG_REBALANCE_STATUS, - "Files migrated: %"PRIu64", size: %" - PRIu64", lookups: %"PRIu64", failures: %"PRIu64", skipped: " - "%"PRIu64, files, size, lookup, failures, skipped); + break; + } + + gf_msg(THIS->name, GF_LOG_INFO, 0, DHT_MSG_REBALANCE_STATUS, + "Rebalance is %s. Time taken is %.2f secs", status, elapsed); + gf_msg(THIS->name, GF_LOG_INFO, 0, DHT_MSG_REBALANCE_STATUS, + "Files migrated: %" PRIu64 ", size: %" PRIu64 ", lookups: %" PRIu64 + ", failures: %" PRIu64 + ", skipped: " + "%" PRIu64, + files, size, lookup, failures, skipped); out: - return 0; + return 0; } void -gf_defrag_set_pause_state (gf_tier_conf_t *tier_conf, tier_pause_state_t state) +gf_defrag_set_pause_state(gf_tier_conf_t *tier_conf, tier_pause_state_t state) { - pthread_mutex_lock (&tier_conf->pause_mutex); - tier_conf->pause_state = state; - pthread_mutex_unlock (&tier_conf->pause_mutex); + pthread_mutex_lock(&tier_conf->pause_mutex); + tier_conf->pause_state = state; + pthread_mutex_unlock(&tier_conf->pause_mutex); } - tier_pause_state_t -gf_defrag_get_pause_state (gf_tier_conf_t *tier_conf) +gf_defrag_get_pause_state(gf_tier_conf_t *tier_conf) { - int state; + int state; - pthread_mutex_lock (&tier_conf->pause_mutex); - state = tier_conf->pause_state; - pthread_mutex_unlock (&tier_conf->pause_mutex); + pthread_mutex_lock(&tier_conf->pause_mutex); + state = tier_conf->pause_state; + pthread_mutex_unlock(&tier_conf->pause_mutex); - return state; + return state; } tier_pause_state_t -gf_defrag_check_pause_tier (gf_tier_conf_t *tier_conf) +gf_defrag_check_pause_tier(gf_tier_conf_t *tier_conf) { - int woke = 0; - int state = -1; + int woke = 0; + int state = -1; - pthread_mutex_lock (&tier_conf->pause_mutex); + pthread_mutex_lock(&tier_conf->pause_mutex); - if (tier_conf->pause_state == TIER_RUNNING) - goto out; + if (tier_conf->pause_state == TIER_RUNNING) + goto out; - if (tier_conf->pause_state == TIER_PAUSED) - goto out; + if (tier_conf->pause_state == TIER_PAUSED) + goto out; - if (tier_conf->promote_in_progress || - tier_conf->demote_in_progress) - goto out; + if (tier_conf->promote_in_progress || tier_conf->demote_in_progress) + goto out; - tier_conf->pause_state = TIER_PAUSED; + tier_conf->pause_state = TIER_PAUSED; - if (tier_conf->pause_synctask) { - synctask_wake (tier_conf->pause_synctask); - tier_conf->pause_synctask = 0; - woke = 1; - } + if (tier_conf->pause_synctask) { + synctask_wake(tier_conf->pause_synctask); + tier_conf->pause_synctask = 0; + woke = 1; + } - gf_msg ("tier", GF_LOG_DEBUG, 0, - DHT_MSG_TIER_PAUSED, - "woken %d", woke); + gf_msg("tier", GF_LOG_DEBUG, 0, DHT_MSG_TIER_PAUSED, "woken %d", woke); - gf_event (EVENT_TIER_PAUSE, "vol=%s", tier_conf->volname); + gf_event(EVENT_TIER_PAUSE, "vol=%s", tier_conf->volname); out: - state = tier_conf->pause_state; + state = tier_conf->pause_state; - pthread_mutex_unlock (&tier_conf->pause_mutex); + pthread_mutex_unlock(&tier_conf->pause_mutex); - return state; + return state; } void -gf_defrag_pause_tier_timeout (void *data) +gf_defrag_pause_tier_timeout(void *data) { - xlator_t *this = NULL; - dht_conf_t *conf = NULL; - gf_defrag_info_t *defrag = NULL; + xlator_t *this = NULL; + dht_conf_t *conf = NULL; + gf_defrag_info_t *defrag = NULL; - this = (xlator_t *) data; - GF_VALIDATE_OR_GOTO ("tier", this, out); + this = (xlator_t *)data; + GF_VALIDATE_OR_GOTO("tier", this, out); - conf = this->private; - GF_VALIDATE_OR_GOTO (this->name, conf, out); + conf = this->private; + GF_VALIDATE_OR_GOTO(this->name, conf, out); - defrag = conf->defrag; - GF_VALIDATE_OR_GOTO (this->name, defrag, out); + defrag = conf->defrag; + GF_VALIDATE_OR_GOTO(this->name, defrag, out); - gf_msg (this->name, GF_LOG_DEBUG, 0, - DHT_MSG_TIER_PAUSED, - "Request pause timer timeout"); + gf_msg(this->name, GF_LOG_DEBUG, 0, DHT_MSG_TIER_PAUSED, + "Request pause timer timeout"); - gf_defrag_check_pause_tier (&defrag->tier_conf); + gf_defrag_check_pause_tier(&defrag->tier_conf); out: - return; + return; } int -gf_defrag_pause_tier (xlator_t *this, gf_defrag_info_t *defrag) +gf_defrag_pause_tier(xlator_t *this, gf_defrag_info_t *defrag) { - int ret = 0; - struct timespec delta = {0,}; - int delay = 2; - - if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) - goto out; - - /* - * Set flag requesting to pause tiering. Wait 'delay' seconds for - * tiering to actually stop as indicated by the pause state - * before returning success or failure. - */ - gf_defrag_set_pause_state (&defrag->tier_conf, TIER_REQUEST_PAUSE); - - /* - * If migration is not underway, can pause immediately. - */ - gf_defrag_check_pause_tier (&defrag->tier_conf); - if (gf_defrag_get_pause_state (&defrag->tier_conf) == TIER_PAUSED) - goto out; - - gf_msg (this->name, GF_LOG_DEBUG, 0, - DHT_MSG_TIER_PAUSED, - "Request pause tier"); - - defrag->tier_conf.pause_synctask = synctask_get (); - delta.tv_sec = delay; - delta.tv_nsec = 0; - defrag->tier_conf.pause_timer = - gf_timer_call_after (this->ctx, delta, - gf_defrag_pause_tier_timeout, - this); - - synctask_yield (defrag->tier_conf.pause_synctask); - - if (gf_defrag_get_pause_state (&defrag->tier_conf) == TIER_PAUSED) - goto out; - - gf_defrag_set_pause_state (&defrag->tier_conf, TIER_RUNNING); - - ret = -1; + int ret = 0; + struct timespec delta = { + 0, + }; + int delay = 2; + + if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) + goto out; + + /* + * Set flag requesting to pause tiering. Wait 'delay' seconds for + * tiering to actually stop as indicated by the pause state + * before returning success or failure. + */ + gf_defrag_set_pause_state(&defrag->tier_conf, TIER_REQUEST_PAUSE); + + /* + * If migration is not underway, can pause immediately. + */ + gf_defrag_check_pause_tier(&defrag->tier_conf); + if (gf_defrag_get_pause_state(&defrag->tier_conf) == TIER_PAUSED) + goto out; + + gf_msg(this->name, GF_LOG_DEBUG, 0, DHT_MSG_TIER_PAUSED, + "Request pause tier"); + + defrag->tier_conf.pause_synctask = synctask_get(); + delta.tv_sec = delay; + delta.tv_nsec = 0; + defrag->tier_conf.pause_timer = gf_timer_call_after( + this->ctx, delta, gf_defrag_pause_tier_timeout, this); + + synctask_yield(defrag->tier_conf.pause_synctask); + + if (gf_defrag_get_pause_state(&defrag->tier_conf) == TIER_PAUSED) + goto out; + + gf_defrag_set_pause_state(&defrag->tier_conf, TIER_RUNNING); + + ret = -1; out: - gf_msg (this->name, GF_LOG_DEBUG, 0, - DHT_MSG_TIER_PAUSED, - "Pause tiering ret=%d", ret); + gf_msg(this->name, GF_LOG_DEBUG, 0, DHT_MSG_TIER_PAUSED, + "Pause tiering ret=%d", ret); - return ret; + return ret; } int -gf_defrag_resume_tier (xlator_t *this, gf_defrag_info_t *defrag) +gf_defrag_resume_tier(xlator_t *this, gf_defrag_info_t *defrag) { - gf_msg (this->name, GF_LOG_DEBUG, 0, - DHT_MSG_TIER_RESUME, - "Pause end. Resume tiering"); + gf_msg(this->name, GF_LOG_DEBUG, 0, DHT_MSG_TIER_RESUME, + "Pause end. Resume tiering"); - gf_defrag_set_pause_state (&defrag->tier_conf, TIER_RUNNING); + gf_defrag_set_pause_state(&defrag->tier_conf, TIER_RUNNING); - gf_event (EVENT_TIER_RESUME, "vol=%s", defrag->tier_conf.volname); + gf_event(EVENT_TIER_RESUME, "vol=%s", defrag->tier_conf.volname); - return 0; + return 0; } int -gf_defrag_start_detach_tier (gf_defrag_info_t *defrag) +gf_defrag_start_detach_tier(gf_defrag_info_t *defrag) { - defrag->cmd = GF_DEFRAG_CMD_START_DETACH_TIER; + defrag->cmd = GF_DEFRAG_CMD_START_DETACH_TIER; - return 0; + return 0; } int -gf_defrag_stop (dht_conf_t *conf, gf_defrag_status_t status, - dict_t *output) +gf_defrag_stop(dht_conf_t *conf, gf_defrag_status_t status, dict_t *output) { - /* TODO: set a variable 'stop_defrag' here, it should be checked - in defrag loop */ - int ret = -1; - gf_defrag_info_t *defrag = conf->defrag; + /* TODO: set a variable 'stop_defrag' here, it should be checked + in defrag loop */ + int ret = -1; + gf_defrag_info_t *defrag = conf->defrag; - GF_ASSERT (defrag); + GF_ASSERT(defrag); - if (defrag->defrag_status == GF_DEFRAG_STATUS_NOT_STARTED) { - goto out; - } + if (defrag->defrag_status == GF_DEFRAG_STATUS_NOT_STARTED) { + goto out; + } - gf_msg ("", GF_LOG_INFO, 0, DHT_MSG_REBALANCE_STOPPED, - "Received stop command on rebalance"); - defrag->defrag_status = status; + gf_msg("", GF_LOG_INFO, 0, DHT_MSG_REBALANCE_STOPPED, + "Received stop command on rebalance"); + defrag->defrag_status = status; - if (output) - gf_defrag_status_get (conf, output); - ret = 0; + if (output) + gf_defrag_status_get(conf, output); + ret = 0; out: - gf_msg_debug ("", 0, "Returning %d", ret); - return ret; + gf_msg_debug("", 0, "Returning %d", ret); + return ret; } diff --git a/xlators/cluster/dht/src/dht-rename.c b/xlators/cluster/dht/src/dht-rename.c index 295d95232ec..45808a2bfa6 100644 --- a/xlators/cluster/dht/src/dht-rename.c +++ b/xlators/cluster/dht/src/dht-rename.c @@ -17,483 +17,450 @@ #include "dht-lock.h" #include "defaults.h" -int dht_rename_unlock (call_frame_t *frame, xlator_t *this); +int +dht_rename_unlock(call_frame_t *frame, xlator_t *this); int32_t -dht_rename_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata); +dht_rename_lock_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata); int -dht_rename_unlock_cbk (call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, int32_t op_errno, - dict_t *xdata) +dht_rename_unlock_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - dht_local_t *local = NULL; + dht_local_t *local = NULL; - local = frame->local; + local = frame->local; - dht_set_fixed_dir_stat (&local->preoldparent); - dht_set_fixed_dir_stat (&local->postoldparent); - dht_set_fixed_dir_stat (&local->preparent); - dht_set_fixed_dir_stat (&local->postparent); + dht_set_fixed_dir_stat(&local->preoldparent); + dht_set_fixed_dir_stat(&local->postoldparent); + dht_set_fixed_dir_stat(&local->preparent); + dht_set_fixed_dir_stat(&local->postparent); - if (IA_ISREG (local->stbuf.ia_type)) - DHT_STRIP_PHASE1_FLAGS (&local->stbuf); + if (IA_ISREG(local->stbuf.ia_type)) + DHT_STRIP_PHASE1_FLAGS(&local->stbuf); - DHT_STACK_UNWIND (rename, frame, local->op_ret, local->op_errno, - &local->stbuf, &local->preoldparent, - &local->postoldparent, &local->preparent, - &local->postparent, local->xattr); - return 0; + DHT_STACK_UNWIND(rename, frame, local->op_ret, local->op_errno, + &local->stbuf, &local->preoldparent, &local->postoldparent, + &local->preparent, &local->postparent, local->xattr); + return 0; } static void -dht_rename_dir_unlock_src (call_frame_t *frame, xlator_t *this) +dht_rename_dir_unlock_src(call_frame_t *frame, xlator_t *this) { - dht_local_t *local = NULL; + dht_local_t *local = NULL; - local = frame->local; - dht_unlock_namespace (frame, &local->lock[0]); - return; + local = frame->local; + dht_unlock_namespace(frame, &local->lock[0]); + return; } static void -dht_rename_dir_unlock_dst (call_frame_t *frame, xlator_t *this) +dht_rename_dir_unlock_dst(call_frame_t *frame, xlator_t *this) { - dht_local_t *local = NULL; - int op_ret = -1; - char src_gfid[GF_UUID_BUF_SIZE] = {0}; - char dst_gfid[GF_UUID_BUF_SIZE] = {0}; - - local = frame->local; - - /* Unlock entrylk */ - dht_unlock_entrylk_wrapper (frame, &local->lock[1].ns.directory_ns); - - /* Unlock inodelk */ - op_ret = dht_unlock_inodelk (frame, - local->lock[1].ns.parent_layout.locks, - local->lock[1].ns.parent_layout.lk_count, - dht_rename_unlock_cbk); - if (op_ret < 0) { - uuid_utoa_r (local->loc.inode->gfid, src_gfid); - - if (local->loc2.inode) - uuid_utoa_r (local->loc2.inode->gfid, dst_gfid); - - if (IA_ISREG (local->stbuf.ia_type)) - gf_msg (this->name, GF_LOG_WARNING, 0, - DHT_MSG_UNLOCKING_FAILED, - "winding unlock inodelk failed " - "rename (%s:%s:%s %s:%s:%s), " - "stale locks left on bricks", - local->loc.path, src_gfid, - local->src_cached->name, - local->loc2.path, dst_gfid, - local->dst_cached ? - local->dst_cached->name : NULL); - else - gf_msg (this->name, GF_LOG_WARNING, 0, - DHT_MSG_UNLOCKING_FAILED, - "winding unlock inodelk failed " - "rename (%s:%s %s:%s), " - "stale locks left on bricks", - local->loc.path, src_gfid, - local->loc2.path, dst_gfid); - - dht_rename_unlock_cbk (frame, NULL, this, 0, 0, NULL); - } + dht_local_t *local = NULL; + int op_ret = -1; + char src_gfid[GF_UUID_BUF_SIZE] = {0}; + char dst_gfid[GF_UUID_BUF_SIZE] = {0}; + + local = frame->local; + + /* Unlock entrylk */ + dht_unlock_entrylk_wrapper(frame, &local->lock[1].ns.directory_ns); + + /* Unlock inodelk */ + op_ret = dht_unlock_inodelk(frame, local->lock[1].ns.parent_layout.locks, + local->lock[1].ns.parent_layout.lk_count, + dht_rename_unlock_cbk); + if (op_ret < 0) { + uuid_utoa_r(local->loc.inode->gfid, src_gfid); + + if (local->loc2.inode) + uuid_utoa_r(local->loc2.inode->gfid, dst_gfid); + + if (IA_ISREG(local->stbuf.ia_type)) + gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_UNLOCKING_FAILED, + "winding unlock inodelk failed " + "rename (%s:%s:%s %s:%s:%s), " + "stale locks left on bricks", + local->loc.path, src_gfid, local->src_cached->name, + local->loc2.path, dst_gfid, + local->dst_cached ? local->dst_cached->name : NULL); + else + gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_UNLOCKING_FAILED, + "winding unlock inodelk failed " + "rename (%s:%s %s:%s), " + "stale locks left on bricks", + local->loc.path, src_gfid, local->loc2.path, dst_gfid); - return; + dht_rename_unlock_cbk(frame, NULL, this, 0, 0, NULL); + } + + return; } static int -dht_rename_dir_unlock (call_frame_t *frame, xlator_t *this) +dht_rename_dir_unlock(call_frame_t *frame, xlator_t *this) { - - dht_rename_dir_unlock_src (frame, this); - dht_rename_dir_unlock_dst (frame, this); - return 0; + dht_rename_dir_unlock_src(frame, this); + dht_rename_dir_unlock_dst(frame, this); + return 0; } int -dht_rename_dir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *stbuf, - struct iatt *preoldparent, struct iatt *postoldparent, - struct iatt *prenewparent, struct iatt *postnewparent, - dict_t *xdata) +dht_rename_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *stbuf, + struct iatt *preoldparent, struct iatt *postoldparent, + struct iatt *prenewparent, struct iatt *postnewparent, + dict_t *xdata) { - dht_conf_t *conf = NULL; - dht_local_t *local = NULL; - int this_call_cnt = 0; - xlator_t *prev = NULL; - int i = 0; - char gfid[GF_UUID_BUF_SIZE] = {0}; - int subvol_cnt = -1; - - conf = this->private; - local = frame->local; - prev = cookie; - subvol_cnt = dht_subvol_cnt (this, prev); - local->ret_cache[subvol_cnt] = op_ret; - - if (op_ret == -1) { - gf_uuid_unparse(local->loc.inode->gfid, gfid); - - gf_msg (this->name, GF_LOG_INFO, op_errno, - DHT_MSG_RENAME_FAILED, - "Rename %s -> %s on %s failed, (gfid = %s)", - local->loc.path, local->loc2.path, - prev->name, gfid); - - local->op_ret = op_ret; - local->op_errno = op_errno; - goto unwind; - } - /* TODO: construct proper stbuf for dir */ - /* - * FIXME: is this the correct way to build stbuf and - * parent bufs? - */ - dht_iatt_merge (this, &local->stbuf, stbuf); - dht_iatt_merge (this, &local->preoldparent, preoldparent); - dht_iatt_merge (this, &local->postoldparent, postoldparent); - dht_iatt_merge (this, &local->preparent, prenewparent); - dht_iatt_merge (this, &local->postparent, postnewparent); + dht_conf_t *conf = NULL; + dht_local_t *local = NULL; + int this_call_cnt = 0; + xlator_t *prev = NULL; + int i = 0; + char gfid[GF_UUID_BUF_SIZE] = {0}; + int subvol_cnt = -1; + + conf = this->private; + local = frame->local; + prev = cookie; + subvol_cnt = dht_subvol_cnt(this, prev); + local->ret_cache[subvol_cnt] = op_ret; + + if (op_ret == -1) { + gf_uuid_unparse(local->loc.inode->gfid, gfid); + + gf_msg(this->name, GF_LOG_INFO, op_errno, DHT_MSG_RENAME_FAILED, + "Rename %s -> %s on %s failed, (gfid = %s)", local->loc.path, + local->loc2.path, prev->name, gfid); + + local->op_ret = op_ret; + local->op_errno = op_errno; + goto unwind; + } + /* TODO: construct proper stbuf for dir */ + /* + * FIXME: is this the correct way to build stbuf and + * parent bufs? + */ + dht_iatt_merge(this, &local->stbuf, stbuf); + dht_iatt_merge(this, &local->preoldparent, preoldparent); + dht_iatt_merge(this, &local->postoldparent, postoldparent); + dht_iatt_merge(this, &local->preparent, prenewparent); + dht_iatt_merge(this, &local->postparent, postnewparent); unwind: - this_call_cnt = dht_frame_return (frame); - if (is_last_call (this_call_cnt)) { - /* We get here with local->call_cnt == 0. Which means - * we are the only one executing this code, there is - * no contention. Therefore it's safe to manipulate or - * deref local->call_cnt directly (without locking). + this_call_cnt = dht_frame_return(frame); + if (is_last_call(this_call_cnt)) { + /* We get here with local->call_cnt == 0. Which means + * we are the only one executing this code, there is + * no contention. Therefore it's safe to manipulate or + * deref local->call_cnt directly (without locking). + */ + if (local->ret_cache[conf->subvolume_cnt] == 0) { + /* count errant subvols in last field of ret_cache */ + for (i = 0; i < conf->subvolume_cnt; i++) { + if (local->ret_cache[i] != 0) + ++local->ret_cache[conf->subvolume_cnt]; + } + if (local->ret_cache[conf->subvolume_cnt]) { + /* undoing the damage: + * for all subvolumes, where rename + * succeeded, we perform the reverse operation */ - if (local->ret_cache[conf->subvolume_cnt] == 0) { - /* count errant subvols in last field of ret_cache */ - for (i = 0; i < conf->subvolume_cnt; i++) { - if (local->ret_cache[i] != 0) - ++local->ret_cache[conf->subvolume_cnt]; - } - if (local->ret_cache[conf->subvolume_cnt]) { - /* undoing the damage: - * for all subvolumes, where rename - * succeeded, we perform the reverse operation - */ - for (i = 0; i < conf->subvolume_cnt; i++) { - if (local->ret_cache[i] == 0) - ++local->call_cnt; - } - for (i = 0; i < conf->subvolume_cnt; i++) { - if (local->ret_cache[i]) - continue; - - STACK_WIND (frame, - dht_rename_dir_cbk, - conf->subvolumes[i], - conf->subvolumes[i]->fops->rename, - &local->loc2, &local->loc, - NULL); - } - - return 0; - } + for (i = 0; i < conf->subvolume_cnt; i++) { + if (local->ret_cache[i] == 0) + ++local->call_cnt; } + for (i = 0; i < conf->subvolume_cnt; i++) { + if (local->ret_cache[i]) + continue; - WIPE (&local->preoldparent); - WIPE (&local->postoldparent); - WIPE (&local->preparent); - WIPE (&local->postparent); + STACK_WIND(frame, dht_rename_dir_cbk, conf->subvolumes[i], + conf->subvolumes[i]->fops->rename, &local->loc2, + &local->loc, NULL); + } - dht_rename_dir_unlock (frame, this); + return 0; + } } - return 0; -} + WIPE(&local->preoldparent); + WIPE(&local->postoldparent); + WIPE(&local->preparent); + WIPE(&local->postparent); + dht_rename_dir_unlock(frame, this); + } + + return 0; +} int -dht_rename_hashed_dir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *stbuf, - struct iatt *preoldparent, - struct iatt *postoldparent, - struct iatt *prenewparent, - struct iatt *postnewparent, dict_t *xdata) +dht_rename_hashed_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *stbuf, + struct iatt *preoldparent, struct iatt *postoldparent, + struct iatt *prenewparent, struct iatt *postnewparent, + dict_t *xdata) { - dht_conf_t *conf = NULL; - dht_local_t *local = NULL; - int call_cnt = 0; - xlator_t *prev = NULL; - int i = 0; - char gfid[GF_UUID_BUF_SIZE] = {0}; - - conf = this->private; - local = frame->local; - prev = cookie; - - - if (op_ret == -1) { - gf_uuid_unparse(local->loc.inode->gfid, gfid); - - gf_msg (this->name, GF_LOG_INFO, op_errno, - DHT_MSG_RENAME_FAILED, - "rename %s -> %s on %s failed, (gfid = %s) ", - local->loc.path, local->loc2.path, - prev->name, gfid); - - local->op_ret = op_ret; - local->op_errno = op_errno; - goto unwind; - } - /* TODO: construct proper stbuf for dir */ - /* - * FIXME: is this the correct way to build stbuf and - * parent bufs? - */ - dht_iatt_merge (this, &local->stbuf, stbuf); - dht_iatt_merge (this, &local->preoldparent, preoldparent); - dht_iatt_merge (this, &local->postoldparent, postoldparent); - dht_iatt_merge (this, &local->preparent, prenewparent); - dht_iatt_merge (this, &local->postparent, postnewparent); - - call_cnt = local->call_cnt = conf->subvolume_cnt - 1; - - if (!local->call_cnt) - goto unwind; - - for (i = 0; i < conf->subvolume_cnt; i++) { - if (conf->subvolumes[i] == local->dst_hashed) - continue; - STACK_WIND_COOKIE (frame, dht_rename_dir_cbk, - conf->subvolumes[i], - conf->subvolumes[i], - conf->subvolumes[i]->fops->rename, - &local->loc, &local->loc2, NULL); - if (!--call_cnt) - break; - } - + dht_conf_t *conf = NULL; + dht_local_t *local = NULL; + int call_cnt = 0; + xlator_t *prev = NULL; + int i = 0; + char gfid[GF_UUID_BUF_SIZE] = {0}; + + conf = this->private; + local = frame->local; + prev = cookie; + + if (op_ret == -1) { + gf_uuid_unparse(local->loc.inode->gfid, gfid); - return 0; + gf_msg(this->name, GF_LOG_INFO, op_errno, DHT_MSG_RENAME_FAILED, + "rename %s -> %s on %s failed, (gfid = %s) ", local->loc.path, + local->loc2.path, prev->name, gfid); + + local->op_ret = op_ret; + local->op_errno = op_errno; + goto unwind; + } + /* TODO: construct proper stbuf for dir */ + /* + * FIXME: is this the correct way to build stbuf and + * parent bufs? + */ + dht_iatt_merge(this, &local->stbuf, stbuf); + dht_iatt_merge(this, &local->preoldparent, preoldparent); + dht_iatt_merge(this, &local->postoldparent, postoldparent); + dht_iatt_merge(this, &local->preparent, prenewparent); + dht_iatt_merge(this, &local->postparent, postnewparent); + + call_cnt = local->call_cnt = conf->subvolume_cnt - 1; + + if (!local->call_cnt) + goto unwind; + + for (i = 0; i < conf->subvolume_cnt; i++) { + if (conf->subvolumes[i] == local->dst_hashed) + continue; + STACK_WIND_COOKIE( + frame, dht_rename_dir_cbk, conf->subvolumes[i], conf->subvolumes[i], + conf->subvolumes[i]->fops->rename, &local->loc, &local->loc2, NULL); + if (!--call_cnt) + break; + } + + return 0; unwind: - WIPE (&local->preoldparent); - WIPE (&local->postoldparent); - WIPE (&local->preparent); - WIPE (&local->postparent); + WIPE(&local->preoldparent); + WIPE(&local->postoldparent); + WIPE(&local->preparent); + WIPE(&local->postparent); - dht_rename_dir_unlock (frame, this); - return 0; + dht_rename_dir_unlock(frame, this); + return 0; } - int -dht_rename_dir_do (call_frame_t *frame, xlator_t *this) +dht_rename_dir_do(call_frame_t *frame, xlator_t *this) { - dht_local_t *local = NULL; + dht_local_t *local = NULL; - local = frame->local; + local = frame->local; - if (local->op_ret == -1) - goto err; + if (local->op_ret == -1) + goto err; - local->op_ret = 0; + local->op_ret = 0; - STACK_WIND_COOKIE (frame, dht_rename_hashed_dir_cbk, local->dst_hashed, - local->dst_hashed, - local->dst_hashed->fops->rename, - &local->loc, &local->loc2, NULL); - return 0; + STACK_WIND_COOKIE(frame, dht_rename_hashed_dir_cbk, local->dst_hashed, + local->dst_hashed, local->dst_hashed->fops->rename, + &local->loc, &local->loc2, NULL); + return 0; err: - dht_rename_dir_unlock (frame, this); - return 0; + dht_rename_dir_unlock(frame, this); + return 0; } - int -dht_rename_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, gf_dirent_t *entries, - dict_t *xdata) +dht_rename_readdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, gf_dirent_t *entries, + dict_t *xdata) { - dht_local_t *local = NULL; - int this_call_cnt = -1; - xlator_t *prev = NULL; - - local = frame->local; - prev = cookie; - - if (op_ret > 2) { - gf_msg_trace (this->name, 0, - "readdir on %s for %s returned %d entries", - prev->name, local->loc.path, op_ret); - local->op_ret = -1; - local->op_errno = ENOTEMPTY; - } + dht_local_t *local = NULL; + int this_call_cnt = -1; + xlator_t *prev = NULL; - this_call_cnt = dht_frame_return (frame); + local = frame->local; + prev = cookie; - if (is_last_call (this_call_cnt)) { - dht_rename_dir_do (frame, this); - } + if (op_ret > 2) { + gf_msg_trace(this->name, 0, "readdir on %s for %s returned %d entries", + prev->name, local->loc.path, op_ret); + local->op_ret = -1; + local->op_errno = ENOTEMPTY; + } - return 0; -} + this_call_cnt = dht_frame_return(frame); + if (is_last_call(this_call_cnt)) { + dht_rename_dir_do(frame, this); + } + + return 0; +} int -dht_rename_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, fd_t *fd, dict_t *xdata) +dht_rename_opendir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, fd_t *fd, dict_t *xdata) { - dht_local_t *local = NULL; - int this_call_cnt = -1; - xlator_t *prev = NULL; - char gfid[GF_UUID_BUF_SIZE] = {0}; - - local = frame->local; - prev = cookie; - + dht_local_t *local = NULL; + int this_call_cnt = -1; + xlator_t *prev = NULL; + char gfid[GF_UUID_BUF_SIZE] = {0}; - if (op_ret == -1) { + local = frame->local; + prev = cookie; - gf_uuid_unparse(local->loc.inode->gfid, gfid); - gf_msg (this->name, GF_LOG_INFO, op_errno, - DHT_MSG_OPENDIR_FAILED, - "opendir on %s for %s failed,(gfid = %s) ", - prev->name, local->loc.path, gfid); - goto err; - } + if (op_ret == -1) { + gf_uuid_unparse(local->loc.inode->gfid, gfid); + gf_msg(this->name, GF_LOG_INFO, op_errno, DHT_MSG_OPENDIR_FAILED, + "opendir on %s for %s failed,(gfid = %s) ", prev->name, + local->loc.path, gfid); + goto err; + } - fd_bind (fd); - STACK_WIND_COOKIE (frame, dht_rename_readdir_cbk, prev, prev, - prev->fops->readdir, local->fd, 4096, 0, NULL); + fd_bind(fd); + STACK_WIND_COOKIE(frame, dht_rename_readdir_cbk, prev, prev, + prev->fops->readdir, local->fd, 4096, 0, NULL); - return 0; + return 0; err: - this_call_cnt = dht_frame_return (frame); + this_call_cnt = dht_frame_return(frame); - if (is_last_call (this_call_cnt)) { - dht_rename_dir_do (frame, this); - } + if (is_last_call(this_call_cnt)) { + dht_rename_dir_do(frame, this); + } - return 0; + return 0; } int -dht_rename_dir_lock2_cbk (call_frame_t *frame, void *cookie, xlator_t *this, +dht_rename_dir_lock2_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, dict_t *xdata) { - dht_local_t *local = NULL; - char src_gfid[GF_UUID_BUF_SIZE] = {0}; - char dst_gfid[GF_UUID_BUF_SIZE] = {0}; - dht_conf_t *conf = NULL; - int i = 0; - - local = frame->local; - conf = this->private; - - if (op_ret < 0) { - uuid_utoa_r (local->loc.inode->gfid, src_gfid); - - if (local->loc2.inode) - uuid_utoa_r (local->loc2.inode->gfid, dst_gfid); - - gf_msg (this->name, GF_LOG_WARNING, op_errno, - DHT_MSG_INODE_LK_ERROR, - "acquiring entrylk after inodelk failed" - "rename (%s:%s:%s %s:%s:%s)", - local->loc.path, src_gfid, local->src_cached->name, - local->loc2.path, dst_gfid, - local->dst_cached ? local->dst_cached->name : NULL); - - local->op_ret = -1; - local->op_errno = op_errno; - goto err; - } - - local->fd = fd_create (local->loc.inode, frame->root->pid); - if (!local->fd) { - op_errno = ENOMEM; - goto err; - } - - local->op_ret = 0; + dht_local_t *local = NULL; + char src_gfid[GF_UUID_BUF_SIZE] = {0}; + char dst_gfid[GF_UUID_BUF_SIZE] = {0}; + dht_conf_t *conf = NULL; + int i = 0; + + local = frame->local; + conf = this->private; + + if (op_ret < 0) { + uuid_utoa_r(local->loc.inode->gfid, src_gfid); + + if (local->loc2.inode) + uuid_utoa_r(local->loc2.inode->gfid, dst_gfid); + + gf_msg(this->name, GF_LOG_WARNING, op_errno, DHT_MSG_INODE_LK_ERROR, + "acquiring entrylk after inodelk failed" + "rename (%s:%s:%s %s:%s:%s)", + local->loc.path, src_gfid, local->src_cached->name, + local->loc2.path, dst_gfid, + local->dst_cached ? local->dst_cached->name : NULL); + + local->op_ret = -1; + local->op_errno = op_errno; + goto err; + } + + local->fd = fd_create(local->loc.inode, frame->root->pid); + if (!local->fd) { + op_errno = ENOMEM; + goto err; + } + + local->op_ret = 0; + + if (!local->dst_cached) { + dht_rename_dir_do(frame, this); + return 0; + } - if (!local->dst_cached) { - dht_rename_dir_do (frame, this); - return 0; - } + for (i = 0; i < conf->subvolume_cnt; i++) { + STACK_WIND_COOKIE(frame, dht_rename_opendir_cbk, conf->subvolumes[i], + conf->subvolumes[i], + conf->subvolumes[i]->fops->opendir, &local->loc2, + local->fd, NULL); + } - for (i = 0; i < conf->subvolume_cnt; i++) { - STACK_WIND_COOKIE (frame, dht_rename_opendir_cbk, - conf->subvolumes[i], - conf->subvolumes[i], - conf->subvolumes[i]->fops->opendir, - &local->loc2, local->fd, NULL); - } - - return 0; + return 0; err: - /* No harm in calling an extra unlock */ - dht_rename_dir_unlock (frame, this); - return 0; + /* No harm in calling an extra unlock */ + dht_rename_dir_unlock(frame, this); + return 0; } int -dht_rename_dir_lock1_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) +dht_rename_dir_lock1_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - dht_local_t *local = NULL; - char src_gfid[GF_UUID_BUF_SIZE] = {0}; - char dst_gfid[GF_UUID_BUF_SIZE] = {0}; - int ret = 0; - loc_t *loc = NULL; - xlator_t *subvol = NULL; - - local = frame->local; - - if (op_ret < 0) { - uuid_utoa_r (local->loc.inode->gfid, src_gfid); - - if (local->loc2.inode) - uuid_utoa_r (local->loc2.inode->gfid, dst_gfid); - - gf_msg (this->name, GF_LOG_WARNING, op_errno, - DHT_MSG_INODE_LK_ERROR, - "acquiring entrylk after inodelk failed" - "rename (%s:%s:%s %s:%s:%s)", - local->loc.path, src_gfid, local->src_cached->name, - local->loc2.path, dst_gfid, - local->dst_cached ? local->dst_cached->name : NULL); - - local->op_ret = -1; - local->op_errno = op_errno; - goto err; - } - - if (local->current == &local->lock[0]) { - loc = &local->loc2; - subvol = local->dst_hashed; - local->current = &local->lock[1]; - } else { - loc = &local->loc; - subvol = local->src_hashed; - local->current = &local->lock[0]; - } - ret = dht_protect_namespace (frame, loc, subvol, &local->current->ns, - dht_rename_dir_lock2_cbk); - if (ret < 0) { - op_errno = EINVAL; - goto err; - } - - return 0; + dht_local_t *local = NULL; + char src_gfid[GF_UUID_BUF_SIZE] = {0}; + char dst_gfid[GF_UUID_BUF_SIZE] = {0}; + int ret = 0; + loc_t *loc = NULL; + xlator_t *subvol = NULL; + + local = frame->local; + + if (op_ret < 0) { + uuid_utoa_r(local->loc.inode->gfid, src_gfid); + + if (local->loc2.inode) + uuid_utoa_r(local->loc2.inode->gfid, dst_gfid); + + gf_msg(this->name, GF_LOG_WARNING, op_errno, DHT_MSG_INODE_LK_ERROR, + "acquiring entrylk after inodelk failed" + "rename (%s:%s:%s %s:%s:%s)", + local->loc.path, src_gfid, local->src_cached->name, + local->loc2.path, dst_gfid, + local->dst_cached ? local->dst_cached->name : NULL); + + local->op_ret = -1; + local->op_errno = op_errno; + goto err; + } + + if (local->current == &local->lock[0]) { + loc = &local->loc2; + subvol = local->dst_hashed; + local->current = &local->lock[1]; + } else { + loc = &local->loc; + subvol = local->src_hashed; + local->current = &local->lock[0]; + } + ret = dht_protect_namespace(frame, loc, subvol, &local->current->ns, + dht_rename_dir_lock2_cbk); + if (ret < 0) { + op_errno = EINVAL; + goto err; + } + + return 0; err: - /* No harm in calling an extra unlock */ - dht_rename_dir_unlock (frame, this); - return 0; + /* No harm in calling an extra unlock */ + dht_rename_dir_unlock(frame, this); + return 0; } - /* * If the hashed subvolumes of both source and dst are the different, * lock in dictionary order of hashed subvol->name. This is important @@ -506,1566 +473,1471 @@ err: * */ static void -dht_order_rename_lock (call_frame_t *frame, loc_t **loc, xlator_t **subvol) +dht_order_rename_lock(call_frame_t *frame, loc_t **loc, xlator_t **subvol) { - int ret = 0; - dht_local_t *local = NULL; - char src[GF_UUID_BNAME_BUF_SIZE] = {0}; - char dst[GF_UUID_BNAME_BUF_SIZE] = {0}; - - - local = frame->local; - - if (local->src_hashed->name == local->dst_hashed->name) { - ret = 0; - } else { - ret = strcmp (local->src_hashed->name, local->dst_hashed->name); - } - - if (ret == 0) { - - /* hashed subvols are the same for src and dst */ - /* Entrylks need to be ordered*/ - if (local->loc.pargfid) - uuid_utoa_r (local->loc.pargfid, src); - else if (local->loc.parent) - uuid_utoa_r (local->loc.parent->gfid, src); - - strcat (src, local->loc.name); - - if (local->loc2.pargfid) - uuid_utoa_r (local->loc2.pargfid, dst); - else if (local->loc2.parent) - uuid_utoa_r (local->loc2.parent->gfid, dst); - - strcat (dst, local->loc2.name); - ret = strcmp (src, dst); - } - - if (ret <= 0) { - /*inodelk in dictionary order of hashed subvol names*/ - /*entrylk in dictionary order of gfid/basename */ - local->current = &local->lock[0]; - *loc = &local->loc; - *subvol = local->src_hashed; - - } else { - local->current = &local->lock[1]; - *loc = &local->loc2; - *subvol = local->dst_hashed; - } - - return; + int ret = 0; + dht_local_t *local = NULL; + char src[GF_UUID_BNAME_BUF_SIZE] = {0}; + char dst[GF_UUID_BNAME_BUF_SIZE] = {0}; + + local = frame->local; + + if (local->src_hashed->name == local->dst_hashed->name) { + ret = 0; + } else { + ret = strcmp(local->src_hashed->name, local->dst_hashed->name); + } + + if (ret == 0) { + /* hashed subvols are the same for src and dst */ + /* Entrylks need to be ordered*/ + if (local->loc.pargfid) + uuid_utoa_r(local->loc.pargfid, src); + else if (local->loc.parent) + uuid_utoa_r(local->loc.parent->gfid, src); + + strcat(src, local->loc.name); + + if (local->loc2.pargfid) + uuid_utoa_r(local->loc2.pargfid, dst); + else if (local->loc2.parent) + uuid_utoa_r(local->loc2.parent->gfid, dst); + + strcat(dst, local->loc2.name); + ret = strcmp(src, dst); + } + + if (ret <= 0) { + /*inodelk in dictionary order of hashed subvol names*/ + /*entrylk in dictionary order of gfid/basename */ + local->current = &local->lock[0]; + *loc = &local->loc; + *subvol = local->src_hashed; + + } else { + local->current = &local->lock[1]; + *loc = &local->loc2; + *subvol = local->dst_hashed; + } + + return; } int -dht_rename_dir (call_frame_t *frame, xlator_t *this) +dht_rename_dir(call_frame_t *frame, xlator_t *this) { - dht_conf_t *conf = NULL; - dht_local_t *local = NULL; - loc_t *loc = NULL; - xlator_t *subvol = NULL; - int i = 0; - int ret = 0; - int op_errno = -1; - - conf = frame->this->private; - local = frame->local; - - local->ret_cache = GF_CALLOC (conf->subvolume_cnt + 1, sizeof (int), - gf_dht_ret_cache_t); - - if (local->ret_cache == NULL) { - op_errno = ENOMEM; - goto err; - } - - local->call_cnt = conf->subvolume_cnt; - - for (i = 0; i < conf->subvolume_cnt; i++) { - if (!conf->subvolume_status[i]) { - gf_msg (this->name, GF_LOG_INFO, 0, - DHT_MSG_RENAME_FAILED, - "Rename dir failed: subvolume down (%s)", - conf->subvolumes[i]->name); - op_errno = ENOTCONN; - goto err; - } - } - - - /* Locks on src and dst needs to ordered which otherwise might cause - * deadlocks when rename (src, dst) and rename (dst, src) is done from - * two different clients - */ - dht_order_rename_lock (frame, &loc, &subvol); - - /* Rename must take locks on src to avoid lookup selfheal from - * recreating src on those subvols where the rename was successful. - * The locks can't be issued parallel as two different clients might - * attempt same rename command and be in dead lock. - */ - ret = dht_protect_namespace (frame, loc, subvol, - &local->current->ns, - dht_rename_dir_lock1_cbk); - if (ret < 0) { - op_errno = EINVAL; - goto err; - } - - return 0; + dht_conf_t *conf = NULL; + dht_local_t *local = NULL; + loc_t *loc = NULL; + xlator_t *subvol = NULL; + int i = 0; + int ret = 0; + int op_errno = -1; + + conf = frame->this->private; + local = frame->local; + + local->ret_cache = GF_CALLOC(conf->subvolume_cnt + 1, sizeof(int), + gf_dht_ret_cache_t); + + if (local->ret_cache == NULL) { + op_errno = ENOMEM; + goto err; + } + + local->call_cnt = conf->subvolume_cnt; + + for (i = 0; i < conf->subvolume_cnt; i++) { + if (!conf->subvolume_status[i]) { + gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_RENAME_FAILED, + "Rename dir failed: subvolume down (%s)", + conf->subvolumes[i]->name); + op_errno = ENOTCONN; + goto err; + } + } + + /* Locks on src and dst needs to ordered which otherwise might cause + * deadlocks when rename (src, dst) and rename (dst, src) is done from + * two different clients + */ + dht_order_rename_lock(frame, &loc, &subvol); + + /* Rename must take locks on src to avoid lookup selfheal from + * recreating src on those subvols where the rename was successful. + * The locks can't be issued parallel as two different clients might + * attempt same rename command and be in dead lock. + */ + ret = dht_protect_namespace(frame, loc, subvol, &local->current->ns, + dht_rename_dir_lock1_cbk); + if (ret < 0) { + op_errno = EINVAL; + goto err; + } + + return 0; err: - op_errno = (op_errno == -1) ? errno : op_errno; - DHT_STACK_UNWIND (rename, frame, -1, op_errno, NULL, NULL, NULL, NULL, - NULL, NULL); - return 0; + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND(rename, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL, + NULL); + return 0; } static int -dht_rename_track_for_changelog (xlator_t *this, dict_t *xattr, - loc_t *oldloc, loc_t *newloc) +dht_rename_track_for_changelog(xlator_t *this, dict_t *xattr, loc_t *oldloc, + loc_t *newloc) { - int ret = -1; - dht_changelog_rename_info_t *info = NULL; - char *name = NULL; - int len1 = 0; - int len2 = 0; - int size = 0; - - if (!xattr || !oldloc || !newloc || !this) - return ret; - - len1 = strlen (oldloc->name) + 1; - len2 = strlen (newloc->name) + 1; - size = sizeof (dht_changelog_rename_info_t) + len1 + len2; - - info = GF_CALLOC (size, sizeof(char), gf_common_mt_char); - if (!info) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_DICT_SET_FAILED, - "Failed to calloc memory"); - return ret; - } + int ret = -1; + dht_changelog_rename_info_t *info = NULL; + char *name = NULL; + int len1 = 0; + int len2 = 0; + int size = 0; + + if (!xattr || !oldloc || !newloc || !this) + return ret; - gf_uuid_copy (info->old_pargfid, oldloc->pargfid); - gf_uuid_copy (info->new_pargfid, newloc->pargfid); - - info->oldname_len = len1; - info->newname_len = len2; - strncpy (info->buffer, oldloc->name, len1); - name = info->buffer + len1; - strncpy (name, newloc->name, len2); - - ret = dict_set_bin (xattr, DHT_CHANGELOG_RENAME_OP_KEY, - info, size); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED, - "Failed to set dictionary value: key = %s," - " path = %s", DHT_CHANGELOG_RENAME_OP_KEY, - oldloc->name); - GF_FREE (info); - } + len1 = strlen(oldloc->name) + 1; + len2 = strlen(newloc->name) + 1; + size = sizeof(dht_changelog_rename_info_t) + len1 + len2; + info = GF_CALLOC(size, sizeof(char), gf_common_mt_char); + if (!info) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED, + "Failed to calloc memory"); return ret; + } + + gf_uuid_copy(info->old_pargfid, oldloc->pargfid); + gf_uuid_copy(info->new_pargfid, newloc->pargfid); + + info->oldname_len = len1; + info->newname_len = len2; + strncpy(info->buffer, oldloc->name, len1); + name = info->buffer + len1; + strncpy(name, newloc->name, len2); + + ret = dict_set_bin(xattr, DHT_CHANGELOG_RENAME_OP_KEY, info, size); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED, + "Failed to set dictionary value: key = %s," + " path = %s", + DHT_CHANGELOG_RENAME_OP_KEY, oldloc->name); + GF_FREE(info); + } + + return ret; } - - -#define DHT_MARKER_DONT_ACCOUNT(xattr) do { \ - int tmp = -1; \ - if (!xattr) { \ - xattr = dict_new (); \ - if (!xattr) \ - break; \ - } \ - tmp = dict_set_str (xattr, GLUSTERFS_MARKER_DONT_ACCOUNT_KEY, \ - "yes"); \ - if (tmp) { \ - gf_msg (this->name, GF_LOG_ERROR, 0, \ - DHT_MSG_DICT_SET_FAILED, \ - "Failed to set dictionary value: key = %s," \ - " path = %s",GLUSTERFS_MARKER_DONT_ACCOUNT_KEY, \ - local->loc.path); \ - } \ - }while (0) - - -#define DHT_CHANGELOG_TRACK_AS_RENAME(xattr, oldloc, newloc) do { \ - int tmp = -1; \ - if (!xattr) { \ - xattr = dict_new (); \ - if (!xattr) { \ - gf_msg (this->name, GF_LOG_ERROR, 0, \ - DHT_MSG_DICT_SET_FAILED, \ - "Failed to create dictionary to " \ - "track rename"); \ - break; \ - } \ - } \ - \ - tmp = dht_rename_track_for_changelog (this, xattr, \ - oldloc, newloc); \ - \ - if (tmp) { \ - gf_msg (this->name, GF_LOG_ERROR, 0, \ - DHT_MSG_DICT_SET_FAILED, \ - "Failed to set dictionary value: key = %s," \ - " path = %s", DHT_CHANGELOG_RENAME_OP_KEY, \ - (oldloc)->path); \ - } \ - } while (0) - +#define DHT_MARKER_DONT_ACCOUNT(xattr) \ + do { \ + int tmp = -1; \ + if (!xattr) { \ + xattr = dict_new(); \ + if (!xattr) \ + break; \ + } \ + tmp = dict_set_str(xattr, GLUSTERFS_MARKER_DONT_ACCOUNT_KEY, "yes"); \ + if (tmp) { \ + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED, \ + "Failed to set dictionary value: key = %s," \ + " path = %s", \ + GLUSTERFS_MARKER_DONT_ACCOUNT_KEY, local->loc.path); \ + } \ + } while (0) + +#define DHT_CHANGELOG_TRACK_AS_RENAME(xattr, oldloc, newloc) \ + do { \ + int tmp = -1; \ + if (!xattr) { \ + xattr = dict_new(); \ + if (!xattr) { \ + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED, \ + "Failed to create dictionary to " \ + "track rename"); \ + break; \ + } \ + } \ + \ + tmp = dht_rename_track_for_changelog(this, xattr, oldloc, newloc); \ + \ + if (tmp) { \ + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED, \ + "Failed to set dictionary value: key = %s," \ + " path = %s", \ + DHT_CHANGELOG_RENAME_OP_KEY, (oldloc)->path); \ + } \ + } while (0) int -dht_rename_unlock (call_frame_t *frame, xlator_t *this) +dht_rename_unlock(call_frame_t *frame, xlator_t *this) { - dht_local_t *local = NULL; - int op_ret = -1; - char src_gfid[GF_UUID_BUF_SIZE] = {0}; - char dst_gfid[GF_UUID_BUF_SIZE] = {0}; - dht_ilock_wrap_t inodelk_wrapper = {0, }; - - local = frame->local; - inodelk_wrapper.locks = local->rename_inodelk_backward_compatible; - inodelk_wrapper.lk_count = local->rename_inodelk_bc_count; - - op_ret = dht_unlock_inodelk_wrapper (frame, &inodelk_wrapper); - if (op_ret < 0) { - uuid_utoa_r (local->loc.inode->gfid, src_gfid); - - if (local->loc2.inode) - uuid_utoa_r (local->loc2.inode->gfid, dst_gfid); - - if (IA_ISREG (local->stbuf.ia_type)) - gf_msg (this->name, GF_LOG_WARNING, 0, - DHT_MSG_UNLOCKING_FAILED, - "winding unlock inodelk failed " - "rename (%s:%s:%s %s:%s:%s), " - "stale locks left on bricks", - local->loc.path, src_gfid, - local->src_cached->name, - local->loc2.path, dst_gfid, - local->dst_cached ? - local->dst_cached->name : NULL); - else - gf_msg (this->name, GF_LOG_WARNING, 0, - DHT_MSG_UNLOCKING_FAILED, - "winding unlock inodelk failed " - "rename (%s:%s %s:%s), " - "stale locks left on bricks", - local->loc.path, src_gfid, - local->loc2.path, dst_gfid); - } - - dht_unlock_namespace (frame, &local->lock[0]); - dht_unlock_namespace (frame, &local->lock[1]); - - dht_rename_unlock_cbk (frame, NULL, this, local->op_ret, - local->op_errno, NULL); - return 0; + dht_local_t *local = NULL; + int op_ret = -1; + char src_gfid[GF_UUID_BUF_SIZE] = {0}; + char dst_gfid[GF_UUID_BUF_SIZE] = {0}; + dht_ilock_wrap_t inodelk_wrapper = { + 0, + }; + + local = frame->local; + inodelk_wrapper.locks = local->rename_inodelk_backward_compatible; + inodelk_wrapper.lk_count = local->rename_inodelk_bc_count; + + op_ret = dht_unlock_inodelk_wrapper(frame, &inodelk_wrapper); + if (op_ret < 0) { + uuid_utoa_r(local->loc.inode->gfid, src_gfid); + + if (local->loc2.inode) + uuid_utoa_r(local->loc2.inode->gfid, dst_gfid); + + if (IA_ISREG(local->stbuf.ia_type)) + gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_UNLOCKING_FAILED, + "winding unlock inodelk failed " + "rename (%s:%s:%s %s:%s:%s), " + "stale locks left on bricks", + local->loc.path, src_gfid, local->src_cached->name, + local->loc2.path, dst_gfid, + local->dst_cached ? local->dst_cached->name : NULL); + else + gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_UNLOCKING_FAILED, + "winding unlock inodelk failed " + "rename (%s:%s %s:%s), " + "stale locks left on bricks", + local->loc.path, src_gfid, local->loc2.path, dst_gfid); + } + + dht_unlock_namespace(frame, &local->lock[0]); + dht_unlock_namespace(frame, &local->lock[1]); + + dht_rename_unlock_cbk(frame, NULL, this, local->op_ret, local->op_errno, + NULL); + return 0; } int -dht_rename_done (call_frame_t *frame, xlator_t *this) +dht_rename_done(call_frame_t *frame, xlator_t *this) { - dht_local_t *local = NULL; + dht_local_t *local = NULL; - local = frame->local; + local = frame->local; - if (local->linked == _gf_true) { - local->linked = _gf_false; - dht_linkfile_attr_heal (frame, this); - } + if (local->linked == _gf_true) { + local->linked = _gf_false; + dht_linkfile_attr_heal(frame, this); + } - dht_rename_unlock (frame, this); - return 0; + dht_rename_unlock(frame, this); + return 0; } int -dht_rename_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) +dht_rename_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) { - dht_local_t *local = NULL; - xlator_t *prev = NULL; - int this_call_cnt = 0; - - local = frame->local; - prev = cookie; - - FRAME_SU_UNDO (frame, dht_local_t); - if (!local) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_INVALID_VALUE, - "!local, should not happen"); - goto out; - } + dht_local_t *local = NULL; + xlator_t *prev = NULL; + int this_call_cnt = 0; - this_call_cnt = dht_frame_return (frame); + local = frame->local; + prev = cookie; - if (op_ret == -1) { - gf_msg (this->name, GF_LOG_WARNING, op_errno, - DHT_MSG_UNLINK_FAILED, - "%s: Rename: unlink on %s failed ", - local->loc.path, prev->name); - } + FRAME_SU_UNDO(frame, dht_local_t); + if (!local) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_INVALID_VALUE, + "!local, should not happen"); + goto out; + } - WIPE (&local->preoldparent); - WIPE (&local->postoldparent); - WIPE (&local->preparent); - WIPE (&local->postparent); + this_call_cnt = dht_frame_return(frame); - if (is_last_call (this_call_cnt)) { - dht_rename_done (frame, this); - } + if (op_ret == -1) { + gf_msg(this->name, GF_LOG_WARNING, op_errno, DHT_MSG_UNLINK_FAILED, + "%s: Rename: unlink on %s failed ", local->loc.path, prev->name); + } + + WIPE(&local->preoldparent); + WIPE(&local->postoldparent); + WIPE(&local->preparent); + WIPE(&local->postparent); + + if (is_last_call(this_call_cnt)) { + dht_rename_done(frame, this); + } out: - return 0; + return 0; } - int -dht_rename_cleanup (call_frame_t *frame) +dht_rename_cleanup(call_frame_t *frame) { - dht_local_t *local = NULL; - xlator_t *this = NULL; - xlator_t *src_hashed = NULL; - xlator_t *src_cached = NULL; - xlator_t *dst_hashed = NULL; - xlator_t *dst_cached = NULL; - int call_cnt = 0; - dict_t *xattr = NULL; - char gfid[GF_UUID_BUF_SIZE] = {0}; - - local = frame->local; - this = frame->this; - - src_hashed = local->src_hashed; - src_cached = local->src_cached; - dst_hashed = local->dst_hashed; - dst_cached = local->dst_cached; - - if (src_cached == dst_cached) - goto nolinks; - - if (local->linked && (dst_hashed != src_hashed) && - (dst_hashed != src_cached)) { - call_cnt++; - } + dht_local_t *local = NULL; + xlator_t *this = NULL; + xlator_t *src_hashed = NULL; + xlator_t *src_cached = NULL; + xlator_t *dst_hashed = NULL; + xlator_t *dst_cached = NULL; + int call_cnt = 0; + dict_t *xattr = NULL; + char gfid[GF_UUID_BUF_SIZE] = {0}; - if (local->added_link && (src_cached != dst_hashed)) { - call_cnt++; - } + local = frame->local; + this = frame->this; - local->call_cnt = call_cnt; + src_hashed = local->src_hashed; + src_cached = local->src_cached; + dst_hashed = local->dst_hashed; + dst_cached = local->dst_cached; - if (!call_cnt) - goto nolinks; + if (src_cached == dst_cached) + goto nolinks; - DHT_MARK_FOP_INTERNAL (xattr); + if (local->linked && (dst_hashed != src_hashed) && + (dst_hashed != src_cached)) { + call_cnt++; + } - gf_uuid_unparse(local->loc.inode->gfid, gfid); + if (local->added_link && (src_cached != dst_hashed)) { + call_cnt++; + } - if (local->linked && (dst_hashed != src_hashed) && - (dst_hashed != src_cached)) { - dict_t *xattr_new = NULL; + local->call_cnt = call_cnt; - gf_msg_trace (this->name, 0, - "unlinking linkfile %s @ %s => %s, (gfid = %s)", - local->loc.path, dst_hashed->name, - src_cached->name, gfid); + if (!call_cnt) + goto nolinks; - xattr_new = dict_copy_with_ref (xattr, NULL); + DHT_MARK_FOP_INTERNAL(xattr); + gf_uuid_unparse(local->loc.inode->gfid, gfid); - DHT_MARKER_DONT_ACCOUNT(xattr_new); + if (local->linked && (dst_hashed != src_hashed) && + (dst_hashed != src_cached)) { + dict_t *xattr_new = NULL; - FRAME_SU_DO (frame, dht_local_t); - STACK_WIND_COOKIE (frame, dht_rename_unlink_cbk, dst_hashed, - dst_hashed, dst_hashed->fops->unlink, - &local->loc, 0, xattr_new); + gf_msg_trace(this->name, 0, + "unlinking linkfile %s @ %s => %s, (gfid = %s)", + local->loc.path, dst_hashed->name, src_cached->name, gfid); - dict_unref (xattr_new); - xattr_new = NULL; - } + xattr_new = dict_copy_with_ref(xattr, NULL); - if (local->added_link && (src_cached != dst_hashed)) { - dict_t *xattr_new = NULL; + DHT_MARKER_DONT_ACCOUNT(xattr_new); - gf_msg_trace (this->name, 0, - "unlinking link %s => %s (%s), (gfid = %s)", - local->loc.path, local->loc2.path, - src_cached->name, gfid); + FRAME_SU_DO(frame, dht_local_t); + STACK_WIND_COOKIE(frame, dht_rename_unlink_cbk, dst_hashed, dst_hashed, + dst_hashed->fops->unlink, &local->loc, 0, xattr_new); - xattr_new = dict_copy_with_ref (xattr, NULL); + dict_unref(xattr_new); + xattr_new = NULL; + } - if (gf_uuid_compare (local->loc.pargfid, - local->loc2.pargfid) == 0) { - DHT_MARKER_DONT_ACCOUNT(xattr_new); - } - /* * - * The link to file is created using root permission. - * Hence deletion should happen using root. Otherwise - * it will fail. - */ - FRAME_SU_DO (frame, dht_local_t); - STACK_WIND_COOKIE (frame, dht_rename_unlink_cbk, src_cached, - src_cached, src_cached->fops->unlink, - &local->loc2, 0, xattr_new); + if (local->added_link && (src_cached != dst_hashed)) { + dict_t *xattr_new = NULL; + + gf_msg_trace(this->name, 0, "unlinking link %s => %s (%s), (gfid = %s)", + local->loc.path, local->loc2.path, src_cached->name, gfid); - dict_unref (xattr_new); - xattr_new = NULL; + xattr_new = dict_copy_with_ref(xattr, NULL); + + if (gf_uuid_compare(local->loc.pargfid, local->loc2.pargfid) == 0) { + DHT_MARKER_DONT_ACCOUNT(xattr_new); } + /* * + * The link to file is created using root permission. + * Hence deletion should happen using root. Otherwise + * it will fail. + */ + FRAME_SU_DO(frame, dht_local_t); + STACK_WIND_COOKIE(frame, dht_rename_unlink_cbk, src_cached, src_cached, + src_cached->fops->unlink, &local->loc2, 0, xattr_new); - if (xattr) - dict_unref (xattr); + dict_unref(xattr_new); + xattr_new = NULL; + } - return 0; + if (xattr) + dict_unref(xattr); + + return 0; nolinks: - WIPE (&local->preoldparent); - WIPE (&local->postoldparent); - WIPE (&local->preparent); - WIPE (&local->postparent); + WIPE(&local->preoldparent); + WIPE(&local->postoldparent); + WIPE(&local->preparent); + WIPE(&local->postparent); - dht_rename_unlock (frame, this); - return 0; + dht_rename_unlock(frame, this); + return 0; } - int -dht_rename_unlink (call_frame_t *frame, xlator_t *this) +dht_rename_unlink(call_frame_t *frame, xlator_t *this) { - dht_local_t *local = NULL; - xlator_t *src_hashed = NULL; - xlator_t *src_cached = NULL; - xlator_t *dst_hashed = NULL; - xlator_t *dst_cached = NULL; - xlator_t *rename_subvol = NULL; - dict_t *xattr = NULL; - - local = frame->local; - - src_hashed = local->src_hashed; - src_cached = local->src_cached; - dst_hashed = local->dst_hashed; - dst_cached = local->dst_cached; - - local->call_cnt = 0; - - /* NOTE: rename_subvol is the same subvolume from which dht_rename_cbk - * is called. since rename has already happened on rename_subvol, - * unlink shouldn't be sent for oldpath (either linkfile or cached-file) - * on rename_subvol. */ - if (src_cached == dst_cached) - rename_subvol = src_cached; - else - rename_subvol = dst_hashed; + dht_local_t *local = NULL; + xlator_t *src_hashed = NULL; + xlator_t *src_cached = NULL; + xlator_t *dst_hashed = NULL; + xlator_t *dst_cached = NULL; + xlator_t *rename_subvol = NULL; + dict_t *xattr = NULL; - /* TODO: delete files in background */ + local = frame->local; - if (src_cached != dst_hashed && src_cached != dst_cached) - local->call_cnt++; + src_hashed = local->src_hashed; + src_cached = local->src_cached; + dst_hashed = local->dst_hashed; + dst_cached = local->dst_cached; - if (src_hashed != rename_subvol && src_hashed != src_cached) - local->call_cnt++; + local->call_cnt = 0; - if (dst_cached && dst_cached != dst_hashed && dst_cached != src_cached) - local->call_cnt++; + /* NOTE: rename_subvol is the same subvolume from which dht_rename_cbk + * is called. since rename has already happened on rename_subvol, + * unlink shouldn't be sent for oldpath (either linkfile or cached-file) + * on rename_subvol. */ + if (src_cached == dst_cached) + rename_subvol = src_cached; + else + rename_subvol = dst_hashed; - if (local->call_cnt == 0) - goto unwind; + /* TODO: delete files in background */ - DHT_MARK_FOP_INTERNAL (xattr); + if (src_cached != dst_hashed && src_cached != dst_cached) + local->call_cnt++; - if (src_cached != dst_hashed && src_cached != dst_cached) { - dict_t *xattr_new = NULL; + if (src_hashed != rename_subvol && src_hashed != src_cached) + local->call_cnt++; - xattr_new = dict_copy_with_ref (xattr, NULL); + if (dst_cached && dst_cached != dst_hashed && dst_cached != src_cached) + local->call_cnt++; - gf_msg_trace (this->name, 0, - "deleting old src datafile %s @ %s", - local->loc.path, src_cached->name); + if (local->call_cnt == 0) + goto unwind; - if (gf_uuid_compare (local->loc.pargfid, - local->loc2.pargfid) == 0) { - DHT_MARKER_DONT_ACCOUNT(xattr_new); - } + DHT_MARK_FOP_INTERNAL(xattr); + + if (src_cached != dst_hashed && src_cached != dst_cached) { + dict_t *xattr_new = NULL; - DHT_CHANGELOG_TRACK_AS_RENAME(xattr_new, &local->loc, - &local->loc2); - STACK_WIND_COOKIE (frame, dht_rename_unlink_cbk, src_cached, - src_cached, src_cached->fops->unlink, - &local->loc, 0, xattr_new); + xattr_new = dict_copy_with_ref(xattr, NULL); - dict_unref (xattr_new); - xattr_new = NULL; + gf_msg_trace(this->name, 0, "deleting old src datafile %s @ %s", + local->loc.path, src_cached->name); + + if (gf_uuid_compare(local->loc.pargfid, local->loc2.pargfid) == 0) { + DHT_MARKER_DONT_ACCOUNT(xattr_new); } - if (src_hashed != rename_subvol && src_hashed != src_cached) { - dict_t *xattr_new = NULL; + DHT_CHANGELOG_TRACK_AS_RENAME(xattr_new, &local->loc, &local->loc2); + STACK_WIND_COOKIE(frame, dht_rename_unlink_cbk, src_cached, src_cached, + src_cached->fops->unlink, &local->loc, 0, xattr_new); - xattr_new = dict_copy_with_ref (xattr, NULL); + dict_unref(xattr_new); + xattr_new = NULL; + } - gf_msg_trace (this->name, 0, - "deleting old src linkfile %s @ %s", - local->loc.path, src_hashed->name); + if (src_hashed != rename_subvol && src_hashed != src_cached) { + dict_t *xattr_new = NULL; - DHT_MARKER_DONT_ACCOUNT(xattr_new); + xattr_new = dict_copy_with_ref(xattr, NULL); - STACK_WIND_COOKIE (frame, dht_rename_unlink_cbk, src_hashed, - src_hashed, src_hashed->fops->unlink, - &local->loc, 0, xattr_new); + gf_msg_trace(this->name, 0, "deleting old src linkfile %s @ %s", + local->loc.path, src_hashed->name); - dict_unref (xattr_new); - xattr_new = NULL; - } + DHT_MARKER_DONT_ACCOUNT(xattr_new); - if (dst_cached && - (dst_cached != dst_hashed) && - (dst_cached != src_cached)) { - gf_msg_trace (this->name, 0, - "deleting old dst datafile %s @ %s", - local->loc2.path, dst_cached->name); + STACK_WIND_COOKIE(frame, dht_rename_unlink_cbk, src_hashed, src_hashed, + src_hashed->fops->unlink, &local->loc, 0, xattr_new); - STACK_WIND_COOKIE (frame, dht_rename_unlink_cbk, dst_cached, - dst_cached, dst_cached->fops->unlink, - &local->loc2, 0, xattr); - } - if (xattr) - dict_unref (xattr); - return 0; + dict_unref(xattr_new); + xattr_new = NULL; + } + + if (dst_cached && (dst_cached != dst_hashed) && + (dst_cached != src_cached)) { + gf_msg_trace(this->name, 0, "deleting old dst datafile %s @ %s", + local->loc2.path, dst_cached->name); + + STACK_WIND_COOKIE(frame, dht_rename_unlink_cbk, dst_cached, dst_cached, + dst_cached->fops->unlink, &local->loc2, 0, xattr); + } + if (xattr) + dict_unref(xattr); + return 0; unwind: - WIPE (&local->preoldparent); - WIPE (&local->postoldparent); - WIPE (&local->preparent); - WIPE (&local->postparent); + WIPE(&local->preoldparent); + WIPE(&local->postoldparent); + WIPE(&local->preparent); + WIPE(&local->postparent); - dht_rename_done (frame, this); + dht_rename_done(frame, this); - return 0; + return 0; } int -dht_rename_links_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, - inode_t *inode, struct iatt *stbuf, - struct iatt *preparent, struct iatt *postparent, - dict_t *xdata) +dht_rename_links_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *stbuf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) { - xlator_t *prev = NULL; - dht_local_t *local = NULL; - - prev = cookie; - local = frame->local; - - /* TODO: Handle this case in lookup-optimize */ - if (op_ret == -1) { - gf_msg (this->name, GF_LOG_WARNING, op_errno, - DHT_MSG_CREATE_LINK_FAILED, - "link/file %s on %s failed", - local->loc.path, prev->name); - } + xlator_t *prev = NULL; + dht_local_t *local = NULL; - if (local->linked == _gf_true) { - local->linked = _gf_false; - dht_linkfile_attr_heal (frame, this); - } + prev = cookie; + local = frame->local; - dht_rename_unlink (frame, this); - return 0; -} + /* TODO: Handle this case in lookup-optimize */ + if (op_ret == -1) { + gf_msg(this->name, GF_LOG_WARNING, op_errno, DHT_MSG_CREATE_LINK_FAILED, + "link/file %s on %s failed", local->loc.path, prev->name); + } + + if (local->linked == _gf_true) { + local->linked = _gf_false; + dht_linkfile_attr_heal(frame, this); + } + dht_rename_unlink(frame, this); + return 0; +} int -dht_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *stbuf, - struct iatt *preoldparent, struct iatt *postoldparent, - struct iatt *prenewparent, struct iatt *postnewparent, - dict_t *xdata) +dht_rename_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *stbuf, + struct iatt *preoldparent, struct iatt *postoldparent, + struct iatt *prenewparent, struct iatt *postnewparent, + dict_t *xdata) { - dht_local_t *local = NULL; - xlator_t *prev = NULL; - xlator_t *src_cached = NULL; - xlator_t *dst_hashed = NULL; - xlator_t *dst_cached = NULL; - loc_t link_loc = {0}; - - local = frame->local; - prev = cookie; - - src_cached = local->src_cached; - dst_hashed = local->dst_hashed; - dst_cached = local->dst_cached; - - if (local->linked == _gf_true) - FRAME_SU_UNDO (frame, dht_local_t); - - /* It is a critical failure iff we fail to rename the cached file - * if the rename of the linkto failed, it is not a critical failure, - * and we do not want to lose the created hard link for the new - * name as that could have been read by other clients. - * - * NOTE: If another client is attempting the same oldname -> newname - * rename, and finds both file names as existing, and are hard links - * to each other, then FUSE would send in an unlink for oldname. In - * this time duration if we treat the linkto as a critical error and - * unlink the newname we created, we would have effectively lost the - * file to rename operations. - * - * Repercussions of treating this as a non-critical error is that - * we could leave behind a stale linkto file and/or not create the new - * linkto file, the second case would be rectified by a subsequent - * lookup, the first case by a rebalance, like for all stale linkto - * files */ - - if (op_ret == -1) { - /* Critical failure: unable to rename the cached file */ - if (prev == src_cached) { - gf_msg (this->name, GF_LOG_WARNING, op_errno, - DHT_MSG_RENAME_FAILED, - "%s: Rename on %s failed, (gfid = %s) ", - local->loc.path, prev->name, - local->loc.inode ? - uuid_utoa(local->loc.inode->gfid):""); - local->op_ret = op_ret; - local->op_errno = op_errno; - goto cleanup; - } else { - /* Non-critical failure, unable to rename the linkto - * file - */ - gf_msg (this->name, GF_LOG_INFO, op_errno, - DHT_MSG_RENAME_FAILED, - "%s: Rename (linkto file) on %s failed, " - "(gfid = %s) ", - local->loc.path, prev->name, - local->loc.inode ? - uuid_utoa(local->loc.inode->gfid):""); - } - } - if (xdata) { - if (!local->xattr) - local->xattr = dict_ref (xdata); - else - local->xattr = dict_copy_with_ref (xdata, local->xattr); - } - - /* Merge attrs only from src_cached. In case there of src_cached != - * dst_hashed, this ignores linkfile attrs. */ + dht_local_t *local = NULL; + xlator_t *prev = NULL; + xlator_t *src_cached = NULL; + xlator_t *dst_hashed = NULL; + xlator_t *dst_cached = NULL; + loc_t link_loc = {0}; + + local = frame->local; + prev = cookie; + + src_cached = local->src_cached; + dst_hashed = local->dst_hashed; + dst_cached = local->dst_cached; + + if (local->linked == _gf_true) + FRAME_SU_UNDO(frame, dht_local_t); + + /* It is a critical failure iff we fail to rename the cached file + * if the rename of the linkto failed, it is not a critical failure, + * and we do not want to lose the created hard link for the new + * name as that could have been read by other clients. + * + * NOTE: If another client is attempting the same oldname -> newname + * rename, and finds both file names as existing, and are hard links + * to each other, then FUSE would send in an unlink for oldname. In + * this time duration if we treat the linkto as a critical error and + * unlink the newname we created, we would have effectively lost the + * file to rename operations. + * + * Repercussions of treating this as a non-critical error is that + * we could leave behind a stale linkto file and/or not create the new + * linkto file, the second case would be rectified by a subsequent + * lookup, the first case by a rebalance, like for all stale linkto + * files */ + + if (op_ret == -1) { + /* Critical failure: unable to rename the cached file */ if (prev == src_cached) { - dht_iatt_merge (this, &local->stbuf, stbuf); - dht_iatt_merge (this, &local->preoldparent, preoldparent); - dht_iatt_merge (this, &local->postoldparent, postoldparent); - dht_iatt_merge (this, &local->preparent, prenewparent); - dht_iatt_merge (this, &local->postparent, postnewparent); - } - - /* Create the linkto file for the dst file */ - if ((src_cached == dst_cached) && (dst_hashed != dst_cached)) { - - loc_copy (&link_loc, &local->loc2); - if (link_loc.inode) - inode_unref (link_loc.inode); - link_loc.inode = inode_ref (local->loc.inode); - gf_uuid_copy (local->gfid, local->loc.inode->gfid); - gf_uuid_copy (link_loc.gfid, local->loc.inode->gfid); - - dht_linkfile_create (frame, dht_rename_links_create_cbk, - this, src_cached, dst_hashed, - &link_loc); - return 0; - } - - dht_rename_unlink (frame, this); + gf_msg(this->name, GF_LOG_WARNING, op_errno, DHT_MSG_RENAME_FAILED, + "%s: Rename on %s failed, (gfid = %s) ", local->loc.path, + prev->name, + local->loc.inode ? uuid_utoa(local->loc.inode->gfid) : ""); + local->op_ret = op_ret; + local->op_errno = op_errno; + goto cleanup; + } else { + /* Non-critical failure, unable to rename the linkto + * file + */ + gf_msg(this->name, GF_LOG_INFO, op_errno, DHT_MSG_RENAME_FAILED, + "%s: Rename (linkto file) on %s failed, " + "(gfid = %s) ", + local->loc.path, prev->name, + local->loc.inode ? uuid_utoa(local->loc.inode->gfid) : ""); + } + } + if (xdata) { + if (!local->xattr) + local->xattr = dict_ref(xdata); + else + local->xattr = dict_copy_with_ref(xdata, local->xattr); + } + + /* Merge attrs only from src_cached. In case there of src_cached != + * dst_hashed, this ignores linkfile attrs. */ + if (prev == src_cached) { + dht_iatt_merge(this, &local->stbuf, stbuf); + dht_iatt_merge(this, &local->preoldparent, preoldparent); + dht_iatt_merge(this, &local->postoldparent, postoldparent); + dht_iatt_merge(this, &local->preparent, prenewparent); + dht_iatt_merge(this, &local->postparent, postnewparent); + } + + /* Create the linkto file for the dst file */ + if ((src_cached == dst_cached) && (dst_hashed != dst_cached)) { + loc_copy(&link_loc, &local->loc2); + if (link_loc.inode) + inode_unref(link_loc.inode); + link_loc.inode = inode_ref(local->loc.inode); + gf_uuid_copy(local->gfid, local->loc.inode->gfid); + gf_uuid_copy(link_loc.gfid, local->loc.inode->gfid); + + dht_linkfile_create(frame, dht_rename_links_create_cbk, this, + src_cached, dst_hashed, &link_loc); return 0; + } + + dht_rename_unlink(frame, this); + return 0; cleanup: - dht_rename_cleanup (frame); + dht_rename_cleanup(frame); - return 0; + return 0; } - int -dht_do_rename (call_frame_t *frame) +dht_do_rename(call_frame_t *frame) { - dht_local_t *local = NULL; - xlator_t *dst_hashed = NULL; - xlator_t *src_cached = NULL; - xlator_t *dst_cached = NULL; - xlator_t *this = NULL; - xlator_t *rename_subvol = NULL; - - local = frame->local; - this = frame->this; - - dst_hashed = local->dst_hashed; - dst_cached = local->dst_cached; - src_cached = local->src_cached; - - if (src_cached == dst_cached) - rename_subvol = src_cached; - else - rename_subvol = dst_hashed; - - if ((src_cached != dst_hashed) && (rename_subvol == dst_hashed)) { - DHT_MARKER_DONT_ACCOUNT(local->xattr_req); - } - - if (rename_subvol == src_cached) { - DHT_CHANGELOG_TRACK_AS_RENAME(local->xattr_req, &local->loc, - &local->loc2); - } - - gf_msg_trace (this->name, 0, - "renaming %s => %s (%s)", - local->loc.path, local->loc2.path, rename_subvol->name); - - if (local->linked == _gf_true) - FRAME_SU_DO (frame, dht_local_t); - STACK_WIND_COOKIE (frame, dht_rename_cbk, rename_subvol, rename_subvol, - rename_subvol->fops->rename, &local->loc, - &local->loc2, local->xattr_req); - return 0; + dht_local_t *local = NULL; + xlator_t *dst_hashed = NULL; + xlator_t *src_cached = NULL; + xlator_t *dst_cached = NULL; + xlator_t *this = NULL; + xlator_t *rename_subvol = NULL; + + local = frame->local; + this = frame->this; + + dst_hashed = local->dst_hashed; + dst_cached = local->dst_cached; + src_cached = local->src_cached; + + if (src_cached == dst_cached) + rename_subvol = src_cached; + else + rename_subvol = dst_hashed; + + if ((src_cached != dst_hashed) && (rename_subvol == dst_hashed)) { + DHT_MARKER_DONT_ACCOUNT(local->xattr_req); + } + + if (rename_subvol == src_cached) { + DHT_CHANGELOG_TRACK_AS_RENAME(local->xattr_req, &local->loc, + &local->loc2); + } + + gf_msg_trace(this->name, 0, "renaming %s => %s (%s)", local->loc.path, + local->loc2.path, rename_subvol->name); + + if (local->linked == _gf_true) + FRAME_SU_DO(frame, dht_local_t); + STACK_WIND_COOKIE(frame, dht_rename_cbk, rename_subvol, rename_subvol, + rename_subvol->fops->rename, &local->loc, &local->loc2, + local->xattr_req); + return 0; } int -dht_rename_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, - inode_t *inode, struct iatt *stbuf, - struct iatt *preparent, struct iatt *postparent, - dict_t *xdata) +dht_rename_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *stbuf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) { - dht_local_t *local = NULL; - xlator_t *prev = NULL; + dht_local_t *local = NULL; + xlator_t *prev = NULL; - local = frame->local; - prev = cookie; + local = frame->local; + prev = cookie; - if (op_ret == -1) { - gf_msg_debug (this->name, 0, - "link/file on %s failed (%s)", - prev->name, strerror (op_errno)); - local->op_ret = -1; - local->op_errno = op_errno; - local->added_link = _gf_false; - } else - dht_iatt_merge (this, &local->stbuf, stbuf); + if (op_ret == -1) { + gf_msg_debug(this->name, 0, "link/file on %s failed (%s)", prev->name, + strerror(op_errno)); + local->op_ret = -1; + local->op_errno = op_errno; + local->added_link = _gf_false; + } else + dht_iatt_merge(this, &local->stbuf, stbuf); - if (local->op_ret == -1) - goto cleanup; + if (local->op_ret == -1) + goto cleanup; - dht_do_rename (frame); + dht_do_rename(frame); - return 0; + return 0; cleanup: - dht_rename_cleanup (frame); + dht_rename_cleanup(frame); - return 0; + return 0; } int -dht_rename_linkto_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, - inode_t *inode, struct iatt *stbuf, - struct iatt *preparent, struct iatt *postparent, - dict_t *xdata) +dht_rename_linkto_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *stbuf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) { - dht_local_t *local = NULL; - xlator_t *prev = NULL; - xlator_t *src_cached = NULL; - dict_t *xattr = NULL; - - local = frame->local; - DHT_MARK_FOP_INTERNAL (xattr); - prev = cookie; - src_cached = local->src_cached; - - if (op_ret == -1) { - gf_msg_debug (this->name, 0, - "link/file on %s failed (%s)", - prev->name, strerror (op_errno)); - local->op_ret = -1; - local->op_errno = op_errno; - } + dht_local_t *local = NULL; + xlator_t *prev = NULL; + xlator_t *src_cached = NULL; + dict_t *xattr = NULL; - /* If linkto creation failed move to failure cleanup code, - * instead of continuing with creating the link file */ - if (local->op_ret != 0) { - goto cleanup; - } + local = frame->local; + DHT_MARK_FOP_INTERNAL(xattr); + prev = cookie; + src_cached = local->src_cached; - gf_msg_trace (this->name, 0, - "link %s => %s (%s)", local->loc.path, - local->loc2.path, src_cached->name); - if (gf_uuid_compare (local->loc.pargfid, - local->loc2.pargfid) == 0) { - DHT_MARKER_DONT_ACCOUNT(xattr); - } + if (op_ret == -1) { + gf_msg_debug(this->name, 0, "link/file on %s failed (%s)", prev->name, + strerror(op_errno)); + local->op_ret = -1; + local->op_errno = op_errno; + } - local->added_link = _gf_true; + /* If linkto creation failed move to failure cleanup code, + * instead of continuing with creating the link file */ + if (local->op_ret != 0) { + goto cleanup; + } - STACK_WIND_COOKIE (frame, dht_rename_link_cbk, src_cached, src_cached, - src_cached->fops->link, &local->loc, &local->loc2, - xattr); + gf_msg_trace(this->name, 0, "link %s => %s (%s)", local->loc.path, + local->loc2.path, src_cached->name); + if (gf_uuid_compare(local->loc.pargfid, local->loc2.pargfid) == 0) { + DHT_MARKER_DONT_ACCOUNT(xattr); + } - if (xattr) - dict_unref (xattr); + local->added_link = _gf_true; - return 0; + STACK_WIND_COOKIE(frame, dht_rename_link_cbk, src_cached, src_cached, + src_cached->fops->link, &local->loc, &local->loc2, xattr); + + if (xattr) + dict_unref(xattr); + + return 0; cleanup: - dht_rename_cleanup (frame); + dht_rename_cleanup(frame); - if (xattr) - dict_unref (xattr); + if (xattr) + dict_unref(xattr); - return 0; + return 0; } int -dht_rename_unlink_links_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, - struct iatt *preparent, struct iatt *postparent, - dict_t *xdata) +dht_rename_unlink_links_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + struct iatt *preparent, struct iatt *postparent, + dict_t *xdata) { - dht_local_t *local = NULL; - xlator_t *prev = NULL; + dht_local_t *local = NULL; + xlator_t *prev = NULL; + local = frame->local; + prev = cookie; - local = frame->local; - prev = cookie; + if ((op_ret == -1) && (op_errno != ENOENT)) { + gf_msg_debug(this->name, 0, "unlink of %s on %s failed (%s)", + local->loc2.path, prev->name, strerror(op_errno)); + local->op_ret = -1; + local->op_errno = op_errno; + } - if ((op_ret == -1) && (op_errno != ENOENT)) { - gf_msg_debug (this->name, 0, - "unlink of %s on %s failed (%s)", - local->loc2.path, prev->name, - strerror (op_errno)); - local->op_ret = -1; - local->op_errno = op_errno; - } + if (local->op_ret == -1) + goto cleanup; - if (local->op_ret == -1) - goto cleanup; + dht_do_rename(frame); - dht_do_rename (frame); - - return 0; + return 0; cleanup: - dht_rename_cleanup (frame); + dht_rename_cleanup(frame); - return 0; + return 0; } - int -dht_rename_create_links (call_frame_t *frame) +dht_rename_create_links(call_frame_t *frame) { - dht_local_t *local = NULL; - xlator_t *this = NULL; - xlator_t *src_hashed = NULL; - xlator_t *src_cached = NULL; - xlator_t *dst_hashed = NULL; - xlator_t *dst_cached = NULL; - int call_cnt = 0; - dict_t *xattr = NULL; + dht_local_t *local = NULL; + xlator_t *this = NULL; + xlator_t *src_hashed = NULL; + xlator_t *src_cached = NULL; + xlator_t *dst_hashed = NULL; + xlator_t *dst_cached = NULL; + int call_cnt = 0; + dict_t *xattr = NULL; + local = frame->local; + this = frame->this; - local = frame->local; - this = frame->this; + src_hashed = local->src_hashed; + src_cached = local->src_cached; + dst_hashed = local->dst_hashed; + dst_cached = local->dst_cached; - src_hashed = local->src_hashed; - src_cached = local->src_cached; - dst_hashed = local->dst_hashed; - dst_cached = local->dst_cached; + DHT_MARK_FOP_INTERNAL(xattr); - DHT_MARK_FOP_INTERNAL (xattr); + if (src_cached == dst_cached) { + dict_t *xattr_new = NULL; - if (src_cached == dst_cached) { - dict_t *xattr_new = NULL; + if (dst_hashed == dst_cached) + goto nolinks; - if (dst_hashed == dst_cached) - goto nolinks; + xattr_new = dict_copy_with_ref(xattr, NULL); - xattr_new = dict_copy_with_ref (xattr, NULL); + gf_msg_trace(this->name, 0, "unlinking dst linkfile %s @ %s", + local->loc2.path, dst_hashed->name); - gf_msg_trace (this->name, 0, - "unlinking dst linkfile %s @ %s", - local->loc2.path, dst_hashed->name); + DHT_MARKER_DONT_ACCOUNT(xattr_new); - DHT_MARKER_DONT_ACCOUNT(xattr_new); + STACK_WIND_COOKIE(frame, dht_rename_unlink_links_cbk, dst_hashed, + dst_hashed, dst_hashed->fops->unlink, &local->loc2, 0, + xattr_new); - STACK_WIND_COOKIE (frame, dht_rename_unlink_links_cbk, - dst_hashed, dst_hashed, - dst_hashed->fops->unlink, &local->loc2, 0, - xattr_new); - - dict_unref (xattr_new); - if (xattr) - dict_unref (xattr); - - return 0; - } + dict_unref(xattr_new); + if (xattr) + dict_unref(xattr); - if (src_cached != dst_hashed) { - /* needed to create the link file */ - call_cnt++; - if (dst_hashed != src_hashed) - /* needed to create the linkto file */ - call_cnt ++; + return 0; + } + + if (src_cached != dst_hashed) { + /* needed to create the link file */ + call_cnt++; + if (dst_hashed != src_hashed) + /* needed to create the linkto file */ + call_cnt++; + } + + /* We should not have any failures post the link creation, as this + * introduces the newname into the namespace. Clients could have cached + * the existence of the newname and may start taking actions based on + * the same. Hence create the linkto first, and then attempt the link. + * + * NOTE: If another client is attempting the same oldname -> newname + * rename, and finds both file names as existing, and are hard links + * to each other, then FUSE would send in an unlink for oldname. In + * this time duration if we treat the linkto as a critical error and + * unlink the newname we created, we would have effectively lost the + * file to rename operations. */ + if (dst_hashed != src_hashed && src_cached != dst_hashed) { + gf_msg_trace(this->name, 0, "linkfile %s @ %s => %s", local->loc.path, + dst_hashed->name, src_cached->name); + + memcpy(local->gfid, local->loc.inode->gfid, 16); + dht_linkfile_create(frame, dht_rename_linkto_cbk, this, src_cached, + dst_hashed, &local->loc); + } else if (src_cached != dst_hashed) { + dict_t *xattr_new = NULL; + + xattr_new = dict_copy_with_ref(xattr, NULL); + + gf_msg_trace(this->name, 0, "link %s => %s (%s)", local->loc.path, + local->loc2.path, src_cached->name); + if (gf_uuid_compare(local->loc.pargfid, local->loc2.pargfid) == 0) { + DHT_MARKER_DONT_ACCOUNT(xattr_new); } - /* We should not have any failures post the link creation, as this - * introduces the newname into the namespace. Clients could have cached - * the existence of the newname and may start taking actions based on - * the same. Hence create the linkto first, and then attempt the link. - * - * NOTE: If another client is attempting the same oldname -> newname - * rename, and finds both file names as existing, and are hard links - * to each other, then FUSE would send in an unlink for oldname. In - * this time duration if we treat the linkto as a critical error and - * unlink the newname we created, we would have effectively lost the - * file to rename operations. */ - if (dst_hashed != src_hashed && src_cached != dst_hashed) { - gf_msg_trace (this->name, 0, - "linkfile %s @ %s => %s", - local->loc.path, dst_hashed->name, - src_cached->name); - - memcpy (local->gfid, local->loc.inode->gfid, 16); - dht_linkfile_create (frame, dht_rename_linkto_cbk, this, - src_cached, dst_hashed, &local->loc); - } else if (src_cached != dst_hashed) { - dict_t *xattr_new = NULL; - - xattr_new = dict_copy_with_ref (xattr, NULL); - - gf_msg_trace (this->name, 0, - "link %s => %s (%s)", local->loc.path, - local->loc2.path, src_cached->name); - if (gf_uuid_compare (local->loc.pargfid, - local->loc2.pargfid) == 0) { - DHT_MARKER_DONT_ACCOUNT(xattr_new); - } + local->added_link = _gf_true; - local->added_link = _gf_true; + STACK_WIND_COOKIE(frame, dht_rename_link_cbk, src_cached, src_cached, + src_cached->fops->link, &local->loc, &local->loc2, + xattr_new); - STACK_WIND_COOKIE (frame, dht_rename_link_cbk, src_cached, - src_cached, src_cached->fops->link, - &local->loc, &local->loc2, xattr_new); - - dict_unref (xattr_new); - } + dict_unref(xattr_new); + } nolinks: - if (!call_cnt) { - /* skip to next step */ - dht_do_rename (frame); - } - if (xattr) - dict_unref (xattr); - - return 0; + if (!call_cnt) { + /* skip to next step */ + dht_do_rename(frame); + } + if (xattr) + dict_unref(xattr); + + return 0; } int -dht_rename_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, - inode_t *inode, struct iatt *stbuf, dict_t *xattr, - struct iatt *postparent) +dht_rename_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, inode_t *inode, + struct iatt *stbuf, dict_t *xattr, + struct iatt *postparent) { - dht_local_t *local = NULL; - int call_cnt = 0; - dht_conf_t *conf = NULL; - char gfid_local[GF_UUID_BUF_SIZE] = {0}; - char gfid_server[GF_UUID_BUF_SIZE] = {0}; - int child_index = -1; - gf_boolean_t is_src = _gf_false; - loc_t *loc = NULL; - - - child_index = (long)cookie; - - local = frame->local; - conf = this->private; - - is_src = (child_index == 0); + dht_local_t *local = NULL; + int call_cnt = 0; + dht_conf_t *conf = NULL; + char gfid_local[GF_UUID_BUF_SIZE] = {0}; + char gfid_server[GF_UUID_BUF_SIZE] = {0}; + int child_index = -1; + gf_boolean_t is_src = _gf_false; + loc_t *loc = NULL; + + child_index = (long)cookie; + + local = frame->local; + conf = this->private; + + is_src = (child_index == 0); + if (is_src) + loc = &local->loc; + else + loc = &local->loc2; + + if (op_ret >= 0) { if (is_src) - loc = &local->loc; - else - loc = &local->loc2; - - if (op_ret >= 0) { - if (is_src) - local->src_cached - = dht_subvol_get_cached (this, - local->loc.inode); - else { - if (loc->inode) - gf_uuid_unparse (loc->inode->gfid, gfid_local); - - gf_msg_debug (this->name, 0, - "dst_cached before lookup: %s, " - "(path:%s)(gfid:%s),", - local->loc2.path, - local->dst_cached - ? local->dst_cached->name : - NULL, - local->dst_cached ? gfid_local : NULL); - - local->dst_cached - = dht_subvol_get_cached (this, - local->loc2_copy.inode); - - gf_uuid_unparse (stbuf->ia_gfid, gfid_local); - - gf_msg_debug (this->name, GF_LOG_WARNING, - "dst_cached after lookup: %s, " - "(path:%s)(gfid:%s)", - local->loc2.path, - local->dst_cached - ? local->dst_cached->name : - NULL, - local->dst_cached ? gfid_local : NULL); - - - if ((local->loc2.inode == NULL) - || gf_uuid_compare (stbuf->ia_gfid, - local->loc2.inode->gfid)) { - if (local->loc2.inode != NULL) { - inode_unlink (local->loc2.inode, - local->loc2.parent, - local->loc2.name); - inode_unref (local->loc2.inode); - } - - local->loc2.inode - = inode_link (local->loc2_copy.inode, - local->loc2_copy.parent, - local->loc2_copy.name, - stbuf); - gf_uuid_copy (local->loc2.gfid, - stbuf->ia_gfid); - } + local->src_cached = dht_subvol_get_cached(this, local->loc.inode); + else { + if (loc->inode) + gf_uuid_unparse(loc->inode->gfid, gfid_local); + + gf_msg_debug(this->name, 0, + "dst_cached before lookup: %s, " + "(path:%s)(gfid:%s),", + local->loc2.path, + local->dst_cached ? local->dst_cached->name : NULL, + local->dst_cached ? gfid_local : NULL); + + local->dst_cached = dht_subvol_get_cached(this, + local->loc2_copy.inode); + + gf_uuid_unparse(stbuf->ia_gfid, gfid_local); + + gf_msg_debug(this->name, GF_LOG_WARNING, + "dst_cached after lookup: %s, " + "(path:%s)(gfid:%s)", + local->loc2.path, + local->dst_cached ? local->dst_cached->name : NULL, + local->dst_cached ? gfid_local : NULL); + + if ((local->loc2.inode == NULL) || + gf_uuid_compare(stbuf->ia_gfid, local->loc2.inode->gfid)) { + if (local->loc2.inode != NULL) { + inode_unlink(local->loc2.inode, local->loc2.parent, + local->loc2.name); + inode_unref(local->loc2.inode); } - } - if (op_ret < 0) { - if (is_src) { - /* The meaning of is_linkfile is overloaded here. For locking - * to work properly both rebalance and rename should acquire - * lock on datafile. The reason for sending this lookup is to - * find out whether we've acquired a lock on data file. - * Between the lookup before rename and this rename, the - * file could be migrated by a rebalance process and now this - * file this might be a linkto file. We verify that by sending - * this lookup. However, if this lookup fails we cannot really - * say whether we've acquired lock on a datafile or linkto file. - * So, we act conservatively and _assume_ - * that this is a linkfile and fail the rename operation. - */ - local->is_linkfile = _gf_true; - local->op_errno = op_errno; - } else { - if (local->dst_cached) - gf_msg_debug (this->name, op_errno, - "file %s (gfid:%s) was present " - "(hashed-subvol=%s, " - "cached-subvol=%s) before rename," - " but lookup failed", - local->loc2.path, - uuid_utoa (local->loc2.inode->gfid), - local->dst_hashed->name, - local->dst_cached->name); - if (dht_inode_missing (op_errno)) - local->dst_cached = NULL; - } - } else if (is_src && xattr && check_is_linkfile (inode, stbuf, xattr, - conf->link_xattr_name)) { - local->is_linkfile = _gf_true; - /* Found linkto file instead of data file, passdown ENOENT - * based on the above comment */ - local->op_errno = ENOENT; - } - - if (!local->is_linkfile && (op_ret >= 0) && - gf_uuid_compare (loc->gfid, stbuf->ia_gfid)) { - gf_uuid_unparse (loc->gfid, gfid_local); - gf_uuid_unparse (stbuf->ia_gfid, gfid_server); - - gf_msg (this->name, GF_LOG_WARNING, 0, - DHT_MSG_GFID_MISMATCH, - "path:%s, received a different gfid, local_gfid= %s" - " server_gfid: %s", - local->loc.path, gfid_local, gfid_server); - - /* Will passdown ENOENT anyway since the file we sent on - * rename is replaced with a different file */ - local->op_errno = ENOENT; - /* Since local->is_linkfile is used here to detect failure, - * marking this to true */ - local->is_linkfile = _gf_true; - } - - call_cnt = dht_frame_return (frame); - if (is_last_call (call_cnt)) { - if (local->is_linkfile) { - local->op_ret = -1; - goto fail; - } - - dht_rename_create_links (frame); - } - - return 0; + local->loc2.inode = inode_link(local->loc2_copy.inode, + local->loc2_copy.parent, + local->loc2_copy.name, stbuf); + gf_uuid_copy(local->loc2.gfid, stbuf->ia_gfid); + } + } + } + + if (op_ret < 0) { + if (is_src) { + /* The meaning of is_linkfile is overloaded here. For locking + * to work properly both rebalance and rename should acquire + * lock on datafile. The reason for sending this lookup is to + * find out whether we've acquired a lock on data file. + * Between the lookup before rename and this rename, the + * file could be migrated by a rebalance process and now this + * file this might be a linkto file. We verify that by sending + * this lookup. However, if this lookup fails we cannot really + * say whether we've acquired lock on a datafile or linkto file. + * So, we act conservatively and _assume_ + * that this is a linkfile and fail the rename operation. + */ + local->is_linkfile = _gf_true; + local->op_errno = op_errno; + } else { + if (local->dst_cached) + gf_msg_debug(this->name, op_errno, + "file %s (gfid:%s) was present " + "(hashed-subvol=%s, " + "cached-subvol=%s) before rename," + " but lookup failed", + local->loc2.path, + uuid_utoa(local->loc2.inode->gfid), + local->dst_hashed->name, local->dst_cached->name); + if (dht_inode_missing(op_errno)) + local->dst_cached = NULL; + } + } else if (is_src && xattr && + check_is_linkfile(inode, stbuf, xattr, conf->link_xattr_name)) { + local->is_linkfile = _gf_true; + /* Found linkto file instead of data file, passdown ENOENT + * based on the above comment */ + local->op_errno = ENOENT; + } + + if (!local->is_linkfile && (op_ret >= 0) && + gf_uuid_compare(loc->gfid, stbuf->ia_gfid)) { + gf_uuid_unparse(loc->gfid, gfid_local); + gf_uuid_unparse(stbuf->ia_gfid, gfid_server); + + gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_GFID_MISMATCH, + "path:%s, received a different gfid, local_gfid= %s" + " server_gfid: %s", + local->loc.path, gfid_local, gfid_server); + + /* Will passdown ENOENT anyway since the file we sent on + * rename is replaced with a different file */ + local->op_errno = ENOENT; + /* Since local->is_linkfile is used here to detect failure, + * marking this to true */ + local->is_linkfile = _gf_true; + } + + call_cnt = dht_frame_return(frame); + if (is_last_call(call_cnt)) { + if (local->is_linkfile) { + local->op_ret = -1; + goto fail; + } + + dht_rename_create_links(frame); + } + + return 0; fail: - dht_rename_unlock (frame, this); - return 0; + dht_rename_unlock(frame, this); + return 0; } int -dht_rename_file_lock1_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) +dht_rename_file_lock1_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - dht_local_t *local = NULL; - char src_gfid[GF_UUID_BUF_SIZE] = {0}; - char dst_gfid[GF_UUID_BUF_SIZE] = {0}; - int ret = 0; - loc_t *loc = NULL; - xlator_t *subvol = NULL; - - local = frame->local; - - if (op_ret < 0) { - uuid_utoa_r (local->loc.inode->gfid, src_gfid); - - if (local->loc2.inode) - uuid_utoa_r (local->loc2.inode->gfid, dst_gfid); - - gf_msg (this->name, GF_LOG_WARNING, op_errno, - DHT_MSG_INODE_LK_ERROR, - "protecting namespace of %s failed" - "rename (%s:%s:%s %s:%s:%s)", - local->current == &local->lock[0] ? local->loc.path - : local->loc2.path, - local->loc.path, src_gfid, local->src_hashed->name, - local->loc2.path, dst_gfid, - local->dst_hashed ? local->dst_hashed->name : NULL); - - local->op_ret = -1; - local->op_errno = op_errno; - goto err; - } - - if (local->current == &local->lock[0]) { - loc = &local->loc2; - subvol = local->dst_hashed; - local->current = &local->lock[1]; - } else { - loc = &local->loc; - subvol = local->src_hashed; - local->current = &local->lock[0]; - } - - ret = dht_protect_namespace (frame, loc, subvol, &local->current->ns, - dht_rename_lock_cbk); - if (ret < 0) { - op_errno = EINVAL; - goto err; - } - - return 0; + dht_local_t *local = NULL; + char src_gfid[GF_UUID_BUF_SIZE] = {0}; + char dst_gfid[GF_UUID_BUF_SIZE] = {0}; + int ret = 0; + loc_t *loc = NULL; + xlator_t *subvol = NULL; + + local = frame->local; + + if (op_ret < 0) { + uuid_utoa_r(local->loc.inode->gfid, src_gfid); + + if (local->loc2.inode) + uuid_utoa_r(local->loc2.inode->gfid, dst_gfid); + + gf_msg(this->name, GF_LOG_WARNING, op_errno, DHT_MSG_INODE_LK_ERROR, + "protecting namespace of %s failed" + "rename (%s:%s:%s %s:%s:%s)", + local->current == &local->lock[0] ? local->loc.path + : local->loc2.path, + local->loc.path, src_gfid, local->src_hashed->name, + local->loc2.path, dst_gfid, + local->dst_hashed ? local->dst_hashed->name : NULL); + + local->op_ret = -1; + local->op_errno = op_errno; + goto err; + } + + if (local->current == &local->lock[0]) { + loc = &local->loc2; + subvol = local->dst_hashed; + local->current = &local->lock[1]; + } else { + loc = &local->loc; + subvol = local->src_hashed; + local->current = &local->lock[0]; + } + + ret = dht_protect_namespace(frame, loc, subvol, &local->current->ns, + dht_rename_lock_cbk); + if (ret < 0) { + op_errno = EINVAL; + goto err; + } + + return 0; err: - /* No harm in calling an extra unlock */ - dht_rename_unlock (frame, this); - return 0; + /* No harm in calling an extra unlock */ + dht_rename_unlock(frame, this); + return 0; } int32_t -dht_rename_file_protect_namespace (call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, - int32_t op_errno, dict_t *xdata) +dht_rename_file_protect_namespace(call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, + int32_t op_errno, dict_t *xdata) { - dht_local_t *local = NULL; - char src_gfid[GF_UUID_BUF_SIZE] = {0}; - char dst_gfid[GF_UUID_BUF_SIZE] = {0}; - int ret = 0; - loc_t *loc = NULL; - xlator_t *subvol = NULL; + dht_local_t *local = NULL; + char src_gfid[GF_UUID_BUF_SIZE] = {0}; + char dst_gfid[GF_UUID_BUF_SIZE] = {0}; + int ret = 0; + loc_t *loc = NULL; + xlator_t *subvol = NULL; - local = frame->local; + local = frame->local; - if (op_ret < 0) { - uuid_utoa_r (local->loc.inode->gfid, src_gfid); + if (op_ret < 0) { + uuid_utoa_r(local->loc.inode->gfid, src_gfid); - if (local->loc2.inode) - uuid_utoa_r (local->loc2.inode->gfid, dst_gfid); + if (local->loc2.inode) + uuid_utoa_r(local->loc2.inode->gfid, dst_gfid); - gf_msg (this->name, GF_LOG_WARNING, op_errno, - DHT_MSG_INODE_LK_ERROR, - "acquiring inodelk failed " - "rename (%s:%s:%s %s:%s:%s)", - local->loc.path, src_gfid, local->src_cached->name, - local->loc2.path, dst_gfid, - local->dst_cached ? local->dst_cached->name : NULL); + gf_msg(this->name, GF_LOG_WARNING, op_errno, DHT_MSG_INODE_LK_ERROR, + "acquiring inodelk failed " + "rename (%s:%s:%s %s:%s:%s)", + local->loc.path, src_gfid, local->src_cached->name, + local->loc2.path, dst_gfid, + local->dst_cached ? local->dst_cached->name : NULL); - local->op_ret = -1; - local->op_errno = op_errno; + local->op_ret = -1; + local->op_errno = op_errno; - goto err; - } + goto err; + } - /* Locks on src and dst needs to ordered which otherwise might cause - * deadlocks when rename (src, dst) and rename (dst, src) is done from - * two different clients - */ - dht_order_rename_lock (frame, &loc, &subvol); + /* Locks on src and dst needs to ordered which otherwise might cause + * deadlocks when rename (src, dst) and rename (dst, src) is done from + * two different clients + */ + dht_order_rename_lock(frame, &loc, &subvol); - ret = dht_protect_namespace (frame, loc, subvol, - &local->current->ns, - dht_rename_file_lock1_cbk); - if (ret < 0) { - op_errno = EINVAL; - goto err; - } + ret = dht_protect_namespace(frame, loc, subvol, &local->current->ns, + dht_rename_file_lock1_cbk); + if (ret < 0) { + op_errno = EINVAL; + goto err; + } - return 0; + return 0; err: - /* Its fine to call unlock even when no locks are acquired, as we check - * for lock->locked before winding a unlock call. - */ - dht_rename_unlock (frame, this); + /* Its fine to call unlock even when no locks are acquired, as we check + * for lock->locked before winding a unlock call. + */ + dht_rename_unlock(frame, this); - return 0; + return 0; } int32_t -dht_rename_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) +dht_rename_lock_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - dht_local_t *local = NULL; - char src_gfid[GF_UUID_BUF_SIZE] = {0}; - char dst_gfid[GF_UUID_BUF_SIZE] = {0}; - dict_t *xattr_req = NULL; - dht_conf_t *conf = NULL; - int i = 0; - xlator_t *subvol = NULL; - dht_lock_t *lock = NULL; - - local = frame->local; - conf = this->private; - - if (op_ret < 0) { - uuid_utoa_r (local->loc.inode->gfid, src_gfid); - - if (local->loc2.inode) - uuid_utoa_r (local->loc2.inode->gfid, dst_gfid); - - gf_msg (this->name, GF_LOG_WARNING, op_errno, - DHT_MSG_INODE_LK_ERROR, - "protecting namespace of %s failed. " - "rename (%s:%s:%s %s:%s:%s)", - local->current == &local->lock[0] ? local->loc.path - : local->loc2.path, - local->loc.path, src_gfid, local->src_hashed->name, - local->loc2.path, dst_gfid, - local->dst_hashed ? local->dst_hashed->name : NULL); - - local->op_ret = -1; - local->op_errno = op_errno; - - goto done; + dht_local_t *local = NULL; + char src_gfid[GF_UUID_BUF_SIZE] = {0}; + char dst_gfid[GF_UUID_BUF_SIZE] = {0}; + dict_t *xattr_req = NULL; + dht_conf_t *conf = NULL; + int i = 0; + xlator_t *subvol = NULL; + dht_lock_t *lock = NULL; + + local = frame->local; + conf = this->private; + + if (op_ret < 0) { + uuid_utoa_r(local->loc.inode->gfid, src_gfid); + + if (local->loc2.inode) + uuid_utoa_r(local->loc2.inode->gfid, dst_gfid); + + gf_msg(this->name, GF_LOG_WARNING, op_errno, DHT_MSG_INODE_LK_ERROR, + "protecting namespace of %s failed. " + "rename (%s:%s:%s %s:%s:%s)", + local->current == &local->lock[0] ? local->loc.path + : local->loc2.path, + local->loc.path, src_gfid, local->src_hashed->name, + local->loc2.path, dst_gfid, + local->dst_hashed ? local->dst_hashed->name : NULL); + + local->op_ret = -1; + local->op_errno = op_errno; + + goto done; + } + + xattr_req = dict_new(); + if (xattr_req == NULL) { + local->op_ret = -1; + local->op_errno = ENOMEM; + goto done; + } + + op_ret = dict_set_uint32(xattr_req, conf->link_xattr_name, 256); + if (op_ret < 0) { + local->op_ret = -1; + local->op_errno = -op_ret; + goto done; + } + + /* dst_cached might've changed. This normally happens for two reasons: + * 1. rebalance migrated dst + * 2. Another parallel rename was done overwriting dst + * + * Doing a lookup on local->loc2 when dst exists, but is associated + * with a different gfid will result in an ESTALE error. So, do a fresh + * lookup with a new inode on dst-path and handle change of dst-cached + * in the cbk. Also, to identify dst-cached changes we do a lookup on + * "this" rather than the subvol. + */ + loc_copy(&local->loc2_copy, &local->loc2); + inode_unref(local->loc2_copy.inode); + local->loc2_copy.inode = inode_new(local->loc.inode->table); + + /* Why not use local->lock.locks[?].loc for lookup post lock phase + * --------------------------------------------------------------- + * "layout.parent_layout.locks[?].loc" does not have the name and pargfid + * populated. + * Reason: If we had populated the name and pargfid, server might + * resolve to a successful lookup even if there is a file with same name + * with a different gfid(unlink & create) as server does name based + * resolution on first priority. And this can result in operating on a + * different inode entirely. + * + * Now consider a scenario where source file was renamed by some other + * client to a new name just before this lock was granted. So if a + * lookup would be done on local->lock[0].layout.parent_layout.locks[?].loc, + * server will send success even if the entry was renamed (since server will + * do a gfid based resolution). So once a lock is granted, make sure the + * file exists with the name that the client requested with. + * */ + + local->call_cnt = 2; + for (i = 0; i < 2; i++) { + if (i == 0) { + lock = local->rename_inodelk_backward_compatible[0]; + if (gf_uuid_compare(local->loc.gfid, lock->loc.gfid) == 0) + subvol = lock->xl; + else { + lock = local->rename_inodelk_backward_compatible[1]; + subvol = lock->xl; + } + } else { + subvol = this; } - xattr_req = dict_new (); - if (xattr_req == NULL) { - local->op_ret = -1; - local->op_errno = ENOMEM; - goto done; - } + STACK_WIND_COOKIE(frame, dht_rename_lookup_cbk, (void *)(long)i, subvol, + subvol->fops->lookup, + (i == 0) ? &local->loc : &local->loc2_copy, + xattr_req); + } - op_ret = dict_set_uint32 (xattr_req, - conf->link_xattr_name, 256); - if (op_ret < 0) { - local->op_ret = -1; - local->op_errno = -op_ret; - goto done; - } - - /* dst_cached might've changed. This normally happens for two reasons: - * 1. rebalance migrated dst - * 2. Another parallel rename was done overwriting dst - * - * Doing a lookup on local->loc2 when dst exists, but is associated - * with a different gfid will result in an ESTALE error. So, do a fresh - * lookup with a new inode on dst-path and handle change of dst-cached - * in the cbk. Also, to identify dst-cached changes we do a lookup on - * "this" rather than the subvol. - */ - loc_copy (&local->loc2_copy, &local->loc2); - inode_unref (local->loc2_copy.inode); - local->loc2_copy.inode = inode_new (local->loc.inode->table); - - /* Why not use local->lock.locks[?].loc for lookup post lock phase - * --------------------------------------------------------------- - * "layout.parent_layout.locks[?].loc" does not have the name and pargfid - * populated. - * Reason: If we had populated the name and pargfid, server might - * resolve to a successful lookup even if there is a file with same name - * with a different gfid(unlink & create) as server does name based - * resolution on first priority. And this can result in operating on a - * different inode entirely. - * - * Now consider a scenario where source file was renamed by some other - * client to a new name just before this lock was granted. So if a - * lookup would be done on local->lock[0].layout.parent_layout.locks[?].loc, - * server will send success even if the entry was renamed (since server will - * do a gfid based resolution). So once a lock is granted, make sure the file - * exists with the name that the client requested with. - * */ - - local->call_cnt = 2; - for (i = 0; i < 2; i++) { - if (i == 0) { - lock = local->rename_inodelk_backward_compatible[0]; - if (gf_uuid_compare (local->loc.gfid, - lock->loc.gfid) == 0) - subvol = lock->xl; - else { - lock = local->rename_inodelk_backward_compatible[1]; - subvol = lock->xl; - } - } else { - subvol = this; - } - - STACK_WIND_COOKIE (frame, dht_rename_lookup_cbk, - (void *)(long)i, subvol, - subvol->fops->lookup, - (i == 0) ? &local->loc : &local->loc2_copy, - xattr_req); - } - - dict_unref (xattr_req); - return 0; + dict_unref(xattr_req); + return 0; done: - /* Its fine to call unlock even when no locks are acquired, as we check - * for lock->locked before winding a unlock call. - */ - dht_rename_unlock (frame, this); + /* Its fine to call unlock even when no locks are acquired, as we check + * for lock->locked before winding a unlock call. + */ + dht_rename_unlock(frame, this); - if (xattr_req) - dict_unref (xattr_req); + if (xattr_req) + dict_unref(xattr_req); - return 0; + return 0; } int -dht_rename_lock (call_frame_t *frame) +dht_rename_lock(call_frame_t *frame) { - dht_local_t *local = NULL; - int count = 1, ret = -1; - dht_lock_t **lk_array = NULL; - - local = frame->local; - - if (local->dst_cached) - count++; - - lk_array = GF_CALLOC (count, sizeof (*lk_array), - gf_common_mt_pointer); - if (lk_array == NULL) - goto err; - - lk_array[0] = dht_lock_new (frame->this, local->src_cached, &local->loc, - F_WRLCK, DHT_FILE_MIGRATE_DOMAIN, NULL, - FAIL_ON_ANY_ERROR); - if (lk_array[0] == NULL) - goto err; - - if (local->dst_cached) { - /* dst might be removed by the time inodelk reaches bricks, - * which can result in ESTALE errors. POSIX imposes no - * restriction for dst to be present for renames to be - * successful. So, we'll ignore ESTALE errors. As far as - * synchronization on dst goes, we'll achieve the same by - * holding entrylk on parent directory of dst in the namespace - * of basename(dst). Also, there might not be quorum in cluster - * xlators like EC/disperse on errno, in which case they return - * EIO. For eg., in a disperse (4 + 2), 3 might return success - * and three might return ESTALE. Disperse, having no Quorum - * unwinds inodelk with EIO. So, ignore EIO too. - */ - lk_array[1] = dht_lock_new (frame->this, local->dst_cached, - &local->loc2, F_WRLCK, - DHT_FILE_MIGRATE_DOMAIN, NULL, - IGNORE_ENOENT_ESTALE_EIO); - if (lk_array[1] == NULL) - goto err; - } - - local->rename_inodelk_backward_compatible = lk_array; - local->rename_inodelk_bc_count = count; - - /* retaining inodelks for the sake of backward compatibility. Please - * make sure to remove this inodelk once all of 3.10, 3.12 and 3.13 - * reach EOL. Better way of getting synchronization would be to acquire - * entrylks on src and dst parent directories in the namespace of - * basenames of src and dst + dht_local_t *local = NULL; + int count = 1, ret = -1; + dht_lock_t **lk_array = NULL; + + local = frame->local; + + if (local->dst_cached) + count++; + + lk_array = GF_CALLOC(count, sizeof(*lk_array), gf_common_mt_pointer); + if (lk_array == NULL) + goto err; + + lk_array[0] = dht_lock_new(frame->this, local->src_cached, &local->loc, + F_WRLCK, DHT_FILE_MIGRATE_DOMAIN, NULL, + FAIL_ON_ANY_ERROR); + if (lk_array[0] == NULL) + goto err; + + if (local->dst_cached) { + /* dst might be removed by the time inodelk reaches bricks, + * which can result in ESTALE errors. POSIX imposes no + * restriction for dst to be present for renames to be + * successful. So, we'll ignore ESTALE errors. As far as + * synchronization on dst goes, we'll achieve the same by + * holding entrylk on parent directory of dst in the namespace + * of basename(dst). Also, there might not be quorum in cluster + * xlators like EC/disperse on errno, in which case they return + * EIO. For eg., in a disperse (4 + 2), 3 might return success + * and three might return ESTALE. Disperse, having no Quorum + * unwinds inodelk with EIO. So, ignore EIO too. */ - ret = dht_blocking_inodelk (frame, lk_array, count, - dht_rename_file_protect_namespace); - if (ret < 0) { - local->rename_inodelk_backward_compatible = NULL; - local->rename_inodelk_bc_count = 0; - goto err; - } - - return 0; + lk_array[1] = dht_lock_new(frame->this, local->dst_cached, &local->loc2, + F_WRLCK, DHT_FILE_MIGRATE_DOMAIN, NULL, + IGNORE_ENOENT_ESTALE_EIO); + if (lk_array[1] == NULL) + goto err; + } + + local->rename_inodelk_backward_compatible = lk_array; + local->rename_inodelk_bc_count = count; + + /* retaining inodelks for the sake of backward compatibility. Please + * make sure to remove this inodelk once all of 3.10, 3.12 and 3.13 + * reach EOL. Better way of getting synchronization would be to acquire + * entrylks on src and dst parent directories in the namespace of + * basenames of src and dst + */ + ret = dht_blocking_inodelk(frame, lk_array, count, + dht_rename_file_protect_namespace); + if (ret < 0) { + local->rename_inodelk_backward_compatible = NULL; + local->rename_inodelk_bc_count = 0; + goto err; + } + + return 0; err: - if (lk_array != NULL) { - int tmp_count = 0, i = 0; + if (lk_array != NULL) { + int tmp_count = 0, i = 0; - for (i = 0; (i < count) && (lk_array[i]); i++, tmp_count++); + for (i = 0; (i < count) && (lk_array[i]); i++, tmp_count++) + ; - dht_lock_array_free (lk_array, tmp_count); - GF_FREE (lk_array); - } + dht_lock_array_free(lk_array, tmp_count); + GF_FREE(lk_array); + } - return -1; + return -1; } int -dht_rename (call_frame_t *frame, xlator_t *this, - loc_t *oldloc, loc_t *newloc, dict_t *xdata) +dht_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, + dict_t *xdata) { - xlator_t *src_cached = NULL; - xlator_t *src_hashed = NULL; - xlator_t *dst_cached = NULL; - xlator_t *dst_hashed = NULL; - int op_errno = -1; - int ret = -1; - dht_local_t *local = NULL; - dht_conf_t *conf = NULL; - char gfid[GF_UUID_BUF_SIZE] = {0}; - char newgfid[GF_UUID_BUF_SIZE] = {0}; - gf_boolean_t free_xdata = _gf_false; - - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (oldloc, err); - VALIDATE_OR_GOTO (newloc, err); - - conf = this->private; - - if (conf->subvolume_cnt == 1) { - if (!IA_ISDIR (oldloc->inode->ia_type)) { - if (!xdata) { - free_xdata = _gf_true; - } - DHT_CHANGELOG_TRACK_AS_RENAME(xdata, oldloc, newloc); - } - default_rename (frame, this, oldloc, newloc, xdata); - if (free_xdata && xdata) { - dict_unref(xdata); - xdata = NULL; - } - return 0; - } - - gf_uuid_unparse(oldloc->inode->gfid, gfid); - - src_hashed = dht_subvol_get_hashed (this, oldloc); - if (!src_hashed) { - gf_msg (this->name, GF_LOG_INFO, 0, - DHT_MSG_RENAME_FAILED, - "No hashed subvolume in layout for path=%s," - "(gfid = %s)", oldloc->path, gfid); - op_errno = EINVAL; - goto err; + xlator_t *src_cached = NULL; + xlator_t *src_hashed = NULL; + xlator_t *dst_cached = NULL; + xlator_t *dst_hashed = NULL; + int op_errno = -1; + int ret = -1; + dht_local_t *local = NULL; + dht_conf_t *conf = NULL; + char gfid[GF_UUID_BUF_SIZE] = {0}; + char newgfid[GF_UUID_BUF_SIZE] = {0}; + gf_boolean_t free_xdata = _gf_false; + + VALIDATE_OR_GOTO(frame, err); + VALIDATE_OR_GOTO(this, err); + VALIDATE_OR_GOTO(oldloc, err); + VALIDATE_OR_GOTO(newloc, err); + + conf = this->private; + + if (conf->subvolume_cnt == 1) { + if (!IA_ISDIR(oldloc->inode->ia_type)) { + if (!xdata) { + free_xdata = _gf_true; + } + DHT_CHANGELOG_TRACK_AS_RENAME(xdata, oldloc, newloc); + } + default_rename(frame, this, oldloc, newloc, xdata); + if (free_xdata && xdata) { + dict_unref(xdata); + xdata = NULL; } - - src_cached = dht_subvol_get_cached (this, oldloc->inode); - if (!src_cached) { - gf_msg (this->name, GF_LOG_INFO, 0, - DHT_MSG_RENAME_FAILED, - "No cached subvolume for path = %s," - "(gfid = %s)", oldloc->path, gfid); - - op_errno = EINVAL; - goto err; - } - - dst_hashed = dht_subvol_get_hashed (this, newloc); - if (!dst_hashed) { - gf_msg (this->name, GF_LOG_INFO, 0, - DHT_MSG_RENAME_FAILED, - "No hashed subvolume in layout for path=%s", - newloc->path); - op_errno = EINVAL; - goto err; - } - - if (newloc->inode) - dst_cached = dht_subvol_get_cached (this, newloc->inode); - - local = dht_local_init (frame, oldloc, NULL, GF_FOP_RENAME); - if (!local) { - op_errno = ENOMEM; - goto err; - } - /* cached_subvol will be set from dht_local_init, reset it to NULL, - as the logic of handling rename is different */ - local->cached_subvol = NULL; - - ret = loc_copy (&local->loc2, newloc); - if (ret == -1) { - op_errno = ENOMEM; - goto err; - } - - local->src_hashed = src_hashed; - local->src_cached = src_cached; - local->dst_hashed = dst_hashed; - local->dst_cached = dst_cached; - if (xdata) - local->xattr_req = dict_ref (xdata); - - if (newloc->inode) - gf_uuid_unparse(newloc->inode->gfid, newgfid); - - gf_msg (this->name, GF_LOG_INFO, 0, - DHT_MSG_RENAME_INFO, - "renaming %s (%s) (hash=%s/cache=%s) => %s (%s) " - "(hash=%s/cache=%s) ", - oldloc->path, gfid, src_hashed->name, src_cached->name, - newloc->path, newloc->inode ? newgfid : NULL, dst_hashed->name, - dst_cached ? dst_cached->name : ""); - - if (IA_ISDIR (oldloc->inode->ia_type)) { - dht_rename_dir (frame, this); - } else { - local->op_ret = 0; - ret = dht_rename_lock (frame); - if (ret < 0) { - op_errno = ENOMEM; - goto err; - } + return 0; + } + + gf_uuid_unparse(oldloc->inode->gfid, gfid); + + src_hashed = dht_subvol_get_hashed(this, oldloc); + if (!src_hashed) { + gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_RENAME_FAILED, + "No hashed subvolume in layout for path=%s," + "(gfid = %s)", + oldloc->path, gfid); + op_errno = EINVAL; + goto err; + } + + src_cached = dht_subvol_get_cached(this, oldloc->inode); + if (!src_cached) { + gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_RENAME_FAILED, + "No cached subvolume for path = %s," + "(gfid = %s)", + oldloc->path, gfid); + + op_errno = EINVAL; + goto err; + } + + dst_hashed = dht_subvol_get_hashed(this, newloc); + if (!dst_hashed) { + gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_RENAME_FAILED, + "No hashed subvolume in layout for path=%s", newloc->path); + op_errno = EINVAL; + goto err; + } + + if (newloc->inode) + dst_cached = dht_subvol_get_cached(this, newloc->inode); + + local = dht_local_init(frame, oldloc, NULL, GF_FOP_RENAME); + if (!local) { + op_errno = ENOMEM; + goto err; + } + /* cached_subvol will be set from dht_local_init, reset it to NULL, + as the logic of handling rename is different */ + local->cached_subvol = NULL; + + ret = loc_copy(&local->loc2, newloc); + if (ret == -1) { + op_errno = ENOMEM; + goto err; + } + + local->src_hashed = src_hashed; + local->src_cached = src_cached; + local->dst_hashed = dst_hashed; + local->dst_cached = dst_cached; + if (xdata) + local->xattr_req = dict_ref(xdata); + + if (newloc->inode) + gf_uuid_unparse(newloc->inode->gfid, newgfid); + + gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_RENAME_INFO, + "renaming %s (%s) (hash=%s/cache=%s) => %s (%s) " + "(hash=%s/cache=%s) ", + oldloc->path, gfid, src_hashed->name, src_cached->name, newloc->path, + newloc->inode ? newgfid : NULL, dst_hashed->name, + dst_cached ? dst_cached->name : ""); + + if (IA_ISDIR(oldloc->inode->ia_type)) { + dht_rename_dir(frame, this); + } else { + local->op_ret = 0; + ret = dht_rename_lock(frame); + if (ret < 0) { + op_errno = ENOMEM; + goto err; } + } - return 0; + return 0; err: - op_errno = (op_errno == -1) ? errno : op_errno; - DHT_STACK_UNWIND (rename, frame, -1, op_errno, NULL, NULL, NULL, NULL, - NULL, NULL); + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND(rename, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL, + NULL); - return 0; + return 0; } diff --git a/xlators/cluster/dht/src/dht-selfheal.c b/xlators/cluster/dht/src/dht-selfheal.c index 5d5c8e86ddf..e17f96698bd 100644 --- a/xlators/cluster/dht/src/dht-selfheal.c +++ b/xlators/cluster/dht/src/dht-selfheal.c @@ -8,7 +8,6 @@ cases as published by the Free Software Foundation. */ - #include "glusterfs.h" #include "xlator.h" #include "dht-common.h" @@ -16,2736 +15,2640 @@ #include "dht-lock.h" #include "glusterfs-acl.h" -#define DHT_SET_LAYOUT_RANGE(layout,i,srt,chunk,path) do { \ - layout->list[i].start = srt; \ - layout->list[i].stop = srt + chunk - 1; \ - layout->list[i].commit_hash = layout->commit_hash; \ - \ - gf_msg_trace (this->name, 0, \ - "gave fix: %u - %u, with commit-hash %u" \ - " on %s for %s", \ - layout->list[i].start, \ - layout->list[i].stop, \ - layout->list[i].commit_hash, \ - layout->list[i].xlator->name, path); \ - } while (0) - -#define DHT_RESET_LAYOUT_RANGE(layout) do { \ - int cnt = 0; \ - for (cnt = 0; cnt < layout->cnt; cnt++ ) { \ - layout->list[cnt].start = 0; \ - layout->list[cnt].stop = 0; \ - } \ - } while (0) +#define DHT_SET_LAYOUT_RANGE(layout, i, srt, chunk, path) \ + do { \ + layout->list[i].start = srt; \ + layout->list[i].stop = srt + chunk - 1; \ + layout->list[i].commit_hash = layout->commit_hash; \ + \ + gf_msg_trace(this->name, 0, \ + "gave fix: %u - %u, with commit-hash %u" \ + " on %s for %s", \ + layout->list[i].start, layout->list[i].stop, \ + layout->list[i].commit_hash, \ + layout->list[i].xlator->name, path); \ + } while (0) + +#define DHT_RESET_LAYOUT_RANGE(layout) \ + do { \ + int cnt = 0; \ + for (cnt = 0; cnt < layout->cnt; cnt++) { \ + layout->list[cnt].start = 0; \ + layout->list[cnt].stop = 0; \ + } \ + } while (0) int -dht_selfheal_layout_lock (call_frame_t *frame, dht_layout_t *layout, - gf_boolean_t newdir, - dht_selfheal_layout_t healer, - dht_need_heal_t should_heal); +dht_selfheal_layout_lock(call_frame_t *frame, dht_layout_t *layout, + gf_boolean_t newdir, dht_selfheal_layout_t healer, + dht_need_heal_t should_heal); static uint32_t -dht_overlap_calc (dht_layout_t *old, int o, dht_layout_t *new, int n) +dht_overlap_calc(dht_layout_t *old, int o, dht_layout_t *new, int n) { - if (o >= old->cnt || n >= new->cnt) - return 0; + if (o >= old->cnt || n >= new->cnt) + return 0; - if (old->list[o].err > 0 || new->list[n].err > 0) - return 0; + if (old->list[o].err > 0 || new->list[n].err > 0) + return 0; - if (old->list[o].start == old->list[o].stop) { - return 0; - } + if (old->list[o].start == old->list[o].stop) { + return 0; + } - if (new->list[n].start == new->list[n].stop) { - return 0; - } + if (new->list[n].start == new->list[n].stop) { + return 0; + } - if ((old->list[o].start > new->list[n].stop) || - (old->list[o].stop < new->list[n].start)) - return 0; + if ((old->list[o].start > new->list[n].stop) || + (old->list[o].stop < new->list[n].start)) + return 0; - return min (old->list[o].stop, new->list[n].stop) - - max (old->list[o].start, new->list[n].start) + 1; + return min(old->list[o].stop, new->list[n].stop) - + max(old->list[o].start, new->list[n].start) + 1; } int -dht_selfheal_unlock_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) +dht_selfheal_unlock_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - DHT_STACK_DESTROY (frame); - return 0; + DHT_STACK_DESTROY(frame); + return 0; } int -dht_selfheal_dir_finish (call_frame_t *frame, xlator_t *this, int ret, - int invoke_cbk) +dht_selfheal_dir_finish(call_frame_t *frame, xlator_t *this, int ret, + int invoke_cbk) { - dht_local_t *local = NULL, *lock_local = NULL; - call_frame_t *lock_frame = NULL; - int lock_count = 0; + dht_local_t *local = NULL, *lock_local = NULL; + call_frame_t *lock_frame = NULL; + int lock_count = 0; - local = frame->local; + local = frame->local; - /* Unlock entrylk */ - dht_unlock_entrylk_wrapper (frame, &local->lock[0].ns.directory_ns); + /* Unlock entrylk */ + dht_unlock_entrylk_wrapper(frame, &local->lock[0].ns.directory_ns); - /* Unlock inodelk */ - lock_count = dht_lock_count (local->lock[0].ns.parent_layout.locks, - local->lock[0].ns.parent_layout.lk_count); - if (lock_count == 0) - goto done; + /* Unlock inodelk */ + lock_count = dht_lock_count(local->lock[0].ns.parent_layout.locks, + local->lock[0].ns.parent_layout.lk_count); + if (lock_count == 0) + goto done; - lock_frame = copy_frame (frame); - if (lock_frame == NULL) { - goto done; - } + lock_frame = copy_frame(frame); + if (lock_frame == NULL) { + goto done; + } - lock_local = dht_local_init (lock_frame, &local->loc, NULL, - lock_frame->root->op); - if (lock_local == NULL) { - goto done; - } + lock_local = dht_local_init(lock_frame, &local->loc, NULL, + lock_frame->root->op); + if (lock_local == NULL) { + goto done; + } - lock_local->lock[0].ns.parent_layout.locks = local->lock[0].ns.parent_layout.locks; - lock_local->lock[0].ns.parent_layout.lk_count = local->lock[0].ns.parent_layout.lk_count; + lock_local->lock[0].ns.parent_layout.locks = local->lock[0] + .ns.parent_layout.locks; + lock_local->lock[0] + .ns.parent_layout.lk_count = local->lock[0].ns.parent_layout.lk_count; - local->lock[0].ns.parent_layout.locks = NULL; - local->lock[0].ns.parent_layout.lk_count = 0; + local->lock[0].ns.parent_layout.locks = NULL; + local->lock[0].ns.parent_layout.lk_count = 0; - dht_unlock_inodelk (lock_frame, - lock_local->lock[0].ns.parent_layout.locks, - lock_local->lock[0].ns.parent_layout.lk_count, - dht_selfheal_unlock_cbk); - lock_frame = NULL; + dht_unlock_inodelk(lock_frame, lock_local->lock[0].ns.parent_layout.locks, + lock_local->lock[0].ns.parent_layout.lk_count, + dht_selfheal_unlock_cbk); + lock_frame = NULL; done: - if (invoke_cbk) - local->selfheal.dir_cbk (frame, NULL, frame->this, ret, - local->op_errno, NULL); - if (lock_frame != NULL) { - DHT_STACK_DESTROY (lock_frame); - } + if (invoke_cbk) + local->selfheal.dir_cbk(frame, NULL, frame->this, ret, local->op_errno, + NULL); + if (lock_frame != NULL) { + DHT_STACK_DESTROY(lock_frame); + } - return 0; + return 0; } int -dht_refresh_layout_done (call_frame_t *frame) +dht_refresh_layout_done(call_frame_t *frame) { - int ret = -1; - dht_layout_t *refreshed = NULL, *heal = NULL; - dht_local_t *local = NULL; - dht_need_heal_t should_heal = NULL; - dht_selfheal_layout_t healer = NULL; + int ret = -1; + dht_layout_t *refreshed = NULL, *heal = NULL; + dht_local_t *local = NULL; + dht_need_heal_t should_heal = NULL; + dht_selfheal_layout_t healer = NULL; - local = frame->local; + local = frame->local; - refreshed = local->selfheal.refreshed_layout; - heal = local->selfheal.layout; + refreshed = local->selfheal.refreshed_layout; + heal = local->selfheal.layout; - healer = local->selfheal.healer; - should_heal = local->selfheal.should_heal; + healer = local->selfheal.healer; + should_heal = local->selfheal.should_heal; - ret = dht_layout_sort (refreshed); - if (ret == -1) { - gf_msg (frame->this->name, GF_LOG_WARNING, 0, - DHT_MSG_LAYOUT_SORT_FAILED, - "sorting the layout failed"); - goto err; - } + ret = dht_layout_sort(refreshed); + if (ret == -1) { + gf_msg(frame->this->name, GF_LOG_WARNING, 0, DHT_MSG_LAYOUT_SORT_FAILED, + "sorting the layout failed"); + goto err; + } - if (should_heal (frame, &heal, &refreshed)) { - healer (frame, &local->loc, heal); - } else { - local->selfheal.layout = NULL; - local->selfheal.refreshed_layout = NULL; - local->selfheal.layout = refreshed; + if (should_heal(frame, &heal, &refreshed)) { + healer(frame, &local->loc, heal); + } else { + local->selfheal.layout = NULL; + local->selfheal.refreshed_layout = NULL; + local->selfheal.layout = refreshed; - dht_layout_unref (frame->this, heal); + dht_layout_unref(frame->this, heal); - dht_selfheal_dir_finish (frame, frame->this, 0, 1); - } + dht_selfheal_dir_finish(frame, frame->this, 0, 1); + } - return 0; + return 0; err: - dht_selfheal_dir_finish (frame, frame->this, -1, 1); - return 0; + dht_selfheal_dir_finish(frame, frame->this, -1, 1); + return 0; } int -dht_refresh_layout_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, inode_t *inode, - struct iatt *stbuf, dict_t *xattr, - struct iatt *postparent) +dht_refresh_layout_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, inode_t *inode, + struct iatt *stbuf, dict_t *xattr, + struct iatt *postparent) { - dht_local_t *local = NULL; - int this_call_cnt = 0; - xlator_t *prev = NULL; - dht_layout_t *layout = NULL; - char gfid[GF_UUID_BUF_SIZE] = {0,}; - - GF_VALIDATE_OR_GOTO ("dht", frame, err); - GF_VALIDATE_OR_GOTO ("dht", this, err); - GF_VALIDATE_OR_GOTO ("dht", frame->local, err); - GF_VALIDATE_OR_GOTO ("dht", this->private, err); - - local = frame->local; - prev = cookie; - - layout = local->selfheal.refreshed_layout; - - LOCK (&frame->lock); - { - op_ret = dht_layout_merge (this, layout, prev, - op_ret, op_errno, xattr); - - dht_iatt_merge (this, &local->stbuf, stbuf); - - if (op_ret == -1) { - gf_uuid_unparse (local->loc.gfid, gfid); - local->op_errno = op_errno; - gf_msg (this->name, GF_LOG_ERROR, op_errno, - DHT_MSG_FILE_LOOKUP_FAILED, - "lookup of %s on %s returned error, gfid: %s", - local->loc.path, prev->name, gfid); - - goto unlock; - } + dht_local_t *local = NULL; + int this_call_cnt = 0; + xlator_t *prev = NULL; + dht_layout_t *layout = NULL; + char gfid[GF_UUID_BUF_SIZE] = { + 0, + }; - local->op_ret = 0; + GF_VALIDATE_OR_GOTO("dht", frame, err); + GF_VALIDATE_OR_GOTO("dht", this, err); + GF_VALIDATE_OR_GOTO("dht", frame->local, err); + GF_VALIDATE_OR_GOTO("dht", this->private, err); + + local = frame->local; + prev = cookie; + + layout = local->selfheal.refreshed_layout; + + LOCK(&frame->lock); + { + op_ret = dht_layout_merge(this, layout, prev, op_ret, op_errno, xattr); + + dht_iatt_merge(this, &local->stbuf, stbuf); + + if (op_ret == -1) { + gf_uuid_unparse(local->loc.gfid, gfid); + local->op_errno = op_errno; + gf_msg(this->name, GF_LOG_ERROR, op_errno, + DHT_MSG_FILE_LOOKUP_FAILED, + "lookup of %s on %s returned error, gfid: %s", + local->loc.path, prev->name, gfid); + + goto unlock; } -unlock: - UNLOCK (&frame->lock); - this_call_cnt = dht_frame_return (frame); + local->op_ret = 0; + } +unlock: + UNLOCK(&frame->lock); - if (is_last_call (this_call_cnt)) { - if (local->op_ret == 0) { - local->refresh_layout_done (frame); - } else { - goto err; - } + this_call_cnt = dht_frame_return(frame); + if (is_last_call(this_call_cnt)) { + if (local->op_ret == 0) { + local->refresh_layout_done(frame); + } else { + goto err; } + } - return 0; + return 0; err: - if (local) { - local->refresh_layout_unlock (frame, this, -1, 1); - } - return 0; + if (local) { + local->refresh_layout_unlock(frame, this, -1, 1); + } + return 0; } int -dht_refresh_layout (call_frame_t *frame) +dht_refresh_layout(call_frame_t *frame) { - int call_cnt = 0; - int i = 0, ret = -1; - dht_conf_t *conf = NULL; - dht_local_t *local = NULL; - xlator_t *this = NULL; - char gfid[GF_UUID_BUF_SIZE] = {0,}; - - GF_VALIDATE_OR_GOTO ("dht", frame, out); - GF_VALIDATE_OR_GOTO ("dht", frame->local, out); - - this = frame->this; - conf = this->private; - local = frame->local; - - call_cnt = conf->subvolume_cnt; - local->call_cnt = call_cnt; - local->op_ret = -1; - - if (local->selfheal.refreshed_layout) { - dht_layout_unref (this, local->selfheal.refreshed_layout); - local->selfheal.refreshed_layout = NULL; - } - - local->selfheal.refreshed_layout = dht_layout_new (this, - conf->subvolume_cnt); - if (!local->selfheal.refreshed_layout) { - gf_uuid_unparse (local->loc.gfid, gfid); - gf_msg (this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY, - "mem allocation for layout failed, path:%s gfid:%s", - local->loc.path, gfid); - goto out; - } + int call_cnt = 0; + int i = 0, ret = -1; + dht_conf_t *conf = NULL; + dht_local_t *local = NULL; + xlator_t *this = NULL; + char gfid[GF_UUID_BUF_SIZE] = { + 0, + }; + + GF_VALIDATE_OR_GOTO("dht", frame, out); + GF_VALIDATE_OR_GOTO("dht", frame->local, out); + + this = frame->this; + conf = this->private; + local = frame->local; + + call_cnt = conf->subvolume_cnt; + local->call_cnt = call_cnt; + local->op_ret = -1; + + if (local->selfheal.refreshed_layout) { + dht_layout_unref(this, local->selfheal.refreshed_layout); + local->selfheal.refreshed_layout = NULL; + } + + local->selfheal.refreshed_layout = dht_layout_new(this, + conf->subvolume_cnt); + if (!local->selfheal.refreshed_layout) { + gf_uuid_unparse(local->loc.gfid, gfid); + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY, + "mem allocation for layout failed, path:%s gfid:%s", + local->loc.path, gfid); + goto out; + } - if (local->xattr != NULL) { - dict_del (local->xattr, conf->xattr_name); - } + if (local->xattr != NULL) { + dict_del(local->xattr, conf->xattr_name); + } + if (local->xattr_req == NULL) { + gf_uuid_unparse(local->loc.gfid, gfid); + local->xattr_req = dict_new(); if (local->xattr_req == NULL) { - gf_uuid_unparse (local->loc.gfid, gfid); - local->xattr_req = dict_new (); - if (local->xattr_req == NULL) { - gf_msg (this->name, GF_LOG_ERROR, ENOMEM, - DHT_MSG_NO_MEMORY, - "dict mem allocation failed, path:%s gfid:%s", - local->loc.path, gfid); - goto out; - } + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY, + "dict mem allocation failed, path:%s gfid:%s", + local->loc.path, gfid); + goto out; } + } - if (dict_get (local->xattr_req, conf->xattr_name) == 0) { - ret = dict_set_uint32 (local->xattr_req, conf->xattr_name, - 4 * 4); - if (ret) - gf_msg (this->name, GF_LOG_WARNING, 0, - DHT_MSG_DICT_SET_FAILED, - "%s: Failed to set dictionary value:key = %s", - local->loc.path, conf->xattr_name); - } + if (dict_get(local->xattr_req, conf->xattr_name) == 0) { + ret = dict_set_uint32(local->xattr_req, conf->xattr_name, 4 * 4); + if (ret) + gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED, + "%s: Failed to set dictionary value:key = %s", + local->loc.path, conf->xattr_name); + } - for (i = 0; i < call_cnt; i++) { - STACK_WIND_COOKIE (frame, dht_refresh_layout_cbk, - conf->subvolumes[i], conf->subvolumes[i], - conf->subvolumes[i]->fops->lookup, - &local->loc, local->xattr_req); - } + for (i = 0; i < call_cnt; i++) { + STACK_WIND_COOKIE(frame, dht_refresh_layout_cbk, conf->subvolumes[i], + conf->subvolumes[i], + conf->subvolumes[i]->fops->lookup, &local->loc, + local->xattr_req); + } - return 0; + return 0; out: - if (local) { - local->refresh_layout_unlock (frame, this, -1, 1); - } - return 0; + if (local) { + local->refresh_layout_unlock(frame, this, -1, 1); + } + return 0; } - int32_t -dht_selfheal_layout_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) +dht_selfheal_layout_lock_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - dht_local_t *local = NULL; + dht_local_t *local = NULL; - local = frame->local; + local = frame->local; - if (!local) { - goto err; - } + if (!local) { + goto err; + } - if (op_ret < 0) { - local->op_errno = op_errno; - goto err; - } + if (op_ret < 0) { + local->op_errno = op_errno; + goto err; + } - local->refresh_layout_unlock = dht_selfheal_dir_finish; - local->refresh_layout_done = dht_refresh_layout_done; + local->refresh_layout_unlock = dht_selfheal_dir_finish; + local->refresh_layout_done = dht_refresh_layout_done; - dht_refresh_layout (frame); - return 0; + dht_refresh_layout(frame); + return 0; err: - dht_selfheal_dir_finish (frame, this, -1, 1); - return 0; + dht_selfheal_dir_finish(frame, this, -1, 1); + return 0; } - gf_boolean_t -dht_should_heal_layout (call_frame_t *frame, dht_layout_t **heal, - dht_layout_t **ondisk) +dht_should_heal_layout(call_frame_t *frame, dht_layout_t **heal, + dht_layout_t **ondisk) { - gf_boolean_t fixit = _gf_true; - dht_local_t *local = NULL; - int ret = -1, heal_missing_dirs = 0; - - local = frame->local; - - if ((heal == NULL) || (*heal == NULL) || (ondisk == NULL) - || (*ondisk == NULL)) - goto out; - - ret = dht_layout_anomalies (frame->this, &local->loc, *ondisk, - &local->selfheal.hole_cnt, - &local->selfheal.overlaps_cnt, - &local->selfheal.missing_cnt, - &local->selfheal.down, - &local->selfheal.misc, NULL); - - if (ret < 0) - goto out; - - /* Directories might've been created as part of this self-heal. We've to - * sync non-layout xattrs and set range 0-0 on new directories + gf_boolean_t fixit = _gf_true; + dht_local_t *local = NULL; + int ret = -1, heal_missing_dirs = 0; + + local = frame->local; + + if ((heal == NULL) || (*heal == NULL) || (ondisk == NULL) || + (*ondisk == NULL)) + goto out; + + ret = dht_layout_anomalies( + frame->this, &local->loc, *ondisk, &local->selfheal.hole_cnt, + &local->selfheal.overlaps_cnt, &local->selfheal.missing_cnt, + &local->selfheal.down, &local->selfheal.misc, NULL); + + if (ret < 0) + goto out; + + /* Directories might've been created as part of this self-heal. We've to + * sync non-layout xattrs and set range 0-0 on new directories + */ + heal_missing_dirs = local->selfheal.force_mkdir + ? local->selfheal.force_mkdir + : dht_layout_missing_dirs(*heal); + + if ((local->selfheal.hole_cnt == 0) && + (local->selfheal.overlaps_cnt == 0) && heal_missing_dirs) { + dht_layout_t *tmp = NULL; + + /* Just added a brick and need to set 0-0 range on this brick. + * But ondisk layout is well-formed. So, swap layouts "heal" and + * "ondisk". Now "ondisk" layout will be used for healing + * xattrs. If there are any non-participating subvols in + * "ondisk" layout, dht_selfheal_dir_xattr_persubvol will set + * 0-0 and non-layout xattrs. This way we won't end up in + * "corrupting" already set and well-formed "ondisk" layout. */ - heal_missing_dirs = local->selfheal.force_mkdir - ? local->selfheal.force_mkdir : dht_layout_missing_dirs (*heal); - - if ((local->selfheal.hole_cnt == 0) - && (local->selfheal.overlaps_cnt == 0) && heal_missing_dirs) { - dht_layout_t *tmp = NULL; - - /* Just added a brick and need to set 0-0 range on this brick. - * But ondisk layout is well-formed. So, swap layouts "heal" and - * "ondisk". Now "ondisk" layout will be used for healing - * xattrs. If there are any non-participating subvols in - * "ondisk" layout, dht_selfheal_dir_xattr_persubvol will set - * 0-0 and non-layout xattrs. This way we won't end up in - * "corrupting" already set and well-formed "ondisk" layout. - */ - tmp = *heal; - *heal = *ondisk; - *ondisk = tmp; - - /* Current selfheal code, heals non-layout xattrs only after - * an add-brick. In fact non-layout xattrs are considered as - * secondary citizens which are healed only if layout xattrs - * need to be healed. This is wrong, since for eg., quota can be - * set when layout is well-formed, but a node is down. Also, - * just for healing non-layout xattrs, we don't need locking. - * This issue is _NOT FIXED_ by this patch. - */ - } + tmp = *heal; + *heal = *ondisk; + *ondisk = tmp; + + /* Current selfheal code, heals non-layout xattrs only after + * an add-brick. In fact non-layout xattrs are considered as + * secondary citizens which are healed only if layout xattrs + * need to be healed. This is wrong, since for eg., quota can be + * set when layout is well-formed, but a node is down. Also, + * just for healing non-layout xattrs, we don't need locking. + * This issue is _NOT FIXED_ by this patch. + */ + } - fixit = (local->selfheal.hole_cnt || local->selfheal.overlaps_cnt - || heal_missing_dirs); + fixit = (local->selfheal.hole_cnt || local->selfheal.overlaps_cnt || + heal_missing_dirs); out: - return fixit; + return fixit; } int -dht_layout_span (dht_layout_t *layout) +dht_layout_span(dht_layout_t *layout) { - int i = 0, count = 0; + int i = 0, count = 0; - for (i = 0; i < layout->cnt; i++) { - if (layout->list[i].err) - continue; + for (i = 0; i < layout->cnt; i++) { + if (layout->list[i].err) + continue; - if (layout->list[i].start != layout->list[i].stop) - count++; - } + if (layout->list[i].start != layout->list[i].stop) + count++; + } - return count; + return count; } int -dht_decommissioned_bricks_in_layout (xlator_t *this, dht_layout_t *layout) +dht_decommissioned_bricks_in_layout(xlator_t *this, dht_layout_t *layout) { - dht_conf_t *conf = NULL; - int count = 0, i = 0, j = 0; + dht_conf_t *conf = NULL; + int count = 0, i = 0, j = 0; - if ((this == NULL) || (layout == NULL)) - goto out; + if ((this == NULL) || (layout == NULL)) + goto out; - conf = this->private; + conf = this->private; - for (i = 0; i < layout->cnt; i++) { - for (j = 0; j < conf->subvolume_cnt; j++) { - if (conf->decommissioned_bricks[j] && - conf->decommissioned_bricks[j] - == layout->list[i].xlator) { - count++; - } - } + for (i = 0; i < layout->cnt; i++) { + for (j = 0; j < conf->subvolume_cnt; j++) { + if (conf->decommissioned_bricks[j] && + conf->decommissioned_bricks[j] == layout->list[i].xlator) { + count++; + } } + } out: - return count; + return count; } dht_distribution_type_t -dht_distribution_type (xlator_t *this, dht_layout_t *layout) +dht_distribution_type(xlator_t *this, dht_layout_t *layout) { - dht_distribution_type_t type = GF_DHT_EQUAL_DISTRIBUTION; - int i = 0; - uint32_t start_range = 0, range = 0, diff = 0; + dht_distribution_type_t type = GF_DHT_EQUAL_DISTRIBUTION; + int i = 0; + uint32_t start_range = 0, range = 0, diff = 0; - if ((this == NULL) || (layout == NULL) || (layout->cnt < 1)) { - goto out; - } + if ((this == NULL) || (layout == NULL) || (layout->cnt < 1)) { + goto out; + } - for (i = 0; i < layout->cnt; i++) { - if (start_range == 0) { - start_range = layout->list[i].stop - - layout->list[i].start; - continue; - } + for (i = 0; i < layout->cnt; i++) { + if (start_range == 0) { + start_range = layout->list[i].stop - layout->list[i].start; + continue; + } - range = layout->list[i].stop - layout->list[i].start; - diff = (range >= start_range) - ? range - start_range - : start_range - range; + range = layout->list[i].stop - layout->list[i].start; + diff = (range >= start_range) ? range - start_range + : start_range - range; - if ((range != 0) && (diff > layout->cnt)) { - type = GF_DHT_WEIGHTED_DISTRIBUTION; - break; - } + if ((range != 0) && (diff > layout->cnt)) { + type = GF_DHT_WEIGHTED_DISTRIBUTION; + break; } + } out: - return type; + return type; } gf_boolean_t -dht_should_fix_layout (call_frame_t *frame, dht_layout_t **inmem, - dht_layout_t **ondisk) +dht_should_fix_layout(call_frame_t *frame, dht_layout_t **inmem, + dht_layout_t **ondisk) { - gf_boolean_t fixit = _gf_true; - - dht_local_t *local = NULL; - int layout_span = 0; - int decommissioned_bricks = 0; - int ret = 0; - dht_conf_t *conf = NULL; - dht_distribution_type_t inmem_dist_type = 0; - dht_distribution_type_t ondisk_dist_type = 0; - - conf = frame->this->private; - - local = frame->local; - - if ((inmem == NULL) || (*inmem == NULL) || (ondisk == NULL) - || (*ondisk == NULL)) - goto out; - - ret = dht_layout_anomalies (frame->this, &local->loc, *ondisk, - &local->selfheal.hole_cnt, - &local->selfheal.overlaps_cnt, NULL, - &local->selfheal.down, - &local->selfheal.misc, NULL); - if (ret < 0) { - fixit = _gf_false; - goto out; - } - - if (local->selfheal.down || local->selfheal.misc) { - fixit = _gf_false; - goto out; - } - - if (local->selfheal.hole_cnt || local->selfheal.overlaps_cnt) - goto out; - - /* If commit hashes are being updated, let it through */ - if ((*inmem)->commit_hash != (*ondisk)->commit_hash) - goto out; - - layout_span = dht_layout_span (*ondisk); - - decommissioned_bricks - = dht_decommissioned_bricks_in_layout (frame->this, - *ondisk); - inmem_dist_type = dht_distribution_type (frame->this, *inmem); - ondisk_dist_type = dht_distribution_type (frame->this, *ondisk); - - if ((decommissioned_bricks == 0) - && (layout_span == (conf->subvolume_cnt - - conf->decommission_subvols_cnt)) - && (inmem_dist_type == ondisk_dist_type)) - fixit = _gf_false; + gf_boolean_t fixit = _gf_true; + + dht_local_t *local = NULL; + int layout_span = 0; + int decommissioned_bricks = 0; + int ret = 0; + dht_conf_t *conf = NULL; + dht_distribution_type_t inmem_dist_type = 0; + dht_distribution_type_t ondisk_dist_type = 0; + + conf = frame->this->private; + + local = frame->local; + + if ((inmem == NULL) || (*inmem == NULL) || (ondisk == NULL) || + (*ondisk == NULL)) + goto out; + + ret = dht_layout_anomalies( + frame->this, &local->loc, *ondisk, &local->selfheal.hole_cnt, + &local->selfheal.overlaps_cnt, NULL, &local->selfheal.down, + &local->selfheal.misc, NULL); + if (ret < 0) { + fixit = _gf_false; + goto out; + } + + if (local->selfheal.down || local->selfheal.misc) { + fixit = _gf_false; + goto out; + } + + if (local->selfheal.hole_cnt || local->selfheal.overlaps_cnt) + goto out; + + /* If commit hashes are being updated, let it through */ + if ((*inmem)->commit_hash != (*ondisk)->commit_hash) + goto out; + + layout_span = dht_layout_span(*ondisk); + + decommissioned_bricks = dht_decommissioned_bricks_in_layout(frame->this, + *ondisk); + inmem_dist_type = dht_distribution_type(frame->this, *inmem); + ondisk_dist_type = dht_distribution_type(frame->this, *ondisk); + + if ((decommissioned_bricks == 0) && + (layout_span == + (conf->subvolume_cnt - conf->decommission_subvols_cnt)) && + (inmem_dist_type == ondisk_dist_type)) + fixit = _gf_false; out: - return fixit; + return fixit; } int -dht_selfheal_layout_lock (call_frame_t *frame, dht_layout_t *layout, - gf_boolean_t newdir, - dht_selfheal_layout_t healer, - dht_need_heal_t should_heal) +dht_selfheal_layout_lock(call_frame_t *frame, dht_layout_t *layout, + gf_boolean_t newdir, dht_selfheal_layout_t healer, + dht_need_heal_t should_heal) { - dht_local_t *local = NULL; - int count = 1, ret = -1, i = 0; - dht_lock_t **lk_array = NULL; - dht_conf_t *conf = NULL; - dht_layout_t *tmp = NULL; - char gfid[GF_UUID_BUF_SIZE] = {0}; - - GF_VALIDATE_OR_GOTO ("dht", frame, err); - GF_VALIDATE_OR_GOTO (frame->this->name, frame->local, err); - - local = frame->local; - - conf = frame->this->private; - - local->selfheal.healer = healer; - local->selfheal.should_heal = should_heal; - - tmp = local->selfheal.layout; - local->selfheal.layout = dht_layout_ref (frame->this, layout); - dht_layout_unref (frame->this, tmp); - - if (!newdir) { - count = conf->subvolume_cnt; - - lk_array = GF_CALLOC (count, sizeof (*lk_array), - gf_common_mt_char); - if (lk_array == NULL) { - gf_uuid_unparse (local->stbuf.ia_gfid, gfid); - gf_msg ("dht", GF_LOG_ERROR, ENOMEM, - DHT_MSG_NO_MEMORY, "mem allocation failed for " - "lk_array, gfid:%s path: %s", gfid, - local->loc.path); - goto err; - } + dht_local_t *local = NULL; + int count = 1, ret = -1, i = 0; + dht_lock_t **lk_array = NULL; + dht_conf_t *conf = NULL; + dht_layout_t *tmp = NULL; + char gfid[GF_UUID_BUF_SIZE] = {0}; - for (i = 0; i < count; i++) { - lk_array[i] = dht_lock_new (frame->this, - conf->subvolumes[i], - &local->loc, F_WRLCK, - DHT_LAYOUT_HEAL_DOMAIN, - NULL, - FAIL_ON_ANY_ERROR); - if (lk_array[i] == NULL) { - gf_uuid_unparse (local->stbuf.ia_gfid, gfid); - gf_msg (THIS->name, GF_LOG_ERROR, ENOMEM, - DHT_MSG_NO_MEMORY, "mem allocation " - "failed for lk_array, gfid:%s path:%s", - gfid, local->loc.path); - goto err; - } - } - } else { - count = 1; - lk_array = GF_CALLOC (count, sizeof (*lk_array), - gf_common_mt_char); - if (lk_array == NULL) { - gf_uuid_unparse (local->stbuf.ia_gfid, gfid); - gf_msg (THIS->name, GF_LOG_ERROR, ENOMEM, - DHT_MSG_NO_MEMORY, "mem allocation failed for " - "lk_array, gfid:%s path:%s", - gfid, local->loc.path); - goto err; - } + GF_VALIDATE_OR_GOTO("dht", frame, err); + GF_VALIDATE_OR_GOTO(frame->this->name, frame->local, err); - lk_array[0] = dht_lock_new (frame->this, local->hashed_subvol, - &local->loc, F_WRLCK, - DHT_LAYOUT_HEAL_DOMAIN, NULL, - FAIL_ON_ANY_ERROR); - if (lk_array[0] == NULL) { - gf_uuid_unparse (local->stbuf.ia_gfid, gfid); - gf_msg (THIS->name, GF_LOG_ERROR, ENOMEM, - DHT_MSG_NO_MEMORY, "mem allocation failed for " - "lk_array, gfid:%s path:%s", gfid, - local->loc.path); - goto err; - } - } + local = frame->local; - local->lock[0].layout.my_layout.locks = lk_array; - local->lock[0].layout.my_layout.lk_count = count; + conf = frame->this->private; - ret = dht_blocking_inodelk (frame, lk_array, count, - dht_selfheal_layout_lock_cbk); - if (ret < 0) { - local->lock[0].layout.my_layout.locks = NULL; - local->lock[0].layout.my_layout.lk_count = 0; - goto err; + local->selfheal.healer = healer; + local->selfheal.should_heal = should_heal; + + tmp = local->selfheal.layout; + local->selfheal.layout = dht_layout_ref(frame->this, layout); + dht_layout_unref(frame->this, tmp); + + if (!newdir) { + count = conf->subvolume_cnt; + + lk_array = GF_CALLOC(count, sizeof(*lk_array), gf_common_mt_char); + if (lk_array == NULL) { + gf_uuid_unparse(local->stbuf.ia_gfid, gfid); + gf_msg("dht", GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY, + "mem allocation failed for " + "lk_array, gfid:%s path: %s", + gfid, local->loc.path); + goto err; } - return 0; + for (i = 0; i < count; i++) { + lk_array[i] = dht_lock_new( + frame->this, conf->subvolumes[i], &local->loc, F_WRLCK, + DHT_LAYOUT_HEAL_DOMAIN, NULL, FAIL_ON_ANY_ERROR); + if (lk_array[i] == NULL) { + gf_uuid_unparse(local->stbuf.ia_gfid, gfid); + gf_msg(THIS->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY, + "mem allocation " + "failed for lk_array, gfid:%s path:%s", + gfid, local->loc.path); + goto err; + } + } + } else { + count = 1; + lk_array = GF_CALLOC(count, sizeof(*lk_array), gf_common_mt_char); + if (lk_array == NULL) { + gf_uuid_unparse(local->stbuf.ia_gfid, gfid); + gf_msg(THIS->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY, + "mem allocation failed for " + "lk_array, gfid:%s path:%s", + gfid, local->loc.path); + goto err; + } + + lk_array[0] = dht_lock_new(frame->this, local->hashed_subvol, + &local->loc, F_WRLCK, DHT_LAYOUT_HEAL_DOMAIN, + NULL, FAIL_ON_ANY_ERROR); + if (lk_array[0] == NULL) { + gf_uuid_unparse(local->stbuf.ia_gfid, gfid); + gf_msg(THIS->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY, + "mem allocation failed for " + "lk_array, gfid:%s path:%s", + gfid, local->loc.path); + goto err; + } + } + + local->lock[0].layout.my_layout.locks = lk_array; + local->lock[0].layout.my_layout.lk_count = count; + + ret = dht_blocking_inodelk(frame, lk_array, count, + dht_selfheal_layout_lock_cbk); + if (ret < 0) { + local->lock[0].layout.my_layout.locks = NULL; + local->lock[0].layout.my_layout.lk_count = 0; + goto err; + } + + return 0; err: - if (lk_array != NULL) { - dht_lock_array_free (lk_array, count); - GF_FREE (lk_array); - } + if (lk_array != NULL) { + dht_lock_array_free(lk_array, count); + GF_FREE(lk_array); + } - return -1; + return -1; } int -dht_selfheal_dir_xattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, dict_t *xdata) +dht_selfheal_dir_xattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, dict_t *xdata) { - dht_local_t *local = NULL; - xlator_t *subvol = NULL; - struct iatt *stbuf = NULL; - int i = 0; - int ret = 0; - dht_layout_t *layout = NULL; - int err = 0; - int this_call_cnt = 0; - char gfid[GF_UUID_BUF_SIZE] = {0}; - - local = frame->local; - layout = local->selfheal.layout; - subvol = cookie; - - if (op_ret == 0) { - err = 0; - } else { - gf_uuid_unparse (local->loc.gfid, gfid); - gf_msg (this->name, GF_LOG_ERROR, op_errno, - DHT_MSG_DIR_SELFHEAL_XATTR_FAILED, - "layout setxattr failed on %s, path:%s gfid:%s", - subvol->name, local->loc.path, gfid); - err = op_errno; - } - - ret = dict_get_bin (xdata, DHT_IATT_IN_XDATA_KEY, (void **) &stbuf); - if (ret < 0) { - gf_uuid_unparse (local->loc.gfid, gfid); - gf_msg_debug (this->name, 0, "key = %s not present in dict" - ", path:%s gfid:%s", DHT_IATT_IN_XDATA_KEY, - local->loc.path, gfid); - } + dht_local_t *local = NULL; + xlator_t *subvol = NULL; + struct iatt *stbuf = NULL; + int i = 0; + int ret = 0; + dht_layout_t *layout = NULL; + int err = 0; + int this_call_cnt = 0; + char gfid[GF_UUID_BUF_SIZE] = {0}; + + local = frame->local; + layout = local->selfheal.layout; + subvol = cookie; + + if (op_ret == 0) { + err = 0; + } else { + gf_uuid_unparse(local->loc.gfid, gfid); + gf_msg(this->name, GF_LOG_ERROR, op_errno, + DHT_MSG_DIR_SELFHEAL_XATTR_FAILED, + "layout setxattr failed on %s, path:%s gfid:%s", subvol->name, + local->loc.path, gfid); + err = op_errno; + } + + ret = dict_get_bin(xdata, DHT_IATT_IN_XDATA_KEY, (void **)&stbuf); + if (ret < 0) { + gf_uuid_unparse(local->loc.gfid, gfid); + gf_msg_debug(this->name, 0, + "key = %s not present in dict" + ", path:%s gfid:%s", + DHT_IATT_IN_XDATA_KEY, local->loc.path, gfid); + } - for (i = 0; i < layout->cnt; i++) { - if (layout->list[i].xlator == subvol) { - layout->list[i].err = err; - break; - } + for (i = 0; i < layout->cnt; i++) { + if (layout->list[i].xlator == subvol) { + layout->list[i].err = err; + break; } + } - LOCK (&frame->lock); - { - dht_iatt_merge (this, &local->stbuf, stbuf); - } - UNLOCK (&frame->lock); + LOCK(&frame->lock); + { + dht_iatt_merge(this, &local->stbuf, stbuf); + } + UNLOCK(&frame->lock); - this_call_cnt = dht_frame_return (frame); + this_call_cnt = dht_frame_return(frame); - if (is_last_call (this_call_cnt)) { - dht_selfheal_dir_finish (frame, this, 0, 1); - } + if (is_last_call(this_call_cnt)) { + dht_selfheal_dir_finish(frame, this, 0, 1); + } - return 0; + return 0; } /* Code is required to set user xattr to local->xattr -*/ + */ int -dht_set_user_xattr (dict_t *dict, char *k, data_t *v, void *data) +dht_set_user_xattr(dict_t *dict, char *k, data_t *v, void *data) { - dict_t *set_xattr = data; - int ret = -1; + dict_t *set_xattr = data; + int ret = -1; - ret = dict_set (set_xattr, k, v); - return ret; + ret = dict_set(set_xattr, k, v); + return ret; } - int -dht_selfheal_dir_xattr_persubvol (call_frame_t *frame, loc_t *loc, - dht_layout_t *layout, int i, - xlator_t *req_subvol) +dht_selfheal_dir_xattr_persubvol(call_frame_t *frame, loc_t *loc, + dht_layout_t *layout, int i, + xlator_t *req_subvol) { - xlator_t *subvol = NULL; - dict_t *xattr = NULL; - dict_t *xdata = NULL; - int ret = 0; - xlator_t *this = NULL; - int32_t *disk_layout = NULL; - dht_local_t *local = NULL; - dht_conf_t *conf = NULL; - data_t *data = NULL; - char gfid[GF_UUID_BUF_SIZE] = {0}; - - local = frame->local; - if (req_subvol) - subvol = req_subvol; - else - subvol = layout->list[i].xlator; - this = frame->this; - - GF_VALIDATE_OR_GOTO ("", this, err); - GF_VALIDATE_OR_GOTO (this->name, layout, err); - GF_VALIDATE_OR_GOTO (this->name, local, err); - GF_VALIDATE_OR_GOTO (this->name, subvol, err); - VALIDATE_OR_GOTO (this->private, err); - - conf = this->private; - - xattr = dict_new (); - if (!xattr) { - goto err; - } - - xdata = dict_new (); - if (!xdata) - goto err; - - ret = dict_set_str (xdata, GLUSTERFS_INTERNAL_FOP_KEY, "yes"); - if (ret < 0) { - gf_msg (this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED, - "%s: Failed to set dictionary value: key = %s," - " gfid = %s", loc->path, - GLUSTERFS_INTERNAL_FOP_KEY, gfid); - goto err; - } - - ret = dict_set_int8 (xdata, DHT_IATT_IN_XDATA_KEY, 1); - if (ret < 0) { - gf_msg (this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED, - "%s: Failed to set dictionary value: key = %s," - " gfid = %s", loc->path, - DHT_IATT_IN_XDATA_KEY, gfid); - goto err; - } - - gf_uuid_unparse(loc->inode->gfid, gfid); - - ret = dht_disk_layout_extract (this, layout, i, &disk_layout); - if (ret == -1) { - gf_msg (this->name, GF_LOG_WARNING, 0, - DHT_MSG_DIR_SELFHEAL_XATTR_FAILED, - "Directory self heal xattr failed:" - " %s: (subvol %s) Failed to extract disk layout," - " gfid = %s", loc->path, subvol->name, gfid); - goto err; - } - - ret = dict_set_bin (xattr, conf->xattr_name, disk_layout, 4 * 4); - if (ret == -1) { - gf_msg (this->name, GF_LOG_WARNING, 0, - DHT_MSG_DIR_SELFHEAL_XATTR_FAILED, - "Directory self heal xattr failed:" - "%s: (subvol %s) Failed to set xattr dictionary," - " gfid = %s", loc->path, subvol->name, gfid); - goto err; - } - disk_layout = NULL; - - gf_msg_trace (this->name, 0, - "setting hash range %u - %u (type %d) on subvolume %s" - " for %s", layout->list[i].start, layout->list[i].stop, - layout->type, subvol->name, loc->path); - - if (local->xattr) { - data = dict_get (local->xattr, QUOTA_LIMIT_KEY); - if (data) { - ret = dict_add (xattr, QUOTA_LIMIT_KEY, data); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_DICT_SET_FAILED, - "%s: Failed to set dictionary value:" - " key = %s", - loc->path, QUOTA_LIMIT_KEY); - } - } - data = dict_get (local->xattr, QUOTA_LIMIT_OBJECTS_KEY); - if (data) { - ret = dict_add (xattr, QUOTA_LIMIT_OBJECTS_KEY, data); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_DICT_SET_FAILED, - "%s: Failed to set dictionary value:" - " key = %s", - loc->path, QUOTA_LIMIT_OBJECTS_KEY); - } - } - } - - if (!gf_uuid_is_null (local->gfid)) - gf_uuid_copy (loc->gfid, local->gfid); - - STACK_WIND_COOKIE (frame, dht_selfheal_dir_xattr_cbk, - (void *) subvol, subvol, subvol->fops->setxattr, - loc, xattr, 0, xdata); - - dict_unref (xattr); - dict_unref (xdata); - - return 0; + xlator_t *subvol = NULL; + dict_t *xattr = NULL; + dict_t *xdata = NULL; + int ret = 0; + xlator_t *this = NULL; + int32_t *disk_layout = NULL; + dht_local_t *local = NULL; + dht_conf_t *conf = NULL; + data_t *data = NULL; + char gfid[GF_UUID_BUF_SIZE] = {0}; + + local = frame->local; + if (req_subvol) + subvol = req_subvol; + else + subvol = layout->list[i].xlator; + this = frame->this; + + GF_VALIDATE_OR_GOTO("", this, err); + GF_VALIDATE_OR_GOTO(this->name, layout, err); + GF_VALIDATE_OR_GOTO(this->name, local, err); + GF_VALIDATE_OR_GOTO(this->name, subvol, err); + VALIDATE_OR_GOTO(this->private, err); + + conf = this->private; + + xattr = dict_new(); + if (!xattr) { + goto err; + } + + xdata = dict_new(); + if (!xdata) + goto err; + + ret = dict_set_str(xdata, GLUSTERFS_INTERNAL_FOP_KEY, "yes"); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED, + "%s: Failed to set dictionary value: key = %s," + " gfid = %s", + loc->path, GLUSTERFS_INTERNAL_FOP_KEY, gfid); + goto err; + } + + ret = dict_set_int8(xdata, DHT_IATT_IN_XDATA_KEY, 1); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED, + "%s: Failed to set dictionary value: key = %s," + " gfid = %s", + loc->path, DHT_IATT_IN_XDATA_KEY, gfid); + goto err; + } + + gf_uuid_unparse(loc->inode->gfid, gfid); + + ret = dht_disk_layout_extract(this, layout, i, &disk_layout); + if (ret == -1) { + gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DIR_SELFHEAL_XATTR_FAILED, + "Directory self heal xattr failed:" + " %s: (subvol %s) Failed to extract disk layout," + " gfid = %s", + loc->path, subvol->name, gfid); + goto err; + } + + ret = dict_set_bin(xattr, conf->xattr_name, disk_layout, 4 * 4); + if (ret == -1) { + gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DIR_SELFHEAL_XATTR_FAILED, + "Directory self heal xattr failed:" + "%s: (subvol %s) Failed to set xattr dictionary," + " gfid = %s", + loc->path, subvol->name, gfid); + goto err; + } + disk_layout = NULL; + + gf_msg_trace(this->name, 0, + "setting hash range %u - %u (type %d) on subvolume %s" + " for %s", + layout->list[i].start, layout->list[i].stop, layout->type, + subvol->name, loc->path); + + if (local->xattr) { + data = dict_get(local->xattr, QUOTA_LIMIT_KEY); + if (data) { + ret = dict_add(xattr, QUOTA_LIMIT_KEY, data); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED, + "%s: Failed to set dictionary value:" + " key = %s", + loc->path, QUOTA_LIMIT_KEY); + } + } + data = dict_get(local->xattr, QUOTA_LIMIT_OBJECTS_KEY); + if (data) { + ret = dict_add(xattr, QUOTA_LIMIT_OBJECTS_KEY, data); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED, + "%s: Failed to set dictionary value:" + " key = %s", + loc->path, QUOTA_LIMIT_OBJECTS_KEY); + } + } + } + + if (!gf_uuid_is_null(local->gfid)) + gf_uuid_copy(loc->gfid, local->gfid); + + STACK_WIND_COOKIE(frame, dht_selfheal_dir_xattr_cbk, (void *)subvol, subvol, + subvol->fops->setxattr, loc, xattr, 0, xdata); + + dict_unref(xattr); + dict_unref(xdata); + + return 0; err: - if (xattr) - dict_unref (xattr); - if (xdata) - dict_unref (xdata); + if (xattr) + dict_unref(xattr); + if (xdata) + dict_unref(xdata); - GF_FREE (disk_layout); + GF_FREE(disk_layout); - dht_selfheal_dir_xattr_cbk (frame, (void *) subvol, frame->this, - -1, ENOMEM, NULL); - return 0; + dht_selfheal_dir_xattr_cbk(frame, (void *)subvol, frame->this, -1, ENOMEM, + NULL); + return 0; } int -dht_fix_dir_xattr (call_frame_t *frame, loc_t *loc, dht_layout_t *layout) +dht_fix_dir_xattr(call_frame_t *frame, loc_t *loc, dht_layout_t *layout) { - dht_local_t *local = NULL; - int i = 0; - int count = 0; - xlator_t *this = NULL; - dht_conf_t *conf = NULL; - dht_layout_t *dummy = NULL; - - local = frame->local; - this = frame->this; - conf = this->private; - - gf_msg_debug (this->name, 0, - "%s: Writing the new range for all subvolumes", - loc->path); - - local->call_cnt = count = conf->subvolume_cnt; - - if (gf_log_get_loglevel () >= GF_LOG_DEBUG) - dht_log_new_layout_for_dir_selfheal (this, loc, layout); - - for (i = 0; i < layout->cnt; i++) { - dht_selfheal_dir_xattr_persubvol (frame, loc, layout, i, NULL); - - if (--count == 0) - goto out; - } - /* if we are here, subvolcount > layout_count. subvols-per-directory - * option might be set here. We need to clear out layout from the - * non-participating subvolumes, else it will result in overlaps */ - dummy = dht_layout_new (this, 1); - if (!dummy) - goto out; - dummy->commit_hash = layout->commit_hash; - for (i = 0; i < conf->subvolume_cnt; i++) { - if (_gf_false == - dht_is_subvol_in_layout (layout, conf->subvolumes[i])) { - dht_selfheal_dir_xattr_persubvol (frame, loc, dummy, 0, - conf->subvolumes[i]); - if (--count == 0) - break; - } - } - - dht_layout_unref (this, dummy); + dht_local_t *local = NULL; + int i = 0; + int count = 0; + xlator_t *this = NULL; + dht_conf_t *conf = NULL; + dht_layout_t *dummy = NULL; + + local = frame->local; + this = frame->this; + conf = this->private; + + gf_msg_debug(this->name, 0, "%s: Writing the new range for all subvolumes", + loc->path); + + local->call_cnt = count = conf->subvolume_cnt; + + if (gf_log_get_loglevel() >= GF_LOG_DEBUG) + dht_log_new_layout_for_dir_selfheal(this, loc, layout); + + for (i = 0; i < layout->cnt; i++) { + dht_selfheal_dir_xattr_persubvol(frame, loc, layout, i, NULL); + + if (--count == 0) + goto out; + } + /* if we are here, subvolcount > layout_count. subvols-per-directory + * option might be set here. We need to clear out layout from the + * non-participating subvolumes, else it will result in overlaps */ + dummy = dht_layout_new(this, 1); + if (!dummy) + goto out; + dummy->commit_hash = layout->commit_hash; + for (i = 0; i < conf->subvolume_cnt; i++) { + if (_gf_false == dht_is_subvol_in_layout(layout, conf->subvolumes[i])) { + dht_selfheal_dir_xattr_persubvol(frame, loc, dummy, 0, + conf->subvolumes[i]); + if (--count == 0) + break; + } + } + + dht_layout_unref(this, dummy); out: - return 0; + return 0; } int -dht_selfheal_dir_xattr (call_frame_t *frame, loc_t *loc, dht_layout_t *layout) +dht_selfheal_dir_xattr(call_frame_t *frame, loc_t *loc, dht_layout_t *layout) { - dht_local_t *local = NULL; - int missing_xattr = 0; - int i = 0; - xlator_t *this = NULL; - dht_conf_t *conf = NULL; - dht_layout_t *dummy = NULL; - char gfid[GF_UUID_BUF_SIZE] = {0,}; - - local = frame->local; - this = frame->this; - conf = this->private; - - for (i = 0; i < layout->cnt; i++) { - if (layout->list[i].err != -1 || !layout->list[i].stop) { - /* err != -1 would mean xattr present on the directory - * or the directory is non existent. - * !layout->list[i].stop would mean layout absent - */ - - continue; - } - missing_xattr++; - } - /* Also account for subvolumes with no-layout. Used for zero'ing out - * the layouts and for setting quota key's if present */ - for (i = 0; i < conf->subvolume_cnt; i++) { - if (_gf_false == - dht_is_subvol_in_layout (layout, conf->subvolumes[i])) { - missing_xattr++; - } - } - gf_msg_trace (this->name, 0, - "%d subvolumes missing xattr for %s", - missing_xattr, loc->path); - - if (missing_xattr == 0) { - dht_selfheal_dir_finish (frame, this, 0, 1); - return 0; - } - - local->call_cnt = missing_xattr; - - if (gf_log_get_loglevel () >= GF_LOG_DEBUG) - dht_log_new_layout_for_dir_selfheal (this, loc, layout); - - for (i = 0; i < layout->cnt; i++) { - if (layout->list[i].err != -1 || !layout->list[i].stop) - continue; - - dht_selfheal_dir_xattr_persubvol (frame, loc, layout, i, NULL); - - if (--missing_xattr == 0) - break; - } - dummy = dht_layout_new (this, 1); - if (!dummy) { - gf_uuid_unparse (loc->gfid, gfid); - gf_msg (this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY, - "failed to allocate dummy layout, path:%s gfid:%s", - loc->path, gfid); - goto out; - } - for (i = 0; i < conf->subvolume_cnt && missing_xattr; i++) { - if (_gf_false == - dht_is_subvol_in_layout (layout, conf->subvolumes[i])) { - dht_selfheal_dir_xattr_persubvol (frame, loc, dummy, 0, - conf->subvolumes[i]); - missing_xattr--; - } - } - - dht_layout_unref (this, dummy); -out: + dht_local_t *local = NULL; + int missing_xattr = 0; + int i = 0; + xlator_t *this = NULL; + dht_conf_t *conf = NULL; + dht_layout_t *dummy = NULL; + char gfid[GF_UUID_BUF_SIZE] = { + 0, + }; + + local = frame->local; + this = frame->this; + conf = this->private; + + for (i = 0; i < layout->cnt; i++) { + if (layout->list[i].err != -1 || !layout->list[i].stop) { + /* err != -1 would mean xattr present on the directory + * or the directory is non existent. + * !layout->list[i].stop would mean layout absent + */ + + continue; + } + missing_xattr++; + } + /* Also account for subvolumes with no-layout. Used for zero'ing out + * the layouts and for setting quota key's if present */ + for (i = 0; i < conf->subvolume_cnt; i++) { + if (_gf_false == dht_is_subvol_in_layout(layout, conf->subvolumes[i])) { + missing_xattr++; + } + } + gf_msg_trace(this->name, 0, "%d subvolumes missing xattr for %s", + missing_xattr, loc->path); + + if (missing_xattr == 0) { + dht_selfheal_dir_finish(frame, this, 0, 1); return 0; + } + + local->call_cnt = missing_xattr; + + if (gf_log_get_loglevel() >= GF_LOG_DEBUG) + dht_log_new_layout_for_dir_selfheal(this, loc, layout); + + for (i = 0; i < layout->cnt; i++) { + if (layout->list[i].err != -1 || !layout->list[i].stop) + continue; + + dht_selfheal_dir_xattr_persubvol(frame, loc, layout, i, NULL); + + if (--missing_xattr == 0) + break; + } + dummy = dht_layout_new(this, 1); + if (!dummy) { + gf_uuid_unparse(loc->gfid, gfid); + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY, + "failed to allocate dummy layout, path:%s gfid:%s", loc->path, + gfid); + goto out; + } + for (i = 0; i < conf->subvolume_cnt && missing_xattr; i++) { + if (_gf_false == dht_is_subvol_in_layout(layout, conf->subvolumes[i])) { + dht_selfheal_dir_xattr_persubvol(frame, loc, dummy, 0, + conf->subvolumes[i]); + missing_xattr--; + } + } + + dht_layout_unref(this, dummy); +out: + return 0; } gf_boolean_t -dht_is_subvol_part_of_layout (dht_layout_t *layout, xlator_t *xlator) +dht_is_subvol_part_of_layout(dht_layout_t *layout, xlator_t *xlator) { - int i = 0; - gf_boolean_t ret = _gf_false; - - for (i = 0; i < layout->cnt; i++) { - if (!strcmp (layout->list[i].xlator->name, xlator->name)) { - ret = _gf_true; - break; + int i = 0; + gf_boolean_t ret = _gf_false; - } + for (i = 0; i < layout->cnt; i++) { + if (!strcmp(layout->list[i].xlator->name, xlator->name)) { + ret = _gf_true; + break; } + } - return ret; + return ret; } int -dht_layout_index_from_conf (dht_layout_t *layout, xlator_t *xlator) +dht_layout_index_from_conf(dht_layout_t *layout, xlator_t *xlator) { - int i = -1; - int j = 0; + int i = -1; + int j = 0; - for (j = 0; j < layout->cnt; j++) { - if (!strcmp (layout->list[j].xlator->name, xlator->name)) { - i = j; - break; - } + for (j = 0; j < layout->cnt; j++) { + if (!strcmp(layout->list[j].xlator->name, xlator->name)) { + i = j; + break; } + } - return i; + return i; } int -dht_selfheal_dir_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, struct iatt *statpre, - struct iatt *statpost, dict_t *xdata) +dht_selfheal_dir_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, struct iatt *statpre, + struct iatt *statpost, dict_t *xdata) { - dht_local_t *local = NULL; - dht_layout_t *layout = NULL; - int this_call_cnt = 0, ret = -1; + dht_local_t *local = NULL; + dht_layout_t *layout = NULL; + int this_call_cnt = 0, ret = -1; - local = frame->local; - layout = local->selfheal.layout; + local = frame->local; + layout = local->selfheal.layout; - this_call_cnt = dht_frame_return (frame); + this_call_cnt = dht_frame_return(frame); - if (is_last_call (this_call_cnt)) { - if (!local->heal_layout) { - gf_msg_trace (this->name, 0, - "Skip heal layout for %s gfid = %s ", - local->loc.path, uuid_utoa(local->gfid)); + if (is_last_call(this_call_cnt)) { + if (!local->heal_layout) { + gf_msg_trace(this->name, 0, "Skip heal layout for %s gfid = %s ", + local->loc.path, uuid_utoa(local->gfid)); - dht_selfheal_dir_finish (frame, this, 0, 1); - return 0; - } - ret = dht_selfheal_layout_lock (frame, layout, _gf_false, - dht_selfheal_dir_xattr, - dht_should_heal_layout); + dht_selfheal_dir_finish(frame, this, 0, 1); + return 0; + } + ret = dht_selfheal_layout_lock(frame, layout, _gf_false, + dht_selfheal_dir_xattr, + dht_should_heal_layout); - if (ret < 0) { - dht_selfheal_dir_finish (frame, this, -1, 1); - } + if (ret < 0) { + dht_selfheal_dir_finish(frame, this, -1, 1); } + } - return 0; + return 0; } - int -dht_selfheal_dir_setattr (call_frame_t *frame, loc_t *loc, struct iatt *stbuf, - int32_t valid, dht_layout_t *layout) +dht_selfheal_dir_setattr(call_frame_t *frame, loc_t *loc, struct iatt *stbuf, + int32_t valid, dht_layout_t *layout) { - int missing_attr = 0; - int i = 0, ret = -1; - dht_local_t *local = NULL; - xlator_t *this = NULL; - int cnt = 0; - - local = frame->local; - this = frame->this; + int missing_attr = 0; + int i = 0, ret = -1; + dht_local_t *local = NULL; + xlator_t *this = NULL; + int cnt = 0; + + local = frame->local; + this = frame->this; + + for (i = 0; i < layout->cnt; i++) { + if (layout->list[i].err == -1) + missing_attr++; + } + + if (missing_attr == 0) { + if (!local->heal_layout) { + gf_msg_trace(this->name, 0, "Skip heal layout for %s gfid = %s ", + loc->path, uuid_utoa(loc->gfid)); + dht_selfheal_dir_finish(frame, this, 0, 1); + return 0; + } + ret = dht_selfheal_layout_lock(frame, layout, _gf_false, + dht_selfheal_dir_xattr, + dht_should_heal_layout); - for (i = 0; i < layout->cnt; i++) { - if (layout->list[i].err == -1) - missing_attr++; + if (ret < 0) { + dht_selfheal_dir_finish(frame, this, -1, 1); } - if (missing_attr == 0) { - if (!local->heal_layout) { - gf_msg_trace (this->name, 0, - "Skip heal layout for %s gfid = %s ", - loc->path, uuid_utoa(loc->gfid)); - dht_selfheal_dir_finish (frame, this, 0, 1); - return 0; - } - ret = dht_selfheal_layout_lock (frame, layout, _gf_false, - dht_selfheal_dir_xattr, - dht_should_heal_layout); - - if (ret < 0) { - dht_selfheal_dir_finish (frame, this, -1, 1); - } - - return 0; - } + return 0; + } - local->call_cnt = missing_attr; - cnt = layout->cnt; + local->call_cnt = missing_attr; + cnt = layout->cnt; - for (i = 0; i < cnt; i++) { - if (layout->list[i].err == -1) { - gf_msg_trace (this->name, 0, - "%s: setattr on subvol %s, gfid = %s", - loc->path, layout->list[i].xlator->name, - uuid_utoa(loc->gfid)); + for (i = 0; i < cnt; i++) { + if (layout->list[i].err == -1) { + gf_msg_trace(this->name, 0, "%s: setattr on subvol %s, gfid = %s", + loc->path, layout->list[i].xlator->name, + uuid_utoa(loc->gfid)); - STACK_WIND (frame, dht_selfheal_dir_setattr_cbk, - layout->list[i].xlator, - layout->list[i].xlator->fops->setattr, - loc, stbuf, valid, NULL); - } + STACK_WIND( + frame, dht_selfheal_dir_setattr_cbk, layout->list[i].xlator, + layout->list[i].xlator->fops->setattr, loc, stbuf, valid, NULL); } + } - return 0; + return 0; } int -dht_selfheal_dir_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, - inode_t *inode, struct iatt *stbuf, - struct iatt *preparent, struct iatt *postparent, - dict_t *xdata) +dht_selfheal_dir_mkdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, inode_t *inode, + struct iatt *stbuf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) { - dht_local_t *local = NULL; - dht_layout_t *layout = NULL; - xlator_t *prev = NULL; - xlator_t *subvol = NULL; - int i = 0, ret = -1; - int this_call_cnt = 0; - char gfid[GF_UUID_BUF_SIZE] = {0}; - - local = frame->local; - layout = local->selfheal.layout; - prev = cookie; - subvol = prev; - - if ((op_ret == 0) || ((op_ret == -1) && (op_errno == EEXIST))) { - for (i = 0; i < layout->cnt; i++) { - if (layout->list[i].xlator == subvol) { - layout->list[i].err = -1; - break; - } - } + dht_local_t *local = NULL; + dht_layout_t *layout = NULL; + xlator_t *prev = NULL; + xlator_t *subvol = NULL; + int i = 0, ret = -1; + int this_call_cnt = 0; + char gfid[GF_UUID_BUF_SIZE] = {0}; + + local = frame->local; + layout = local->selfheal.layout; + prev = cookie; + subvol = prev; + + if ((op_ret == 0) || ((op_ret == -1) && (op_errno == EEXIST))) { + for (i = 0; i < layout->cnt; i++) { + if (layout->list[i].xlator == subvol) { + layout->list[i].err = -1; + break; + } } + } - if (op_ret) { - gf_uuid_unparse(local->loc.gfid, gfid); - gf_msg (this->name, ((op_errno == EEXIST) ? GF_LOG_DEBUG : - GF_LOG_WARNING), - op_errno, DHT_MSG_DIR_SELFHEAL_FAILED, - "Directory selfheal failed: path = %s, gfid = %s", - local->loc.path, gfid ); - goto out; - } - dht_iatt_merge (this, &local->preparent, preparent); - dht_iatt_merge (this, &local->postparent, postparent); - ret = 0; + if (op_ret) { + gf_uuid_unparse(local->loc.gfid, gfid); + gf_msg(this->name, + ((op_errno == EEXIST) ? GF_LOG_DEBUG : GF_LOG_WARNING), op_errno, + DHT_MSG_DIR_SELFHEAL_FAILED, + "Directory selfheal failed: path = %s, gfid = %s", + local->loc.path, gfid); + goto out; + } + dht_iatt_merge(this, &local->preparent, preparent); + dht_iatt_merge(this, &local->postparent, postparent); + ret = 0; out: - this_call_cnt = dht_frame_return (frame); + this_call_cnt = dht_frame_return(frame); - if (is_last_call (this_call_cnt)) { - dht_selfheal_dir_finish (frame, this, ret, 0); - dht_selfheal_dir_setattr (frame, &local->loc, &local->stbuf, 0xffffff, layout); - } + if (is_last_call(this_call_cnt)) { + dht_selfheal_dir_finish(frame, this, ret, 0); + dht_selfheal_dir_setattr(frame, &local->loc, &local->stbuf, 0xffffff, + layout); + } - return 0; + return 0; } void -dht_selfheal_dir_mkdir_setacl (dict_t *xattr, dict_t *dict) +dht_selfheal_dir_mkdir_setacl(dict_t *xattr, dict_t *dict) { - data_t *acl_default = NULL; - data_t *acl_access = NULL; - xlator_t *this = NULL; - int ret = -1; - - GF_ASSERT (xattr); - GF_ASSERT (dict); - - this = THIS; - GF_ASSERT (this); - - acl_default = dict_get (xattr, POSIX_ACL_DEFAULT_XATTR); - - if (!acl_default) { - gf_msg_debug (this->name, 0, - "ACL_DEFAULT xattr not present"); - goto cont; - } - ret = dict_set (dict, POSIX_ACL_DEFAULT_XATTR, acl_default); - if (ret) - gf_msg (this->name, GF_LOG_WARNING, 0, - DHT_MSG_DICT_SET_FAILED, - "Failed to set dictionary value.key = %s", - POSIX_ACL_DEFAULT_XATTR); + data_t *acl_default = NULL; + data_t *acl_access = NULL; + xlator_t *this = NULL; + int ret = -1; + + GF_ASSERT(xattr); + GF_ASSERT(dict); + + this = THIS; + GF_ASSERT(this); + + acl_default = dict_get(xattr, POSIX_ACL_DEFAULT_XATTR); + + if (!acl_default) { + gf_msg_debug(this->name, 0, "ACL_DEFAULT xattr not present"); + goto cont; + } + ret = dict_set(dict, POSIX_ACL_DEFAULT_XATTR, acl_default); + if (ret) + gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED, + "Failed to set dictionary value.key = %s", + POSIX_ACL_DEFAULT_XATTR); cont: - acl_access = dict_get (xattr, POSIX_ACL_ACCESS_XATTR); - if (!acl_access) { - gf_msg_debug (this->name, 0, - "ACL_ACCESS xattr not present"); - goto out; - } - ret = dict_set (dict, POSIX_ACL_ACCESS_XATTR, acl_access); - if (ret) - gf_msg (this->name, GF_LOG_WARNING, 0, - DHT_MSG_DICT_SET_FAILED, - "Failed to set dictionary value.key = %s", - POSIX_ACL_ACCESS_XATTR); + acl_access = dict_get(xattr, POSIX_ACL_ACCESS_XATTR); + if (!acl_access) { + gf_msg_debug(this->name, 0, "ACL_ACCESS xattr not present"); + goto out; + } + ret = dict_set(dict, POSIX_ACL_ACCESS_XATTR, acl_access); + if (ret) + gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED, + "Failed to set dictionary value.key = %s", + POSIX_ACL_ACCESS_XATTR); out: - return; + return; } - void -dht_selfheal_dir_mkdir_setquota (dict_t *src, dict_t *dst) +dht_selfheal_dir_mkdir_setquota(dict_t *src, dict_t *dst) { - data_t *quota_limit_key = NULL; - data_t *quota_limit_obj_key = NULL; - xlator_t *this = NULL; - int ret = -1; - - GF_ASSERT (src); - GF_ASSERT (dst); - - this = THIS; - GF_ASSERT (this); - - quota_limit_key = dict_get (src, QUOTA_LIMIT_KEY); - if (!quota_limit_key) { - gf_msg_debug (this->name, 0, - "QUOTA_LIMIT_KEY xattr not present"); - goto cont; - } - ret = dict_set(dst, QUOTA_LIMIT_KEY, quota_limit_key); - if (ret) - gf_msg (this->name, GF_LOG_WARNING, 0, - DHT_MSG_DICT_SET_FAILED, - "Failed to set dictionary value.key = %s", - QUOTA_LIMIT_KEY); + data_t *quota_limit_key = NULL; + data_t *quota_limit_obj_key = NULL; + xlator_t *this = NULL; + int ret = -1; + + GF_ASSERT(src); + GF_ASSERT(dst); + + this = THIS; + GF_ASSERT(this); + + quota_limit_key = dict_get(src, QUOTA_LIMIT_KEY); + if (!quota_limit_key) { + gf_msg_debug(this->name, 0, "QUOTA_LIMIT_KEY xattr not present"); + goto cont; + } + ret = dict_set(dst, QUOTA_LIMIT_KEY, quota_limit_key); + if (ret) + gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED, + "Failed to set dictionary value.key = %s", QUOTA_LIMIT_KEY); cont: - quota_limit_obj_key = dict_get (src, QUOTA_LIMIT_OBJECTS_KEY); - if (!quota_limit_obj_key) { - gf_msg_debug (this->name, 0, - "QUOTA_LIMIT_OBJECTS_KEY xattr not present"); - goto out; - } - ret = dict_set (dst, QUOTA_LIMIT_OBJECTS_KEY, quota_limit_obj_key); - if (ret) - gf_msg (this->name, GF_LOG_WARNING, 0, - DHT_MSG_DICT_SET_FAILED, - "Failed to set dictionary value.key = %s", - QUOTA_LIMIT_OBJECTS_KEY); + quota_limit_obj_key = dict_get(src, QUOTA_LIMIT_OBJECTS_KEY); + if (!quota_limit_obj_key) { + gf_msg_debug(this->name, 0, + "QUOTA_LIMIT_OBJECTS_KEY xattr not present"); + goto out; + } + ret = dict_set(dst, QUOTA_LIMIT_OBJECTS_KEY, quota_limit_obj_key); + if (ret) + gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED, + "Failed to set dictionary value.key = %s", + QUOTA_LIMIT_OBJECTS_KEY); out: - return; + return; } - - - - int -dht_selfheal_dir_mkdir_lookup_done (call_frame_t *frame, xlator_t *this) +dht_selfheal_dir_mkdir_lookup_done(call_frame_t *frame, xlator_t *this) { - dht_local_t *local = NULL; - int i = 0; - dict_t *dict = NULL; - dht_layout_t *layout = NULL; - loc_t *loc = NULL; - int cnt = 0; - int ret = -1; - - VALIDATE_OR_GOTO (this->private, err); - - local = frame->local; - layout = local->layout; - loc = &local->loc; - - if (!gf_uuid_is_null (local->gfid)) { - dict = dict_new (); - if (!dict) - return -1; - - ret = dict_set_gfuuid (dict, "gfid-req", local->gfid, true); - if (ret) - gf_msg (this->name, GF_LOG_WARNING, 0, - DHT_MSG_DICT_SET_FAILED, - "%s: Failed to set dictionary value:" - " key = gfid-req", loc->path); - } else if (local->params) { - /* Send the dictionary from higher layers directly */ - - dict = dict_ref (local->params); - } - /* Code to update all extended attributed from local->xattr - to dict - */ - dht_dir_set_heal_xattr (this, local, dict, local->xattr, NULL, - NULL); + dht_local_t *local = NULL; + int i = 0; + dict_t *dict = NULL; + dht_layout_t *layout = NULL; + loc_t *loc = NULL; + int cnt = 0; + int ret = -1; + + VALIDATE_OR_GOTO(this->private, err); + + local = frame->local; + layout = local->layout; + loc = &local->loc; + + if (!gf_uuid_is_null(local->gfid)) { + dict = dict_new(); + if (!dict) + return -1; + + ret = dict_set_gfuuid(dict, "gfid-req", local->gfid, true); + if (ret) + gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED, + "%s: Failed to set dictionary value:" + " key = gfid-req", + loc->path); + } else if (local->params) { + /* Send the dictionary from higher layers directly */ + + dict = dict_ref(local->params); + } + /* Code to update all extended attributed from local->xattr + to dict + */ + dht_dir_set_heal_xattr(this, local, dict, local->xattr, NULL, NULL); + + if (!dict) { + gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED, + "dict is NULL, need to make sure gfids are same"); + dict = dict_new(); + if (!dict) + return -1; + } + ret = dict_set_flag(dict, GF_INTERNAL_CTX_KEY, GF_DHT_HEAL_DIR); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED, + "Failed to set dictionary value for" + " key = %s at path: %s", + GF_INTERNAL_CTX_KEY, loc->path); + /* We can still continue. As heal can still happen + * unless quota limits have reached for the dir. + */ + } - if (!dict) { - gf_msg (this->name, GF_LOG_WARNING, 0, - DHT_MSG_DICT_SET_FAILED, - "dict is NULL, need to make sure gfids are same"); - dict = dict_new (); - if (!dict) - return -1; - } - ret = dict_set_flag (dict, GF_INTERNAL_CTX_KEY, GF_DHT_HEAL_DIR); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_DICT_SET_FAILED, - "Failed to set dictionary value for" - " key = %s at path: %s", - GF_INTERNAL_CTX_KEY, loc->path); - /* We can still continue. As heal can still happen - * unless quota limits have reached for the dir. - */ - } + cnt = layout->cnt; + for (i = 0; i < cnt; i++) { + if (layout->list[i].err == ESTALE || layout->list[i].err == ENOENT || + local->selfheal.force_mkdir) { + gf_msg_debug(this->name, 0, "Creating directory %s on subvol %s", + loc->path, layout->list[i].xlator->name); - cnt = layout->cnt; - for (i = 0; i < cnt; i++) { - if (layout->list[i].err == ESTALE || - layout->list[i].err == ENOENT || - local->selfheal.force_mkdir) { - gf_msg_debug (this->name, 0, - "Creating directory %s on subvol %s", - loc->path, layout->list[i].xlator->name); - - STACK_WIND_COOKIE (frame, dht_selfheal_dir_mkdir_cbk, - layout->list[i].xlator, - layout->list[i].xlator, - layout->list[i].xlator->fops->mkdir, - loc, - st_mode_from_ia (local->stbuf.ia_prot, - local->stbuf.ia_type), - 0, dict); - } + STACK_WIND_COOKIE( + frame, dht_selfheal_dir_mkdir_cbk, layout->list[i].xlator, + layout->list[i].xlator, layout->list[i].xlator->fops->mkdir, + loc, + st_mode_from_ia(local->stbuf.ia_prot, local->stbuf.ia_type), 0, + dict); } + } - if (dict) - dict_unref (dict); + if (dict) + dict_unref(dict); - return 0; + return 0; err: - dht_selfheal_dir_finish (frame, this, -1, 1); - return 0; + dht_selfheal_dir_finish(frame, this, -1, 1); + return 0; } int -dht_selfheal_dir_mkdir_lookup_cbk (call_frame_t *frame, void *cookie, - xlator_t *this, int op_ret, int op_errno, - inode_t *inode, struct iatt *stbuf, - dict_t *xattr, struct iatt *postparent) +dht_selfheal_dir_mkdir_lookup_cbk(call_frame_t *frame, void *cookie, + xlator_t *this, int op_ret, int op_errno, + inode_t *inode, struct iatt *stbuf, + dict_t *xattr, struct iatt *postparent) { - dht_local_t *local = NULL; - int i = 0; - int this_call_cnt = 0; - int missing_dirs = 0; - dht_layout_t *layout = NULL; - dht_conf_t *conf = 0; - loc_t *loc = NULL; - int check_mds = 0; - int errst = 0; - int32_t mds_xattr_val[1] = {0}; - char gfid_local[GF_UUID_BUF_SIZE] = {0}; - - VALIDATE_OR_GOTO (this->private, err); - - local = frame->local; - layout = local->layout; - loc = &local->loc; - conf = this->private; - - if (local->gfid) - gf_uuid_unparse(local->gfid, gfid_local); - - this_call_cnt = dht_frame_return (frame); - - LOCK (&frame->lock); - { - if ((op_ret < 0) && - (op_errno == ENOENT || op_errno == ESTALE)) { - local->selfheal.hole_cnt = !local->selfheal.hole_cnt ? 1 - : local->selfheal.hole_cnt + 1; - } - - if (!op_ret) { - dht_iatt_merge (this, &local->stbuf, stbuf); - } - check_mds = dht_dict_get_array (xattr, conf->mds_xattr_key, - mds_xattr_val, 1, &errst); - if (dict_get (xattr, conf->mds_xattr_key) && check_mds && !errst) { - dict_unref (local->xattr); - local->xattr = dict_ref (xattr); - } + dht_local_t *local = NULL; + int i = 0; + int this_call_cnt = 0; + int missing_dirs = 0; + dht_layout_t *layout = NULL; + dht_conf_t *conf = 0; + loc_t *loc = NULL; + int check_mds = 0; + int errst = 0; + int32_t mds_xattr_val[1] = {0}; + char gfid_local[GF_UUID_BUF_SIZE] = {0}; + + VALIDATE_OR_GOTO(this->private, err); + + local = frame->local; + layout = local->layout; + loc = &local->loc; + conf = this->private; + + if (local->gfid) + gf_uuid_unparse(local->gfid, gfid_local); + + this_call_cnt = dht_frame_return(frame); + + LOCK(&frame->lock); + { + if ((op_ret < 0) && (op_errno == ENOENT || op_errno == ESTALE)) { + local->selfheal.hole_cnt = !local->selfheal.hole_cnt + ? 1 + : local->selfheal.hole_cnt + 1; + } + + if (!op_ret) { + dht_iatt_merge(this, &local->stbuf, stbuf); + } + check_mds = dht_dict_get_array(xattr, conf->mds_xattr_key, + mds_xattr_val, 1, &errst); + if (dict_get(xattr, conf->mds_xattr_key) && check_mds && !errst) { + dict_unref(local->xattr); + local->xattr = dict_ref(xattr); + } + } + UNLOCK(&frame->lock); + + if (is_last_call(this_call_cnt)) { + if (local->selfheal.hole_cnt == layout->cnt) { + gf_msg_debug(this->name, op_errno, + "Lookup failed, an rmdir could have " + "deleted this entry %s", + loc->name); + local->op_errno = op_errno; + goto err; + } else { + for (i = 0; i < layout->cnt; i++) { + if (layout->list[i].err == ENOENT || + layout->list[i].err == ESTALE || + local->selfheal.force_mkdir) + missing_dirs++; + } + + if (missing_dirs == 0) { + dht_selfheal_dir_finish(frame, this, 0, 0); + dht_selfheal_dir_setattr(frame, loc, &local->stbuf, 0xffffffff, + layout); + return 0; + } + local->call_cnt = missing_dirs; + dht_selfheal_dir_mkdir_lookup_done(frame, this); } - UNLOCK (&frame->lock); - - if (is_last_call (this_call_cnt)) { - if (local->selfheal.hole_cnt == layout->cnt) { - gf_msg_debug (this->name, op_errno, - "Lookup failed, an rmdir could have " - "deleted this entry %s", loc->name); - local->op_errno = op_errno; - goto err; - } else { - for (i = 0; i < layout->cnt; i++) { - if (layout->list[i].err == ENOENT || - layout->list[i].err == ESTALE || - local->selfheal.force_mkdir) - missing_dirs++; - } - - if (missing_dirs == 0) { - dht_selfheal_dir_finish (frame, this, 0, 0); - dht_selfheal_dir_setattr (frame, loc, - &local->stbuf, - 0xffffffff, layout); - return 0; - } - - local->call_cnt = missing_dirs; - dht_selfheal_dir_mkdir_lookup_done (frame, this); - } - } + } - return 0; + return 0; err: - dht_selfheal_dir_finish (frame, this, -1, 1); - return 0; + dht_selfheal_dir_finish(frame, this, -1, 1); + return 0; } - int -dht_selfheal_dir_mkdir_lock_cbk (call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, - int32_t op_errno, dict_t *xdata) +dht_selfheal_dir_mkdir_lock_cbk(call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, + int32_t op_errno, dict_t *xdata) { - dht_local_t *local = NULL; - dht_conf_t *conf = NULL; - int i = 0; - int ret = -1; - xlator_t *mds_subvol = NULL; - - VALIDATE_OR_GOTO (this->private, err); - - conf = this->private; - local = frame->local; - mds_subvol = local->mds_subvol; - - local->call_cnt = conf->subvolume_cnt; - - if (op_ret < 0) { - - /* We get this error when the directory entry was not created - * on a newky attached tier subvol. Hence proceed and do mkdir - * on the tier subvol. - */ - if (op_errno == EINVAL) { - local->call_cnt = 1; - dht_selfheal_dir_mkdir_lookup_done (frame, this); - return 0; - } + dht_local_t *local = NULL; + dht_conf_t *conf = NULL; + int i = 0; + int ret = -1; + xlator_t *mds_subvol = NULL; - gf_msg (this->name, GF_LOG_WARNING, op_errno, - DHT_MSG_ENTRYLK_ERROR, - "acquiring entrylk after inodelk failed for %s", - local->loc.path); + VALIDATE_OR_GOTO(this->private, err); - local->op_errno = op_errno; - goto err; - } + conf = this->private; + local = frame->local; + mds_subvol = local->mds_subvol; - /* After getting locks, perform lookup again to ensure that the - directory was not deleted by a racing rmdir - */ - if (!local->xattr_req) - local->xattr_req = dict_new (); - - ret = dict_set_int32 (local->xattr_req, "list-xattr", 1); - if (ret) - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_DICT_SET_FAILED, - "Failed to set dictionary key list-xattr value " - " for path %s ", local->loc.path); + local->call_cnt = conf->subvolume_cnt; - for (i = 0; i < conf->subvolume_cnt; i++) { - if (mds_subvol && conf->subvolumes[i] == mds_subvol) { - STACK_WIND_COOKIE (frame, - dht_selfheal_dir_mkdir_lookup_cbk, - conf->subvolumes[i], - conf->subvolumes[i], - conf->subvolumes[i]->fops->lookup, - &local->loc, local->xattr_req); - } else { - STACK_WIND_COOKIE (frame, - dht_selfheal_dir_mkdir_lookup_cbk, - conf->subvolumes[i], - conf->subvolumes[i], - conf->subvolumes[i]->fops->lookup, - &local->loc, NULL); - } + if (op_ret < 0) { + /* We get this error when the directory entry was not created + * on a newky attached tier subvol. Hence proceed and do mkdir + * on the tier subvol. + */ + if (op_errno == EINVAL) { + local->call_cnt = 1; + dht_selfheal_dir_mkdir_lookup_done(frame, this); + return 0; + } + + gf_msg(this->name, GF_LOG_WARNING, op_errno, DHT_MSG_ENTRYLK_ERROR, + "acquiring entrylk after inodelk failed for %s", + local->loc.path); + + local->op_errno = op_errno; + goto err; + } + + /* After getting locks, perform lookup again to ensure that the + directory was not deleted by a racing rmdir + */ + if (!local->xattr_req) + local->xattr_req = dict_new(); + + ret = dict_set_int32(local->xattr_req, "list-xattr", 1); + if (ret) + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED, + "Failed to set dictionary key list-xattr value " + " for path %s ", + local->loc.path); + + for (i = 0; i < conf->subvolume_cnt; i++) { + if (mds_subvol && conf->subvolumes[i] == mds_subvol) { + STACK_WIND_COOKIE(frame, dht_selfheal_dir_mkdir_lookup_cbk, + conf->subvolumes[i], conf->subvolumes[i], + conf->subvolumes[i]->fops->lookup, &local->loc, + local->xattr_req); + } else { + STACK_WIND_COOKIE(frame, dht_selfheal_dir_mkdir_lookup_cbk, + conf->subvolumes[i], conf->subvolumes[i], + conf->subvolumes[i]->fops->lookup, &local->loc, + NULL); } + } - return 0; + return 0; err: - dht_selfheal_dir_finish (frame, this, -1, 1); - return 0; + dht_selfheal_dir_finish(frame, this, -1, 1); + return 0; } int -dht_selfheal_dir_mkdir (call_frame_t *frame, loc_t *loc, - dht_layout_t *layout, int force) +dht_selfheal_dir_mkdir(call_frame_t *frame, loc_t *loc, dht_layout_t *layout, + int force) { - int missing_dirs = 0; - int i = 0; - int ret = -1; - dht_local_t *local = NULL; - xlator_t *this = NULL; - - local = frame->local; - this = frame->this; - - local->selfheal.force_mkdir = force; - local->selfheal.hole_cnt = 0; - - for (i = 0; i < layout->cnt; i++) { - if (layout->list[i].err == ENOENT || force) - missing_dirs++; - } - - if (missing_dirs == 0) { - if (!__is_root_gfid (local->stbuf.ia_gfid)) { - if (local->need_xattr_heal) { - local->need_xattr_heal = 0; - ret = dht_dir_xattr_heal (this, local); - if (ret) - gf_msg (this->name, GF_LOG_ERROR, - ret, - DHT_MSG_DIR_XATTR_HEAL_FAILED, - "xattr heal failed for " - "directory %s gfid %s ", - local->loc.path, - local->gfid); - } else { - if (!gf_uuid_is_null (local->gfid)) - gf_uuid_copy (loc->gfid, local->gfid); - - ret = dht_common_mark_mdsxattr (frame, NULL, 0); - if (!ret) - return 0; - - gf_msg (this->name, GF_LOG_INFO, 0, - DHT_MSG_DIR_XATTR_HEAL_FAILED, - "Failed to set mds xattr " - "for directory %s gfid %s ", - local->loc.path, local->gfid); - } - } - dht_selfheal_dir_setattr (frame, loc, &local->stbuf, - 0xffffffff, layout); - return 0; - } - - if (local->hashed_subvol == NULL) - local->hashed_subvol = dht_subvol_get_hashed (this, loc); - - if (local->hashed_subvol == NULL) { - local->op_errno = EINVAL; - gf_msg (this->name, GF_LOG_WARNING, local->op_errno, - DHT_MSG_HASHED_SUBVOL_GET_FAILED, - "(%s/%s) (path: %s): " - "hashed subvolume not found", loc->pargfid, loc->name, - loc->path); - goto err; - } - - local->current = &local->lock[0]; - ret = dht_protect_namespace (frame, loc, local->hashed_subvol, - &local->current->ns, - dht_selfheal_dir_mkdir_lock_cbk); - - if (ret < 0) - goto err; - + int missing_dirs = 0; + int i = 0; + int ret = -1; + dht_local_t *local = NULL; + xlator_t *this = NULL; + + local = frame->local; + this = frame->this; + + local->selfheal.force_mkdir = force; + local->selfheal.hole_cnt = 0; + + for (i = 0; i < layout->cnt; i++) { + if (layout->list[i].err == ENOENT || force) + missing_dirs++; + } + + if (missing_dirs == 0) { + if (!__is_root_gfid(local->stbuf.ia_gfid)) { + if (local->need_xattr_heal) { + local->need_xattr_heal = 0; + ret = dht_dir_xattr_heal(this, local); + if (ret) + gf_msg(this->name, GF_LOG_ERROR, ret, + DHT_MSG_DIR_XATTR_HEAL_FAILED, + "xattr heal failed for " + "directory %s gfid %s ", + local->loc.path, local->gfid); + } else { + if (!gf_uuid_is_null(local->gfid)) + gf_uuid_copy(loc->gfid, local->gfid); + + ret = dht_common_mark_mdsxattr(frame, NULL, 0); + if (!ret) + return 0; + + gf_msg(this->name, GF_LOG_INFO, 0, + DHT_MSG_DIR_XATTR_HEAL_FAILED, + "Failed to set mds xattr " + "for directory %s gfid %s ", + local->loc.path, local->gfid); + } + } + dht_selfheal_dir_setattr(frame, loc, &local->stbuf, 0xffffffff, layout); return 0; + } + + if (local->hashed_subvol == NULL) + local->hashed_subvol = dht_subvol_get_hashed(this, loc); + + if (local->hashed_subvol == NULL) { + local->op_errno = EINVAL; + gf_msg(this->name, GF_LOG_WARNING, local->op_errno, + DHT_MSG_HASHED_SUBVOL_GET_FAILED, + "(%s/%s) (path: %s): " + "hashed subvolume not found", + loc->pargfid, loc->name, loc->path); + goto err; + } + + local->current = &local->lock[0]; + ret = dht_protect_namespace(frame, loc, local->hashed_subvol, + &local->current->ns, + dht_selfheal_dir_mkdir_lock_cbk); + + if (ret < 0) + goto err; + + return 0; err: - return -1; + return -1; } int -dht_selfheal_layout_alloc_start (xlator_t *this, loc_t *loc, - dht_layout_t *layout) +dht_selfheal_layout_alloc_start(xlator_t *this, loc_t *loc, + dht_layout_t *layout) { - int start = 0; - uint32_t hashval = 0; - int ret = 0; - const char *str = NULL; - dht_conf_t *conf = NULL; - char buf[UUID_CANONICAL_FORM_LEN + 1] = {0, }; - - conf = this->private; - - if (conf->randomize_by_gfid) { - str = uuid_utoa_r (loc->gfid, buf); - } else { - str = loc->path; - } - - ret = dht_hash_compute (this, layout->type, str, &hashval); - if (ret == 0) { - start = (hashval % layout->cnt); - } - - return start; + int start = 0; + uint32_t hashval = 0; + int ret = 0; + const char *str = NULL; + dht_conf_t *conf = NULL; + char buf[UUID_CANONICAL_FORM_LEN + 1] = { + 0, + }; + + conf = this->private; + + if (conf->randomize_by_gfid) { + str = uuid_utoa_r(loc->gfid, buf); + } else { + str = loc->path; + } + + ret = dht_hash_compute(this, layout->type, str, &hashval); + if (ret == 0) { + start = (hashval % layout->cnt); + } + + return start; } static int -dht_get_layout_count (xlator_t *this, dht_layout_t *layout, int new_layout) +dht_get_layout_count(xlator_t *this, dht_layout_t *layout, int new_layout) { - int i = 0; - int j = 0; - int err = 0; - int count = 0; - dht_conf_t *conf = NULL; - - /* Gets in use only for replace-brick, remove-brick */ - conf = this->private; + int i = 0; + int j = 0; + int err = 0; + int count = 0; + dht_conf_t *conf = NULL; + + /* Gets in use only for replace-brick, remove-brick */ + conf = this->private; + for (i = 0; i < layout->cnt; i++) { + for (j = 0; j < conf->subvolume_cnt; j++) { + if (conf->decommissioned_bricks[j] && + conf->decommissioned_bricks[j] == layout->list[i].xlator) { + layout->list[i].err = EINVAL; + break; + } + } + } + + for (i = 0; i < layout->cnt; i++) { + err = layout->list[i].err; + if (err == -1 || err == 0 || err == ENOENT) { + /* Take this with a pinch of salt. The behaviour seems + * to be slightly different when this function is + * invoked from mkdir codepath. For eg., err == 0 in + * mkdir codepath means directory created but xattr + * is not set yet. + */ + + /* Setting list[i].err = -1 is an indication for + dht_selfheal_layout_new_directory() to assign + a range. We set it to -1 based on any one of + the three criteria: + + - err == -1 already, which means directory + existed but layout was not set on it. + + - err == 0, which means directory exists and + has an old layout piece which will be + overwritten now. + + - err == ENOENT, which means directory does + not exist (possibly racing with mkdir or + finishing half done mkdir). The missing + directory will be attempted to be recreated. + */ + count++; + if (!err) + layout->list[i].err = -1; + } + } + + /* no subvolume has enough space, but can't stop directory creation */ + if (!count || !new_layout) { for (i = 0; i < layout->cnt; i++) { - for (j = 0; j < conf->subvolume_cnt; j++) { - if (conf->decommissioned_bricks[j] && - conf->decommissioned_bricks[j] == layout->list[i].xlator) { - layout->list[i].err = EINVAL; - break; - } - } - } - - for (i = 0; i < layout->cnt; i++) { - err = layout->list[i].err; - if (err == -1 || err == 0 || err == ENOENT) { - /* Take this with a pinch of salt. The behaviour seems - * to be slightly different when this function is - * invoked from mkdir codepath. For eg., err == 0 in - * mkdir codepath means directory created but xattr - * is not set yet. - */ - - /* Setting list[i].err = -1 is an indication for - dht_selfheal_layout_new_directory() to assign - a range. We set it to -1 based on any one of - the three criteria: - - - err == -1 already, which means directory - existed but layout was not set on it. - - - err == 0, which means directory exists and - has an old layout piece which will be - overwritten now. - - - err == ENOENT, which means directory does - not exist (possibly racing with mkdir or - finishing half done mkdir). The missing - directory will be attempted to be recreated. - */ - count++; - if (!err) - layout->list[i].err = -1; - } - } - - /* no subvolume has enough space, but can't stop directory creation */ - if (!count || !new_layout) { - for (i = 0; i < layout->cnt; i++) { - err = layout->list[i].err; - if (err == ENOSPC) { - layout->list[i].err = -1; - count++; - } - } - } - - /* if layout->spread_cnt is set, check if it is <= available - * subvolumes (down brick and decommissioned bricks are considered - * un-availbale). Else return count (available up bricks) */ - count = ((layout->spread_cnt && - (layout->spread_cnt <= count)) ? - layout->spread_cnt : ((count) ? count : 1)); - - return count; + err = layout->list[i].err; + if (err == ENOSPC) { + layout->list[i].err = -1; + count++; + } + } + } + + /* if layout->spread_cnt is set, check if it is <= available + * subvolumes (down brick and decommissioned bricks are considered + * un-availbale). Else return count (available up bricks) */ + count = ((layout->spread_cnt && (layout->spread_cnt <= count)) + ? layout->spread_cnt + : ((count) ? count : 1)); + + return count; } +void +dht_selfheal_layout_new_directory(call_frame_t *frame, loc_t *loc, + dht_layout_t *new_layout); -void dht_selfheal_layout_new_directory (call_frame_t *frame, loc_t *loc, - dht_layout_t *new_layout); - -void dht_layout_entry_swap (dht_layout_t *layout, int i, int j); -void dht_layout_range_swap (dht_layout_t *layout, int i, int j); +void +dht_layout_entry_swap(dht_layout_t *layout, int i, int j); +void +dht_layout_range_swap(dht_layout_t *layout, int i, int j); /* * It's a bit icky using local variables in a macro, but it makes the rest * of the code a lot clearer. */ -#define OV_ENTRY(x,y) table[x*new->cnt+y] +#define OV_ENTRY(x, y) table[x * new->cnt + y] void -dht_selfheal_layout_maximize_overlap (call_frame_t *frame, loc_t *loc, - dht_layout_t *new, dht_layout_t *old) +dht_selfheal_layout_maximize_overlap(call_frame_t *frame, loc_t *loc, + dht_layout_t *new, dht_layout_t *old) { - int i = 0; - int j = 0; - uint32_t curr_overlap = 0; - uint32_t max_overlap = 0; - int max_overlap_idx = -1; - uint32_t overlap = 0; - uint32_t *table = NULL; - - dht_layout_sort_volname (old); - /* Now both old_layout->list[] and new_layout->list[] - are match the same xlators/subvolumes. i.e, - old_layout->[i] and new_layout->[i] are referring - to the same subvolumes - */ - - /* Build a table of overlaps between new[i] and old[j]. */ - table = alloca(sizeof(overlap)*old->cnt*new->cnt); - if (!table) { - return; - } - memset(table,0,sizeof(overlap)*old->cnt*new->cnt); - for (i = 0; i < new->cnt; ++i) { - for (j = 0; j < old->cnt; ++j) { - OV_ENTRY(i,j) = dht_overlap_calc(old,j,new,i); - } - } - - for (i = 0; i < new->cnt; i++) { - if (new->list[i].err > 0) { - /* Subvol might be marked for decommission - with EINVAL, or some other serious error - marked with positive errno. - */ - continue; - } - - max_overlap = 0; - max_overlap_idx = i; - for (j = (i + 1); j < new->cnt; ++j) { - if (new->list[j].err > 0) { - /* Subvol might be marked for decommission - with EINVAL, or some other serious error - marked with positive errno. - */ - continue; - } - /* Calculate the overlap now. */ - curr_overlap = OV_ENTRY(i,i) + OV_ENTRY(j,j); - /* Calculate the overlap after the proposed swap. */ - overlap = OV_ENTRY(i,j) + OV_ENTRY(j,i); - /* Are we better than status quo? */ - if (overlap > curr_overlap) { - overlap -= curr_overlap; - /* Are we better than the previous choice? */ - if (overlap > max_overlap) { - max_overlap = overlap; - max_overlap_idx = j; - } - } - } - - if (max_overlap_idx != i) { - dht_layout_range_swap (new, i, max_overlap_idx); - /* Need to swap the table values too. */ - for (j = 0; j < old->cnt; ++j) { - overlap = OV_ENTRY(i,j); - OV_ENTRY(i,j) = OV_ENTRY(max_overlap_idx,j); - OV_ENTRY(max_overlap_idx,j) = overlap; - } - } - } + int i = 0; + int j = 0; + uint32_t curr_overlap = 0; + uint32_t max_overlap = 0; + int max_overlap_idx = -1; + uint32_t overlap = 0; + uint32_t *table = NULL; + + dht_layout_sort_volname(old); + /* Now both old_layout->list[] and new_layout->list[] + are match the same xlators/subvolumes. i.e, + old_layout->[i] and new_layout->[i] are referring + to the same subvolumes + */ + + /* Build a table of overlaps between new[i] and old[j]. */ + table = alloca(sizeof(overlap) * old->cnt * new->cnt); + if (!table) { + return; + } + memset(table, 0, sizeof(overlap) * old->cnt * new->cnt); + for (i = 0; i < new->cnt; ++i) { + for (j = 0; j < old->cnt; ++j) { + OV_ENTRY(i, j) = dht_overlap_calc(old, j, new, i); + } + } + + for (i = 0; i < new->cnt; i++) { + if (new->list[i].err > 0) { + /* Subvol might be marked for decommission + with EINVAL, or some other serious error + marked with positive errno. + */ + continue; + } + + max_overlap = 0; + max_overlap_idx = i; + for (j = (i + 1); j < new->cnt; ++j) { + if (new->list[j].err > 0) { + /* Subvol might be marked for decommission + with EINVAL, or some other serious error + marked with positive errno. + */ + continue; + } + /* Calculate the overlap now. */ + curr_overlap = OV_ENTRY(i, i) + OV_ENTRY(j, j); + /* Calculate the overlap after the proposed swap. */ + overlap = OV_ENTRY(i, j) + OV_ENTRY(j, i); + /* Are we better than status quo? */ + if (overlap > curr_overlap) { + overlap -= curr_overlap; + /* Are we better than the previous choice? */ + if (overlap > max_overlap) { + max_overlap = overlap; + max_overlap_idx = j; + } + } + } + + if (max_overlap_idx != i) { + dht_layout_range_swap(new, i, max_overlap_idx); + /* Need to swap the table values too. */ + for (j = 0; j < old->cnt; ++j) { + overlap = OV_ENTRY(i, j); + OV_ENTRY(i, j) = OV_ENTRY(max_overlap_idx, j); + OV_ENTRY(max_overlap_idx, j) = overlap; + } + } + } } - dht_layout_t * -dht_fix_layout_of_directory (call_frame_t *frame, loc_t *loc, - dht_layout_t *layout) +dht_fix_layout_of_directory(call_frame_t *frame, loc_t *loc, + dht_layout_t *layout) { - int i = 0; - xlator_t *this = NULL; - dht_layout_t *new_layout = NULL; - dht_conf_t *priv = NULL; - dht_local_t *local = NULL; - uint32_t subvol_down = 0; - int ret = 0; - gf_boolean_t maximize_overlap = _gf_true; - char gfid[GF_UUID_BUF_SIZE] = {0}; - - this = frame->this; - priv = this->private; - local = frame->local; - - if (layout->type == DHT_HASH_TYPE_DM_USER) { - gf_msg_debug (THIS->name, 0, "leaving %s alone", - loc->path); - goto done; - } - - new_layout = dht_layout_new (this, priv->subvolume_cnt); - if (!new_layout) { - gf_uuid_unparse (loc->gfid, gfid); - gf_msg (this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY, - "mem allocation failed for new_layout, path:%s gfid:%s", - loc->path, gfid); - goto done; - } - - /* If a subvolume is down, do not re-write the layout. */ - ret = dht_layout_anomalies (this, loc, layout, NULL, NULL, NULL, - &subvol_down, NULL, NULL); - - if (subvol_down || (ret == -1)) { - gf_uuid_unparse (loc->gfid, gfid); - gf_msg (this->name, GF_LOG_WARNING, 0, - DHT_MSG_LAYOUT_FIX_FAILED, - "Layout fix failed: %u subvolume(s) are down" - ". Skipping fix layout. path:%s gfid:%s", subvol_down, - loc->path, gfid); - GF_FREE (new_layout); - return NULL; - } - - for (i = 0; i < new_layout->cnt; i++) { - if (layout->list[i].err != ENOSPC) - new_layout->list[i].err = layout->list[i].err; - else - new_layout->list[i].err = -1; - - new_layout->list[i].xlator = layout->list[i].xlator; - } - - new_layout->commit_hash = layout->commit_hash; - - if (priv->du_stats) { - for (i = 0; i < priv->subvolume_cnt; ++i) { - gf_msg (this->name, GF_LOG_DEBUG, 0, - DHT_MSG_SUBVOL_INFO, - "subvolume %d (%s): %u chunks, path:%s", i, - priv->subvolumes[i]->name, - priv->du_stats[i].chunks, loc->path); - - /* Maximize overlap if the bricks are all the same - * size. - * This is probably not going to be very common on - * live setups but will benefit our regression tests - */ - if (i && (priv->du_stats[i].chunks - != priv->du_stats[0].chunks)) { - maximize_overlap = _gf_false; - } - } - } else { - gf_msg (this->name, GF_LOG_WARNING, 0, - DHT_MSG_NO_DISK_USAGE_STATUS, "no du stats ?!?"); - } - - /* First give it a layout as though it is a new directory. This - ensures rotation to kick in */ - dht_layout_sort_volname (new_layout); - dht_selfheal_layout_new_directory (frame, loc, new_layout); - - - /* Maximize overlap if weighted-rebalance is disabled */ - if (!priv->do_weighting) - maximize_overlap = _gf_true; - - /* Now selectively re-assign ranges only when it helps */ - if (maximize_overlap) { - dht_selfheal_layout_maximize_overlap (frame, loc, new_layout, - layout); - } + int i = 0; + xlator_t *this = NULL; + dht_layout_t *new_layout = NULL; + dht_conf_t *priv = NULL; + dht_local_t *local = NULL; + uint32_t subvol_down = 0; + int ret = 0; + gf_boolean_t maximize_overlap = _gf_true; + char gfid[GF_UUID_BUF_SIZE] = {0}; + + this = frame->this; + priv = this->private; + local = frame->local; + + if (layout->type == DHT_HASH_TYPE_DM_USER) { + gf_msg_debug(THIS->name, 0, "leaving %s alone", loc->path); + goto done; + } + + new_layout = dht_layout_new(this, priv->subvolume_cnt); + if (!new_layout) { + gf_uuid_unparse(loc->gfid, gfid); + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY, + "mem allocation failed for new_layout, path:%s gfid:%s", + loc->path, gfid); + goto done; + } + + /* If a subvolume is down, do not re-write the layout. */ + ret = dht_layout_anomalies(this, loc, layout, NULL, NULL, NULL, + &subvol_down, NULL, NULL); + + if (subvol_down || (ret == -1)) { + gf_uuid_unparse(loc->gfid, gfid); + gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_LAYOUT_FIX_FAILED, + "Layout fix failed: %u subvolume(s) are down" + ". Skipping fix layout. path:%s gfid:%s", + subvol_down, loc->path, gfid); + GF_FREE(new_layout); + return NULL; + } + + for (i = 0; i < new_layout->cnt; i++) { + if (layout->list[i].err != ENOSPC) + new_layout->list[i].err = layout->list[i].err; + else + new_layout->list[i].err = -1; + + new_layout->list[i].xlator = layout->list[i].xlator; + } + + new_layout->commit_hash = layout->commit_hash; + + if (priv->du_stats) { + for (i = 0; i < priv->subvolume_cnt; ++i) { + gf_msg(this->name, GF_LOG_DEBUG, 0, DHT_MSG_SUBVOL_INFO, + "subvolume %d (%s): %u chunks, path:%s", i, + priv->subvolumes[i]->name, priv->du_stats[i].chunks, + loc->path); + + /* Maximize overlap if the bricks are all the same + * size. + * This is probably not going to be very common on + * live setups but will benefit our regression tests + */ + if (i && (priv->du_stats[i].chunks != priv->du_stats[0].chunks)) { + maximize_overlap = _gf_false; + } + } + } else { + gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_NO_DISK_USAGE_STATUS, + "no du stats ?!?"); + } + + /* First give it a layout as though it is a new directory. This + ensures rotation to kick in */ + dht_layout_sort_volname(new_layout); + dht_selfheal_layout_new_directory(frame, loc, new_layout); + + /* Maximize overlap if weighted-rebalance is disabled */ + if (!priv->do_weighting) + maximize_overlap = _gf_true; + + /* Now selectively re-assign ranges only when it helps */ + if (maximize_overlap) { + dht_selfheal_layout_maximize_overlap(frame, loc, new_layout, layout); + } done: - if (new_layout) { - - /* Make sure the extra 'ref' for existing layout is removed */ - dht_layout_unref (this, local->layout); + if (new_layout) { + /* Make sure the extra 'ref' for existing layout is removed */ + dht_layout_unref(this, local->layout); - local->layout = new_layout; - } + local->layout = new_layout; + } - return local->layout; + return local->layout; } - /* * Having to call this 2x for each entry in the layout is pretty horrible, but * that's what all of this layout-sorting nonsense gets us. */ uint32_t -dht_get_chunks_from_xl (xlator_t *parent, xlator_t *child) +dht_get_chunks_from_xl(xlator_t *parent, xlator_t *child) { - dht_conf_t *priv = parent->private; - xlator_list_t *trav; - uint32_t index = 0; + dht_conf_t *priv = parent->private; + xlator_list_t *trav; + uint32_t index = 0; - if (!priv->du_stats) { - return 0; - } + if (!priv->du_stats) { + return 0; + } - for (trav = parent->children; trav; trav = trav->next) { - if (trav->xlator == child) { - return priv->du_stats[index].chunks; - } - ++index; + for (trav = parent->children; trav; trav = trav->next) { + if (trav->xlator == child) { + return priv->du_stats[index].chunks; } + ++index; + } - return 0; + return 0; } - void -dht_selfheal_layout_new_directory (call_frame_t *frame, loc_t *loc, - dht_layout_t *layout) +dht_selfheal_layout_new_directory(call_frame_t *frame, loc_t *loc, + dht_layout_t *layout) { - xlator_t *this = NULL; - double chunk = 0; - int i = 0; - uint32_t start = 0; - int bricks_to_use = 0; - int err = 0; - int start_subvol = 0; - uint32_t curr_size; - uint32_t range_size; - uint64_t total_size = 0; - int real_i; - dht_conf_t *priv; - gf_boolean_t weight_by_size; - int bricks_used = 0; - - this = frame->this; - priv = this->private; - weight_by_size = priv->do_weighting; - - bricks_to_use = dht_get_layout_count (this, layout, 1); - GF_ASSERT (bricks_to_use > 0); - - bricks_used = 0; - for (i = 0; i < layout->cnt; ++i) { - err = layout->list[i].err; - if ((err != -1) && (err != ENOENT)) { - continue; - } - curr_size = dht_get_chunks_from_xl (this, - layout->list[i].xlator); - if (!curr_size) { - weight_by_size = _gf_false; - break; - } - total_size += curr_size; - if (++bricks_used >= bricks_to_use) { - break; - } - } - - if (weight_by_size && total_size) { - /* We know total_size is not zero. */ - chunk = ((double) 0xffffffff) / ((double) total_size); - gf_msg_debug (this->name, 0, - "chunk size = 0xffffffff / %lu = %f", - total_size, chunk); - } - else { - weight_by_size = _gf_false; - chunk = ((unsigned long) 0xffffffff) / bricks_to_use; + xlator_t *this = NULL; + double chunk = 0; + int i = 0; + uint32_t start = 0; + int bricks_to_use = 0; + int err = 0; + int start_subvol = 0; + uint32_t curr_size; + uint32_t range_size; + uint64_t total_size = 0; + int real_i; + dht_conf_t *priv; + gf_boolean_t weight_by_size; + int bricks_used = 0; + + this = frame->this; + priv = this->private; + weight_by_size = priv->do_weighting; + + bricks_to_use = dht_get_layout_count(this, layout, 1); + GF_ASSERT(bricks_to_use > 0); + + bricks_used = 0; + for (i = 0; i < layout->cnt; ++i) { + err = layout->list[i].err; + if ((err != -1) && (err != ENOENT)) { + continue; + } + curr_size = dht_get_chunks_from_xl(this, layout->list[i].xlator); + if (!curr_size) { + weight_by_size = _gf_false; + break; + } + total_size += curr_size; + if (++bricks_used >= bricks_to_use) { + break; + } + } + + if (weight_by_size && total_size) { + /* We know total_size is not zero. */ + chunk = ((double)0xffffffff) / ((double)total_size); + gf_msg_debug(this->name, 0, "chunk size = 0xffffffff / %lu = %f", + total_size, chunk); + } else { + weight_by_size = _gf_false; + chunk = ((unsigned long)0xffffffff) / bricks_to_use; + } + + start_subvol = dht_selfheal_layout_alloc_start(this, loc, layout); + + /* clear out the range, as we are re-computing here */ + DHT_RESET_LAYOUT_RANGE(layout); + + /* + * OK, what's this "real_i" stuff about? This used to be two loops - + * from start_subvol to layout->cnt-1, then from 0 to start_subvol-1. + * That way is practically an open invitation to bugs when only one + * of the loops is updated. Using real_i and modulo operators to make + * it one loop avoids this problem. Remember, folks: it's everyone's + * responsibility to help stamp out copy/paste abuse. + */ + bricks_used = 0; + for (real_i = 0; real_i < layout->cnt; real_i++) { + i = (real_i + start_subvol) % layout->cnt; + err = layout->list[i].err; + if ((err != -1) && (err != ENOENT)) { + continue; + } + if (weight_by_size) { + curr_size = dht_get_chunks_from_xl(this, layout->list[i].xlator); + if (!curr_size) { + continue; + } + } else { + curr_size = 1; } - - start_subvol = dht_selfheal_layout_alloc_start (this, loc, layout); - - /* clear out the range, as we are re-computing here */ - DHT_RESET_LAYOUT_RANGE (layout); - - /* - * OK, what's this "real_i" stuff about? This used to be two loops - - * from start_subvol to layout->cnt-1, then from 0 to start_subvol-1. - * That way is practically an open invitation to bugs when only one - * of the loops is updated. Using real_i and modulo operators to make - * it one loop avoids this problem. Remember, folks: it's everyone's - * responsibility to help stamp out copy/paste abuse. - */ - bricks_used = 0; - for (real_i = 0; real_i < layout->cnt; real_i++) { - i = (real_i + start_subvol) % layout->cnt; - err = layout->list[i].err; - if ((err != -1) && (err != ENOENT)) { - continue; - } - if (weight_by_size) { - curr_size = dht_get_chunks_from_xl (this, - layout->list[i].xlator); - if (!curr_size) { - continue; - } - } - else { - curr_size = 1; - } - range_size = chunk * curr_size; - gf_msg_debug (this->name, 0, - "assigning range size 0x%x to %s", - range_size, - layout->list[i].xlator->name); - DHT_SET_LAYOUT_RANGE(layout, i, start, range_size, - loc->path); - if (++bricks_used >= bricks_to_use) { - layout->list[i].stop = 0xffffffff; - goto done; - } - start += range_size; + range_size = chunk * curr_size; + gf_msg_debug(this->name, 0, "assigning range size 0x%x to %s", + range_size, layout->list[i].xlator->name); + DHT_SET_LAYOUT_RANGE(layout, i, start, range_size, loc->path); + if (++bricks_used >= bricks_to_use) { + layout->list[i].stop = 0xffffffff; + goto done; } + start += range_size; + } done: - return; + return; } int -dht_selfheal_dir_getafix (call_frame_t *frame, loc_t *loc, - dht_layout_t *layout) +dht_selfheal_dir_getafix(call_frame_t *frame, loc_t *loc, dht_layout_t *layout) { - dht_local_t *local = NULL; - uint32_t holes = 0; - int ret = -1; - int i = -1; - uint32_t overlaps = 0; - - local = frame->local; - - holes = local->selfheal.hole_cnt; - overlaps = local->selfheal.overlaps_cnt; - - if (holes || overlaps) { - /* If the layout has anomalies which would change the hash - * ranges, then we need to reset the commit_hash for this - * directory, as the layout would change and things may not - * be in place as expected */ - layout->commit_hash = DHT_LAYOUT_HASH_INVALID; - dht_selfheal_layout_new_directory (frame, loc, layout); - ret = 0; - } + dht_local_t *local = NULL; + uint32_t holes = 0; + int ret = -1; + int i = -1; + uint32_t overlaps = 0; + + local = frame->local; + + holes = local->selfheal.hole_cnt; + overlaps = local->selfheal.overlaps_cnt; + + if (holes || overlaps) { + /* If the layout has anomalies which would change the hash + * ranges, then we need to reset the commit_hash for this + * directory, as the layout would change and things may not + * be in place as expected */ + layout->commit_hash = DHT_LAYOUT_HASH_INVALID; + dht_selfheal_layout_new_directory(frame, loc, layout); + ret = 0; + } - for (i = 0; i < layout->cnt; i++) { - /* directory not present */ - if (layout->list[i].err == ENOENT) { - ret = 0; - break; - } + for (i = 0; i < layout->cnt; i++) { + /* directory not present */ + if (layout->list[i].err == ENOENT) { + ret = 0; + break; } + } - /* TODO: give a fix to these non-virgins */ + /* TODO: give a fix to these non-virgins */ - return ret; + return ret; } int -dht_selfheal_new_directory (call_frame_t *frame, - dht_selfheal_dir_cbk_t dir_cbk, - dht_layout_t *layout) +dht_selfheal_new_directory(call_frame_t *frame, dht_selfheal_dir_cbk_t dir_cbk, + dht_layout_t *layout) { - dht_local_t *local = NULL; - int ret = 0; - inode_t *linked_inode = NULL, *inode = NULL; - loc_t *loc = NULL; - char pgfid[GF_UUID_BUF_SIZE] = {0}; - char gfid[GF_UUID_BUF_SIZE] = {0}; - int32_t op_errno = EIO; - - local = frame->local; - - loc = &local->loc; - - gf_uuid_unparse(local->stbuf.ia_gfid, gfid); - gf_uuid_unparse(loc->parent->gfid, pgfid); - - linked_inode = inode_link (loc->inode, loc->parent, loc->name, - &local->stbuf); - if (!linked_inode) { - gf_msg (frame->this->name, GF_LOG_WARNING, 0, - DHT_MSG_DIR_SELFHEAL_FAILED, - "linking inode failed (%s/%s) => %s", - pgfid, loc->name, gfid); - ret = -1; - goto out; - } - - inode = loc->inode; - loc->inode = linked_inode; - inode_unref (inode); - - local->selfheal.dir_cbk = dir_cbk; - local->selfheal.layout = dht_layout_ref (frame->this, layout); - - dht_layout_sort_volname (layout); - dht_selfheal_layout_new_directory (frame, &local->loc, layout); - - op_errno = ENOMEM; - ret = dht_selfheal_layout_lock (frame, layout, _gf_true, - dht_selfheal_dir_xattr, - dht_should_heal_layout); + dht_local_t *local = NULL; + int ret = 0; + inode_t *linked_inode = NULL, *inode = NULL; + loc_t *loc = NULL; + char pgfid[GF_UUID_BUF_SIZE] = {0}; + char gfid[GF_UUID_BUF_SIZE] = {0}; + int32_t op_errno = EIO; + + local = frame->local; + + loc = &local->loc; + + gf_uuid_unparse(local->stbuf.ia_gfid, gfid); + gf_uuid_unparse(loc->parent->gfid, pgfid); + + linked_inode = inode_link(loc->inode, loc->parent, loc->name, + &local->stbuf); + if (!linked_inode) { + gf_msg(frame->this->name, GF_LOG_WARNING, 0, + DHT_MSG_DIR_SELFHEAL_FAILED, + "linking inode failed (%s/%s) => %s", pgfid, loc->name, gfid); + ret = -1; + goto out; + } + + inode = loc->inode; + loc->inode = linked_inode; + inode_unref(inode); + + local->selfheal.dir_cbk = dir_cbk; + local->selfheal.layout = dht_layout_ref(frame->this, layout); + + dht_layout_sort_volname(layout); + dht_selfheal_layout_new_directory(frame, &local->loc, layout); + + op_errno = ENOMEM; + ret = dht_selfheal_layout_lock(frame, layout, _gf_true, + dht_selfheal_dir_xattr, + dht_should_heal_layout); out: - if (ret < 0) { - dir_cbk (frame, NULL, frame->this, -1, op_errno, NULL); - } + if (ret < 0) { + dir_cbk(frame, NULL, frame->this, -1, op_errno, NULL); + } - return 0; + return 0; } int -dht_fix_directory_layout (call_frame_t *frame, - dht_selfheal_dir_cbk_t dir_cbk, - dht_layout_t *layout) +dht_fix_directory_layout(call_frame_t *frame, dht_selfheal_dir_cbk_t dir_cbk, + dht_layout_t *layout) { - dht_local_t *local = NULL; - dht_layout_t *tmp_layout = NULL; - int ret = 0; + dht_local_t *local = NULL; + dht_layout_t *tmp_layout = NULL; + int ret = 0; - local = frame->local; + local = frame->local; - local->selfheal.dir_cbk = dir_cbk; - local->selfheal.layout = dht_layout_ref (frame->this, layout); + local->selfheal.dir_cbk = dir_cbk; + local->selfheal.layout = dht_layout_ref(frame->this, layout); - /* No layout sorting required here */ - tmp_layout = dht_fix_layout_of_directory (frame, &local->loc, layout); - if (!tmp_layout) { - return -1; - } + /* No layout sorting required here */ + tmp_layout = dht_fix_layout_of_directory(frame, &local->loc, layout); + if (!tmp_layout) { + return -1; + } - ret = dht_selfheal_layout_lock (frame, tmp_layout, _gf_false, - dht_fix_dir_xattr, - dht_should_fix_layout); + ret = dht_selfheal_layout_lock(frame, tmp_layout, _gf_false, + dht_fix_dir_xattr, dht_should_fix_layout); - return ret; + return ret; } - int -dht_selfheal_directory (call_frame_t *frame, dht_selfheal_dir_cbk_t dir_cbk, - loc_t *loc, dht_layout_t *layout) +dht_selfheal_directory(call_frame_t *frame, dht_selfheal_dir_cbk_t dir_cbk, + loc_t *loc, dht_layout_t *layout) { - dht_local_t *local = NULL; - uint32_t down = 0; - uint32_t misc = 0; - int ret = 0; - xlator_t *this = NULL; - char pgfid[GF_UUID_BUF_SIZE] = {0}; - char gfid[GF_UUID_BUF_SIZE] = {0}; - inode_t *linked_inode = NULL, *inode = NULL; - - local = frame->local; - this = frame->this; - - local->selfheal.dir_cbk = dir_cbk; - local->selfheal.layout = dht_layout_ref (this, layout); - - if (!__is_root_gfid (local->stbuf.ia_gfid)) { - gf_uuid_unparse(local->stbuf.ia_gfid, gfid); - gf_uuid_unparse(loc->parent->gfid, pgfid); - - linked_inode = inode_link (loc->inode, loc->parent, loc->name, - &local->stbuf); - if (!linked_inode) { - gf_msg (this->name, GF_LOG_WARNING, 0, - DHT_MSG_DIR_SELFHEAL_FAILED, - "linking inode failed (%s/%s) => %s", - pgfid, loc->name, gfid); - ret = 0; - goto sorry_no_fix; - } + dht_local_t *local = NULL; + uint32_t down = 0; + uint32_t misc = 0; + int ret = 0; + xlator_t *this = NULL; + char pgfid[GF_UUID_BUF_SIZE] = {0}; + char gfid[GF_UUID_BUF_SIZE] = {0}; + inode_t *linked_inode = NULL, *inode = NULL; + + local = frame->local; + this = frame->this; + + local->selfheal.dir_cbk = dir_cbk; + local->selfheal.layout = dht_layout_ref(this, layout); + + if (!__is_root_gfid(local->stbuf.ia_gfid)) { + gf_uuid_unparse(local->stbuf.ia_gfid, gfid); + gf_uuid_unparse(loc->parent->gfid, pgfid); - inode = loc->inode; - loc->inode = linked_inode; - inode_unref (inode); + linked_inode = inode_link(loc->inode, loc->parent, loc->name, + &local->stbuf); + if (!linked_inode) { + gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DIR_SELFHEAL_FAILED, + "linking inode failed (%s/%s) => %s", pgfid, loc->name, + gfid); + ret = 0; + goto sorry_no_fix; } - dht_layout_anomalies (this, loc, layout, - &local->selfheal.hole_cnt, - &local->selfheal.overlaps_cnt, - &local->selfheal.missing_cnt, - &local->selfheal.down, - &local->selfheal.misc, NULL); - - down = local->selfheal.down; - misc = local->selfheal.misc; - - if (down) { - gf_msg (this->name, GF_LOG_WARNING, 0, - DHT_MSG_DIR_SELFHEAL_FAILED, - "Directory selfheal failed: %d subvolumes down." - "Not fixing. path = %s, gfid = %s", - down, loc->path, gfid); - ret = 0; - goto sorry_no_fix; - } + inode = loc->inode; + loc->inode = linked_inode; + inode_unref(inode); + } + + dht_layout_anomalies(this, loc, layout, &local->selfheal.hole_cnt, + &local->selfheal.overlaps_cnt, + &local->selfheal.missing_cnt, &local->selfheal.down, + &local->selfheal.misc, NULL); + + down = local->selfheal.down; + misc = local->selfheal.misc; + + if (down) { + gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DIR_SELFHEAL_FAILED, + "Directory selfheal failed: %d subvolumes down." + "Not fixing. path = %s, gfid = %s", + down, loc->path, gfid); + ret = 0; + goto sorry_no_fix; + } - if (misc) { - gf_msg (this->name, GF_LOG_WARNING, 0, - DHT_MSG_DIR_SELFHEAL_FAILED, - "Directory selfheal failed : %d subvolumes " - "have unrecoverable errors. path = %s, gfid = %s", - misc, loc->path, gfid); + if (misc) { + gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DIR_SELFHEAL_FAILED, + "Directory selfheal failed : %d subvolumes " + "have unrecoverable errors. path = %s, gfid = %s", + misc, loc->path, gfid); - ret = 0; - goto sorry_no_fix; - } + ret = 0; + goto sorry_no_fix; + } - dht_layout_sort_volname (layout); - local->heal_layout = _gf_true; + dht_layout_sort_volname(layout); + local->heal_layout = _gf_true; - /* Ignore return value as it can be inferred from result of - * dht_layout_anomalies - */ - dht_selfheal_dir_getafix (frame, loc, layout); + /* Ignore return value as it can be inferred from result of + * dht_layout_anomalies + */ + dht_selfheal_dir_getafix(frame, loc, layout); - if (!(local->selfheal.hole_cnt || local->selfheal.overlaps_cnt || - local->selfheal.missing_cnt)) { - local->heal_layout = _gf_false; - } + if (!(local->selfheal.hole_cnt || local->selfheal.overlaps_cnt || + local->selfheal.missing_cnt)) { + local->heal_layout = _gf_false; + } - ret = dht_selfheal_dir_mkdir (frame, loc, layout, 0); - if (ret < 0) { - ret = 0; - goto sorry_no_fix; - } + ret = dht_selfheal_dir_mkdir(frame, loc, layout, 0); + if (ret < 0) { + ret = 0; + goto sorry_no_fix; + } - return 0; + return 0; sorry_no_fix: - /* TODO: need to put appropriate local->op_errno */ - dht_selfheal_dir_finish (frame, this, ret, 1); + /* TODO: need to put appropriate local->op_errno */ + dht_selfheal_dir_finish(frame, this, ret, 1); - return 0; + return 0; } int -dht_selfheal_restore (call_frame_t *frame, dht_selfheal_dir_cbk_t dir_cbk, - loc_t *loc, dht_layout_t *layout) +dht_selfheal_restore(call_frame_t *frame, dht_selfheal_dir_cbk_t dir_cbk, + loc_t *loc, dht_layout_t *layout) { - int ret = 0; - dht_local_t *local = NULL; + int ret = 0; + dht_local_t *local = NULL; - local = frame->local; + local = frame->local; - local->selfheal.dir_cbk = dir_cbk; - local->selfheal.layout = dht_layout_ref (frame->this, layout); + local->selfheal.dir_cbk = dir_cbk; + local->selfheal.layout = dht_layout_ref(frame->this, layout); - ret = dht_selfheal_dir_mkdir (frame, loc, layout, 1); + ret = dht_selfheal_dir_mkdir(frame, loc, layout, 1); - return ret; + return ret; } int -dht_dir_heal_xattrs (void *data) +dht_dir_heal_xattrs(void *data) { - call_frame_t *frame = NULL; - dht_local_t *local = NULL; - xlator_t *subvol = NULL; - xlator_t *mds_subvol = NULL; - xlator_t *this = NULL; - dht_conf_t *conf = NULL; - dict_t *user_xattr = NULL; - dict_t *internal_xattr = NULL; - dict_t *mds_xattr = NULL; - dict_t *xdata = NULL; - int call_cnt = 0; - int ret = -1; - int uret = 0; - int uflag = 0; - int i = 0; - int xattr_hashed = 0; - char gfid[GF_UUID_BUF_SIZE] = {0}; - int32_t allzero[1] = {0}; - - GF_VALIDATE_OR_GOTO ("dht", data, out); - - frame = data; - local = frame->local; - this = frame->this; - GF_VALIDATE_OR_GOTO ("dht", this, out); - GF_VALIDATE_OR_GOTO (this->name, local, out); - mds_subvol = local->mds_subvol; - conf = this->private; - GF_VALIDATE_OR_GOTO (this->name, conf, out); - gf_uuid_unparse(local->loc.gfid, gfid); - - if (!mds_subvol) { - gf_msg (this->name, GF_LOG_WARNING, 0, - DHT_MSG_DIR_XATTR_HEAL_FAILED, - "No mds subvol for %s gfid = %s", - local->loc.path, gfid); - goto out; - } - - if ((local->loc.inode && gf_uuid_is_null (local->loc.inode->gfid)) || - gf_uuid_is_null (local->loc.gfid)) { - gf_msg (this->name, GF_LOG_WARNING, 0, - DHT_MSG_DIR_XATTR_HEAL_FAILED, - "No gfid present so skip heal for path %s gfid = %s", - local->loc.path, gfid); - goto out; - } - - internal_xattr = dict_new (); - if (!internal_xattr) { - gf_msg (this->name, GF_LOG_ERROR, DHT_MSG_NO_MEMORY, 0, - "dictionary creation failed"); - goto out; - } - xdata = dict_new (); - if (!xdata) { - gf_msg (this->name, GF_LOG_ERROR, DHT_MSG_NO_MEMORY, 0, - "dictionary creation failed"); - goto out; - } - - call_cnt = conf->subvolume_cnt; - - user_xattr = dict_new (); - if (!user_xattr) { - gf_msg (this->name, GF_LOG_ERROR, DHT_MSG_NO_MEMORY, 0, - "dictionary creation failed"); - goto out; - } - - ret = syncop_listxattr (local->mds_subvol, &local->loc, - &mds_xattr, NULL, NULL); - if (ret < 0) { - gf_msg (this->name, GF_LOG_ERROR, -ret, - DHT_MSG_DIR_XATTR_HEAL_FAILED, - "failed to list xattrs for " - "%s: on %s ", - local->loc.path, local->mds_subvol->name); - } - - if (!mds_xattr) - goto out; - - dht_dir_set_heal_xattr (this, local, user_xattr, mds_xattr, - &uret, &uflag); - - /* To set quota related xattr need to set GLUSTERFS_INTERNAL_FOP_KEY - * key value to 1 - */ - if (dict_get (user_xattr, QUOTA_LIMIT_KEY) || - dict_get (user_xattr, QUOTA_LIMIT_OBJECTS_KEY)) { - ret = dict_set_int32 (xdata, GLUSTERFS_INTERNAL_FOP_KEY, 1); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_DICT_SET_FAILED, - "Failed to set dictionary value: key = %s," - " path = %s", GLUSTERFS_INTERNAL_FOP_KEY, - local->loc.path); - goto out; - } - } - if (uret <= 0 && !uflag) - goto out; - - for (i = 0; i < call_cnt; i++) { - subvol = conf->subvolumes[i]; - if (subvol == mds_subvol) - continue; - if (uret || uflag) { - ret = syncop_setxattr (subvol, &local->loc, user_xattr, - 0, xdata, NULL); - if (ret) { - xattr_hashed = 1; - gf_msg (this->name, GF_LOG_ERROR, -ret, - DHT_MSG_DIR_XATTR_HEAL_FAILED, - "Directory xattr heal failed. Failed to set" - "user xattr on path %s on " - "subvol %s, gfid = %s ", - local->loc.path, subvol->name, gfid); - } - } - } - /* After heal all custom xattr reset internal MDS xattr to 0 */ - if (!xattr_hashed) { - ret = dht_dict_set_array (internal_xattr, - conf->mds_xattr_key, - allzero, 1); - if (ret) { - gf_msg (this->name, GF_LOG_WARNING, ENOMEM, - DHT_MSG_DICT_SET_FAILED, - "Failed to set dictionary value:key = %s for " - "path %s", conf->mds_xattr_key, - local->loc.path); - goto out; - } - ret = syncop_setxattr (mds_subvol, &local->loc, internal_xattr, - 0, NULL, NULL); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, -ret, - DHT_MSG_DIR_XATTR_HEAL_FAILED, - "Failed to reset internal xattr " - "on path %s on subvol %s" - "gfid = %s ", local->loc.path, - mds_subvol->name, gfid); - } + call_frame_t *frame = NULL; + dht_local_t *local = NULL; + xlator_t *subvol = NULL; + xlator_t *mds_subvol = NULL; + xlator_t *this = NULL; + dht_conf_t *conf = NULL; + dict_t *user_xattr = NULL; + dict_t *internal_xattr = NULL; + dict_t *mds_xattr = NULL; + dict_t *xdata = NULL; + int call_cnt = 0; + int ret = -1; + int uret = 0; + int uflag = 0; + int i = 0; + int xattr_hashed = 0; + char gfid[GF_UUID_BUF_SIZE] = {0}; + int32_t allzero[1] = {0}; + + GF_VALIDATE_OR_GOTO("dht", data, out); + + frame = data; + local = frame->local; + this = frame->this; + GF_VALIDATE_OR_GOTO("dht", this, out); + GF_VALIDATE_OR_GOTO(this->name, local, out); + mds_subvol = local->mds_subvol; + conf = this->private; + GF_VALIDATE_OR_GOTO(this->name, conf, out); + gf_uuid_unparse(local->loc.gfid, gfid); + + if (!mds_subvol) { + gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DIR_XATTR_HEAL_FAILED, + "No mds subvol for %s gfid = %s", local->loc.path, gfid); + goto out; + } + + if ((local->loc.inode && gf_uuid_is_null(local->loc.inode->gfid)) || + gf_uuid_is_null(local->loc.gfid)) { + gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DIR_XATTR_HEAL_FAILED, + "No gfid present so skip heal for path %s gfid = %s", + local->loc.path, gfid); + goto out; + } + + internal_xattr = dict_new(); + if (!internal_xattr) { + gf_msg(this->name, GF_LOG_ERROR, DHT_MSG_NO_MEMORY, 0, + "dictionary creation failed"); + goto out; + } + xdata = dict_new(); + if (!xdata) { + gf_msg(this->name, GF_LOG_ERROR, DHT_MSG_NO_MEMORY, 0, + "dictionary creation failed"); + goto out; + } + + call_cnt = conf->subvolume_cnt; + + user_xattr = dict_new(); + if (!user_xattr) { + gf_msg(this->name, GF_LOG_ERROR, DHT_MSG_NO_MEMORY, 0, + "dictionary creation failed"); + goto out; + } + + ret = syncop_listxattr(local->mds_subvol, &local->loc, &mds_xattr, NULL, + NULL); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_DIR_XATTR_HEAL_FAILED, + "failed to list xattrs for " + "%s: on %s ", + local->loc.path, local->mds_subvol->name); + } + + if (!mds_xattr) + goto out; + + dht_dir_set_heal_xattr(this, local, user_xattr, mds_xattr, &uret, &uflag); + + /* To set quota related xattr need to set GLUSTERFS_INTERNAL_FOP_KEY + * key value to 1 + */ + if (dict_get(user_xattr, QUOTA_LIMIT_KEY) || + dict_get(user_xattr, QUOTA_LIMIT_OBJECTS_KEY)) { + ret = dict_set_int32(xdata, GLUSTERFS_INTERNAL_FOP_KEY, 1); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED, + "Failed to set dictionary value: key = %s," + " path = %s", + GLUSTERFS_INTERNAL_FOP_KEY, local->loc.path); + goto out; + } + } + if (uret <= 0 && !uflag) + goto out; + + for (i = 0; i < call_cnt; i++) { + subvol = conf->subvolumes[i]; + if (subvol == mds_subvol) + continue; + if (uret || uflag) { + ret = syncop_setxattr(subvol, &local->loc, user_xattr, 0, xdata, + NULL); + if (ret) { + xattr_hashed = 1; + gf_msg(this->name, GF_LOG_ERROR, -ret, + DHT_MSG_DIR_XATTR_HEAL_FAILED, + "Directory xattr heal failed. Failed to set" + "user xattr on path %s on " + "subvol %s, gfid = %s ", + local->loc.path, subvol->name, gfid); + } + } + } + /* After heal all custom xattr reset internal MDS xattr to 0 */ + if (!xattr_hashed) { + ret = dht_dict_set_array(internal_xattr, conf->mds_xattr_key, allzero, + 1); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, ENOMEM, DHT_MSG_DICT_SET_FAILED, + "Failed to set dictionary value:key = %s for " + "path %s", + conf->mds_xattr_key, local->loc.path); + goto out; + } + ret = syncop_setxattr(mds_subvol, &local->loc, internal_xattr, 0, NULL, + NULL); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, -ret, + DHT_MSG_DIR_XATTR_HEAL_FAILED, + "Failed to reset internal xattr " + "on path %s on subvol %s" + "gfid = %s ", + local->loc.path, mds_subvol->name, gfid); } + } out: - if (user_xattr) - dict_unref (user_xattr); - if (mds_xattr) - dict_unref (mds_xattr); - if (internal_xattr) - dict_unref (internal_xattr); - if (xdata) - dict_unref (xdata); - return 0; + if (user_xattr) + dict_unref(user_xattr); + if (mds_xattr) + dict_unref(mds_xattr); + if (internal_xattr) + dict_unref(internal_xattr); + if (xdata) + dict_unref(xdata); + return 0; } - int -dht_dir_heal_xattrs_done (int ret, call_frame_t *sync_frame, void *data) +dht_dir_heal_xattrs_done(int ret, call_frame_t *sync_frame, void *data) { - DHT_STACK_DESTROY (sync_frame); - return 0; + DHT_STACK_DESTROY(sync_frame); + return 0; } - int -dht_dir_attr_heal (void *data) +dht_dir_attr_heal(void *data) { - call_frame_t *frame = NULL; - dht_local_t *local = NULL; - xlator_t *subvol = NULL; - xlator_t *mds_subvol = NULL; - xlator_t *this = NULL; - dht_conf_t *conf = NULL; - int call_cnt = 0; - int ret = -1; - int i = 0; - char gfid[GF_UUID_BUF_SIZE] = {0}; - - - GF_VALIDATE_OR_GOTO ("dht", data, out); - - frame = data; - local = frame->local; - mds_subvol = local->mds_subvol; - this = frame->this; - GF_VALIDATE_OR_GOTO ("dht", this, out); - GF_VALIDATE_OR_GOTO ("dht", local, out); - conf = this->private; - GF_VALIDATE_OR_GOTO ("dht", conf, out); - - call_cnt = conf->subvolume_cnt; - - if (!__is_root_gfid (local->stbuf.ia_gfid) && (!mds_subvol)) { - gf_msg (this->name, GF_LOG_WARNING, 0, - DHT_MSG_DIR_ATTR_HEAL_FAILED, - "No mds subvol for %s gfid = %s", - local->loc.path, gfid); - goto out; - } - - if (!__is_root_gfid (local->stbuf.ia_gfid)) { - for (i = 0; i < conf->subvolume_cnt; i++) { - if (conf->subvolumes[i] == mds_subvol) { - if (!conf->subvolume_status[i]) { - gf_msg (this->name, GF_LOG_ERROR, - 0, DHT_MSG_HASHED_SUBVOL_DOWN, - "mds subvol is down for path " - " %s gfid is %s Unable to set xattr " , - local->loc.path, gfid); - goto out; - } - } - } + call_frame_t *frame = NULL; + dht_local_t *local = NULL; + xlator_t *subvol = NULL; + xlator_t *mds_subvol = NULL; + xlator_t *this = NULL; + dht_conf_t *conf = NULL; + int call_cnt = 0; + int ret = -1; + int i = 0; + char gfid[GF_UUID_BUF_SIZE] = {0}; + + GF_VALIDATE_OR_GOTO("dht", data, out); + + frame = data; + local = frame->local; + mds_subvol = local->mds_subvol; + this = frame->this; + GF_VALIDATE_OR_GOTO("dht", this, out); + GF_VALIDATE_OR_GOTO("dht", local, out); + conf = this->private; + GF_VALIDATE_OR_GOTO("dht", conf, out); + + call_cnt = conf->subvolume_cnt; + + if (!__is_root_gfid(local->stbuf.ia_gfid) && (!mds_subvol)) { + gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DIR_ATTR_HEAL_FAILED, + "No mds subvol for %s gfid = %s", local->loc.path, gfid); + goto out; + } + + if (!__is_root_gfid(local->stbuf.ia_gfid)) { + for (i = 0; i < conf->subvolume_cnt; i++) { + if (conf->subvolumes[i] == mds_subvol) { + if (!conf->subvolume_status[i]) { + gf_msg(this->name, GF_LOG_ERROR, 0, + DHT_MSG_HASHED_SUBVOL_DOWN, + "mds subvol is down for path " + " %s gfid is %s Unable to set xattr ", + local->loc.path, gfid); + goto out; + } + } + } + } + + for (i = 0; i < call_cnt; i++) { + subvol = conf->subvolumes[i]; + if (!subvol || subvol == mds_subvol) + continue; + if (__is_root_gfid(local->stbuf.ia_gfid)) { + ret = syncop_setattr( + subvol, &local->loc, &local->stbuf, + (GF_SET_ATTR_UID | GF_SET_ATTR_GID | GF_SET_ATTR_MODE), NULL, + NULL, NULL, NULL); + } else { + ret = syncop_setattr( + subvol, &local->loc, &local->mds_stbuf, + (GF_SET_ATTR_UID | GF_SET_ATTR_GID | GF_SET_ATTR_MODE), NULL, + NULL, NULL, NULL); } - for (i = 0; i < call_cnt; i++) { - subvol = conf->subvolumes[i]; - if (!subvol || subvol == mds_subvol) - continue; - if (__is_root_gfid (local->stbuf.ia_gfid)) { - ret = syncop_setattr (subvol, &local->loc, &local->stbuf, - (GF_SET_ATTR_UID | GF_SET_ATTR_GID | GF_SET_ATTR_MODE), - NULL, NULL, NULL, NULL); - } else { - ret = syncop_setattr (subvol, &local->loc, &local->mds_stbuf, - (GF_SET_ATTR_UID | GF_SET_ATTR_GID | GF_SET_ATTR_MODE), - NULL, NULL, NULL, NULL); - } - - if (ret) { - gf_uuid_unparse(local->loc.gfid, gfid); + if (ret) { + gf_uuid_unparse(local->loc.gfid, gfid); - gf_msg (this->name, GF_LOG_ERROR, -ret, - DHT_MSG_DIR_ATTR_HEAL_FAILED, - "Directory attr heal failed. Failed to set" - " uid/gid on path %s on subvol %s, gfid = %s ", - local->loc.path, subvol->name, gfid); - } + gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_DIR_ATTR_HEAL_FAILED, + "Directory attr heal failed. Failed to set" + " uid/gid on path %s on subvol %s, gfid = %s ", + local->loc.path, subvol->name, gfid); } + } out: - return 0; + return 0; } int -dht_dir_attr_heal_done (int ret, call_frame_t *sync_frame, void *data) +dht_dir_attr_heal_done(int ret, call_frame_t *sync_frame, void *data) { - DHT_STACK_DESTROY (sync_frame); - return 0; + DHT_STACK_DESTROY(sync_frame); + return 0; } /* EXIT: dht_update_commit_hash_for_layout */ int -dht_update_commit_hash_for_layout_done (call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, int32_t op_errno, - dict_t *xdata) +dht_update_commit_hash_for_layout_done(call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, + int32_t op_errno, dict_t *xdata) { - dht_local_t *local = NULL; + dht_local_t *local = NULL; - local = frame->local; + local = frame->local; - /* preserve oldest error */ - if (op_ret && !local->op_ret) { - local->op_ret = op_ret; - local->op_errno = op_errno; - } + /* preserve oldest error */ + if (op_ret && !local->op_ret) { + local->op_ret = op_ret; + local->op_errno = op_errno; + } - DHT_STACK_UNWIND (setxattr, frame, local->op_ret, - local->op_errno, NULL); + DHT_STACK_UNWIND(setxattr, frame, local->op_ret, local->op_errno, NULL); - return 0; + return 0; } int -dht_update_commit_hash_for_layout_unlock (call_frame_t *frame, xlator_t *this) +dht_update_commit_hash_for_layout_unlock(call_frame_t *frame, xlator_t *this) { - dht_local_t *local = NULL; - int ret = 0; + dht_local_t *local = NULL; + int ret = 0; - local = frame->local; + local = frame->local; - ret = dht_unlock_inodelk (frame, local->lock[0].layout.my_layout.locks, - local->lock[0].layout.my_layout.lk_count, - dht_update_commit_hash_for_layout_done); - if (ret < 0) { - /* preserve oldest error, just ... */ - if (!local->op_ret) { - local->op_errno = errno; - local->op_ret = -1; - } + ret = dht_unlock_inodelk(frame, local->lock[0].layout.my_layout.locks, + local->lock[0].layout.my_layout.lk_count, + dht_update_commit_hash_for_layout_done); + if (ret < 0) { + /* preserve oldest error, just ... */ + if (!local->op_ret) { + local->op_errno = errno; + local->op_ret = -1; + } - gf_msg (this->name, GF_LOG_WARNING, errno, - DHT_MSG_DIR_SELFHEAL_XATTR_FAILED, - "Winding unlock failed: stale locks left on brick" - " %s", local->loc.path); + gf_msg(this->name, GF_LOG_WARNING, errno, + DHT_MSG_DIR_SELFHEAL_XATTR_FAILED, + "Winding unlock failed: stale locks left on brick" + " %s", + local->loc.path); - dht_update_commit_hash_for_layout_done (frame, NULL, this, - 0, 0, NULL); - } + dht_update_commit_hash_for_layout_done(frame, NULL, this, 0, 0, NULL); + } - return 0; + return 0; } int -dht_update_commit_hash_for_layout_cbk (call_frame_t *frame, void *cookie, - xlator_t *this, int op_ret, - int op_errno, dict_t *xdata) +dht_update_commit_hash_for_layout_cbk(call_frame_t *frame, void *cookie, + xlator_t *this, int op_ret, int op_errno, + dict_t *xdata) { - dht_local_t *local = NULL; - int this_call_cnt = 0; + dht_local_t *local = NULL; + int this_call_cnt = 0; - local = frame->local; + local = frame->local; - LOCK (&frame->lock); - /* store first failure, just because */ - if (op_ret && !local->op_ret) { - local->op_ret = op_ret; - local->op_errno = op_errno; - } - UNLOCK (&frame->lock); + LOCK(&frame->lock); + /* store first failure, just because */ + if (op_ret && !local->op_ret) { + local->op_ret = op_ret; + local->op_errno = op_errno; + } + UNLOCK(&frame->lock); - this_call_cnt = dht_frame_return (frame); + this_call_cnt = dht_frame_return(frame); - if (is_last_call (this_call_cnt)) { - dht_update_commit_hash_for_layout_unlock (frame, this); - } + if (is_last_call(this_call_cnt)) { + dht_update_commit_hash_for_layout_unlock(frame, this); + } - return 0; + return 0; } int -dht_update_commit_hash_for_layout_resume (call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, - int32_t op_errno, dict_t *xdata) +dht_update_commit_hash_for_layout_resume(call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, + int32_t op_errno, dict_t *xdata) { - dht_local_t *local = NULL; - int count = 1, ret = -1, i = 0, j = 0; - dht_conf_t *conf = NULL; - dht_layout_t *layout = NULL; - int32_t *disk_layout = NULL; - dict_t **xattr = NULL; - - local = frame->local; - conf = frame->this->private; - count = conf->local_subvols_cnt; - layout = local->layout; - - if (op_ret < 0) { - goto err_done; - } - - /* We precreate the xattr list as we cannot change call count post the - * first wind as we may never continue from there. So we finish prep - * work before winding the setxattrs */ - xattr = GF_CALLOC (count, sizeof (*xattr), gf_common_mt_char); - if (!xattr) { - local->op_errno = errno; + dht_local_t *local = NULL; + int count = 1, ret = -1, i = 0, j = 0; + dht_conf_t *conf = NULL; + dht_layout_t *layout = NULL; + int32_t *disk_layout = NULL; + dict_t **xattr = NULL; + + local = frame->local; + conf = frame->this->private; + count = conf->local_subvols_cnt; + layout = local->layout; + + if (op_ret < 0) { + goto err_done; + } + + /* We precreate the xattr list as we cannot change call count post the + * first wind as we may never continue from there. So we finish prep + * work before winding the setxattrs */ + xattr = GF_CALLOC(count, sizeof(*xattr), gf_common_mt_char); + if (!xattr) { + local->op_errno = errno; + + gf_msg(this->name, GF_LOG_WARNING, errno, + DHT_MSG_DIR_SELFHEAL_XATTR_FAILED, + "Directory commit hash update failed:" + " %s: Allocation failed", + local->loc.path); + + goto err; + } + + for (i = 0; i < count; i++) { + /* find the layout index for the subvolume */ + ret = dht_layout_index_for_subvol(layout, conf->local_subvols[i]); + if (ret < 0) { + local->op_errno = ENOENT; - gf_msg (this->name, GF_LOG_WARNING, errno, - DHT_MSG_DIR_SELFHEAL_XATTR_FAILED, - "Directory commit hash update failed:" - " %s: Allocation failed", local->loc.path); + gf_msg(this->name, GF_LOG_WARNING, 0, + DHT_MSG_DIR_SELFHEAL_XATTR_FAILED, + "Directory commit hash update failed:" + " %s: (subvol %s) Failed to find disk layout", + local->loc.path, conf->local_subvols[i]->name); - goto err; + goto err; } + j = ret; - for (i = 0; i < count; i++) { - /* find the layout index for the subvolume */ - ret = dht_layout_index_for_subvol (layout, - conf->local_subvols[i]); - if (ret < 0) { - local->op_errno = ENOENT; - - gf_msg (this->name, GF_LOG_WARNING, 0, - DHT_MSG_DIR_SELFHEAL_XATTR_FAILED, - "Directory commit hash update failed:" - " %s: (subvol %s) Failed to find disk layout", - local->loc.path, conf->local_subvols[i]->name); - - goto err; - } - j = ret; - - /* update the commit hash for the layout */ - layout->list[j].commit_hash = layout->commit_hash; + /* update the commit hash for the layout */ + layout->list[j].commit_hash = layout->commit_hash; - /* extract the current layout */ - ret = dht_disk_layout_extract (this, layout, j, &disk_layout); - if (ret == -1) { - local->op_errno = errno; - - gf_msg (this->name, GF_LOG_WARNING, errno, - DHT_MSG_DIR_SELFHEAL_XATTR_FAILED, - "Directory commit hash update failed:" - " %s: (subvol %s) Failed to extract disk" - " layout", local->loc.path, - conf->local_subvols[i]->name); + /* extract the current layout */ + ret = dht_disk_layout_extract(this, layout, j, &disk_layout); + if (ret == -1) { + local->op_errno = errno; - goto err; - } + gf_msg(this->name, GF_LOG_WARNING, errno, + DHT_MSG_DIR_SELFHEAL_XATTR_FAILED, + "Directory commit hash update failed:" + " %s: (subvol %s) Failed to extract disk" + " layout", + local->loc.path, conf->local_subvols[i]->name); - xattr[i] = dict_new (); - if (!xattr[i]) { - local->op_errno = errno; + goto err; + } - gf_msg (this->name, GF_LOG_WARNING, errno, - DHT_MSG_DIR_SELFHEAL_XATTR_FAILED, - "Directory commit hash update failed:" - " %s: Allocation failed", local->loc.path); + xattr[i] = dict_new(); + if (!xattr[i]) { + local->op_errno = errno; - goto err; - } + gf_msg(this->name, GF_LOG_WARNING, errno, + DHT_MSG_DIR_SELFHEAL_XATTR_FAILED, + "Directory commit hash update failed:" + " %s: Allocation failed", + local->loc.path); - ret = dict_set_bin (xattr[i], conf->xattr_name, - disk_layout, 4 * 4); - if (ret != 0) { - local->op_errno = ENOMEM; + goto err; + } - gf_msg (this->name, GF_LOG_WARNING, 0, - DHT_MSG_DIR_SELFHEAL_XATTR_FAILED, - "Directory self heal xattr failed:" - "%s: (subvol %s) Failed to set xattr" - " dictionary,", local->loc.path, - conf->local_subvols[i]->name); + ret = dict_set_bin(xattr[i], conf->xattr_name, disk_layout, 4 * 4); + if (ret != 0) { + local->op_errno = ENOMEM; - goto err; - } - disk_layout = NULL; + gf_msg(this->name, GF_LOG_WARNING, 0, + DHT_MSG_DIR_SELFHEAL_XATTR_FAILED, + "Directory self heal xattr failed:" + "%s: (subvol %s) Failed to set xattr" + " dictionary,", + local->loc.path, conf->local_subvols[i]->name); - gf_msg_trace (this->name, 0, - "setting commit hash %u on subvolume %s" - " for %s", layout->list[j].commit_hash, - conf->local_subvols[i]->name, local->loc.path); + goto err; } + disk_layout = NULL; - /* wind the setting of the commit hash across the local subvols */ - local->call_cnt = count; - local->op_ret = 0; - local->op_errno = 0; + gf_msg_trace(this->name, 0, + "setting commit hash %u on subvolume %s" + " for %s", + layout->list[j].commit_hash, conf->local_subvols[i]->name, + local->loc.path); + } + + /* wind the setting of the commit hash across the local subvols */ + local->call_cnt = count; + local->op_ret = 0; + local->op_errno = 0; + for (i = 0; i < count; i++) { + STACK_WIND(frame, dht_update_commit_hash_for_layout_cbk, + conf->local_subvols[i], + conf->local_subvols[i]->fops->setxattr, &local->loc, + xattr[i], 0, NULL); + } + for (i = 0; i < count; i++) + dict_unref(xattr[i]); + GF_FREE(xattr); + + return 0; +err: + if (xattr) { for (i = 0; i < count; i++) { - STACK_WIND (frame, dht_update_commit_hash_for_layout_cbk, - conf->local_subvols[i], - conf->local_subvols[i]->fops->setxattr, - &local->loc, xattr[i], 0, NULL); - + if (xattr[i]) + dict_unref(xattr[i]); } - for (i = 0; i < count; i++) - dict_unref (xattr[i]); - GF_FREE (xattr); - - return 0; -err: - if (xattr) { - for (i = 0; i < count; i++) { - if (xattr[i]) - dict_unref (xattr[i]); - } - GF_FREE (xattr); - } + GF_FREE(xattr); + } - GF_FREE (disk_layout); + GF_FREE(disk_layout); - local->op_ret = -1; + local->op_ret = -1; - dht_update_commit_hash_for_layout_unlock (frame, this); + dht_update_commit_hash_for_layout_unlock(frame, this); - return 0; + return 0; err_done: - local->op_ret = -1; + local->op_ret = -1; - dht_update_commit_hash_for_layout_done (frame, NULL, this, 0, 0, NULL); + dht_update_commit_hash_for_layout_done(frame, NULL, this, 0, 0, NULL); - return 0; + return 0; } /* ENTER: dht_update_commit_hash_for_layout (see EXIT above) @@ -2762,55 +2665,52 @@ err_done: * - Unlock and return. */ int -dht_update_commit_hash_for_layout (call_frame_t *frame) +dht_update_commit_hash_for_layout(call_frame_t *frame) { - dht_local_t *local = NULL; - int count = 1, ret = -1, i = 0; - dht_lock_t **lk_array = NULL; - dht_conf_t *conf = NULL; - - GF_VALIDATE_OR_GOTO ("dht", frame, err); - GF_VALIDATE_OR_GOTO (frame->this->name, frame->local, err); - - local = frame->local; - conf = frame->this->private; - - if (!conf->defrag) - goto err; - - count = conf->local_subvols_cnt; - lk_array = GF_CALLOC (count, sizeof (*lk_array), - gf_common_mt_char); - if (lk_array == NULL) - goto err; - - for (i = 0; i < count; i++) { - lk_array[i] = dht_lock_new (frame->this, - conf->local_subvols[i], - &local->loc, F_WRLCK, - DHT_LAYOUT_HEAL_DOMAIN, NULL, - FAIL_ON_ANY_ERROR); - if (lk_array[i] == NULL) - goto err; - } - - local->lock[0].layout.my_layout.locks = lk_array; - local->lock[0].layout.my_layout.lk_count = count; - - ret = dht_blocking_inodelk (frame, lk_array, count, - dht_update_commit_hash_for_layout_resume); - if (ret < 0) { - local->lock[0].layout.my_layout.locks = NULL; - local->lock[0].layout.my_layout.lk_count = 0; - goto err; - } - - return 0; + dht_local_t *local = NULL; + int count = 1, ret = -1, i = 0; + dht_lock_t **lk_array = NULL; + dht_conf_t *conf = NULL; + + GF_VALIDATE_OR_GOTO("dht", frame, err); + GF_VALIDATE_OR_GOTO(frame->this->name, frame->local, err); + + local = frame->local; + conf = frame->this->private; + + if (!conf->defrag) + goto err; + + count = conf->local_subvols_cnt; + lk_array = GF_CALLOC(count, sizeof(*lk_array), gf_common_mt_char); + if (lk_array == NULL) + goto err; + + for (i = 0; i < count; i++) { + lk_array[i] = dht_lock_new(frame->this, conf->local_subvols[i], + &local->loc, F_WRLCK, DHT_LAYOUT_HEAL_DOMAIN, + NULL, FAIL_ON_ANY_ERROR); + if (lk_array[i] == NULL) + goto err; + } + + local->lock[0].layout.my_layout.locks = lk_array; + local->lock[0].layout.my_layout.lk_count = count; + + ret = dht_blocking_inodelk(frame, lk_array, count, + dht_update_commit_hash_for_layout_resume); + if (ret < 0) { + local->lock[0].layout.my_layout.locks = NULL; + local->lock[0].layout.my_layout.lk_count = 0; + goto err; + } + + return 0; err: - if (lk_array != NULL) { - dht_lock_array_free (lk_array, count); - GF_FREE (lk_array); - } + if (lk_array != NULL) { + dht_lock_array_free(lk_array, count); + GF_FREE(lk_array); + } - return -1; + return -1; } diff --git a/xlators/cluster/dht/src/dht-shared.c b/xlators/cluster/dht/src/dht-shared.c index b5f790b111b..5fd97130d22 100644 --- a/xlators/cluster/dht/src/dht-shared.c +++ b/xlators/cluster/dht/src/dht-shared.c @@ -8,36 +8,32 @@ cases as published by the Free Software Foundation. */ - /* TODO: add NS locking */ #include "statedump.h" #include "dht-common.h" #include "dht-messages.h" #ifndef MAX -#define MAX(a, b) (((a) > (b))?(a):(b)) +#define MAX(a, b) (((a) > (b)) ? (a) : (b)) #endif -#define GF_DECIDE_DEFRAG_THROTTLE_COUNT(throttle_count, conf) { \ - \ - pthread_mutex_lock (&conf->defrag->dfq_mutex); \ - \ - if (!strcasecmp (conf->dthrottle, "lazy")) \ - conf->defrag->recon_thread_count = 1; \ - \ - throttle_count = \ - MAX ((sysconf(_SC_NPROCESSORS_ONLN) - 4), 4); \ - \ - if (!strcasecmp (conf->dthrottle, "normal")) \ - conf->defrag->recon_thread_count = \ - (throttle_count / 2); \ - \ - if (!strcasecmp (conf->dthrottle, "aggressive")) \ - conf->defrag->recon_thread_count = \ - throttle_count; \ - \ - pthread_mutex_unlock (&conf->defrag->dfq_mutex); \ - } \ +#define GF_DECIDE_DEFRAG_THROTTLE_COUNT(throttle_count, conf) \ + { \ + pthread_mutex_lock(&conf->defrag->dfq_mutex); \ + \ + if (!strcasecmp(conf->dthrottle, "lazy")) \ + conf->defrag->recon_thread_count = 1; \ + \ + throttle_count = MAX((sysconf(_SC_NPROCESSORS_ONLN) - 4), 4); \ + \ + if (!strcasecmp(conf->dthrottle, "normal")) \ + conf->defrag->recon_thread_count = (throttle_count / 2); \ + \ + if (!strcasecmp(conf->dthrottle, "aggressive")) \ + conf->defrag->recon_thread_count = throttle_count; \ + \ + pthread_mutex_unlock(&conf->defrag->dfq_mutex); \ + } /* TODO: - use volumename in xattr instead of "dht" @@ -50,1189 +46,1158 @@ struct volume_options options[]; extern dht_methods_t dht_methods; void -dht_layout_dump (dht_layout_t *layout, const char *prefix) +dht_layout_dump(dht_layout_t *layout, const char *prefix) { - - char key[GF_DUMP_MAX_BUF_LEN]; - int i = 0; - - if (!layout) - goto out; - if (!prefix) - goto out; - - gf_proc_dump_build_key(key, prefix, "cnt"); - gf_proc_dump_write(key, "%d", layout->cnt); - gf_proc_dump_build_key(key, prefix, "preset"); - gf_proc_dump_write(key, "%d", layout->preset); - gf_proc_dump_build_key(key, prefix, "gen"); - gf_proc_dump_write(key, "%d", layout->gen); - if (layout->type != IA_INVAL) { - gf_proc_dump_build_key(key, prefix, "inode type"); - gf_proc_dump_write(key, "%d", layout->type); - } - - if (!IA_ISDIR (layout->type)) - goto out; - - for (i = 0; i < layout->cnt; i++) { - gf_proc_dump_build_key(key, prefix,"list[%d].err", i); - gf_proc_dump_write(key, "%d", layout->list[i].err); - gf_proc_dump_build_key(key, prefix,"list[%d].start", i); - gf_proc_dump_write(key, "%u", layout->list[i].start); - gf_proc_dump_build_key(key, prefix,"list[%d].stop", i); - gf_proc_dump_write(key, "%u", layout->list[i].stop); - if (layout->list[i].xlator) { - gf_proc_dump_build_key(key, prefix, - "list[%d].xlator.type", i); - gf_proc_dump_write(key, "%s", - layout->list[i].xlator->type); - gf_proc_dump_build_key(key, prefix, - "list[%d].xlator.name", i); - gf_proc_dump_write(key, "%s", - layout->list[i].xlator->name); - } + char key[GF_DUMP_MAX_BUF_LEN]; + int i = 0; + + if (!layout) + goto out; + if (!prefix) + goto out; + + gf_proc_dump_build_key(key, prefix, "cnt"); + gf_proc_dump_write(key, "%d", layout->cnt); + gf_proc_dump_build_key(key, prefix, "preset"); + gf_proc_dump_write(key, "%d", layout->preset); + gf_proc_dump_build_key(key, prefix, "gen"); + gf_proc_dump_write(key, "%d", layout->gen); + if (layout->type != IA_INVAL) { + gf_proc_dump_build_key(key, prefix, "inode type"); + gf_proc_dump_write(key, "%d", layout->type); + } + + if (!IA_ISDIR(layout->type)) + goto out; + + for (i = 0; i < layout->cnt; i++) { + gf_proc_dump_build_key(key, prefix, "list[%d].err", i); + gf_proc_dump_write(key, "%d", layout->list[i].err); + gf_proc_dump_build_key(key, prefix, "list[%d].start", i); + gf_proc_dump_write(key, "%u", layout->list[i].start); + gf_proc_dump_build_key(key, prefix, "list[%d].stop", i); + gf_proc_dump_write(key, "%u", layout->list[i].stop); + if (layout->list[i].xlator) { + gf_proc_dump_build_key(key, prefix, "list[%d].xlator.type", i); + gf_proc_dump_write(key, "%s", layout->list[i].xlator->type); + gf_proc_dump_build_key(key, prefix, "list[%d].xlator.name", i); + gf_proc_dump_write(key, "%s", layout->list[i].xlator->name); } + } out: - return; + return; } - int32_t -dht_priv_dump (xlator_t *this) +dht_priv_dump(xlator_t *this) { - char key_prefix[GF_DUMP_MAX_BUF_LEN]; - char key[GF_DUMP_MAX_BUF_LEN]; - int i = 0; - dht_conf_t *conf = NULL; - int ret = -1; + char key_prefix[GF_DUMP_MAX_BUF_LEN]; + char key[GF_DUMP_MAX_BUF_LEN]; + int i = 0; + dht_conf_t *conf = NULL; + int ret = -1; - if (!this) - goto out; + if (!this) + goto out; - conf = this->private; - if (!conf) - goto out; + conf = this->private; + if (!conf) + goto out; - ret = TRY_LOCK(&conf->subvolume_lock); - if (ret != 0) { - return ret; + ret = TRY_LOCK(&conf->subvolume_lock); + if (ret != 0) { + return ret; + } + + gf_proc_dump_add_section("xlator.cluster.dht.%s.priv", this->name); + gf_proc_dump_build_key(key_prefix, "xlator.cluster.dht", "%s.priv", + this->name); + gf_proc_dump_write("subvol_cnt", "%d", conf->subvolume_cnt); + for (i = 0; i < conf->subvolume_cnt; i++) { + snprintf(key, sizeof(key), "subvolumes[%d]", i); + gf_proc_dump_write(key, "%s.%s", conf->subvolumes[i]->type, + conf->subvolumes[i]->name); + if (conf->file_layouts && conf->file_layouts[i]) { + snprintf(key, sizeof(key), "file_layouts[%d]", i); + dht_layout_dump(conf->file_layouts[i], key); } - - gf_proc_dump_add_section("xlator.cluster.dht.%s.priv", this->name); - gf_proc_dump_build_key(key_prefix,"xlator.cluster.dht","%s.priv", - this->name); - gf_proc_dump_write("subvol_cnt","%d", conf->subvolume_cnt); + if (conf->dir_layouts && conf->dir_layouts[i]) { + snprintf(key, sizeof(key), "dir_layouts[%d]", i); + dht_layout_dump(conf->dir_layouts[i], key); + } + if (conf->subvolume_status) { + snprintf(key, sizeof(key), "subvolume_status[%d]", i); + gf_proc_dump_write(key, "%d", (int)conf->subvolume_status[i]); + } + } + + gf_proc_dump_write("search_unhashed", "%d", conf->search_unhashed); + gf_proc_dump_write("gen", "%d", conf->gen); + gf_proc_dump_write("min_free_disk", "%lf", conf->min_free_disk); + gf_proc_dump_write("min_free_inodes", "%lf", conf->min_free_inodes); + gf_proc_dump_write("disk_unit", "%c", conf->disk_unit); + gf_proc_dump_write("refresh_interval", "%d", conf->refresh_interval); + gf_proc_dump_write("unhashed_sticky_bit", "%d", conf->unhashed_sticky_bit); + gf_proc_dump_write("use-readdirp", "%d", conf->use_readdirp); + + if (conf->du_stats && conf->subvolume_status) { for (i = 0; i < conf->subvolume_cnt; i++) { - snprintf (key, sizeof (key), "subvolumes[%d]", i); - gf_proc_dump_write(key, "%s.%s", conf->subvolumes[i]->type, - conf->subvolumes[i]->name); - if (conf->file_layouts && conf->file_layouts[i]){ - snprintf (key, sizeof (key), "file_layouts[%d]", i); - dht_layout_dump(conf->file_layouts[i], key); - } - if (conf->dir_layouts && conf->dir_layouts[i]) { - snprintf (key, sizeof (key), "dir_layouts[%d]", i); - dht_layout_dump(conf->dir_layouts[i], key); - } - if (conf->subvolume_status) { - - snprintf (key, sizeof (key), "subvolume_status[%d]", i); - gf_proc_dump_write(key, "%d", - (int)conf->subvolume_status[i]); - } + if (!conf->subvolume_status[i]) + continue; - } + snprintf(key, sizeof(key), "subvolumes[%d]", i); + gf_proc_dump_write(key, "%s", conf->subvolumes[i]->name); + + snprintf(key, sizeof(key), "du_stats[%d].avail_percent", i); + gf_proc_dump_write(key, "%lf", conf->du_stats[i].avail_percent); + + snprintf(key, sizeof(key), "du_stats[%d].avail_space", i); + gf_proc_dump_write(key, "%lu", conf->du_stats[i].avail_space); - gf_proc_dump_write("search_unhashed", "%d", conf->search_unhashed); - gf_proc_dump_write("gen", "%d", conf->gen); - gf_proc_dump_write("min_free_disk", "%lf", conf->min_free_disk); - gf_proc_dump_write("min_free_inodes", "%lf", conf->min_free_inodes); - gf_proc_dump_write("disk_unit", "%c", conf->disk_unit); - gf_proc_dump_write("refresh_interval", "%d", conf->refresh_interval); - gf_proc_dump_write("unhashed_sticky_bit", "%d", conf->unhashed_sticky_bit); - gf_proc_dump_write("use-readdirp", "%d", conf->use_readdirp); - - if (conf->du_stats && conf->subvolume_status) { - for (i = 0; i < conf->subvolume_cnt; i++) { - if (!conf->subvolume_status[i]) - continue; - - snprintf (key, sizeof (key), "subvolumes[%d]", i); - gf_proc_dump_write (key, "%s", - conf->subvolumes[i]->name); - - snprintf (key, sizeof (key), - "du_stats[%d].avail_percent", i); - gf_proc_dump_write (key, "%lf", - conf->du_stats[i].avail_percent); - - snprintf (key, sizeof (key), "du_stats[%d].avail_space", - i); - gf_proc_dump_write (key, "%lu", - conf->du_stats[i].avail_space); - - snprintf (key, sizeof (key), - "du_stats[%d].avail_inodes", i); - gf_proc_dump_write (key, "%lf", - conf->du_stats[i].avail_inodes); - - snprintf (key, sizeof (key), "du_stats[%d].log", i); - gf_proc_dump_write (key, "%lu", - conf->du_stats[i].log); - } + snprintf(key, sizeof(key), "du_stats[%d].avail_inodes", i); + gf_proc_dump_write(key, "%lf", conf->du_stats[i].avail_inodes); + + snprintf(key, sizeof(key), "du_stats[%d].log", i); + gf_proc_dump_write(key, "%lu", conf->du_stats[i].log); } + } - if (conf->last_stat_fetch.tv_sec) - gf_proc_dump_write("last_stat_fetch", "%s", - ctime(&conf->last_stat_fetch.tv_sec)); + if (conf->last_stat_fetch.tv_sec) + gf_proc_dump_write("last_stat_fetch", "%s", + ctime(&conf->last_stat_fetch.tv_sec)); - UNLOCK(&conf->subvolume_lock); + UNLOCK(&conf->subvolume_lock); out: - return ret; + return ret; } int32_t -dht_inodectx_dump (xlator_t *this, inode_t *inode) +dht_inodectx_dump(xlator_t *this, inode_t *inode) { - int ret = -1; - dht_layout_t *layout = NULL; + int ret = -1; + dht_layout_t *layout = NULL; - if (!this) - goto out; - if (!inode) - goto out; + if (!this) + goto out; + if (!inode) + goto out; - ret = dht_inode_ctx_layout_get (inode, this, &layout); + ret = dht_inode_ctx_layout_get(inode, this, &layout); - if ((ret != 0) || !layout) - return ret; + if ((ret != 0) || !layout) + return ret; - gf_proc_dump_add_section("xlator.cluster.dht.%s.inode", this->name); - dht_layout_dump(layout, "layout"); + gf_proc_dump_add_section("xlator.cluster.dht.%s.inode", this->name); + dht_layout_dump(layout, "layout"); out: - return ret; + return ret; } void -dht_fini (xlator_t *this) +dht_fini(xlator_t *this) { - int i = 0; - dht_conf_t *conf = NULL; - - GF_VALIDATE_OR_GOTO ("dht", this, out); - - conf = this->private; - this->private = NULL; - if (conf) { - if (conf->file_layouts) { - for (i = 0; i < conf->subvolume_cnt; i++) { - GF_FREE (conf->file_layouts[i]); - } - GF_FREE (conf->file_layouts); - } - - dict_unref(conf->leaf_to_subvol); - - /* allocated in dht_init_subvolumes() */ - GF_FREE (conf->subvolumes); - GF_FREE (conf->subvolume_status); - GF_FREE (conf->last_event); - GF_FREE (conf->subvol_up_time); - GF_FREE (conf->du_stats); - GF_FREE (conf->decommissioned_bricks); - - /* allocated in dht_init() */ - GF_FREE (conf->mds_xattr_key); - GF_FREE (conf->link_xattr_name); - GF_FREE (conf->commithash_xattr_name); - GF_FREE (conf->wild_xattr_name); - - /* allocated in dht_init_regex() */ - if (conf->rsync_regex_valid) - regfree (&conf->rsync_regex); - if (conf->extra_regex_valid) - regfree (&conf->extra_regex); - - synclock_destroy (&conf->link_lock); - - if (conf->lock_pool) - mem_pool_destroy (conf->lock_pool); - - GF_FREE (conf); + int i = 0; + dht_conf_t *conf = NULL; + + GF_VALIDATE_OR_GOTO("dht", this, out); + + conf = this->private; + this->private = NULL; + if (conf) { + if (conf->file_layouts) { + for (i = 0; i < conf->subvolume_cnt; i++) { + GF_FREE(conf->file_layouts[i]); + } + GF_FREE(conf->file_layouts); } + + dict_unref(conf->leaf_to_subvol); + + /* allocated in dht_init_subvolumes() */ + GF_FREE(conf->subvolumes); + GF_FREE(conf->subvolume_status); + GF_FREE(conf->last_event); + GF_FREE(conf->subvol_up_time); + GF_FREE(conf->du_stats); + GF_FREE(conf->decommissioned_bricks); + + /* allocated in dht_init() */ + GF_FREE(conf->mds_xattr_key); + GF_FREE(conf->link_xattr_name); + GF_FREE(conf->commithash_xattr_name); + GF_FREE(conf->wild_xattr_name); + + /* allocated in dht_init_regex() */ + if (conf->rsync_regex_valid) + regfree(&conf->rsync_regex); + if (conf->extra_regex_valid) + regfree(&conf->extra_regex); + + synclock_destroy(&conf->link_lock); + + if (conf->lock_pool) + mem_pool_destroy(conf->lock_pool); + + GF_FREE(conf); + } out: - return; + return; } int32_t -mem_acct_init (xlator_t *this) +mem_acct_init(xlator_t *this) { - int ret = -1; + int ret = -1; - GF_VALIDATE_OR_GOTO ("dht", this, out); + GF_VALIDATE_OR_GOTO("dht", this, out); - ret = xlator_mem_acct_init (this, gf_dht_mt_end + 1); + ret = xlator_mem_acct_init(this, gf_dht_mt_end + 1); - if (ret != 0) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_NO_MEMORY, - "Memory accounting init failed"); - return ret; - } -out: + if (ret != 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_NO_MEMORY, + "Memory accounting init failed"); return ret; + } +out: + return ret; } - int -dht_parse_decommissioned_bricks (xlator_t *this, dht_conf_t *conf, - const char *bricks) +dht_parse_decommissioned_bricks(xlator_t *this, dht_conf_t *conf, + const char *bricks) { - int i = 0; - int ret = -1; - char *tmpstr = NULL; - char *dup_brick = NULL; - char *node = NULL; - - if (!conf || !bricks) - goto out; - - dup_brick = gf_strdup (bricks); - node = strtok_r (dup_brick, ",", &tmpstr); - while (node) { - for (i = 0; i < conf->subvolume_cnt; i++) { - if (!strcmp (conf->subvolumes[i]->name, node)) { - conf->decommissioned_bricks[i] = - conf->subvolumes[i]; - conf->decommission_subvols_cnt++; - gf_msg (this->name, GF_LOG_INFO, 0, - DHT_MSG_SUBVOL_DECOMMISSION_INFO, - "decommissioning subvolume %s", - conf->subvolumes[i]->name); - break; - } - } - if (i == conf->subvolume_cnt) { - /* Wrong node given. */ - goto out; - } - node = strtok_r (NULL, ",", &tmpstr); + int i = 0; + int ret = -1; + char *tmpstr = NULL; + char *dup_brick = NULL; + char *node = NULL; + + if (!conf || !bricks) + goto out; + + dup_brick = gf_strdup(bricks); + node = strtok_r(dup_brick, ",", &tmpstr); + while (node) { + for (i = 0; i < conf->subvolume_cnt; i++) { + if (!strcmp(conf->subvolumes[i]->name, node)) { + conf->decommissioned_bricks[i] = conf->subvolumes[i]; + conf->decommission_subvols_cnt++; + gf_msg(this->name, GF_LOG_INFO, 0, + DHT_MSG_SUBVOL_DECOMMISSION_INFO, + "decommissioning subvolume %s", + conf->subvolumes[i]->name); + break; + } } + if (i == conf->subvolume_cnt) { + /* Wrong node given. */ + goto out; + } + node = strtok_r(NULL, ",", &tmpstr); + } - ret = 0; - conf->decommission_in_progress = 1; + ret = 0; + conf->decommission_in_progress = 1; out: - GF_FREE (dup_brick); + GF_FREE(dup_brick); - return ret; + return ret; } int -dht_decommissioned_remove (xlator_t *this, dht_conf_t *conf) +dht_decommissioned_remove(xlator_t *this, dht_conf_t *conf) { - int i = 0; - int ret = -1; + int i = 0; + int ret = -1; - if (!conf) - goto out; + if (!conf) + goto out; - for (i = 0; i < conf->subvolume_cnt; i++) { - if (conf->decommissioned_bricks[i]) { - conf->decommissioned_bricks[i] = NULL; - conf->decommission_subvols_cnt--; - } + for (i = 0; i < conf->subvolume_cnt; i++) { + if (conf->decommissioned_bricks[i]) { + conf->decommissioned_bricks[i] = NULL; + conf->decommission_subvols_cnt--; } + } - ret = 0; + ret = 0; out: - return ret; + return ret; } void -dht_init_regex (xlator_t *this, dict_t *odict, char *name, - regex_t *re, gf_boolean_t *re_valid, dht_conf_t *conf) +dht_init_regex(xlator_t *this, dict_t *odict, char *name, regex_t *re, + gf_boolean_t *re_valid, dht_conf_t *conf) { - char *temp_str = NULL; + char *temp_str = NULL; - if (dict_get_str (odict, name, &temp_str) != 0) { - if (strcmp(name,"rsync-hash-regex")) { - return; - } - temp_str = "^\\.(.+)\\.[^.]+$"; + if (dict_get_str(odict, name, &temp_str) != 0) { + if (strcmp(name, "rsync-hash-regex")) { + return; + } + temp_str = "^\\.(.+)\\.[^.]+$"; + } + + LOCK(&conf->lock); + { + if (*re_valid) { + regfree(re); + *re_valid = _gf_false; + } + + if (!strcmp(temp_str, "none")) { + goto unlock; } - LOCK (&conf->lock); - { - if (*re_valid) { - regfree(re); - *re_valid = _gf_false; - } - - if (!strcmp(temp_str, "none")) { - goto unlock; - } - - if (regcomp(re, temp_str, REG_EXTENDED) == 0) { - gf_msg_debug (this->name, 0, - "using regex %s = %s", name, temp_str); - *re_valid = _gf_true; - } else { - gf_msg (this->name, GF_LOG_WARNING, 0, - DHT_MSG_REGEX_INFO, - "compiling regex %s failed", temp_str); - } + if (regcomp(re, temp_str, REG_EXTENDED) == 0) { + gf_msg_debug(this->name, 0, "using regex %s = %s", name, temp_str); + *re_valid = _gf_true; + } else { + gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_REGEX_INFO, + "compiling regex %s failed", temp_str); } + } unlock: - UNLOCK (&conf->lock); + UNLOCK(&conf->lock); } int dht_set_subvol_range(xlator_t *this) { - int ret = -1; - dht_conf_t *conf = NULL; + int ret = -1; + dht_conf_t *conf = NULL; - conf = this->private; + conf = this->private; - if (!conf) - goto out; + if (!conf) + goto out; - conf->leaf_to_subvol = dict_new(); - if (!conf->leaf_to_subvol) - goto out; + conf->leaf_to_subvol = dict_new(); + if (!conf->leaf_to_subvol) + goto out; - ret = glusterfs_reachable_leaves(this, conf->leaf_to_subvol); + ret = glusterfs_reachable_leaves(this, conf->leaf_to_subvol); out: - return ret; + return ret; } int -dht_configure_throttle (xlator_t *this, dht_conf_t *conf, char *temp_str) +dht_configure_throttle(xlator_t *this, dht_conf_t *conf, char *temp_str) { - int rebal_thread_count = 0; - int ret = 0; - - pthread_mutex_lock (&conf->defrag->dfq_mutex); - { - if (!strcasecmp (temp_str, "lazy")) { - conf->defrag->recon_thread_count = 1; - } else if (!strcasecmp (temp_str, "normal")) { - conf->defrag->recon_thread_count = 2; - } else if (!strcasecmp (temp_str, "aggressive")) { - conf->defrag->recon_thread_count = MAX (MAX_REBAL_THREADS - 4, 4); - } else if ((gf_string2int (temp_str, &rebal_thread_count) == 0)) { - if ((rebal_thread_count > 0) && (rebal_thread_count <= MAX_REBAL_THREADS)) { - gf_msg (this->name, GF_LOG_INFO, 0, 0, - "rebal thread count configured to %d", - rebal_thread_count); - conf->defrag->recon_thread_count = rebal_thread_count; - } else { - gf_msg(this->name, GF_LOG_ERROR, 0, - DHT_MSG_INVALID_OPTION, - "Invalid option: Reconfigure: " - "rebal-throttle should be " - "within range of 0 and maximum number of" - " cores available"); - ret = -1; - pthread_mutex_unlock (&conf->defrag->dfq_mutex); - goto out; - } + int rebal_thread_count = 0; + int ret = 0; + + pthread_mutex_lock(&conf->defrag->dfq_mutex); + { + if (!strcasecmp(temp_str, "lazy")) { + conf->defrag->recon_thread_count = 1; + } else if (!strcasecmp(temp_str, "normal")) { + conf->defrag->recon_thread_count = 2; + } else if (!strcasecmp(temp_str, "aggressive")) { + conf->defrag->recon_thread_count = MAX(MAX_REBAL_THREADS - 4, 4); + } else if ((gf_string2int(temp_str, &rebal_thread_count) == 0)) { + if ((rebal_thread_count > 0) && + (rebal_thread_count <= MAX_REBAL_THREADS)) { + gf_msg(this->name, GF_LOG_INFO, 0, 0, + "rebal thread count configured to %d", + rebal_thread_count); + conf->defrag->recon_thread_count = rebal_thread_count; + } else { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_INVALID_OPTION, + "Invalid option: Reconfigure: " + "rebal-throttle should be " + "within range of 0 and maximum number of" + " cores available"); + ret = -1; + pthread_mutex_unlock(&conf->defrag->dfq_mutex); + goto out; + } } else { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_INVALID_OPTION, - "Invalid option: Reconfigure: " - "rebal-throttle should be {lazy|normal|aggressive}" - " or a number up to the number of cores available," - " not (%s), defaulting to (%d)", - temp_str, conf->dthrottle); - ret = -1; + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_INVALID_OPTION, + "Invalid option: Reconfigure: " + "rebal-throttle should be {lazy|normal|aggressive}" + " or a number up to the number of cores available," + " not (%s), defaulting to (%d)", + temp_str, conf->dthrottle); + ret = -1; } - } - pthread_mutex_unlock (&conf->defrag->dfq_mutex); + } + pthread_mutex_unlock(&conf->defrag->dfq_mutex); out: - return ret; + return ret; } int -dht_reconfigure (xlator_t *this, dict_t *options) +dht_reconfigure(xlator_t *this, dict_t *options) { - dht_conf_t *conf = NULL; - char *temp_str = NULL; - gf_boolean_t search_unhashed; - int ret = -1; - - GF_VALIDATE_OR_GOTO ("dht", this, out); - GF_VALIDATE_OR_GOTO ("dht", options, out); - - conf = this->private; - if (!conf) - return 0; - - if (dict_get_str (options, "lookup-unhashed", &temp_str) == 0) { - /* If option is not "auto", other options _should_ be boolean*/ - if (strcasecmp (temp_str, "auto")) { - if (!gf_string2boolean (temp_str, &search_unhashed)) { - gf_msg_debug(this->name, 0, "Reconfigure: " - "lookup-unhashed reconfigured(%s)", - temp_str); - conf->search_unhashed = search_unhashed; - } else { - gf_msg(this->name, GF_LOG_ERROR, 0, - DHT_MSG_INVALID_OPTION, - "Invalid option: Reconfigure: " - "lookup-unhashed should be boolean," - " not (%s), defaulting to (%d)", - temp_str, conf->search_unhashed); - ret = -1; - goto out; - } - } else { - gf_msg_debug(this->name, 0, "Reconfigure:" - " lookup-unhashed reconfigured auto "); - conf->search_unhashed = GF_DHT_LOOKUP_UNHASHED_AUTO; - } - } + dht_conf_t *conf = NULL; + char *temp_str = NULL; + gf_boolean_t search_unhashed; + int ret = -1; - GF_OPTION_RECONF ("lookup-optimize", conf->lookup_optimize, options, - bool, out); + GF_VALIDATE_OR_GOTO("dht", this, out); + GF_VALIDATE_OR_GOTO("dht", options, out); - GF_OPTION_RECONF ("min-free-disk", conf->min_free_disk, options, - percent_or_size, out); - /* option can be any one of percent or bytes */ - conf->disk_unit = 0; - if (conf->min_free_disk < 100.0) - conf->disk_unit = 'p'; + conf = this->private; + if (!conf) + return 0; - GF_OPTION_RECONF ("min-free-inodes", conf->min_free_inodes, options, - percent, out); + if (dict_get_str(options, "lookup-unhashed", &temp_str) == 0) { + /* If option is not "auto", other options _should_ be boolean*/ + if (strcasecmp(temp_str, "auto")) { + if (!gf_string2boolean(temp_str, &search_unhashed)) { + gf_msg_debug(this->name, 0, + "Reconfigure: " + "lookup-unhashed reconfigured(%s)", + temp_str); + conf->search_unhashed = search_unhashed; + } else { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_INVALID_OPTION, + "Invalid option: Reconfigure: " + "lookup-unhashed should be boolean," + " not (%s), defaulting to (%d)", + temp_str, conf->search_unhashed); + ret = -1; + goto out; + } + } else { + gf_msg_debug(this->name, 0, + "Reconfigure:" + " lookup-unhashed reconfigured auto "); + conf->search_unhashed = GF_DHT_LOOKUP_UNHASHED_AUTO; + } + } - GF_OPTION_RECONF ("directory-layout-spread", conf->dir_spread_cnt, - options, uint32, out); + GF_OPTION_RECONF("lookup-optimize", conf->lookup_optimize, options, bool, + out); - GF_OPTION_RECONF ("readdir-optimize", conf->readdir_optimize, options, - bool, out); - GF_OPTION_RECONF ("randomize-hash-range-by-gfid", - conf->randomize_by_gfid, - options, bool, out); + GF_OPTION_RECONF("min-free-disk", conf->min_free_disk, options, + percent_or_size, out); + /* option can be any one of percent or bytes */ + conf->disk_unit = 0; + if (conf->min_free_disk < 100.0) + conf->disk_unit = 'p'; - GF_OPTION_RECONF ("lock-migration", conf->lock_migration_enabled, - options, bool, out); + GF_OPTION_RECONF("min-free-inodes", conf->min_free_inodes, options, percent, + out); - GF_OPTION_RECONF ("force-migration", conf->force_migration, - options, bool, out); + GF_OPTION_RECONF("directory-layout-spread", conf->dir_spread_cnt, options, + uint32, out); + GF_OPTION_RECONF("readdir-optimize", conf->readdir_optimize, options, bool, + out); + GF_OPTION_RECONF("randomize-hash-range-by-gfid", conf->randomize_by_gfid, + options, bool, out); - if (conf->defrag) { - if (dict_get_str (options, "rebal-throttle", &temp_str) == 0) { - ret = dht_configure_throttle (this, conf, temp_str); - if (ret == -1) - goto out; - } - } + GF_OPTION_RECONF("lock-migration", conf->lock_migration_enabled, options, + bool, out); - if (conf->defrag) { - conf->defrag->lock_migration_enabled = - conf->lock_migration_enabled; - } + GF_OPTION_RECONF("force-migration", conf->force_migration, options, bool, + out); - if (conf->defrag) { - GF_OPTION_RECONF ("rebalance-stats", conf->defrag->stats, - options, bool, out); + if (conf->defrag) { + if (dict_get_str(options, "rebal-throttle", &temp_str) == 0) { + ret = dht_configure_throttle(this, conf, temp_str); + if (ret == -1) + goto out; } + } + + if (conf->defrag) { + conf->defrag->lock_migration_enabled = conf->lock_migration_enabled; + } + + if (conf->defrag) { + GF_OPTION_RECONF("rebalance-stats", conf->defrag->stats, options, bool, + out); + } + + if (dict_get_str(options, "decommissioned-bricks", &temp_str) == 0) { + ret = dht_parse_decommissioned_bricks(this, conf, temp_str); + if (ret == -1) + goto out; + } else { + ret = dht_decommissioned_remove(this, conf); + if (ret == -1) + goto out; + } + + dht_init_regex(this, options, "rsync-hash-regex", &conf->rsync_regex, + &conf->rsync_regex_valid, conf); + dht_init_regex(this, options, "extra-hash-regex", &conf->extra_regex, + &conf->extra_regex_valid, conf); + + GF_OPTION_RECONF("weighted-rebalance", conf->do_weighting, options, bool, + out); + + GF_OPTION_RECONF("use-readdirp", conf->use_readdirp, options, bool, out); + ret = 0; +out: + return ret; +} - if (dict_get_str (options, "decommissioned-bricks", &temp_str) == 0) { - ret = dht_parse_decommissioned_bricks (this, conf, temp_str); - if (ret == -1) - goto out; - } else { - ret = dht_decommissioned_remove (this, conf); - if (ret == -1) - goto out; +static int +gf_defrag_pattern_list_fill(xlator_t *this, gf_defrag_info_t *defrag, + char *data) +{ + int ret = -1; + char *tmp_str = NULL; + char *tmp_str1 = NULL; + char *dup_str = NULL; + char *num = NULL; + char *pattern_str = NULL; + char *pattern = NULL; + gf_defrag_pattern_list_t *temp_list = NULL; + gf_defrag_pattern_list_t *pattern_list = NULL; + + if (!this || !defrag || !data) + goto out; + + /* Get the pattern for pattern list. "pattern:" + * eg: *avi, *pdf:10MB, *:1TB + */ + pattern_str = strtok_r(data, ",", &tmp_str); + while (pattern_str) { + dup_str = gf_strdup(pattern_str); + pattern_list = GF_CALLOC(1, sizeof(gf_defrag_pattern_list_t), 1); + if (!pattern_list) { + goto out; } + pattern = strtok_r(dup_str, ":", &tmp_str1); + num = strtok_r(NULL, ":", &tmp_str1); + if (!pattern) + goto out; + if (!num) { + if (gf_string2bytesize_uint64(pattern, &pattern_list->size) == 0) { + pattern = "*"; + } + } else if (gf_string2bytesize_uint64(num, &pattern_list->size) != 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_INVALID_OPTION, + "Invalid option. Defrag pattern:" + " Invalid number format \"%s\"", + num); + goto out; + } + memcpy(pattern_list->path_pattern, pattern, strlen(dup_str)); - dht_init_regex (this, options, "rsync-hash-regex", - &conf->rsync_regex, &conf->rsync_regex_valid, conf); - dht_init_regex (this, options, "extra-hash-regex", - &conf->extra_regex, &conf->extra_regex_valid, conf); + if (!defrag->defrag_pattern) + temp_list = NULL; + else + temp_list = defrag->defrag_pattern; - GF_OPTION_RECONF ("weighted-rebalance", conf->do_weighting, options, - bool, out); + pattern_list->next = temp_list; - GF_OPTION_RECONF ("use-readdirp", conf->use_readdirp, options, - bool, out); - ret = 0; -out: - return ret; -} + defrag->defrag_pattern = pattern_list; + pattern_list = NULL; -static int -gf_defrag_pattern_list_fill (xlator_t *this, gf_defrag_info_t *defrag, char *data) -{ - int ret = -1; - char *tmp_str = NULL; - char *tmp_str1 = NULL; - char *dup_str = NULL; - char *num = NULL; - char *pattern_str = NULL; - char *pattern = NULL; - gf_defrag_pattern_list_t *temp_list = NULL; - gf_defrag_pattern_list_t *pattern_list = NULL; - - if (!this || !defrag || !data) - goto out; + GF_FREE(dup_str); + dup_str = NULL; - /* Get the pattern for pattern list. "pattern:" - * eg: *avi, *pdf:10MB, *:1TB - */ - pattern_str = strtok_r (data, ",", &tmp_str); - while (pattern_str) { - dup_str = gf_strdup (pattern_str); - pattern_list = GF_CALLOC (1, sizeof (gf_defrag_pattern_list_t), - 1); - if (!pattern_list) { - goto out; - } - pattern = strtok_r (dup_str, ":", &tmp_str1); - num = strtok_r (NULL, ":", &tmp_str1); - if (!pattern) - goto out; - if (!num) { - if (gf_string2bytesize_uint64(pattern, &pattern_list->size) - == 0) { - pattern = "*"; - } - } else if (gf_string2bytesize_uint64 (num, &pattern_list->size) != 0) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_INVALID_OPTION, - "Invalid option. Defrag pattern:" - " Invalid number format \"%s\"", num); - goto out; - } - memcpy (pattern_list->path_pattern, pattern, strlen (dup_str)); - - if (!defrag->defrag_pattern) - temp_list = NULL; - else - temp_list = defrag->defrag_pattern; - - pattern_list->next = temp_list; - - defrag->defrag_pattern = pattern_list; - pattern_list = NULL; - - GF_FREE (dup_str); - dup_str = NULL; - - pattern_str = strtok_r (NULL, ",", &tmp_str); - } + pattern_str = strtok_r(NULL, ",", &tmp_str); + } - ret = 0; + ret = 0; out: - if (ret) - GF_FREE (pattern_list); - GF_FREE (dup_str); + if (ret) + GF_FREE(pattern_list); + GF_FREE(dup_str); - return ret; + return ret; } - - int -dht_init_methods (xlator_t *this) +dht_init_methods(xlator_t *this) { - int ret = -1; - dht_conf_t *conf = NULL; - dht_methods_t *methods = NULL; + int ret = -1; + dht_conf_t *conf = NULL; + dht_methods_t *methods = NULL; - GF_VALIDATE_OR_GOTO ("dht", this, err); + GF_VALIDATE_OR_GOTO("dht", this, err); - conf = this->private; - methods = &(conf->methods); + conf = this->private; + methods = &(conf->methods); - methods->migration_get_dst_subvol = dht_migration_get_dst_subvol; - methods->migration_needed = dht_migration_needed; - methods->migration_other = NULL; - methods->layout_search = dht_layout_search; + methods->migration_get_dst_subvol = dht_migration_get_dst_subvol; + methods->migration_needed = dht_migration_needed; + methods->migration_other = NULL; + methods->layout_search = dht_layout_search; - ret = 0; + ret = 0; err: - return ret; + return ret; } int -dht_init (xlator_t *this) +dht_init(xlator_t *this) { - dht_conf_t *conf = NULL; - char *temp_str = NULL; - int ret = -1; - int i = 0; - gf_defrag_info_t *defrag = NULL; - int cmd = 0; - char *node_uuid = NULL; - uint32_t commit_hash = 0; - - GF_VALIDATE_OR_GOTO ("dht", this, err); - - if (!this->children) { - gf_msg (this->name, GF_LOG_CRITICAL, 0, - DHT_MSG_INVALID_CONFIGURATION, - "Distribute needs more than one subvolume"); - return -1; - } - - if (!this->parents) { - gf_msg (this->name, GF_LOG_WARNING, 0, - DHT_MSG_INVALID_CONFIGURATION, - "dangling volume. check volfile"); - } - - conf = GF_CALLOC (1, sizeof (*conf), gf_dht_mt_dht_conf_t); - if (!conf) { - goto err; - } - - LOCK_INIT (&conf->subvolume_lock); - LOCK_INIT (&conf->layout_lock); - LOCK_INIT (&conf->lock); - synclock_init (&conf->link_lock, SYNC_LOCK_DEFAULT); - - /* We get the commit-hash to set only for rebalance process */ - if (dict_get_uint32 (this->options, - "commit-hash", &commit_hash) == 0) { - gf_msg (this->name, GF_LOG_INFO, 0, - DHT_MSG_COMMIT_HASH_INFO, "%s using commit hash %u", - __func__, commit_hash); - conf->vol_commit_hash = commit_hash; - conf->vch_forced = _gf_true; - } - - ret = dict_get_int32 (this->options, "rebalance-cmd", &cmd); - - if (cmd) { - defrag = GF_CALLOC (1, sizeof (gf_defrag_info_t), - gf_defrag_info_mt); - - GF_VALIDATE_OR_GOTO (this->name, defrag, err); - - LOCK_INIT (&defrag->lock); - - defrag->is_exiting = 0; - - conf->defrag = defrag; - defrag->this = this; - - ret = dict_get_str (this->options, "node-uuid", &node_uuid); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_INVALID_CONFIGURATION, - "Invalid volume configuration: " - "node-uuid not specified"); - goto err; - } + dht_conf_t *conf = NULL; + char *temp_str = NULL; + int ret = -1; + int i = 0; + gf_defrag_info_t *defrag = NULL; + int cmd = 0; + char *node_uuid = NULL; + uint32_t commit_hash = 0; + + GF_VALIDATE_OR_GOTO("dht", this, err); + + if (!this->children) { + gf_msg(this->name, GF_LOG_CRITICAL, 0, DHT_MSG_INVALID_CONFIGURATION, + "Distribute needs more than one subvolume"); + return -1; + } - if (gf_uuid_parse (node_uuid, defrag->node_uuid)) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_INVALID_OPTION, "Invalid option:" - " Cannot parse glusterd node uuid"); - goto err; - } + if (!this->parents) { + gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_INVALID_CONFIGURATION, + "dangling volume. check volfile"); + } - defrag->cmd = cmd; + conf = GF_CALLOC(1, sizeof(*conf), gf_dht_mt_dht_conf_t); + if (!conf) { + goto err; + } - defrag->stats = _gf_false; + LOCK_INIT(&conf->subvolume_lock); + LOCK_INIT(&conf->layout_lock); + LOCK_INIT(&conf->lock); + synclock_init(&conf->link_lock, SYNC_LOCK_DEFAULT); - defrag->queue = NULL; + /* We get the commit-hash to set only for rebalance process */ + if (dict_get_uint32(this->options, "commit-hash", &commit_hash) == 0) { + gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_COMMIT_HASH_INFO, + "%s using commit hash %u", __func__, commit_hash); + conf->vol_commit_hash = commit_hash; + conf->vch_forced = _gf_true; + } - defrag->crawl_done = 0; + ret = dict_get_int32(this->options, "rebalance-cmd", &cmd); - defrag->global_error = 0; + if (cmd) { + defrag = GF_CALLOC(1, sizeof(gf_defrag_info_t), gf_defrag_info_mt); - defrag->q_entry_count = 0; + GF_VALIDATE_OR_GOTO(this->name, defrag, err); - defrag->wakeup_crawler = 0; + LOCK_INIT(&defrag->lock); - pthread_mutex_init (&defrag->dfq_mutex, 0); - pthread_cond_init (&defrag->parallel_migration_cond, 0); - pthread_cond_init (&defrag->rebalance_crawler_alarm, 0); - pthread_cond_init (&defrag->df_wakeup_thread, 0); + defrag->is_exiting = 0; - pthread_mutex_init (&defrag->fc_mutex, 0); - pthread_cond_init (&defrag->fc_wakeup_cond, 0); + conf->defrag = defrag; + defrag->this = this; - defrag->global_error = 0; + ret = dict_get_str(this->options, "node-uuid", &node_uuid); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_INVALID_CONFIGURATION, + "Invalid volume configuration: " + "node-uuid not specified"); + goto err; } - conf->use_fallocate = 1; - - conf->search_unhashed = GF_DHT_LOOKUP_UNHASHED_ON; - if (dict_get_str (this->options, "lookup-unhashed", &temp_str) == 0) { - /* If option is not "auto", other options _should_ be boolean */ - if (strcasecmp (temp_str, "auto")) { - gf_boolean_t search_unhashed_bool; - ret = gf_string2boolean (temp_str, &search_unhashed_bool); - if (ret == -1) { - goto err; - } - conf->search_unhashed = search_unhashed_bool - ? GF_DHT_LOOKUP_UNHASHED_ON - : GF_DHT_LOOKUP_UNHASHED_OFF; - } - else { - conf->search_unhashed = GF_DHT_LOOKUP_UNHASHED_AUTO; - } + if (gf_uuid_parse(node_uuid, defrag->node_uuid)) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_INVALID_OPTION, + "Invalid option:" + " Cannot parse glusterd node uuid"); + goto err; } - GF_OPTION_INIT ("lookup-optimize", conf->lookup_optimize, bool, err); + defrag->cmd = cmd; - GF_OPTION_INIT ("unhashed-sticky-bit", conf->unhashed_sticky_bit, bool, - err); + defrag->stats = _gf_false; - GF_OPTION_INIT ("use-readdirp", conf->use_readdirp, bool, err); + defrag->queue = NULL; - GF_OPTION_INIT ("min-free-disk", conf->min_free_disk, percent_or_size, - err); + defrag->crawl_done = 0; - GF_OPTION_INIT ("min-free-inodes", conf->min_free_inodes, percent, - err); + defrag->global_error = 0; - conf->dir_spread_cnt = conf->subvolume_cnt; - GF_OPTION_INIT ("directory-layout-spread", conf->dir_spread_cnt, - uint32, err); + defrag->q_entry_count = 0; - GF_OPTION_INIT ("assert-no-child-down", conf->assert_no_child_down, - bool, err); + defrag->wakeup_crawler = 0; - GF_OPTION_INIT ("readdir-optimize", conf->readdir_optimize, bool, err); + pthread_mutex_init(&defrag->dfq_mutex, 0); + pthread_cond_init(&defrag->parallel_migration_cond, 0); + pthread_cond_init(&defrag->rebalance_crawler_alarm, 0); + pthread_cond_init(&defrag->df_wakeup_thread, 0); + pthread_mutex_init(&defrag->fc_mutex, 0); + pthread_cond_init(&defrag->fc_wakeup_cond, 0); - GF_OPTION_INIT ("lock-migration", conf->lock_migration_enabled, - bool, err); + defrag->global_error = 0; + } - GF_OPTION_INIT ("force-migration", conf->force_migration, - bool, err); + conf->use_fallocate = 1; + conf->search_unhashed = GF_DHT_LOOKUP_UNHASHED_ON; + if (dict_get_str(this->options, "lookup-unhashed", &temp_str) == 0) { + /* If option is not "auto", other options _should_ be boolean */ + if (strcasecmp(temp_str, "auto")) { + gf_boolean_t search_unhashed_bool; + ret = gf_string2boolean(temp_str, &search_unhashed_bool); + if (ret == -1) { + goto err; + } + conf->search_unhashed = search_unhashed_bool + ? GF_DHT_LOOKUP_UNHASHED_ON + : GF_DHT_LOOKUP_UNHASHED_OFF; + } else { + conf->search_unhashed = GF_DHT_LOOKUP_UNHASHED_AUTO; + } + } - if (defrag) { - defrag->lock_migration_enabled = conf->lock_migration_enabled; + GF_OPTION_INIT("lookup-optimize", conf->lookup_optimize, bool, err); - GF_OPTION_INIT ("rebalance-stats", defrag->stats, bool, err); - if (dict_get_str (this->options, "rebalance-filter", &temp_str) - == 0) { - if (gf_defrag_pattern_list_fill (this, defrag, temp_str) - == -1) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_INVALID_OPTION, - "Invalid option:" - " Cannot parse rebalance-filter (%s)", - temp_str); + GF_OPTION_INIT("unhashed-sticky-bit", conf->unhashed_sticky_bit, bool, err); - goto err; - } - } - } + GF_OPTION_INIT("use-readdirp", conf->use_readdirp, bool, err); - /* option can be any one of percent or bytes */ - conf->disk_unit = 0; - if (conf->min_free_disk < 100) - conf->disk_unit = 'p'; + GF_OPTION_INIT("min-free-disk", conf->min_free_disk, percent_or_size, err); - ret = dht_init_subvolumes (this, conf); - if (ret == -1) { - goto err; - } + GF_OPTION_INIT("min-free-inodes", conf->min_free_inodes, percent, err); - if (cmd) { - ret = dht_init_local_subvolumes (this, conf); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_INIT_LOCAL_SUBVOL_FAILED, - "dht_init_local_subvolumes failed"); - goto err; - } - } + conf->dir_spread_cnt = conf->subvolume_cnt; + GF_OPTION_INIT("directory-layout-spread", conf->dir_spread_cnt, uint32, + err); - if (dict_get_str (this->options, "decommissioned-bricks", &temp_str) == 0) { - ret = dht_parse_decommissioned_bricks (this, conf, temp_str); - if (ret == -1) - goto err; - } + GF_OPTION_INIT("assert-no-child-down", conf->assert_no_child_down, bool, + err); - dht_init_regex (this, this->options, "rsync-hash-regex", - &conf->rsync_regex, &conf->rsync_regex_valid, conf); - dht_init_regex (this, this->options, "extra-hash-regex", - &conf->extra_regex, &conf->extra_regex_valid, conf); + GF_OPTION_INIT("readdir-optimize", conf->readdir_optimize, bool, err); - ret = dht_layouts_init (this, conf); - if (ret == -1) { - goto err; - } + GF_OPTION_INIT("lock-migration", conf->lock_migration_enabled, bool, err); + + GF_OPTION_INIT("force-migration", conf->force_migration, bool, err); + if (defrag) { + defrag->lock_migration_enabled = conf->lock_migration_enabled; - conf->gen = 1; + GF_OPTION_INIT("rebalance-stats", defrag->stats, bool, err); + if (dict_get_str(this->options, "rebalance-filter", &temp_str) == 0) { + if (gf_defrag_pattern_list_fill(this, defrag, temp_str) == -1) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_INVALID_OPTION, + "Invalid option:" + " Cannot parse rebalance-filter (%s)", + temp_str); - this->local_pool = mem_pool_new (dht_local_t, 512); - if (!this->local_pool) { - gf_msg (this->name, GF_LOG_ERROR, ENOMEM, - DHT_MSG_NO_MEMORY, - " DHT initialisation failed. " - "failed to create local_t's memory pool"); goto err; + } } - - GF_OPTION_INIT ("randomize-hash-range-by-gfid", - conf->randomize_by_gfid, bool, err); - - if (defrag) { - GF_OPTION_INIT ("rebal-throttle", temp_str, str, err); - if (temp_str) { - ret = dht_configure_throttle (this, conf, temp_str); - if (ret == -1) - goto err; - } + } + + /* option can be any one of percent or bytes */ + conf->disk_unit = 0; + if (conf->min_free_disk < 100) + conf->disk_unit = 'p'; + + ret = dht_init_subvolumes(this, conf); + if (ret == -1) { + goto err; + } + + if (cmd) { + ret = dht_init_local_subvolumes(this, conf); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, + DHT_MSG_INIT_LOCAL_SUBVOL_FAILED, + "dht_init_local_subvolumes failed"); + goto err; } - - GF_OPTION_INIT ("xattr-name", conf->xattr_name, str, err); - gf_asprintf (&conf->mds_xattr_key, "%s."DHT_MDS_STR, conf->xattr_name); - gf_asprintf (&conf->link_xattr_name, "%s."DHT_LINKFILE_STR, - conf->xattr_name); - gf_asprintf (&conf->commithash_xattr_name, "%s."DHT_COMMITHASH_STR, - conf->xattr_name); - gf_asprintf (&conf->wild_xattr_name, "%s*", conf->xattr_name); - if (!conf->link_xattr_name || !conf->wild_xattr_name) { + } + + if (dict_get_str(this->options, "decommissioned-bricks", &temp_str) == 0) { + ret = dht_parse_decommissioned_bricks(this, conf, temp_str); + if (ret == -1) + goto err; + } + + dht_init_regex(this, this->options, "rsync-hash-regex", &conf->rsync_regex, + &conf->rsync_regex_valid, conf); + dht_init_regex(this, this->options, "extra-hash-regex", &conf->extra_regex, + &conf->extra_regex_valid, conf); + + ret = dht_layouts_init(this, conf); + if (ret == -1) { + goto err; + } + + conf->gen = 1; + + this->local_pool = mem_pool_new(dht_local_t, 512); + if (!this->local_pool) { + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY, + " DHT initialisation failed. " + "failed to create local_t's memory pool"); + goto err; + } + + GF_OPTION_INIT("randomize-hash-range-by-gfid", conf->randomize_by_gfid, + bool, err); + + if (defrag) { + GF_OPTION_INIT("rebal-throttle", temp_str, str, err); + if (temp_str) { + ret = dht_configure_throttle(this, conf, temp_str); + if (ret == -1) goto err; } + } - GF_OPTION_INIT ("weighted-rebalance", conf->do_weighting, bool, err); + GF_OPTION_INIT("xattr-name", conf->xattr_name, str, err); + gf_asprintf(&conf->mds_xattr_key, "%s." DHT_MDS_STR, conf->xattr_name); + gf_asprintf(&conf->link_xattr_name, "%s." DHT_LINKFILE_STR, + conf->xattr_name); + gf_asprintf(&conf->commithash_xattr_name, "%s." DHT_COMMITHASH_STR, + conf->xattr_name); + gf_asprintf(&conf->wild_xattr_name, "%s*", conf->xattr_name); + if (!conf->link_xattr_name || !conf->wild_xattr_name) { + goto err; + } - conf->lock_pool = mem_pool_new (dht_lock_t, 512); - if (!conf->lock_pool) { - gf_msg (this->name, GF_LOG_ERROR, 0, DHT_MSG_INIT_FAILED, - "failed to create lock mem_pool, failing " - "initialization"); - goto err; - } + GF_OPTION_INIT("weighted-rebalance", conf->do_weighting, bool, err); - this->private = conf; + conf->lock_pool = mem_pool_new(dht_lock_t, 512); + if (!conf->lock_pool) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_INIT_FAILED, + "failed to create lock mem_pool, failing " + "initialization"); + goto err; + } - if (dht_set_subvol_range(this)) - goto err; + this->private = conf; - if (dht_init_methods (this)) - goto err; + if (dht_set_subvol_range(this)) + goto err; - return 0; + if (dht_init_methods(this)) + goto err; + + return 0; err: - if (conf) { - if (conf->file_layouts) { - for (i = 0; i < conf->subvolume_cnt; i++) { - GF_FREE (conf->file_layouts[i]); - } - GF_FREE (conf->file_layouts); - } + if (conf) { + if (conf->file_layouts) { + for (i = 0; i < conf->subvolume_cnt; i++) { + GF_FREE(conf->file_layouts[i]); + } + GF_FREE(conf->file_layouts); + } - GF_FREE (conf->subvolumes); + GF_FREE(conf->subvolumes); - GF_FREE (conf->subvolume_status); + GF_FREE(conf->subvolume_status); - GF_FREE (conf->du_stats); + GF_FREE(conf->du_stats); - GF_FREE (conf->defrag); + GF_FREE(conf->defrag); - GF_FREE (conf->xattr_name); - GF_FREE (conf->link_xattr_name); - GF_FREE (conf->wild_xattr_name); - GF_FREE (conf->mds_xattr_key); + GF_FREE(conf->xattr_name); + GF_FREE(conf->link_xattr_name); + GF_FREE(conf->wild_xattr_name); + GF_FREE(conf->mds_xattr_key); - if (conf->lock_pool) - mem_pool_destroy (conf->lock_pool); + if (conf->lock_pool) + mem_pool_destroy(conf->lock_pool); - GF_FREE (conf); - } + GF_FREE(conf); + } - return -1; + return -1; } - struct volume_options options[] = { - { .key = {"lookup-unhashed"}, - .value = {"auto", "yes", "no", "enable", "disable", "1", "0", - "on", "off"}, - .type = GF_OPTION_TYPE_STR, - .default_value = "on", - .description = "This option if set to ON, does a lookup through " - "all the sub-volumes, in case a lookup didn't return any result " - "from the hash subvolume. If set to OFF, it does not do a lookup " - "on the remaining subvolumes.", - .op_version = {1}, - .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE, - .level = OPT_STATUS_BASIC, - }, - { .key = {"lookup-optimize"}, - .type = GF_OPTION_TYPE_BOOL, - .default_value = "on", - .description = "This option if set to ON enables the optimization " - "of -ve lookups, by not doing a lookup on non-hashed subvolumes for " - "files, in case the hashed subvolume does not return any result. " - "This option disregards the lookup-unhashed setting, when enabled.", - .op_version = {GD_OP_VERSION_3_7_2}, - .level = OPT_STATUS_ADVANCED, - .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC - }, - { .key = {"min-free-disk"}, - .type = GF_OPTION_TYPE_PERCENT_OR_SIZET, - .default_value = "10%", - .description = "Percentage/Size of disk space, after which the " - "process starts balancing out the cluster, and logs will appear " - "in log files", - .op_version = {1}, - .level = OPT_STATUS_BASIC, - .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC - }, - { .key = {"min-free-inodes"}, - .type = GF_OPTION_TYPE_PERCENT, - .default_value = "5%", - .description = "after system has only N% of inodes, warnings " - "starts to appear in log files", - .op_version = {1}, - .level = OPT_STATUS_BASIC, - .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC - }, - { .key = {"unhashed-sticky-bit"}, - .type = GF_OPTION_TYPE_BOOL, - .default_value = "off", - }, - { .key = {"use-readdirp"}, - .type = GF_OPTION_TYPE_BOOL, - .default_value = "on", - .description = "This option if set to ON, forces the use of " - "readdirp, and hence also displays the stats of the files.", - .level = OPT_STATUS_ADVANCED, - .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC - }, - { .key = {"assert-no-child-down"}, - .type = GF_OPTION_TYPE_BOOL, - .default_value = "off", - .description = "This option if set to ON, in the event of " - "CHILD_DOWN, will call exit." - }, - { .key = {"directory-layout-spread"}, - .type = GF_OPTION_TYPE_INT, - .min = 1, - .validate = GF_OPT_VALIDATE_MIN, - .description = "Specifies the directory layout spread. Takes number " - "of subvolumes as default value.", - - .op_version = {2}, - }, - { .key = {"decommissioned-bricks"}, - .type = GF_OPTION_TYPE_ANY, - .description = "This option if set to ON, decommissions " - "the brick, so that no new data is allowed to be created " - "on that brick.", - .level = OPT_STATUS_ADVANCED, - }, - { .key = {"rebalance-cmd"}, - .type = GF_OPTION_TYPE_INT, - }, - { .key = {"commit-hash"}, - .type = GF_OPTION_TYPE_INT, - }, - { .key = {"node-uuid"}, - .type = GF_OPTION_TYPE_STR, - }, - { .key = {"rebalance-stats"}, - .type = GF_OPTION_TYPE_BOOL, - .default_value = "off", - .description = "This option if set to ON displays and logs the " - " time taken for migration of each file, during the rebalance " - "process. If set to OFF, the rebalance logs will only display the " - "time spent in each directory.", - .op_version = {2}, - .level = OPT_STATUS_BASIC, - }, - { .key = {"readdir-optimize"}, - .type = GF_OPTION_TYPE_BOOL, - .default_value = "off", - .description = "This option if set to ON enables the optimization " - "that allows DHT to requests non-first subvolumes to filter out " - "directory entries.", - .op_version = {1}, - .level = OPT_STATUS_ADVANCED, - .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC - }, - { .key = {"rsync-hash-regex"}, - .type = GF_OPTION_TYPE_STR, - /* Setting a default here doesn't work. See dht_init_regex. */ - .description = "Regular expression for stripping temporary-file " - "suffix and prefix used by rsync, to prevent relocation when the " - "file is renamed.", - .op_version = {3}, - .level = OPT_STATUS_BASIC, - .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC - }, - { .key = {"extra-hash-regex"}, - .type = GF_OPTION_TYPE_STR, - /* Setting a default here doesn't work. See dht_init_regex. */ - .description = "Regular expression for stripping temporary-file " - "suffix and prefix used by an application, to prevent relocation when " - "the file is renamed.", - .op_version = {3}, - .level = OPT_STATUS_BASIC, - .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC - }, - { .key = {"rebalance-filter"}, - .type = GF_OPTION_TYPE_STR, - }, - - { .key = {"xattr-name"}, - .type = GF_OPTION_TYPE_STR, - .default_value = "trusted.glusterfs.dht", - .description = "Base for extended attributes used by this " - "translator instance, to avoid conflicts with others above or " - "below it.", - .op_version = {3}, - }, - - { .key = {"weighted-rebalance"}, - .type = GF_OPTION_TYPE_BOOL, - .default_value = "on", - .description = "When enabled, files will be allocated to bricks " - "with a probability proportional to their size. Otherwise, all " - "bricks will have the same probability (legacy behavior).", - .op_version = {GD_OP_VERSION_3_6_0}, - .level = OPT_STATUS_BASIC, - .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC - }, - - /* NUFA option */ - { .key = {"local-volume-name"}, - .type = GF_OPTION_TYPE_XLATOR - }, - - /* tier options */ - { .key = {"tier-pause"}, - .type = GF_OPTION_TYPE_BOOL, - .default_value = "off", - }, - - { .key = {"tier-promote-frequency"}, - .type = GF_OPTION_TYPE_INT, - .default_value = "120", - }, - - { .key = {"tier-demote-frequency"}, - .type = GF_OPTION_TYPE_INT, - .default_value = "3600", - }, - - { .key = {"write-freq-threshold"}, - .type = GF_OPTION_TYPE_INT, - .default_value = "0", - }, - - { .key = {"read-freq-threshold"}, - .type = GF_OPTION_TYPE_INT, - .default_value = "0", - }, - { .key = {"watermark-hi"}, - .type = GF_OPTION_TYPE_PERCENT, - .default_value = "90", - }, - { .key = {"watermark-low"}, - .type = GF_OPTION_TYPE_PERCENT, - .default_value = "75", - }, - { .key = {"tier-mode"}, - .type = GF_OPTION_TYPE_STR, - .default_value = "test", - }, - { .key = {"tier-compact"}, - .type = GF_OPTION_TYPE_BOOL, - .default_value = "off", - }, - { .key = {"tier-hot-compact-frequency"}, - .type = GF_OPTION_TYPE_INT, - .default_value = "604800", - .description = "Frequency to compact DBs on hot tier in system" - }, - { .key = {"tier-cold-compact-frequency"}, - .type = GF_OPTION_TYPE_INT, - .default_value = "604800", - .description = "Frequency to compact DBs on cold tier in system" - }, - { .key = {"tier-max-mb"}, - .type = GF_OPTION_TYPE_INT, - .default_value = "4000", - }, - { .key = {"tier-max-promote-file-size"}, - .type = GF_OPTION_TYPE_INT, - .default_value = "0", - }, - { .key = {"tier-max-files"}, - .type = GF_OPTION_TYPE_INT, - .default_value = "10000", - }, - { .key = {"tier-query-limit"}, - .type = GF_OPTION_TYPE_INT, - .default_value = "100", - }, - /* switch option */ - { .key = {"pattern.switch.case"}, - .type = GF_OPTION_TYPE_ANY - }, - - { .key = {"randomize-hash-range-by-gfid"}, - .type = GF_OPTION_TYPE_BOOL, - .default_value = "off", - .description = "Use gfid of directory to determine the subvolume " - "from which hash ranges are allocated starting with 0. " - "Note that we still use a directory/file's name to determine the " - "subvolume to which it hashes", - .op_version = {GD_OP_VERSION_3_6_0}, - }, - - { .key = {"rebal-throttle"}, - .type = GF_OPTION_TYPE_STR, - .default_value = "normal", - .description = " Sets the maximum number of parallel file migrations " - "allowed on a node during the rebalance operation. The" - " default value is normal and allows a max of " - "[($(processing units) - 4) / 2), 2] files to be " - "migrated at a time. Lazy will allow only one file to " - "be migrated at a time and aggressive will allow " - "max of [($(processing units) - 4) / 2), 4]", - .op_version = {GD_OP_VERSION_3_7_0}, - .level = OPT_STATUS_BASIC, - .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC - - }, - - { .key = {"lock-migration"}, - .type = GF_OPTION_TYPE_BOOL, - .default_value = "off", - .description = " If enabled this feature will migrate the posix locks" - " associated with a file during rebalance", - .op_version = {GD_OP_VERSION_3_8_0}, - .level = OPT_STATUS_ADVANCED, - .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC - }, - - { .key = {"force-migration"}, - .type = GF_OPTION_TYPE_BOOL, - .default_value = "off", - .description = "If disabled, rebalance will not migrate files that " - "are being written to by an application", - .op_version = {GD_OP_VERSION_4_0_0}, - .level = OPT_STATUS_ADVANCED, - .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC - }, - - { .key = {NULL} }, + { + .key = {"lookup-unhashed"}, + .value = {"auto", "yes", "no", "enable", "disable", "1", "0", "on", + "off"}, + .type = GF_OPTION_TYPE_STR, + .default_value = "on", + .description = + "This option if set to ON, does a lookup through " + "all the sub-volumes, in case a lookup didn't return any result " + "from the hash subvolume. If set to OFF, it does not do a lookup " + "on the remaining subvolumes.", + .op_version = {1}, + .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE, + .level = OPT_STATUS_BASIC, + }, + {.key = {"lookup-optimize"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "on", + .description = + "This option if set to ON enables the optimization " + "of -ve lookups, by not doing a lookup on non-hashed subvolumes for " + "files, in case the hashed subvolume does not return any result. " + "This option disregards the lookup-unhashed setting, when enabled.", + .op_version = {GD_OP_VERSION_3_7_2}, + .level = OPT_STATUS_ADVANCED, + .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC}, + {.key = {"min-free-disk"}, + .type = GF_OPTION_TYPE_PERCENT_OR_SIZET, + .default_value = "10%", + .description = + "Percentage/Size of disk space, after which the " + "process starts balancing out the cluster, and logs will appear " + "in log files", + .op_version = {1}, + .level = OPT_STATUS_BASIC, + .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC}, + {.key = {"min-free-inodes"}, + .type = GF_OPTION_TYPE_PERCENT, + .default_value = "5%", + .description = "after system has only N% of inodes, warnings " + "starts to appear in log files", + .op_version = {1}, + .level = OPT_STATUS_BASIC, + .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC}, + { + .key = {"unhashed-sticky-bit"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "off", + }, + {.key = {"use-readdirp"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "on", + .description = "This option if set to ON, forces the use of " + "readdirp, and hence also displays the stats of the files.", + .level = OPT_STATUS_ADVANCED, + .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC}, + {.key = {"assert-no-child-down"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "off", + .description = "This option if set to ON, in the event of " + "CHILD_DOWN, will call exit."}, + { + .key = {"directory-layout-spread"}, + .type = GF_OPTION_TYPE_INT, + .min = 1, + .validate = GF_OPT_VALIDATE_MIN, + .description = "Specifies the directory layout spread. Takes number " + "of subvolumes as default value.", + + .op_version = {2}, + }, + { + .key = {"decommissioned-bricks"}, + .type = GF_OPTION_TYPE_ANY, + .description = + "This option if set to ON, decommissions " + "the brick, so that no new data is allowed to be created " + "on that brick.", + .level = OPT_STATUS_ADVANCED, + }, + { + .key = {"rebalance-cmd"}, + .type = GF_OPTION_TYPE_INT, + }, + { + .key = {"commit-hash"}, + .type = GF_OPTION_TYPE_INT, + }, + { + .key = {"node-uuid"}, + .type = GF_OPTION_TYPE_STR, + }, + { + .key = {"rebalance-stats"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "off", + .description = + "This option if set to ON displays and logs the " + " time taken for migration of each file, during the rebalance " + "process. If set to OFF, the rebalance logs will only display the " + "time spent in each directory.", + .op_version = {2}, + .level = OPT_STATUS_BASIC, + }, + {.key = {"readdir-optimize"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "off", + .description = + "This option if set to ON enables the optimization " + "that allows DHT to requests non-first subvolumes to filter out " + "directory entries.", + .op_version = {1}, + .level = OPT_STATUS_ADVANCED, + .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC}, + {.key = {"rsync-hash-regex"}, + .type = GF_OPTION_TYPE_STR, + /* Setting a default here doesn't work. See dht_init_regex. */ + .description = + "Regular expression for stripping temporary-file " + "suffix and prefix used by rsync, to prevent relocation when the " + "file is renamed.", + .op_version = {3}, + .level = OPT_STATUS_BASIC, + .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC}, + {.key = {"extra-hash-regex"}, + .type = GF_OPTION_TYPE_STR, + /* Setting a default here doesn't work. See dht_init_regex. */ + .description = + "Regular expression for stripping temporary-file " + "suffix and prefix used by an application, to prevent relocation when " + "the file is renamed.", + .op_version = {3}, + .level = OPT_STATUS_BASIC, + .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC}, + { + .key = {"rebalance-filter"}, + .type = GF_OPTION_TYPE_STR, + }, + + { + .key = {"xattr-name"}, + .type = GF_OPTION_TYPE_STR, + .default_value = "trusted.glusterfs.dht", + .description = + "Base for extended attributes used by this " + "translator instance, to avoid conflicts with others above or " + "below it.", + .op_version = {3}, + }, + + {.key = {"weighted-rebalance"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "on", + .description = + "When enabled, files will be allocated to bricks " + "with a probability proportional to their size. Otherwise, all " + "bricks will have the same probability (legacy behavior).", + .op_version = {GD_OP_VERSION_3_6_0}, + .level = OPT_STATUS_BASIC, + .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC}, + + /* NUFA option */ + {.key = {"local-volume-name"}, .type = GF_OPTION_TYPE_XLATOR}, + + /* tier options */ + { + .key = {"tier-pause"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "off", + }, + + { + .key = {"tier-promote-frequency"}, + .type = GF_OPTION_TYPE_INT, + .default_value = "120", + }, + + { + .key = {"tier-demote-frequency"}, + .type = GF_OPTION_TYPE_INT, + .default_value = "3600", + }, + + { + .key = {"write-freq-threshold"}, + .type = GF_OPTION_TYPE_INT, + .default_value = "0", + }, + + { + .key = {"read-freq-threshold"}, + .type = GF_OPTION_TYPE_INT, + .default_value = "0", + }, + { + .key = {"watermark-hi"}, + .type = GF_OPTION_TYPE_PERCENT, + .default_value = "90", + }, + { + .key = {"watermark-low"}, + .type = GF_OPTION_TYPE_PERCENT, + .default_value = "75", + }, + { + .key = {"tier-mode"}, + .type = GF_OPTION_TYPE_STR, + .default_value = "test", + }, + { + .key = {"tier-compact"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "off", + }, + {.key = {"tier-hot-compact-frequency"}, + .type = GF_OPTION_TYPE_INT, + .default_value = "604800", + .description = "Frequency to compact DBs on hot tier in system"}, + {.key = {"tier-cold-compact-frequency"}, + .type = GF_OPTION_TYPE_INT, + .default_value = "604800", + .description = "Frequency to compact DBs on cold tier in system"}, + { + .key = {"tier-max-mb"}, + .type = GF_OPTION_TYPE_INT, + .default_value = "4000", + }, + { + .key = {"tier-max-promote-file-size"}, + .type = GF_OPTION_TYPE_INT, + .default_value = "0", + }, + { + .key = {"tier-max-files"}, + .type = GF_OPTION_TYPE_INT, + .default_value = "10000", + }, + { + .key = {"tier-query-limit"}, + .type = GF_OPTION_TYPE_INT, + .default_value = "100", + }, + /* switch option */ + {.key = {"pattern.switch.case"}, .type = GF_OPTION_TYPE_ANY}, + + { + .key = {"randomize-hash-range-by-gfid"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "off", + .description = + "Use gfid of directory to determine the subvolume " + "from which hash ranges are allocated starting with 0. " + "Note that we still use a directory/file's name to determine the " + "subvolume to which it hashes", + .op_version = {GD_OP_VERSION_3_6_0}, + }, + + {.key = {"rebal-throttle"}, + .type = GF_OPTION_TYPE_STR, + .default_value = "normal", + .description = " Sets the maximum number of parallel file migrations " + "allowed on a node during the rebalance operation. The" + " default value is normal and allows a max of " + "[($(processing units) - 4) / 2), 2] files to be " + "migrated at a time. Lazy will allow only one file to " + "be migrated at a time and aggressive will allow " + "max of [($(processing units) - 4) / 2), 4]", + .op_version = {GD_OP_VERSION_3_7_0}, + .level = OPT_STATUS_BASIC, + .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC + + }, + + {.key = {"lock-migration"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "off", + .description = " If enabled this feature will migrate the posix locks" + " associated with a file during rebalance", + .op_version = {GD_OP_VERSION_3_8_0}, + .level = OPT_STATUS_ADVANCED, + .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC}, + + {.key = {"force-migration"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "off", + .description = "If disabled, rebalance will not migrate files that " + "are being written to by an application", + .op_version = {GD_OP_VERSION_4_0_0}, + .level = OPT_STATUS_ADVANCED, + .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC}, + + {.key = {NULL}}, }; diff --git a/xlators/cluster/dht/src/dht.c b/xlators/cluster/dht/src/dht.c index 90962636d18..677905f236e 100644 --- a/xlators/cluster/dht/src/dht.c +++ b/xlators/cluster/dht/src/dht.c @@ -8,78 +8,72 @@ cases as published by the Free Software Foundation. */ - #include "statedump.h" #include "dht-common.h" -class_methods_t class_methods = { - .init = dht_init, - .fini = dht_fini, - .reconfigure = dht_reconfigure, - .notify = dht_notify -}; +class_methods_t class_methods = {.init = dht_init, + .fini = dht_fini, + .reconfigure = dht_reconfigure, + .notify = dht_notify}; struct xlator_fops fops = { - .ipc = dht_ipc, - .lookup = dht_lookup, - .mknod = dht_mknod, - .create = dht_create, + .ipc = dht_ipc, + .lookup = dht_lookup, + .mknod = dht_mknod, + .create = dht_create, - .open = dht_open, - .statfs = dht_statfs, - .opendir = dht_opendir, - .readdir = dht_readdir, - .readdirp = dht_readdirp, - .fsyncdir = dht_fsyncdir, - .symlink = dht_symlink, - .unlink = dht_unlink, - .link = dht_link, - .mkdir = dht_mkdir, - .rmdir = dht_rmdir, - .rename = dht_rename, - .entrylk = dht_entrylk, - .fentrylk = dht_fentrylk, + .open = dht_open, + .statfs = dht_statfs, + .opendir = dht_opendir, + .readdir = dht_readdir, + .readdirp = dht_readdirp, + .fsyncdir = dht_fsyncdir, + .symlink = dht_symlink, + .unlink = dht_unlink, + .link = dht_link, + .mkdir = dht_mkdir, + .rmdir = dht_rmdir, + .rename = dht_rename, + .entrylk = dht_entrylk, + .fentrylk = dht_fentrylk, - /* Inode read operations */ - .stat = dht_stat, - .fstat = dht_fstat, - .access = dht_access, - .readlink = dht_readlink, - .getxattr = dht_getxattr, - .fgetxattr = dht_fgetxattr, - .readv = dht_readv, - .flush = dht_flush, - .fsync = dht_fsync, - .inodelk = dht_inodelk, - .finodelk = dht_finodelk, - .lk = dht_lk, - .lease = dht_lease, + /* Inode read operations */ + .stat = dht_stat, + .fstat = dht_fstat, + .access = dht_access, + .readlink = dht_readlink, + .getxattr = dht_getxattr, + .fgetxattr = dht_fgetxattr, + .readv = dht_readv, + .flush = dht_flush, + .fsync = dht_fsync, + .inodelk = dht_inodelk, + .finodelk = dht_finodelk, + .lk = dht_lk, + .lease = dht_lease, - /* Inode write operations */ - .fremovexattr = dht_fremovexattr, - .removexattr = dht_removexattr, - .setxattr = dht_setxattr, - .fsetxattr = dht_fsetxattr, - .truncate = dht_truncate, - .ftruncate = dht_ftruncate, - .writev = dht_writev, - .xattrop = dht_xattrop, - .fxattrop = dht_fxattrop, - .setattr = dht_setattr, - .fsetattr = dht_fsetattr, - .fallocate = dht_fallocate, - .discard = dht_discard, - .zerofill = dht_zerofill, + /* Inode write operations */ + .fremovexattr = dht_fremovexattr, + .removexattr = dht_removexattr, + .setxattr = dht_setxattr, + .fsetxattr = dht_fsetxattr, + .truncate = dht_truncate, + .ftruncate = dht_ftruncate, + .writev = dht_writev, + .xattrop = dht_xattrop, + .fxattrop = dht_fxattrop, + .setattr = dht_setattr, + .fsetattr = dht_fsetattr, + .fallocate = dht_fallocate, + .discard = dht_discard, + .zerofill = dht_zerofill, }; struct xlator_dumpops dumpops = { - .priv = dht_priv_dump, - .inodectx = dht_inodectx_dump, + .priv = dht_priv_dump, + .inodectx = dht_inodectx_dump, }; - -struct xlator_cbks cbks = { - .release = dht_release, -// .releasedir = dht_releasedir, - .forget = dht_forget -}; +struct xlator_cbks cbks = {.release = dht_release, + // .releasedir = dht_releasedir, + .forget = dht_forget}; diff --git a/xlators/cluster/dht/src/nufa.c b/xlators/cluster/dht/src/nufa.c index afa446584ba..b8077f972d1 100644 --- a/xlators/cluster/dht/src/nufa.c +++ b/xlators/cluster/dht/src/nufa.c @@ -8,7 +8,6 @@ cases as published by the Free Software Foundation. */ - #include "dht-common.h" /* TODO: all 'TODO's in dht.c holds good */ @@ -16,673 +15,633 @@ extern struct volume_options options[]; int -nufa_local_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, - inode_t *inode, struct iatt *stbuf, dict_t *xattr, - struct iatt *postparent) +nufa_local_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, inode_t *inode, + struct iatt *stbuf, dict_t *xattr, + struct iatt *postparent) { - xlator_t *subvol = NULL; - char is_linkfile = 0; - char is_dir = 0; - dht_conf_t *conf = NULL; - dht_local_t *local = NULL; - loc_t *loc = NULL; - int i = 0; - xlator_t *prev = NULL; - int call_cnt = 0; - int ret = 0; - - conf = this->private; - - prev = cookie; - local = frame->local; - loc = &local->loc; - - if (ENTRY_MISSING (op_ret, op_errno)) { - if (conf->search_unhashed) { - local->op_errno = ENOENT; - dht_lookup_everywhere (frame, this, loc); - return 0; - } + xlator_t *subvol = NULL; + char is_linkfile = 0; + char is_dir = 0; + dht_conf_t *conf = NULL; + dht_local_t *local = NULL; + loc_t *loc = NULL; + int i = 0; + xlator_t *prev = NULL; + int call_cnt = 0; + int ret = 0; + + conf = this->private; + + prev = cookie; + local = frame->local; + loc = &local->loc; + + if (ENTRY_MISSING(op_ret, op_errno)) { + if (conf->search_unhashed) { + local->op_errno = ENOENT; + dht_lookup_everywhere(frame, this, loc); + return 0; } - - if (op_ret == -1) - goto out; - - is_linkfile = check_is_linkfile (inode, stbuf, xattr, - conf->link_xattr_name); - is_dir = check_is_dir (inode, stbuf, xattr); - - if (!is_dir && !is_linkfile) { - /* non-directory and not a linkfile */ - ret = dht_layout_preset (this, prev, inode); - if (ret < 0) { - gf_msg_debug (this->name, 0, - "could not set pre-set layout for subvol" - " %s", prev->name); - op_ret = -1; - op_errno = EINVAL; - goto err; - } - - goto out; + } + + if (op_ret == -1) + goto out; + + is_linkfile = check_is_linkfile(inode, stbuf, xattr, conf->link_xattr_name); + is_dir = check_is_dir(inode, stbuf, xattr); + + if (!is_dir && !is_linkfile) { + /* non-directory and not a linkfile */ + ret = dht_layout_preset(this, prev, inode); + if (ret < 0) { + gf_msg_debug(this->name, 0, + "could not set pre-set layout for subvol" + " %s", + prev->name); + op_ret = -1; + op_errno = EINVAL; + goto err; } - if (is_dir) { - call_cnt = conf->subvolume_cnt; - local->call_cnt = call_cnt; - - local->inode = inode_ref (inode); - local->xattr = dict_ref (xattr); - - local->op_ret = 0; - local->op_errno = 0; - - local->layout = dht_layout_new (this, conf->subvolume_cnt); - if (!local->layout) { - op_ret = -1; - op_errno = ENOMEM; - goto err; - } - - for (i = 0; i < call_cnt; i++) { - STACK_WIND_COOKIE (frame, dht_lookup_dir_cbk, - conf->subvolumes[i], - conf->subvolumes[i], - conf->subvolumes[i]->fops->lookup, - &local->loc, local->xattr_req); - } - } + goto out; + } - if (is_linkfile) { - subvol = dht_linkfile_subvol (this, inode, stbuf, xattr); + if (is_dir) { + call_cnt = conf->subvolume_cnt; + local->call_cnt = call_cnt; - if (!subvol) { - gf_msg_debug (this->name, 0, - "linkfile has no link subvolume. path=%s", - loc->path); - dht_lookup_everywhere (frame, this, loc); - return 0; - } + local->inode = inode_ref(inode); + local->xattr = dict_ref(xattr); - STACK_WIND_COOKIE (frame, dht_lookup_linkfile_cbk, subvol, - subvol, subvol->fops->lookup, - &local->loc, local->xattr_req); + local->op_ret = 0; + local->op_errno = 0; + + local->layout = dht_layout_new(this, conf->subvolume_cnt); + if (!local->layout) { + op_ret = -1; + op_errno = ENOMEM; + goto err; } - return 0; + for (i = 0; i < call_cnt; i++) { + STACK_WIND_COOKIE(frame, dht_lookup_dir_cbk, conf->subvolumes[i], + conf->subvolumes[i], + conf->subvolumes[i]->fops->lookup, &local->loc, + local->xattr_req); + } + } -out: - if (!local->hashed_subvol) { - gf_msg_debug (this->name, 0, - "no subvolume in layout for path=%s", - local->loc.path); - local->op_errno = ENOENT; - dht_lookup_everywhere (frame, this, loc); - return 0; + if (is_linkfile) { + subvol = dht_linkfile_subvol(this, inode, stbuf, xattr); + + if (!subvol) { + gf_msg_debug(this->name, 0, + "linkfile has no link subvolume. path=%s", loc->path); + dht_lookup_everywhere(frame, this, loc); + return 0; } - STACK_WIND_COOKIE (frame, dht_lookup_cbk, local->hashed_subvol, - local->hashed_subvol, - local->hashed_subvol->fops->lookup, - &local->loc, local->xattr_req); + STACK_WIND_COOKIE(frame, dht_lookup_linkfile_cbk, subvol, subvol, + subvol->fops->lookup, &local->loc, local->xattr_req); + } + return 0; + +out: + if (!local->hashed_subvol) { + gf_msg_debug(this->name, 0, "no subvolume in layout for path=%s", + local->loc.path); + local->op_errno = ENOENT; + dht_lookup_everywhere(frame, this, loc); return 0; + } + + STACK_WIND_COOKIE(frame, dht_lookup_cbk, local->hashed_subvol, + local->hashed_subvol, local->hashed_subvol->fops->lookup, + &local->loc, local->xattr_req); + + return 0; err: - DHT_STACK_UNWIND (lookup, frame, op_ret, op_errno, - inode, stbuf, xattr, postparent); - return 0; + DHT_STACK_UNWIND(lookup, frame, op_ret, op_errno, inode, stbuf, xattr, + postparent); + return 0; } int -nufa_lookup (call_frame_t *frame, xlator_t *this, - loc_t *loc, dict_t *xattr_req) +nufa_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req) { - xlator_t *hashed_subvol = NULL; - xlator_t *subvol = NULL; - dht_local_t *local = NULL; - dht_conf_t *conf = NULL; - int ret = -1; - int op_errno = -1; - dht_layout_t *layout = NULL; - int i = 0; - int call_cnt = 0; - - - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (loc, err); - VALIDATE_OR_GOTO (loc->inode, err); - VALIDATE_OR_GOTO (loc->path, err); - - conf = this->private; - - local = dht_local_init (frame, loc, NULL, GF_FOP_LOOKUP); - if (!local) { - op_errno = ENOMEM; - goto err; + xlator_t *hashed_subvol = NULL; + xlator_t *subvol = NULL; + dht_local_t *local = NULL; + dht_conf_t *conf = NULL; + int ret = -1; + int op_errno = -1; + dht_layout_t *layout = NULL; + int i = 0; + int call_cnt = 0; + + VALIDATE_OR_GOTO(frame, err); + VALIDATE_OR_GOTO(this, err); + VALIDATE_OR_GOTO(loc, err); + VALIDATE_OR_GOTO(loc->inode, err); + VALIDATE_OR_GOTO(loc->path, err); + + conf = this->private; + + local = dht_local_init(frame, loc, NULL, GF_FOP_LOOKUP); + if (!local) { + op_errno = ENOMEM; + goto err; + } + + if (xattr_req) { + local->xattr_req = dict_ref(xattr_req); + } else { + local->xattr_req = dict_new(); + } + + hashed_subvol = dht_subvol_get_hashed(this, &local->loc); + + local->hashed_subvol = hashed_subvol; + + if (is_revalidate(loc)) { + layout = local->layout; + if (!layout) { + gf_msg_debug(this->name, 0, + "revalidate lookup without cache. " + "path=%s", + loc->path); + op_errno = EINVAL; + goto err; } - if (xattr_req) { - local->xattr_req = dict_ref (xattr_req); - } else { - local->xattr_req = dict_new (); + if (layout->gen && (layout->gen < conf->gen)) { + gf_msg_debug(this->name, 0, "incomplete layout failure for path=%s", + loc->path); + dht_layout_unref(this, local->layout); + goto do_fresh_lookup; } - hashed_subvol = dht_subvol_get_hashed (this, &local->loc); - - local->hashed_subvol = hashed_subvol; - - if (is_revalidate (loc)) { - layout = local->layout; - if (!layout) { - gf_msg_debug (this->name, 0, - "revalidate lookup without cache. " - "path=%s", loc->path); - op_errno = EINVAL; - goto err; - } - - if (layout->gen && (layout->gen < conf->gen)) { - gf_msg_debug (this->name, 0, - "incomplete layout failure for path=%s", - loc->path); - dht_layout_unref (this, local->layout); - goto do_fresh_lookup; - } - - local->inode = inode_ref (loc->inode); - - local->call_cnt = layout->cnt; - call_cnt = local->call_cnt; - - /* NOTE: we don't require 'trusted.glusterfs.dht.linkto' attribute, - * revalidates directly go to the cached-subvolume. - */ - ret = dict_set_uint32 (local->xattr_req, - conf->xattr_name, 4 * 4); - if (ret < 0) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_DICT_SET_FAILED, - "Failed to set dict value."); - op_errno = -1; - goto err; - } - - for (i = 0; i < layout->cnt; i++) { - subvol = layout->list[i].xlator; - - STACK_WIND_COOKIE (frame, dht_revalidate_cbk, subvol, - subvol, subvol->fops->lookup, - loc, local->xattr_req); - - if (!--call_cnt) - break; - } - } else { - do_fresh_lookup: - ret = dict_set_uint32 (local->xattr_req, - conf->xattr_name, 4 * 4); - if (ret < 0) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_DICT_SET_FAILED, - "Failed to set dict value."); - op_errno = -1; - goto err; - } - - ret = dict_set_uint32 (local->xattr_req, - conf->link_xattr_name, 256); - if (ret < 0) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_DICT_SET_FAILED, - "Failed to set dict value."); - op_errno = -1; - goto err; - } - - /* Send it to only local volume */ - STACK_WIND_COOKIE (frame, nufa_local_lookup_cbk, - ((xlator_t *)conf->private), - ((xlator_t *)conf->private), - ((xlator_t *)conf->private)->fops->lookup, - loc, local->xattr_req); - } + local->inode = inode_ref(loc->inode); - return 0; + local->call_cnt = layout->cnt; + call_cnt = local->call_cnt; -err: - op_errno = (op_errno == -1) ? errno : op_errno; - DHT_STACK_UNWIND (lookup, frame, -1, op_errno, NULL, NULL, NULL, - NULL); - return 0; -} + /* NOTE: we don't require 'trusted.glusterfs.dht.linkto' attribute, + * revalidates directly go to the cached-subvolume. + */ + ret = dict_set_uint32(local->xattr_req, conf->xattr_name, 4 * 4); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED, + "Failed to set dict value."); + op_errno = -1; + goto err; + } -int -nufa_create_linkfile_create_cbk (call_frame_t *frame, void *cookie, - xlator_t *this, int op_ret, int op_errno, - inode_t *inode, struct iatt *stbuf, - struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) -{ - dht_local_t *local = NULL; + for (i = 0; i < layout->cnt; i++) { + subvol = layout->list[i].xlator; - local = frame->local; + STACK_WIND_COOKIE(frame, dht_revalidate_cbk, subvol, subvol, + subvol->fops->lookup, loc, local->xattr_req); - if (op_ret == -1) - goto err; + if (!--call_cnt) + break; + } + } else { + do_fresh_lookup: + ret = dict_set_uint32(local->xattr_req, conf->xattr_name, 4 * 4); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED, + "Failed to set dict value."); + op_errno = -1; + goto err; + } + + ret = dict_set_uint32(local->xattr_req, conf->link_xattr_name, 256); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED, + "Failed to set dict value."); + op_errno = -1; + goto err; + } - STACK_WIND_COOKIE (frame, dht_create_cbk, local->cached_subvol, - local->cached_subvol, local->cached_subvol->fops->create, - &local->loc, local->flags, local->mode, local->umask, - local->fd, local->params); + /* Send it to only local volume */ + STACK_WIND_COOKIE( + frame, nufa_local_lookup_cbk, ((xlator_t *)conf->private), + ((xlator_t *)conf->private), + ((xlator_t *)conf->private)->fops->lookup, loc, local->xattr_req); + } - return 0; + return 0; err: - DHT_STACK_UNWIND (create, frame, -1, op_errno, - NULL, NULL, NULL, NULL, NULL, NULL); - return 0; + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND(lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL); + return 0; } int -nufa_create (call_frame_t *frame, xlator_t *this, - loc_t *loc, int32_t flags, mode_t mode, - mode_t umask, fd_t *fd, dict_t *params) +nufa_create_linkfile_create_cbk(call_frame_t *frame, void *cookie, + xlator_t *this, int op_ret, int op_errno, + inode_t *inode, struct iatt *stbuf, + struct iatt *preparent, struct iatt *postparent, + dict_t *xdata) { - dht_local_t *local = NULL; - dht_conf_t *conf = NULL; - xlator_t *subvol = NULL; - xlator_t *avail_subvol = NULL; - int op_errno = -1; + dht_local_t *local = NULL; - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (loc, err); + local = frame->local; - conf = this->private; + if (op_ret == -1) + goto err; - dht_get_du_info (frame, this, loc); + STACK_WIND_COOKIE(frame, dht_create_cbk, local->cached_subvol, + local->cached_subvol, local->cached_subvol->fops->create, + &local->loc, local->flags, local->mode, local->umask, + local->fd, local->params); - local = dht_local_init (frame, loc, fd, GF_FOP_CREATE); - if (!local) { - op_errno = ENOMEM; - goto err; - } + return 0; - subvol = dht_subvol_get_hashed (this, loc); - if (!subvol) { - gf_msg_debug (this->name, 0, - "no subvolume in layout for path=%s", - loc->path); - op_errno = ENOENT; - goto err; - } - - avail_subvol = conf->private; - if (dht_is_subvol_filled (this, (xlator_t *)conf->private)) { - avail_subvol = - dht_free_disk_available_subvol (this, - (xlator_t *)conf->private, - local); - } +err: + DHT_STACK_UNWIND(create, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL, + NULL); + return 0; +} - if (subvol != avail_subvol) { - /* create a link file instead of actual file */ - local->params = dict_ref (params); - local->mode = mode; - local->flags = flags; - local->umask = umask; - local->cached_subvol = avail_subvol; - dht_linkfile_create (frame, nufa_create_linkfile_create_cbk, - this, avail_subvol, subvol, loc); - return 0; - } +int +nufa_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + mode_t mode, mode_t umask, fd_t *fd, dict_t *params) +{ + dht_local_t *local = NULL; + dht_conf_t *conf = NULL; + xlator_t *subvol = NULL; + xlator_t *avail_subvol = NULL; + int op_errno = -1; + + VALIDATE_OR_GOTO(frame, err); + VALIDATE_OR_GOTO(this, err); + VALIDATE_OR_GOTO(loc, err); + + conf = this->private; + + dht_get_du_info(frame, this, loc); + + local = dht_local_init(frame, loc, fd, GF_FOP_CREATE); + if (!local) { + op_errno = ENOMEM; + goto err; + } + + subvol = dht_subvol_get_hashed(this, loc); + if (!subvol) { + gf_msg_debug(this->name, 0, "no subvolume in layout for path=%s", + loc->path); + op_errno = ENOENT; + goto err; + } + + avail_subvol = conf->private; + if (dht_is_subvol_filled(this, (xlator_t *)conf->private)) { + avail_subvol = dht_free_disk_available_subvol( + this, (xlator_t *)conf->private, local); + } + + if (subvol != avail_subvol) { + /* create a link file instead of actual file */ + local->params = dict_ref(params); + local->mode = mode; + local->flags = flags; + local->umask = umask; + local->cached_subvol = avail_subvol; + dht_linkfile_create(frame, nufa_create_linkfile_create_cbk, this, + avail_subvol, subvol, loc); + return 0; + } - gf_msg_trace (this->name, 0, - "creating %s on %s", loc->path, subvol->name); + gf_msg_trace(this->name, 0, "creating %s on %s", loc->path, subvol->name); - STACK_WIND_COOKIE (frame, dht_create_cbk, subvol, - subvol, subvol->fops->create, - loc, flags, mode, umask, fd, params); + STACK_WIND_COOKIE(frame, dht_create_cbk, subvol, subvol, + subvol->fops->create, loc, flags, mode, umask, fd, + params); - return 0; + return 0; err: - op_errno = (op_errno == -1) ? errno : op_errno; - DHT_STACK_UNWIND (create, frame, -1, op_errno, - NULL, NULL, NULL, NULL, NULL, NULL); + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND(create, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL, + NULL); - return 0; + return 0; } int -nufa_mknod_linkfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, inode_t *inode, - struct iatt *stbuf, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) +nufa_mknod_linkfile_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, inode_t *inode, + struct iatt *stbuf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) { - dht_local_t *local = NULL; + dht_local_t *local = NULL; - local = frame->local; - if (!local || !local->cached_subvol) { - op_errno = EINVAL; - op_ret = -1; - goto err; - } + local = frame->local; + if (!local || !local->cached_subvol) { + op_errno = EINVAL; + op_ret = -1; + goto err; + } - if (op_ret >= 0) { - STACK_WIND_COOKIE (frame, dht_newfile_cbk, - (void *)local->cached_subvol, local->cached_subvol, - local->cached_subvol->fops->mknod, - &local->loc, local->mode, local->rdev, - local->umask, local->params); + if (op_ret >= 0) { + STACK_WIND_COOKIE( + frame, dht_newfile_cbk, (void *)local->cached_subvol, + local->cached_subvol, local->cached_subvol->fops->mknod, + &local->loc, local->mode, local->rdev, local->umask, local->params); - return 0; - } + return 0; + } err: - WIPE (postparent); - WIPE (preparent); + WIPE(postparent); + WIPE(preparent); - DHT_STACK_UNWIND (link, frame, op_ret, op_errno, - inode, stbuf, preparent, postparent, xdata); - return 0; + DHT_STACK_UNWIND(link, frame, op_ret, op_errno, inode, stbuf, preparent, + postparent, xdata); + return 0; } - int -nufa_mknod (call_frame_t *frame, xlator_t *this, - loc_t *loc, mode_t mode, dev_t rdev, mode_t umask, dict_t *params) +nufa_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, + dev_t rdev, mode_t umask, dict_t *params) { - dht_local_t *local = NULL; - dht_conf_t *conf = NULL; - xlator_t *subvol = NULL; - xlator_t *avail_subvol = NULL; - int op_errno = -1; - - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (loc, err); - - conf = this->private; - - dht_get_du_info (frame, this, loc); - - local = dht_local_init (frame, loc, NULL, GF_FOP_MKNOD); - if (!local) { - op_errno = ENOMEM; - goto err; - } - - subvol = dht_subvol_get_hashed (this, loc); - if (!subvol) { - gf_msg_debug (this->name, 0, - "no subvolume in layout for path=%s", - loc->path); - op_errno = ENOENT; - goto err; - } - - /* Consider the disksize in consideration */ - avail_subvol = conf->private; - if (dht_is_subvol_filled (this, (xlator_t *)conf->private)) { - avail_subvol = - dht_free_disk_available_subvol (this, - (xlator_t *)conf->private, - local); - } - - if (avail_subvol != subvol) { - /* Create linkfile first */ - - local->params = dict_ref (params); - local->mode = mode; - local->umask = umask; - local->rdev = rdev; - local->cached_subvol = avail_subvol; - - dht_linkfile_create (frame, nufa_mknod_linkfile_cbk, this, - avail_subvol, subvol, loc); - return 0; - } + dht_local_t *local = NULL; + dht_conf_t *conf = NULL; + xlator_t *subvol = NULL; + xlator_t *avail_subvol = NULL; + int op_errno = -1; + + VALIDATE_OR_GOTO(frame, err); + VALIDATE_OR_GOTO(this, err); + VALIDATE_OR_GOTO(loc, err); + + conf = this->private; + + dht_get_du_info(frame, this, loc); + + local = dht_local_init(frame, loc, NULL, GF_FOP_MKNOD); + if (!local) { + op_errno = ENOMEM; + goto err; + } + + subvol = dht_subvol_get_hashed(this, loc); + if (!subvol) { + gf_msg_debug(this->name, 0, "no subvolume in layout for path=%s", + loc->path); + op_errno = ENOENT; + goto err; + } + + /* Consider the disksize in consideration */ + avail_subvol = conf->private; + if (dht_is_subvol_filled(this, (xlator_t *)conf->private)) { + avail_subvol = dht_free_disk_available_subvol( + this, (xlator_t *)conf->private, local); + } + + if (avail_subvol != subvol) { + /* Create linkfile first */ + + local->params = dict_ref(params); + local->mode = mode; + local->umask = umask; + local->rdev = rdev; + local->cached_subvol = avail_subvol; + + dht_linkfile_create(frame, nufa_mknod_linkfile_cbk, this, avail_subvol, + subvol, loc); + return 0; + } - gf_msg_trace (this->name, 0, - "creating %s on %s", loc->path, subvol->name); + gf_msg_trace(this->name, 0, "creating %s on %s", loc->path, subvol->name); - STACK_WIND_COOKIE (frame, dht_newfile_cbk, (void *)subvol, subvol, - subvol->fops->mknod, loc, mode, rdev, umask, - params); + STACK_WIND_COOKIE(frame, dht_newfile_cbk, (void *)subvol, subvol, + subvol->fops->mknod, loc, mode, rdev, umask, params); - return 0; + return 0; err: - op_errno = (op_errno == -1) ? errno : op_errno; - DHT_STACK_UNWIND (mknod, frame, -1, op_errno, - NULL, NULL, NULL, NULL, NULL); + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND(mknod, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL); - return 0; + return 0; } - gf_boolean_t -same_first_part (char *str1, char term1, char *str2, char term2) +same_first_part(char *str1, char term1, char *str2, char term2) { - gf_boolean_t ended1; - gf_boolean_t ended2; - - for (;;) { - ended1 = ((*str1 == '\0') || (*str1 == term1)); - ended2 = ((*str2 == '\0') || (*str2 == term2)); - if (ended1 && ended2) { - return _gf_true; - } - if (ended1 || ended2 || (*str1 != *str2)) { - return _gf_false; - } - ++str1; - ++str2; + gf_boolean_t ended1; + gf_boolean_t ended2; + + for (;;) { + ended1 = ((*str1 == '\0') || (*str1 == term1)); + ended2 = ((*str2 == '\0') || (*str2 == term2)); + if (ended1 && ended2) { + return _gf_true; } + if (ended1 || ended2 || (*str1 != *str2)) { + return _gf_false; + } + ++str1; + ++str2; + } } typedef struct nufa_args { - xlator_t *this; - char *volname; - gf_boolean_t addr_match; + xlator_t *this; + char *volname; + gf_boolean_t addr_match; } nufa_args_t; static void -nufa_find_local_brick (xlator_t *xl, void *data) +nufa_find_local_brick(xlator_t *xl, void *data) { - nufa_args_t *args = data; - xlator_t *this = args->this; - char *local_volname = args->volname; - gf_boolean_t addr_match = args->addr_match; - char *brick_host = NULL; - dht_conf_t *conf = this->private; - int ret = -1; - - /*This means a local subvol was already found. We pick the first brick - * that is local*/ - if (conf->private) - return; - - if (strcmp (xl->name, local_volname) == 0) { - conf->private = xl; - gf_msg (this->name, GF_LOG_INFO, 0, - DHT_MSG_SUBVOL_INFO, - "Using specified subvol %s", - local_volname); - return; - } - - if (!addr_match) - return; - - ret = dict_get_str (xl->options, "remote-host", &brick_host); - if ((ret == 0) && - (gf_is_same_address (local_volname, brick_host) || - gf_is_local_addr (brick_host))) { - conf->private = xl; - gf_msg (this->name, GF_LOG_INFO, 0, - DHT_MSG_SUBVOL_INFO, "Using the first local " - "subvol %s", xl->name); - return; - } - + nufa_args_t *args = data; + xlator_t *this = args->this; + char *local_volname = args->volname; + gf_boolean_t addr_match = args->addr_match; + char *brick_host = NULL; + dht_conf_t *conf = this->private; + int ret = -1; + + /*This means a local subvol was already found. We pick the first brick + * that is local*/ + if (conf->private) + return; + + if (strcmp(xl->name, local_volname) == 0) { + conf->private = xl; + gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_SUBVOL_INFO, + "Using specified subvol %s", local_volname); + return; + } + + if (!addr_match) + return; + + ret = dict_get_str(xl->options, "remote-host", &brick_host); + if ((ret == 0) && (gf_is_same_address(local_volname, brick_host) || + gf_is_local_addr(brick_host))) { + conf->private = xl; + gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_SUBVOL_INFO, + "Using the first local " + "subvol %s", + xl->name); + return; + } } static void -nufa_to_dht (xlator_t *this) +nufa_to_dht(xlator_t *this) { - GF_ASSERT (this); - GF_ASSERT (this->fops); + GF_ASSERT(this); + GF_ASSERT(this->fops); - this->fops->lookup = dht_lookup; - this->fops->create = dht_create; - this->fops->mknod = dht_mknod; + this->fops->lookup = dht_lookup; + this->fops->create = dht_create; + this->fops->mknod = dht_mknod; } int -nufa_find_local_subvol (xlator_t *this, - void (*fn) (xlator_t *each, void* data), void *data) +nufa_find_local_subvol(xlator_t *this, void (*fn)(xlator_t *each, void *data), + void *data) { - int ret = -1; - dht_conf_t *conf = this->private; - xlator_list_t *trav = NULL; - xlator_t *parent = NULL; - xlator_t *candidate = NULL; - - xlator_foreach_depth_first (this, fn, data); - if (!conf->private) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_BRICK_ERROR, "Couldn't find a local " - "brick"); - return -1; + int ret = -1; + dht_conf_t *conf = this->private; + xlator_list_t *trav = NULL; + xlator_t *parent = NULL; + xlator_t *candidate = NULL; + + xlator_foreach_depth_first(this, fn, data); + if (!conf->private) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_BRICK_ERROR, + "Couldn't find a local " + "brick"); + return -1; + } + + candidate = conf->private; + trav = candidate->parents; + while (trav) { + parent = trav->xlator; + if (strcmp(parent->type, "cluster/nufa") == 0) { + gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_SUBVOL_INFO, + "Found local subvol, " + "%s", + candidate->name); + ret = 0; + conf->private = candidate; + break; } - candidate = conf->private; - trav = candidate->parents; - while (trav) { - - parent = trav->xlator; - if (strcmp (parent->type, "cluster/nufa") == 0) { - gf_msg (this->name, GF_LOG_INFO, 0, - DHT_MSG_SUBVOL_INFO, "Found local subvol, " - "%s", candidate->name); - ret = 0; - conf->private = candidate; - break; - } - - candidate = parent; - trav = parent->parents; - } + candidate = parent; + trav = parent->parents; + } - return ret; + return ret; } int -nufa_init (xlator_t *this) +nufa_init(xlator_t *this) { - data_t *data = NULL; - char *local_volname = NULL; - int ret = -1; - char my_hostname[256]; - gf_boolean_t addr_match = _gf_false; - nufa_args_t args = {0, }; - - ret = dht_init(this); - if (ret) { - return ret; - } - - if ((data = dict_get (this->options, "local-volume-name"))) { - local_volname = data->data; - - } else { - addr_match = _gf_true; - local_volname = "localhost"; - ret = gethostname (my_hostname, 256); - if (ret == 0) - local_volname = my_hostname; - - else - gf_msg (this->name, GF_LOG_WARNING, errno, - DHT_MSG_GET_HOSTNAME_FAILED, - "could not find hostname"); - - } - - args.this = this; - args.volname = local_volname; - args.addr_match = addr_match; - ret = nufa_find_local_subvol (this, nufa_find_local_brick, &args); - if (ret) { - gf_msg (this->name, GF_LOG_INFO, 0, - DHT_MSG_SUBVOL_INFO, - "Unable to find local subvolume, switching " - "to dht mode"); - nufa_to_dht (this); - } - return 0; + data_t *data = NULL; + char *local_volname = NULL; + int ret = -1; + char my_hostname[256]; + gf_boolean_t addr_match = _gf_false; + nufa_args_t args = { + 0, + }; + + ret = dht_init(this); + if (ret) { + return ret; + } + + if ((data = dict_get(this->options, "local-volume-name"))) { + local_volname = data->data; + + } else { + addr_match = _gf_true; + local_volname = "localhost"; + ret = gethostname(my_hostname, 256); + if (ret == 0) + local_volname = my_hostname; + + else + gf_msg(this->name, GF_LOG_WARNING, errno, + DHT_MSG_GET_HOSTNAME_FAILED, "could not find hostname"); + } + + args.this = this; + args.volname = local_volname; + args.addr_match = addr_match; + ret = nufa_find_local_subvol(this, nufa_find_local_brick, &args); + if (ret) { + gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_SUBVOL_INFO, + "Unable to find local subvolume, switching " + "to dht mode"); + nufa_to_dht(this); + } + return 0; } dht_methods_t dht_methods = { - .migration_get_dst_subvol = dht_migration_get_dst_subvol, - .migration_needed = dht_migration_needed, - .layout_search = dht_layout_search, -}; - -class_methods_t class_methods = { - .init = nufa_init, - .fini = dht_fini, - .reconfigure = dht_reconfigure, - .notify = dht_notify + .migration_get_dst_subvol = dht_migration_get_dst_subvol, + .migration_needed = dht_migration_needed, + .layout_search = dht_layout_search, }; +class_methods_t class_methods = {.init = nufa_init, + .fini = dht_fini, + .reconfigure = dht_reconfigure, + .notify = dht_notify}; struct xlator_fops fops = { - .lookup = nufa_lookup, - .create = nufa_create, - .mknod = nufa_mknod, - - .stat = dht_stat, - .fstat = dht_fstat, - .truncate = dht_truncate, - .ftruncate = dht_ftruncate, - .access = dht_access, - .readlink = dht_readlink, - .setxattr = dht_setxattr, - .getxattr = dht_getxattr, - .removexattr = dht_removexattr, - .open = dht_open, - .readv = dht_readv, - .writev = dht_writev, - .flush = dht_flush, - .fsync = dht_fsync, - .statfs = dht_statfs, - .lk = dht_lk, - .opendir = dht_opendir, - .readdir = dht_readdir, - .readdirp = dht_readdirp, - .fsyncdir = dht_fsyncdir, - .symlink = dht_symlink, - .unlink = dht_unlink, - .link = dht_link, - .mkdir = dht_mkdir, - .rmdir = dht_rmdir, - .rename = dht_rename, - .inodelk = dht_inodelk, - .finodelk = dht_finodelk, - .entrylk = dht_entrylk, - .fentrylk = dht_fentrylk, - .xattrop = dht_xattrop, - .fxattrop = dht_fxattrop, - .setattr = dht_setattr, + .lookup = nufa_lookup, + .create = nufa_create, + .mknod = nufa_mknod, + + .stat = dht_stat, + .fstat = dht_fstat, + .truncate = dht_truncate, + .ftruncate = dht_ftruncate, + .access = dht_access, + .readlink = dht_readlink, + .setxattr = dht_setxattr, + .getxattr = dht_getxattr, + .removexattr = dht_removexattr, + .open = dht_open, + .readv = dht_readv, + .writev = dht_writev, + .flush = dht_flush, + .fsync = dht_fsync, + .statfs = dht_statfs, + .lk = dht_lk, + .opendir = dht_opendir, + .readdir = dht_readdir, + .readdirp = dht_readdirp, + .fsyncdir = dht_fsyncdir, + .symlink = dht_symlink, + .unlink = dht_unlink, + .link = dht_link, + .mkdir = dht_mkdir, + .rmdir = dht_rmdir, + .rename = dht_rename, + .inodelk = dht_inodelk, + .finodelk = dht_finodelk, + .entrylk = dht_entrylk, + .fentrylk = dht_fentrylk, + .xattrop = dht_xattrop, + .fxattrop = dht_fxattrop, + .setattr = dht_setattr, }; - -struct xlator_cbks cbks = { - .forget = dht_forget -}; +struct xlator_cbks cbks = {.forget = dht_forget}; diff --git a/xlators/cluster/dht/src/switch.c b/xlators/cluster/dht/src/switch.c index 8b66a09b822..ca9bfce5a8e 100644 --- a/xlators/cluster/dht/src/switch.c +++ b/xlators/cluster/dht/src/switch.c @@ -8,7 +8,6 @@ cases as published by the Free Software Foundation. */ - #include "dht-common.h" #include "dht-mem-types.h" @@ -20,907 +19,853 @@ extern struct volume_options options[]; struct switch_sched_array { - xlator_t *xl; - int32_t eligible; - int32_t considered; + xlator_t *xl; + int32_t eligible; + int32_t considered; }; /* Select one of this struct based on the path's pattern match */ struct switch_struct { - struct switch_struct *next; - struct switch_sched_array *array; - int32_t node_index; /* Index of the node in - this pattern. */ - int32_t num_child; /* Total num of child nodes - with this pattern. */ - char path_pattern[256]; + struct switch_struct *next; + struct switch_sched_array *array; + int32_t node_index; /* Index of the node in + this pattern. */ + int32_t num_child; /* Total num of child nodes + with this pattern. */ + char path_pattern[256]; }; /* TODO: all 'TODO's in dht.c holds good */ /* This function should return child node as '*:subvolumes' is inserterd */ static int32_t -gf_switch_valid_child (xlator_t *this, const char *child) +gf_switch_valid_child(xlator_t *this, const char *child) { - xlator_list_t *children = NULL; - int32_t ret = 0; - - children = this->children; - while (children) { - if (!strcmp (child, children->xlator->name)) { - ret = 1; - break; - } - children = children->next; + xlator_list_t *children = NULL; + int32_t ret = 0; + + children = this->children; + while (children) { + if (!strcmp(child, children->xlator->name)) { + ret = 1; + break; } + children = children->next; + } - return ret; + return ret; } static xlator_t * -get_switch_matching_subvol (const char *path, dht_conf_t *conf, - xlator_t *hashed_subvol) +get_switch_matching_subvol(const char *path, dht_conf_t *conf, + xlator_t *hashed_subvol) { - struct switch_struct *cond = NULL; - struct switch_struct *trav = NULL; - char *pathname = NULL; - int idx = 0; - xlator_t *subvol = NULL; - - cond = conf->private; - subvol = hashed_subvol; - if (!cond) - goto out; - - pathname = gf_strdup (path); - if (!pathname) - goto out; - - trav = cond; - while (trav) { - if (fnmatch (trav->path_pattern, - pathname, FNM_NOESCAPE) == 0) { - for (idx = 0; idx < trav->num_child; idx++) { - if (trav->array[idx].xl == hashed_subvol) - goto out; - } - idx = trav->node_index++; - trav->node_index %= trav->num_child; - subvol = trav->array[idx].xl; - goto out; - } - trav = trav->next; + struct switch_struct *cond = NULL; + struct switch_struct *trav = NULL; + char *pathname = NULL; + int idx = 0; + xlator_t *subvol = NULL; + + cond = conf->private; + subvol = hashed_subvol; + if (!cond) + goto out; + + pathname = gf_strdup(path); + if (!pathname) + goto out; + + trav = cond; + while (trav) { + if (fnmatch(trav->path_pattern, pathname, FNM_NOESCAPE) == 0) { + for (idx = 0; idx < trav->num_child; idx++) { + if (trav->array[idx].xl == hashed_subvol) + goto out; + } + idx = trav->node_index++; + trav->node_index %= trav->num_child; + subvol = trav->array[idx].xl; + goto out; } + trav = trav->next; + } out: - GF_FREE (pathname); + GF_FREE(pathname); - return subvol; + return subvol; } - int -switch_local_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, - inode_t *inode, struct iatt *stbuf, dict_t *xattr, - struct iatt *postparent) +switch_local_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, inode_t *inode, + struct iatt *stbuf, dict_t *xattr, + struct iatt *postparent) { - xlator_t *subvol = NULL; - char is_linkfile = 0; - char is_dir = 0; - dht_conf_t *conf = NULL; - dht_local_t *local = NULL; - loc_t *loc = NULL; - int i = 0; - xlator_t *prev = NULL; - int call_cnt = 0; - int ret = 0; - - conf = this->private; - - prev = cookie; - local = frame->local; - loc = &local->loc; - - if (ENTRY_MISSING (op_ret, op_errno)) { - if (conf->search_unhashed) { - local->op_errno = ENOENT; - dht_lookup_everywhere (frame, this, loc); - return 0; - } + xlator_t *subvol = NULL; + char is_linkfile = 0; + char is_dir = 0; + dht_conf_t *conf = NULL; + dht_local_t *local = NULL; + loc_t *loc = NULL; + int i = 0; + xlator_t *prev = NULL; + int call_cnt = 0; + int ret = 0; + + conf = this->private; + + prev = cookie; + local = frame->local; + loc = &local->loc; + + if (ENTRY_MISSING(op_ret, op_errno)) { + if (conf->search_unhashed) { + local->op_errno = ENOENT; + dht_lookup_everywhere(frame, this, loc); + return 0; } - - if (op_ret == -1) - goto out; - - is_linkfile = check_is_linkfile (inode, stbuf, xattr, - conf->link_xattr_name); - is_dir = check_is_dir (inode, stbuf, xattr); - - if (!is_dir && !is_linkfile) { - /* non-directory and not a linkfile */ - - ret = dht_layout_preset (this, prev, inode); - if (ret < 0) { - gf_msg_debug (this->name, 0, - "could not set pre-set layout " - "for subvol %s", - prev->name); - op_ret = -1; - op_errno = EINVAL; - goto err; - } - - goto out; + } + + if (op_ret == -1) + goto out; + + is_linkfile = check_is_linkfile(inode, stbuf, xattr, conf->link_xattr_name); + is_dir = check_is_dir(inode, stbuf, xattr); + + if (!is_dir && !is_linkfile) { + /* non-directory and not a linkfile */ + + ret = dht_layout_preset(this, prev, inode); + if (ret < 0) { + gf_msg_debug(this->name, 0, + "could not set pre-set layout " + "for subvol %s", + prev->name); + op_ret = -1; + op_errno = EINVAL; + goto err; } - if (is_dir) { - call_cnt = conf->subvolume_cnt; - local->call_cnt = call_cnt; + goto out; + } - local->inode = inode_ref (inode); - local->xattr = dict_ref (xattr); + if (is_dir) { + call_cnt = conf->subvolume_cnt; + local->call_cnt = call_cnt; - local->op_ret = 0; - local->op_errno = 0; + local->inode = inode_ref(inode); + local->xattr = dict_ref(xattr); - local->layout = dht_layout_new (this, conf->subvolume_cnt); - if (!local->layout) { - op_ret = -1; - op_errno = ENOMEM; - gf_msg_debug (this->name, 0, - "memory allocation failed :("); - goto err; - } + local->op_ret = 0; + local->op_errno = 0; - for (i = 0; i < call_cnt; i++) { - STACK_WIND_COOKIE (frame, dht_lookup_dir_cbk, - conf->subvolumes[i], - conf->subvolumes[i], - conf->subvolumes[i]->fops->lookup, - &local->loc, local->xattr_req); - } + local->layout = dht_layout_new(this, conf->subvolume_cnt); + if (!local->layout) { + op_ret = -1; + op_errno = ENOMEM; + gf_msg_debug(this->name, 0, "memory allocation failed :("); + goto err; } - if (is_linkfile) { - subvol = dht_linkfile_subvol (this, inode, stbuf, xattr); + for (i = 0; i < call_cnt; i++) { + STACK_WIND_COOKIE(frame, dht_lookup_dir_cbk, conf->subvolumes[i], + conf->subvolumes[i], + conf->subvolumes[i]->fops->lookup, &local->loc, + local->xattr_req); + } + } - if (!subvol) { - gf_msg_debug (this->name, 0, - "linkfile has no link subvolume.path=%s", - loc->path); - dht_lookup_everywhere (frame, this, loc); - return 0; - } + if (is_linkfile) { + subvol = dht_linkfile_subvol(this, inode, stbuf, xattr); - STACK_WIND_COOKIE (frame, dht_lookup_linkfile_cbk, subvol, - subvol, subvol->fops->lookup, - &local->loc, local->xattr_req); + if (!subvol) { + gf_msg_debug(this->name, 0, + "linkfile has no link subvolume.path=%s", loc->path); + dht_lookup_everywhere(frame, this, loc); + return 0; } - return 0; + STACK_WIND_COOKIE(frame, dht_lookup_linkfile_cbk, subvol, subvol, + subvol->fops->lookup, &local->loc, local->xattr_req); + } + + return 0; out: - if (!local->hashed_subvol) { - gf_msg_debug (this->name, 0, - "no subvolume in layout for path=%s", - local->loc.path); - local->op_errno = ENOENT; - dht_lookup_everywhere (frame, this, loc); - return 0; - } + if (!local->hashed_subvol) { + gf_msg_debug(this->name, 0, "no subvolume in layout for path=%s", + local->loc.path); + local->op_errno = ENOENT; + dht_lookup_everywhere(frame, this, loc); + return 0; + } - STACK_WIND_COOKIE (frame, dht_lookup_cbk, local->hashed_subvol, - local->hashed_subvol, - local->hashed_subvol->fops->lookup, - &local->loc, local->xattr_req); + STACK_WIND_COOKIE(frame, dht_lookup_cbk, local->hashed_subvol, + local->hashed_subvol, local->hashed_subvol->fops->lookup, + &local->loc, local->xattr_req); - return 0; + return 0; err: - DHT_STACK_UNWIND (lookup, frame, op_ret, op_errno, - inode, stbuf, xattr, NULL); - return 0; + DHT_STACK_UNWIND(lookup, frame, op_ret, op_errno, inode, stbuf, xattr, + NULL); + return 0; } int -switch_lookup (call_frame_t *frame, xlator_t *this, - loc_t *loc, dict_t *xattr_req) +switch_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, + dict_t *xattr_req) { - xlator_t *hashed_subvol = NULL; - xlator_t *cached_subvol = NULL; - xlator_t *subvol = NULL; - dht_local_t *local = NULL; - dht_conf_t *conf = NULL; - int ret = -1; - int op_errno = -1; - dht_layout_t *layout = NULL; - int i = 0; - int call_cnt = 0; - - - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (loc, err); - VALIDATE_OR_GOTO (loc->inode, err); - VALIDATE_OR_GOTO (loc->path, err); - - conf = this->private; - - local = dht_local_init (frame, loc, NULL, GF_FOP_LOOKUP); - if (!local) { - op_errno = ENOMEM; - goto err; + xlator_t *hashed_subvol = NULL; + xlator_t *cached_subvol = NULL; + xlator_t *subvol = NULL; + dht_local_t *local = NULL; + dht_conf_t *conf = NULL; + int ret = -1; + int op_errno = -1; + dht_layout_t *layout = NULL; + int i = 0; + int call_cnt = 0; + + VALIDATE_OR_GOTO(frame, err); + VALIDATE_OR_GOTO(this, err); + VALIDATE_OR_GOTO(loc, err); + VALIDATE_OR_GOTO(loc->inode, err); + VALIDATE_OR_GOTO(loc->path, err); + + conf = this->private; + + local = dht_local_init(frame, loc, NULL, GF_FOP_LOOKUP); + if (!local) { + op_errno = ENOMEM; + goto err; + } + + if (xattr_req) { + local->xattr_req = dict_ref(xattr_req); + } else { + local->xattr_req = dict_new(); + } + + hashed_subvol = dht_subvol_get_hashed(this, &local->loc); + cached_subvol = local->cached_subvol; + + local->hashed_subvol = hashed_subvol; + + if (is_revalidate(loc)) { + layout = local->layout; + if (!layout) { + gf_msg_debug(this->name, 0, + "revalidate lookup without cache. path=%s", loc->path); + op_errno = EINVAL; + goto err; } - if (xattr_req) { - local->xattr_req = dict_ref (xattr_req); - } else { - local->xattr_req = dict_new (); + if (layout->gen && (layout->gen < conf->gen)) { + gf_msg_debug(this->name, 0, "incomplete layout failure for path=%s", + loc->path); + dht_layout_unref(this, local->layout); + goto do_fresh_lookup; } - hashed_subvol = dht_subvol_get_hashed (this, &local->loc); - cached_subvol = local->cached_subvol; + local->inode = inode_ref(loc->inode); - local->hashed_subvol = hashed_subvol; + local->call_cnt = layout->cnt; + call_cnt = local->call_cnt; - if (is_revalidate (loc)) { - layout = local->layout; - if (!layout) { - gf_msg_debug(this->name, 0, - "revalidate lookup without cache. path=%s", - loc->path); - op_errno = EINVAL; - goto err; - } - - if (layout->gen && (layout->gen < conf->gen)) { - gf_msg_debug (this->name, 0, - "incomplete layout failure for path=%s", - loc->path); - dht_layout_unref (this, local->layout); - goto do_fresh_lookup; - } - - local->inode = inode_ref (loc->inode); - - local->call_cnt = layout->cnt; - call_cnt = local->call_cnt; + /* NOTE: we don't require 'trusted.glusterfs.dht.linkto' + * attribute, revalidates directly go to the cached-subvolume. + */ + ret = dict_set_uint32(local->xattr_req, conf->xattr_name, 4 * 4); + if (ret < 0) + gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED, + "failed to set dict value for %s", conf->xattr_name); - /* NOTE: we don't require 'trusted.glusterfs.dht.linkto' - * attribute, revalidates directly go to the cached-subvolume. - */ - ret = dict_set_uint32 (local->xattr_req, - conf->xattr_name, 4 * 4); - if (ret < 0) - gf_msg (this->name, GF_LOG_WARNING, 0, - DHT_MSG_DICT_SET_FAILED, - "failed to set dict value for %s", - conf->xattr_name); + for (i = 0; i < layout->cnt; i++) { + subvol = layout->list[i].xlator; - for (i = 0; i < layout->cnt; i++) { - subvol = layout->list[i].xlator; + STACK_WIND_COOKIE(frame, dht_revalidate_cbk, subvol, subvol, + subvol->fops->lookup, loc, local->xattr_req); - STACK_WIND_COOKIE (frame, dht_revalidate_cbk, subvol, - subvol, subvol->fops->lookup, - loc, local->xattr_req); + if (!--call_cnt) + break; + } + } else { + do_fresh_lookup: + ret = dict_set_uint32(local->xattr_req, conf->xattr_name, 4 * 4); + if (ret < 0) + gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED, + "failed to set dict value for %s", conf->xattr_name); + + ret = dict_set_uint32(local->xattr_req, conf->link_xattr_name, 256); + if (ret < 0) + gf_msg(this->name, GF_LOG_WARNING, EINVAL, DHT_MSG_DICT_SET_FAILED, + "failed to set dict value for %s", conf->link_xattr_name); + + if (!hashed_subvol) { + gf_msg_debug(this->name, 0, + "no subvolume in layout for path=%s, " + "checking on all the subvols to see if " + "it is a directory", + loc->path); + call_cnt = conf->subvolume_cnt; + local->call_cnt = call_cnt; + + local->layout = dht_layout_new(this, conf->subvolume_cnt); + if (!local->layout) { + op_errno = ENOMEM; + goto err; + } + + for (i = 0; i < call_cnt; i++) { + STACK_WIND_COOKIE(frame, dht_lookup_dir_cbk, + conf->subvolumes[i], conf->subvolumes[i], + conf->subvolumes[i]->fops->lookup, + &local->loc, local->xattr_req); + } + return 0; + } - if (!--call_cnt) - break; - } + /* */ + cached_subvol = get_switch_matching_subvol(loc->path, conf, + hashed_subvol); + if (cached_subvol == hashed_subvol) { + STACK_WIND_COOKIE(frame, dht_lookup_cbk, hashed_subvol, + hashed_subvol, hashed_subvol->fops->lookup, loc, + local->xattr_req); } else { - do_fresh_lookup: - ret = dict_set_uint32 (local->xattr_req, - conf->xattr_name, 4 * 4); - if (ret < 0) - gf_msg (this->name, GF_LOG_WARNING, 0, - DHT_MSG_DICT_SET_FAILED, - "failed to set dict value for %s", - conf->xattr_name); - - ret = dict_set_uint32 (local->xattr_req, - conf->link_xattr_name, 256); - if (ret < 0) - gf_msg (this->name, GF_LOG_WARNING, EINVAL, - DHT_MSG_DICT_SET_FAILED, - "failed to set dict value for %s", - conf->link_xattr_name); - - if (!hashed_subvol) { - gf_msg_debug (this->name, 0, - "no subvolume in layout for path=%s, " - "checking on all the subvols to see if " - "it is a directory", loc->path); - call_cnt = conf->subvolume_cnt; - local->call_cnt = call_cnt; - - local->layout = dht_layout_new (this, - conf->subvolume_cnt); - if (!local->layout) { - op_errno = ENOMEM; - goto err; - } - - for (i = 0; i < call_cnt; i++) { - STACK_WIND_COOKIE (frame, dht_lookup_dir_cbk, - conf->subvolumes[i], - conf->subvolumes[i], - conf->subvolumes[i]->fops->lookup, - &local->loc, local->xattr_req); - } - return 0; - } - - /* */ - cached_subvol = get_switch_matching_subvol (loc->path, conf, - hashed_subvol); - if (cached_subvol == hashed_subvol) { - STACK_WIND_COOKIE (frame, dht_lookup_cbk, hashed_subvol, - hashed_subvol, - hashed_subvol->fops->lookup, - loc, local->xattr_req); - } else { - STACK_WIND_COOKIE (frame, switch_local_lookup_cbk, - cached_subvol, cached_subvol, - cached_subvol->fops->lookup, - loc, local->xattr_req); - } + STACK_WIND_COOKIE(frame, switch_local_lookup_cbk, cached_subvol, + cached_subvol, cached_subvol->fops->lookup, loc, + local->xattr_req); } + } - return 0; + return 0; err: - op_errno = (op_errno == -1) ? errno : op_errno; - DHT_STACK_UNWIND (lookup, frame, -1, op_errno, - NULL, NULL, NULL, NULL); - return 0; + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND(lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL); + return 0; } int -switch_create_linkfile_create_cbk (call_frame_t *frame, void *cookie, - xlator_t *this, int op_ret, int op_errno, - inode_t *inode, struct iatt *stbuf, - struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) +switch_create_linkfile_create_cbk(call_frame_t *frame, void *cookie, + xlator_t *this, int op_ret, int op_errno, + inode_t *inode, struct iatt *stbuf, + struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) { - dht_local_t *local = NULL; + dht_local_t *local = NULL; - local = frame->local; + local = frame->local; - if (op_ret == -1) - goto err; + if (op_ret == -1) + goto err; - STACK_WIND_COOKIE (frame, dht_create_cbk, local->cached_subvol, - local->cached_subvol, local->cached_subvol->fops->create, - &local->loc, local->flags, local->mode, local->umask, - local->fd, local->params); + STACK_WIND_COOKIE(frame, dht_create_cbk, local->cached_subvol, + local->cached_subvol, local->cached_subvol->fops->create, + &local->loc, local->flags, local->mode, local->umask, + local->fd, local->params); - return 0; + return 0; err: - DHT_STACK_UNWIND (create, frame, -1, op_errno, - NULL, NULL, NULL, NULL, NULL, NULL); - return 0; + DHT_STACK_UNWIND(create, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL, + NULL); + return 0; } int -switch_create (call_frame_t *frame, xlator_t *this, - loc_t *loc, int32_t flags, mode_t mode, - mode_t umask, fd_t *fd, dict_t *params) +switch_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + mode_t mode, mode_t umask, fd_t *fd, dict_t *params) { - dht_local_t *local = NULL; - dht_conf_t *conf = NULL; - xlator_t *subvol = NULL; - xlator_t *avail_subvol = NULL; - int op_errno = -1; - - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (loc, err); - - conf = this->private; - - dht_get_du_info (frame, this, loc); - - local = dht_local_init (frame, loc, fd, GF_FOP_CREATE); - if (!local) { - op_errno = ENOMEM; - goto err; - } - - subvol = dht_subvol_get_hashed (this, loc); - if (!subvol) { - gf_msg_debug (this->name, 0, - "no subvolume in layout for path=%s", - loc->path); - op_errno = ENOENT; - goto err; - } - - avail_subvol = get_switch_matching_subvol (loc->path, conf, subvol); - if (dht_is_subvol_filled (this, avail_subvol)) { - avail_subvol = - dht_free_disk_available_subvol (this, avail_subvol, - local); - } - - if (subvol != avail_subvol) { - /* create a link file instead of actual file */ - local->mode = mode; - local->flags = flags; - local->umask = umask; - local->cached_subvol = avail_subvol; - dht_linkfile_create (frame, switch_create_linkfile_create_cbk, - this, avail_subvol, subvol, loc); - return 0; - } + dht_local_t *local = NULL; + dht_conf_t *conf = NULL; + xlator_t *subvol = NULL; + xlator_t *avail_subvol = NULL; + int op_errno = -1; + + VALIDATE_OR_GOTO(frame, err); + VALIDATE_OR_GOTO(this, err); + VALIDATE_OR_GOTO(loc, err); + + conf = this->private; + + dht_get_du_info(frame, this, loc); + + local = dht_local_init(frame, loc, fd, GF_FOP_CREATE); + if (!local) { + op_errno = ENOMEM; + goto err; + } + + subvol = dht_subvol_get_hashed(this, loc); + if (!subvol) { + gf_msg_debug(this->name, 0, "no subvolume in layout for path=%s", + loc->path); + op_errno = ENOENT; + goto err; + } + + avail_subvol = get_switch_matching_subvol(loc->path, conf, subvol); + if (dht_is_subvol_filled(this, avail_subvol)) { + avail_subvol = dht_free_disk_available_subvol(this, avail_subvol, + local); + } + + if (subvol != avail_subvol) { + /* create a link file instead of actual file */ + local->mode = mode; + local->flags = flags; + local->umask = umask; + local->cached_subvol = avail_subvol; + dht_linkfile_create(frame, switch_create_linkfile_create_cbk, this, + avail_subvol, subvol, loc); + return 0; + } - gf_msg_trace (this->name, 0, - "creating %s on %s", loc->path, subvol->name); + gf_msg_trace(this->name, 0, "creating %s on %s", loc->path, subvol->name); - STACK_WIND_COOKIE (frame, dht_create_cbk, subvol, subvol, - subvol->fops->create, loc, flags, mode, umask, fd, - params); + STACK_WIND_COOKIE(frame, dht_create_cbk, subvol, subvol, + subvol->fops->create, loc, flags, mode, umask, fd, + params); - return 0; + return 0; err: - op_errno = (op_errno == -1) ? errno : op_errno; - DHT_STACK_UNWIND (create, frame, -1, op_errno, - NULL, NULL, NULL, NULL, NULL, NULL); + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND(create, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL, + NULL); - return 0; + return 0; } int -switch_mknod_linkfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, inode_t *inode, - struct iatt *stbuf, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) +switch_mknod_linkfile_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, inode_t *inode, + struct iatt *stbuf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) { - dht_local_t *local = NULL; + dht_local_t *local = NULL; - local = frame->local; - if (!local || !local->cached_subvol) { - op_errno = EINVAL; - op_ret = -1; - goto err; - } + local = frame->local; + if (!local || !local->cached_subvol) { + op_errno = EINVAL; + op_ret = -1; + goto err; + } - if (op_ret >= 0) { - STACK_WIND_COOKIE (frame, dht_newfile_cbk, - (void *)local->cached_subvol, local->cached_subvol, - local->cached_subvol->fops->mknod, - &local->loc, local->mode, local->rdev, - local->umask, local->params); + if (op_ret >= 0) { + STACK_WIND_COOKIE( + frame, dht_newfile_cbk, (void *)local->cached_subvol, + local->cached_subvol, local->cached_subvol->fops->mknod, + &local->loc, local->mode, local->rdev, local->umask, local->params); - return 0; - } -err: - DHT_STACK_UNWIND (link, frame, op_ret, op_errno, - inode, stbuf, preparent, postparent, xdata); return 0; + } +err: + DHT_STACK_UNWIND(link, frame, op_ret, op_errno, inode, stbuf, preparent, + postparent, xdata); + return 0; } - int -switch_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, - dev_t rdev, mode_t umask, dict_t *params) +switch_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, + dev_t rdev, mode_t umask, dict_t *params) { - dht_local_t *local = NULL; - dht_conf_t *conf = NULL; - xlator_t *subvol = NULL; - xlator_t *avail_subvol = NULL; - int op_errno = -1; - - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (loc, err); - - conf = this->private; - - dht_get_du_info (frame, this, loc); - - local = dht_local_init (frame, loc, NULL, GF_FOP_MKNOD); - if (!local) { - op_errno = ENOMEM; - goto err; - } - - subvol = dht_subvol_get_hashed (this, loc); - if (!subvol) { - gf_msg_debug (this->name, 0, - "no subvolume in layout for path=%s", - loc->path); - op_errno = ENOENT; - goto err; - } - - /* Consider the disksize in consideration */ - avail_subvol = get_switch_matching_subvol (loc->path, conf, subvol); - if (dht_is_subvol_filled (this, avail_subvol)) { - avail_subvol = - dht_free_disk_available_subvol (this, avail_subvol, - local); - } - - if (avail_subvol != subvol) { - /* Create linkfile first */ - - local->params = dict_ref (params); - local->mode = mode; - local->umask = umask; - local->rdev = rdev; - local->cached_subvol = avail_subvol; - - dht_linkfile_create (frame, switch_mknod_linkfile_cbk, - this, avail_subvol, subvol, loc); - return 0; - } + dht_local_t *local = NULL; + dht_conf_t *conf = NULL; + xlator_t *subvol = NULL; + xlator_t *avail_subvol = NULL; + int op_errno = -1; + + VALIDATE_OR_GOTO(frame, err); + VALIDATE_OR_GOTO(this, err); + VALIDATE_OR_GOTO(loc, err); + + conf = this->private; + + dht_get_du_info(frame, this, loc); + + local = dht_local_init(frame, loc, NULL, GF_FOP_MKNOD); + if (!local) { + op_errno = ENOMEM; + goto err; + } + + subvol = dht_subvol_get_hashed(this, loc); + if (!subvol) { + gf_msg_debug(this->name, 0, "no subvolume in layout for path=%s", + loc->path); + op_errno = ENOENT; + goto err; + } + + /* Consider the disksize in consideration */ + avail_subvol = get_switch_matching_subvol(loc->path, conf, subvol); + if (dht_is_subvol_filled(this, avail_subvol)) { + avail_subvol = dht_free_disk_available_subvol(this, avail_subvol, + local); + } + + if (avail_subvol != subvol) { + /* Create linkfile first */ + + local->params = dict_ref(params); + local->mode = mode; + local->umask = umask; + local->rdev = rdev; + local->cached_subvol = avail_subvol; + + dht_linkfile_create(frame, switch_mknod_linkfile_cbk, this, + avail_subvol, subvol, loc); + return 0; + } - gf_msg_trace (this->name, 0, - "creating %s on %s", loc->path, subvol->name); + gf_msg_trace(this->name, 0, "creating %s on %s", loc->path, subvol->name); - STACK_WIND_COOKIE (frame, dht_newfile_cbk, (void *)subvol, subvol, - subvol->fops->mknod, loc, mode, rdev, umask, - params); + STACK_WIND_COOKIE(frame, dht_newfile_cbk, (void *)subvol, subvol, + subvol->fops->mknod, loc, mode, rdev, umask, params); - return 0; + return 0; err: - op_errno = (op_errno == -1) ? errno : op_errno; - DHT_STACK_UNWIND (mknod, frame, -1, op_errno, - NULL, NULL, NULL, NULL, NULL); + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND(mknod, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL); - return 0; + return 0; } - void -switch_fini (xlator_t *this) +switch_fini(xlator_t *this) { - dht_conf_t *conf = NULL; - struct switch_struct *trav = NULL; - struct switch_struct *prev = NULL; - - conf = this->private; - - if (conf) { - trav = (struct switch_struct *)conf->private; - conf->private = NULL; - while (trav) { - GF_FREE (trav->array); - prev = trav; - trav = trav->next; - GF_FREE (prev); - } + dht_conf_t *conf = NULL; + struct switch_struct *trav = NULL; + struct switch_struct *prev = NULL; + + conf = this->private; + + if (conf) { + trav = (struct switch_struct *)conf->private; + conf->private = NULL; + while (trav) { + GF_FREE(trav->array); + prev = trav; + trav = trav->next; + GF_FREE(prev); } + } - dht_fini(this); + dht_fini(this); } int -set_switch_pattern (xlator_t *this, dht_conf_t *conf, - const char *pattern_str) +set_switch_pattern(xlator_t *this, dht_conf_t *conf, const char *pattern_str) { - int flag = 0; - int idx = 0; - int index = 0; - int child_count = 0; - char *tmp = NULL; - char *tmp1 = NULL; - char *child = NULL; - char *tmp_str = NULL; - char *tmp_str1 = NULL; - char *dup_str = NULL; - char *dup_childs = NULL; - char *switch_str = NULL; - char *pattern = NULL; - char *childs = NULL; - char *option_string = NULL; - size_t pattern_length; - struct switch_struct *switch_buf = NULL; - struct switch_struct *switch_opt = NULL; - struct switch_struct *trav = NULL; - struct switch_sched_array *switch_buf_array = NULL; - xlator_list_t *trav_xl = NULL; - - trav_xl = this->children; - while (trav_xl) { - index++; - trav_xl = trav_xl->next; + int flag = 0; + int idx = 0; + int index = 0; + int child_count = 0; + char *tmp = NULL; + char *tmp1 = NULL; + char *child = NULL; + char *tmp_str = NULL; + char *tmp_str1 = NULL; + char *dup_str = NULL; + char *dup_childs = NULL; + char *switch_str = NULL; + char *pattern = NULL; + char *childs = NULL; + char *option_string = NULL; + size_t pattern_length; + struct switch_struct *switch_buf = NULL; + struct switch_struct *switch_opt = NULL; + struct switch_struct *trav = NULL; + struct switch_sched_array *switch_buf_array = NULL; + xlator_list_t *trav_xl = NULL; + + trav_xl = this->children; + while (trav_xl) { + index++; + trav_xl = trav_xl->next; + } + child_count = index; + switch_buf_array = GF_CALLOC((index + 1), sizeof(struct switch_sched_array), + gf_switch_mt_switch_sched_array); + if (!switch_buf_array) + goto err; + + trav_xl = this->children; + index = 0; + + while (trav_xl) { + switch_buf_array[index].xl = trav_xl->xlator; + switch_buf_array[index].eligible = 1; + trav_xl = trav_xl->next; + index++; + } + + /* *jpg:child1,child2;*mpg:child3;*:child4,child5,child6 */ + + /* Get the pattern for considering switch case. + "option block-size *avi:10MB" etc */ + option_string = gf_strdup(pattern_str); + switch_str = strtok_r(option_string, ";", &tmp_str); + while (switch_str) { + dup_str = gf_strdup(switch_str); + switch_opt = GF_CALLOC(1, sizeof(struct switch_struct), + gf_switch_mt_switch_struct); + if (!switch_opt) { + GF_FREE(dup_str); + goto err; } - child_count = index; - switch_buf_array = GF_CALLOC ((index + 1), - sizeof (struct switch_sched_array), - gf_switch_mt_switch_sched_array); - if (!switch_buf_array) - goto err; - - trav_xl = this->children; - index = 0; - while (trav_xl) { - switch_buf_array[index].xl = trav_xl->xlator; - switch_buf_array[index].eligible = 1; - trav_xl = trav_xl->next; - index++; + pattern = strtok_r(dup_str, ":", &tmp_str1); + childs = strtok_r(NULL, ":", &tmp_str1); + if (strncmp(pattern, "*", 2) == 0) { + gf_msg("switch", GF_LOG_INFO, 0, DHT_MSG_SWITCH_PATTERN_INFO, + "'*' pattern will be taken by default " + "for all the unconfigured child nodes," + " hence neglecting current option"); + switch_str = strtok_r(NULL, ";", &tmp_str); + GF_FREE(switch_opt); + switch_opt = NULL; + GF_FREE(dup_str); + continue; } - - /* *jpg:child1,child2;*mpg:child3;*:child4,child5,child6 */ - - /* Get the pattern for considering switch case. - "option block-size *avi:10MB" etc */ - option_string = gf_strdup (pattern_str); - switch_str = strtok_r (option_string, ";", &tmp_str); - while (switch_str) { - dup_str = gf_strdup (switch_str); - switch_opt = GF_CALLOC (1, sizeof (struct switch_struct), - gf_switch_mt_switch_struct); - if (!switch_opt) { - GF_FREE (dup_str); - goto err; - } - - pattern = strtok_r (dup_str, ":", &tmp_str1); - childs = strtok_r (NULL, ":", &tmp_str1); - if (strncmp (pattern, "*", 2) == 0) { - gf_msg ("switch", GF_LOG_INFO, 0, - DHT_MSG_SWITCH_PATTERN_INFO, - "'*' pattern will be taken by default " - "for all the unconfigured child nodes," - " hence neglecting current option"); - switch_str = strtok_r (NULL, ";", &tmp_str); - GF_FREE (switch_opt); - switch_opt = NULL; - GF_FREE (dup_str); - continue; - } - GF_FREE (dup_str); - - pattern_length = strlen (pattern); - if (pattern_length >= (sizeof (switch_opt->path_pattern))) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_SET_SWITCH_PATTERN_ERROR, - "Pattern (%s) too long", pattern); - goto err; - } - memcpy (switch_opt->path_pattern, pattern, pattern_length); - switch_opt->path_pattern[pattern_length] = '\0'; - - if (childs) { - dup_childs = gf_strdup (childs); - child = strtok_r (dup_childs, ",", &tmp); - while (child) { - if (gf_switch_valid_child (this, child)) { - idx++; - child = strtok_r (NULL, ",", &tmp); - } else { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_SUBVOL_ERROR, - "%s is not a subvolume of %s. " - "pattern can only be scheduled " - "only to a subvolume of %s", - child, this->name, this->name); - GF_FREE (dup_childs); - goto err; - } - } - GF_FREE (dup_childs); - child = strtok_r (childs, ",", &tmp1); - switch_opt->num_child = idx; - switch_opt->array = GF_CALLOC (1, (idx * - sizeof (struct switch_sched_array)), - gf_switch_mt_switch_sched_array); - if (!switch_opt->array) - goto err; - idx = 0; - while (child) { - for (index = 0; index < child_count; index++) { - if (strcmp (switch_buf_array[index].xl->name, - child) == 0) { - gf_msg_debug ("switch", 0, - "'%s' pattern will be " - "scheduled to \"%s\"", - switch_opt->path_pattern, child); - /* - if (switch_buf_array[index-1].considered) { - gf_msg_debug ("switch", 0, - "ambiguity found, exiting"); - return -1; - } - */ - switch_opt->array[idx].xl = switch_buf_array[index].xl; - switch_buf_array[index].considered = 1; - idx++; - break; - } - } - child = strtok_r (NULL, ",", &tmp1); - } + GF_FREE(dup_str); + + pattern_length = strlen(pattern); + if (pattern_length >= (sizeof(switch_opt->path_pattern))) { + gf_msg(this->name, GF_LOG_ERROR, 0, + DHT_MSG_SET_SWITCH_PATTERN_ERROR, "Pattern (%s) too long", + pattern); + goto err; + } + memcpy(switch_opt->path_pattern, pattern, pattern_length); + switch_opt->path_pattern[pattern_length] = '\0'; + + if (childs) { + dup_childs = gf_strdup(childs); + child = strtok_r(dup_childs, ",", &tmp); + while (child) { + if (gf_switch_valid_child(this, child)) { + idx++; + child = strtok_r(NULL, ",", &tmp); } else { - /* error */ - gf_msg ("switch", GF_LOG_ERROR, 0, - DHT_MSG_SET_SWITCH_PATTERN_ERROR, - "Check \"scheduler.switch.case\" " - "option in unify volume. Exiting"); - goto err; + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_SUBVOL_ERROR, + "%s is not a subvolume of %s. " + "pattern can only be scheduled " + "only to a subvolume of %s", + child, this->name, this->name); + GF_FREE(dup_childs); + goto err; } - - /* Link it to the main structure */ - if (switch_buf) { - /* there are already few entries */ - trav = switch_buf; - while (trav->next) - trav = trav->next; - trav->next = switch_opt; - } else { - /* First entry */ - switch_buf = switch_opt; + } + GF_FREE(dup_childs); + child = strtok_r(childs, ",", &tmp1); + switch_opt->num_child = idx; + switch_opt->array = GF_CALLOC( + 1, (idx * sizeof(struct switch_sched_array)), + gf_switch_mt_switch_sched_array); + if (!switch_opt->array) + goto err; + idx = 0; + while (child) { + for (index = 0; index < child_count; index++) { + if (strcmp(switch_buf_array[index].xl->name, child) == 0) { + gf_msg_debug("switch", 0, + "'%s' pattern will be " + "scheduled to \"%s\"", + switch_opt->path_pattern, child); + /* + if (switch_buf_array[index-1].considered) { + gf_msg_debug ("switch", 0, + "ambiguity found, exiting"); + return -1; + } + */ + switch_opt->array[idx].xl = switch_buf_array[index].xl; + switch_buf_array[index].considered = 1; + idx++; + break; + } } - switch_opt = NULL; - switch_str = strtok_r (NULL, ";", &tmp_str); + child = strtok_r(NULL, ",", &tmp1); + } + } else { + /* error */ + gf_msg("switch", GF_LOG_ERROR, 0, DHT_MSG_SET_SWITCH_PATTERN_ERROR, + "Check \"scheduler.switch.case\" " + "option in unify volume. Exiting"); + goto err; } - /* Now, all the pattern based considerations done, so for all the - * remaining pattern, '*' to all the remaining child nodes - */ - { - for (index=0; index < child_count; index++) { - /* check for considered flag */ - if (switch_buf_array[index].considered) - continue; - flag++; - } - if (!flag) { - gf_msg ("switch", GF_LOG_ERROR, 0, - DHT_MSG_SET_SWITCH_PATTERN_ERROR, - "No nodes left for pattern '*'. Exiting"); - goto err; - } - switch_opt = GF_CALLOC (1, sizeof (struct switch_struct), - gf_switch_mt_switch_struct); - if (!switch_opt) - goto err; - - /* Add the '*' pattern to the array */ - memcpy (switch_opt->path_pattern, "*", 2); - switch_opt->num_child = flag; - switch_opt->array = - GF_CALLOC (1, - flag * sizeof (struct switch_sched_array), - gf_switch_mt_switch_sched_array); - if (!switch_opt->array) - goto err; - flag = 0; - for (index=0; index < child_count; index++) { - /* check for considered flag */ - if (switch_buf_array[index].considered) - continue; - gf_msg_debug ("switch", 0, "'%s'" - " pattern will be scheduled to \"%s\"", - switch_opt->path_pattern, - switch_buf_array[index].xl->name); - - switch_opt->array[flag].xl = - switch_buf_array[index].xl; - switch_buf_array[index].considered = 1; - flag++; - } - if (switch_buf) { - /* there are already few entries */ - trav = switch_buf; - while (trav->next) - trav = trav->next; - trav->next = switch_opt; - } else { - /* First entry */ - switch_buf = switch_opt; - } - switch_opt = NULL; + /* Link it to the main structure */ + if (switch_buf) { + /* there are already few entries */ + trav = switch_buf; + while (trav->next) + trav = trav->next; + trav->next = switch_opt; + } else { + /* First entry */ + switch_buf = switch_opt; + } + switch_opt = NULL; + switch_str = strtok_r(NULL, ";", &tmp_str); + } + + /* Now, all the pattern based considerations done, so for all the + * remaining pattern, '*' to all the remaining child nodes + */ + { + for (index = 0; index < child_count; index++) { + /* check for considered flag */ + if (switch_buf_array[index].considered) + continue; + flag++; + } + if (!flag) { + gf_msg("switch", GF_LOG_ERROR, 0, DHT_MSG_SET_SWITCH_PATTERN_ERROR, + "No nodes left for pattern '*'. Exiting"); + goto err; + } + switch_opt = GF_CALLOC(1, sizeof(struct switch_struct), + gf_switch_mt_switch_struct); + if (!switch_opt) + goto err; + + /* Add the '*' pattern to the array */ + memcpy(switch_opt->path_pattern, "*", 2); + switch_opt->num_child = flag; + switch_opt->array = GF_CALLOC(1, + flag * sizeof(struct switch_sched_array), + gf_switch_mt_switch_sched_array); + if (!switch_opt->array) + goto err; + flag = 0; + for (index = 0; index < child_count; index++) { + /* check for considered flag */ + if (switch_buf_array[index].considered) + continue; + gf_msg_debug("switch", 0, + "'%s'" + " pattern will be scheduled to \"%s\"", + switch_opt->path_pattern, + switch_buf_array[index].xl->name); + + switch_opt->array[flag].xl = switch_buf_array[index].xl; + switch_buf_array[index].considered = 1; + flag++; + } + if (switch_buf) { + /* there are already few entries */ + trav = switch_buf; + while (trav->next) + trav = trav->next; + trav->next = switch_opt; + } else { + /* First entry */ + switch_buf = switch_opt; } - /* */ - conf->private = switch_buf; + switch_opt = NULL; + } + /* */ + conf->private = switch_buf; - GF_FREE (option_string); - return 0; + GF_FREE(option_string); + return 0; err: - GF_FREE (switch_buf_array); - GF_FREE (switch_opt); - GF_FREE (option_string); + GF_FREE(switch_buf_array); + GF_FREE(switch_opt); + GF_FREE(option_string); - if (switch_buf) { - trav = switch_buf; - while (trav) { - GF_FREE (trav->array); - switch_opt = trav; - trav = trav->next; - GF_FREE (switch_opt); - } + if (switch_buf) { + trav = switch_buf; + while (trav) { + GF_FREE(trav->array); + switch_opt = trav; + trav = trav->next; + GF_FREE(switch_opt); } - return -1; + } + return -1; } - int32_t -switch_init (xlator_t *this) +switch_init(xlator_t *this) { - dht_conf_t *conf = NULL; - data_t *data = NULL; - int ret = -1; + dht_conf_t *conf = NULL; + data_t *data = NULL; + int ret = -1; + + ret = dht_init(this); + if (ret) { + return ret; + } + conf = this->private; - ret = dht_init(this); + data = dict_get(this->options, "pattern.switch.case"); + if (data) { + /* TODO: */ + ret = set_switch_pattern(this, conf, data->data); if (ret) { - return ret; - } - conf = this->private; - - data = dict_get (this->options, "pattern.switch.case"); - if (data) { - /* TODO: */ - ret = set_switch_pattern (this, conf, data->data); - if (ret) { - goto err; - } + goto err; } + } - this->private = conf; - return 0; + this->private = conf; + return 0; err: - dht_fini(this); - return -1; + dht_fini(this); + return -1; } - -class_methods_t class_methods = { - .init = switch_init, - .fini = switch_fini, - .reconfigure = dht_reconfigure, - .notify = dht_notify -}; - +class_methods_t class_methods = {.init = switch_init, + .fini = switch_fini, + .reconfigure = dht_reconfigure, + .notify = dht_notify}; struct xlator_fops fops = { - .lookup = switch_lookup, - .create = switch_create, - .mknod = switch_mknod, - - .stat = dht_stat, - .fstat = dht_fstat, - .truncate = dht_truncate, - .ftruncate = dht_ftruncate, - .access = dht_access, - .readlink = dht_readlink, - .setxattr = dht_setxattr, - .getxattr = dht_getxattr, - .removexattr = dht_removexattr, - .open = dht_open, - .readv = dht_readv, - .writev = dht_writev, - .flush = dht_flush, - .fsync = dht_fsync, - .statfs = dht_statfs, - .lk = dht_lk, - .opendir = dht_opendir, - .readdir = dht_readdir, - .readdirp = dht_readdirp, - .fsyncdir = dht_fsyncdir, - .symlink = dht_symlink, - .unlink = dht_unlink, - .link = dht_link, - .mkdir = dht_mkdir, - .rmdir = dht_rmdir, - .rename = dht_rename, - .inodelk = dht_inodelk, - .finodelk = dht_finodelk, - .entrylk = dht_entrylk, - .fentrylk = dht_fentrylk, - .xattrop = dht_xattrop, - .fxattrop = dht_fxattrop, - .setattr = dht_setattr, + .lookup = switch_lookup, + .create = switch_create, + .mknod = switch_mknod, + + .stat = dht_stat, + .fstat = dht_fstat, + .truncate = dht_truncate, + .ftruncate = dht_ftruncate, + .access = dht_access, + .readlink = dht_readlink, + .setxattr = dht_setxattr, + .getxattr = dht_getxattr, + .removexattr = dht_removexattr, + .open = dht_open, + .readv = dht_readv, + .writev = dht_writev, + .flush = dht_flush, + .fsync = dht_fsync, + .statfs = dht_statfs, + .lk = dht_lk, + .opendir = dht_opendir, + .readdir = dht_readdir, + .readdirp = dht_readdirp, + .fsyncdir = dht_fsyncdir, + .symlink = dht_symlink, + .unlink = dht_unlink, + .link = dht_link, + .mkdir = dht_mkdir, + .rmdir = dht_rmdir, + .rename = dht_rename, + .inodelk = dht_inodelk, + .finodelk = dht_finodelk, + .entrylk = dht_entrylk, + .fentrylk = dht_fentrylk, + .xattrop = dht_xattrop, + .fxattrop = dht_fxattrop, + .setattr = dht_setattr, }; - -struct xlator_cbks cbks = { - .forget = dht_forget -}; +struct xlator_cbks cbks = {.forget = dht_forget}; diff --git a/xlators/cluster/dht/src/tier-common.c b/xlators/cluster/dht/src/tier-common.c index f08ff4768f0..b86ed673042 100644 --- a/xlators/cluster/dht/src/tier-common.c +++ b/xlators/cluster/dht/src/tier-common.c @@ -17,1233 +17,1183 @@ #include "tier.h" int -dht_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, - inode_t *inode, struct iatt *stbuf, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata); - +dht_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, + int op_errno, inode_t *inode, struct iatt *stbuf, + struct iatt *preparent, struct iatt *postparent, dict_t *xdata); int -tier_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, - inode_t *inode, struct iatt *stbuf, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) +tier_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, + int op_errno, inode_t *inode, struct iatt *stbuf, + struct iatt *preparent, struct iatt *postparent, dict_t *xdata) { - dht_local_t *local = NULL; - loc_t *oldloc = NULL; - loc_t *newloc = NULL; + dht_local_t *local = NULL; + loc_t *oldloc = NULL; + loc_t *newloc = NULL; - local = frame->local; + local = frame->local; - oldloc = &local->loc; - newloc = &local->loc2; + oldloc = &local->loc; + newloc = &local->loc2; - if (op_ret == -1) { - /* No continuation on DHT inode missing errors, as we should - * then have a good stbuf that states P2 happened. We would - * get inode missing if, the file completed migrated between - * the lookup and the link call */ - goto out; - } + if (op_ret == -1) { + /* No continuation on DHT inode missing errors, as we should + * then have a good stbuf that states P2 happened. We would + * get inode missing if, the file completed migrated between + * the lookup and the link call */ + goto out; + } - if (local->call_cnt != 1) { - goto out; - } + if (local->call_cnt != 1) { + goto out; + } - local->call_cnt = 2; + local->call_cnt = 2; - /* Do this on the hot tier now */ + /* Do this on the hot tier now */ - STACK_WIND (frame, tier_link_cbk, local->cached_subvol, - local->cached_subvol->fops->link, - oldloc, newloc, xdata); + STACK_WIND(frame, tier_link_cbk, local->cached_subvol, + local->cached_subvol->fops->link, oldloc, newloc, xdata); - return 0; + return 0; out: - DHT_STRIP_PHASE1_FLAGS (stbuf); + DHT_STRIP_PHASE1_FLAGS(stbuf); - DHT_STACK_UNWIND (link, frame, op_ret, op_errno, inode, stbuf, - preparent, postparent, NULL); + DHT_STACK_UNWIND(link, frame, op_ret, op_errno, inode, stbuf, preparent, + postparent, NULL); - return 0; + return 0; } - int -tier_link (call_frame_t *frame, xlator_t *this, - loc_t *oldloc, loc_t *newloc, dict_t *xdata) +tier_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, + dict_t *xdata) { - xlator_t *cached_subvol = NULL; - xlator_t *hashed_subvol = NULL; - int op_errno = -1; - int ret = -1; - dht_local_t *local = NULL; - dht_conf_t *conf = NULL; - - - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (oldloc, err); - VALIDATE_OR_GOTO (newloc, err); - - conf = this->private; - - local = dht_local_init (frame, oldloc, NULL, GF_FOP_LINK); - if (!local) { - op_errno = ENOMEM; - goto err; - } - local->call_cnt = 1; - - cached_subvol = local->cached_subvol; - - if (!cached_subvol) { - gf_msg_debug (this->name, 0, - "no cached subvolume for path=%s", oldloc->path); - op_errno = ENOENT; - goto err; - } - - hashed_subvol = TIER_HASHED_SUBVOL; - - ret = loc_copy (&local->loc2, newloc); - if (ret == -1) { - op_errno = ENOMEM; - goto err; - } - - if (hashed_subvol == cached_subvol) { - STACK_WIND (frame, dht_link_cbk, - cached_subvol, cached_subvol->fops->link, - oldloc, newloc, xdata); - return 0; - } - + xlator_t *cached_subvol = NULL; + xlator_t *hashed_subvol = NULL; + int op_errno = -1; + int ret = -1; + dht_local_t *local = NULL; + dht_conf_t *conf = NULL; + + VALIDATE_OR_GOTO(frame, err); + VALIDATE_OR_GOTO(this, err); + VALIDATE_OR_GOTO(oldloc, err); + VALIDATE_OR_GOTO(newloc, err); + + conf = this->private; + + local = dht_local_init(frame, oldloc, NULL, GF_FOP_LINK); + if (!local) { + op_errno = ENOMEM; + goto err; + } + local->call_cnt = 1; + + cached_subvol = local->cached_subvol; + + if (!cached_subvol) { + gf_msg_debug(this->name, 0, "no cached subvolume for path=%s", + oldloc->path); + op_errno = ENOENT; + goto err; + } + + hashed_subvol = TIER_HASHED_SUBVOL; + + ret = loc_copy(&local->loc2, newloc); + if (ret == -1) { + op_errno = ENOMEM; + goto err; + } + + if (hashed_subvol == cached_subvol) { + STACK_WIND(frame, dht_link_cbk, cached_subvol, + cached_subvol->fops->link, oldloc, newloc, xdata); + return 0; + } - /* Create hardlinks to both the data file on the hot tier - and the linkto file on the cold tier */ + /* Create hardlinks to both the data file on the hot tier + and the linkto file on the cold tier */ - gf_uuid_copy (local->gfid, oldloc->inode->gfid); + gf_uuid_copy(local->gfid, oldloc->inode->gfid); - STACK_WIND (frame, tier_link_cbk, - hashed_subvol, hashed_subvol->fops->link, - oldloc, newloc, xdata); + STACK_WIND(frame, tier_link_cbk, hashed_subvol, hashed_subvol->fops->link, + oldloc, newloc, xdata); - return 0; + return 0; err: - op_errno = (op_errno == -1) ? errno : op_errno; - DHT_STACK_UNWIND (link, frame, -1, op_errno, NULL, NULL, NULL, NULL, - NULL); - return 0; + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND(link, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL); + return 0; } - - int -tier_create_unlink_stale_linkto_cbk (call_frame_t *frame, void *cookie, - xlator_t *this, int op_ret, int op_errno, - struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) +tier_create_unlink_stale_linkto_cbk(call_frame_t *frame, void *cookie, + xlator_t *this, int op_ret, int op_errno, + struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) { + dht_local_t *local = NULL; - dht_local_t *local = NULL; + local = frame->local; - local = frame->local; + if (local->params) { + dict_del(local->params, GLUSTERFS_INTERNAL_FOP_KEY); + } - if (local->params) { - dict_del (local->params, GLUSTERFS_INTERNAL_FOP_KEY); - } - - DHT_STACK_UNWIND (create, frame, -1, local->op_errno, - NULL, NULL, NULL, NULL, NULL, NULL); + DHT_STACK_UNWIND(create, frame, -1, local->op_errno, NULL, NULL, NULL, NULL, + NULL, NULL); - return 0; + return 0; } int -tier_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, - fd_t *fd, inode_t *inode, struct iatt *stbuf, - struct iatt *preparent, struct iatt *postparent, dict_t *xdata) +tier_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, + int op_errno, fd_t *fd, inode_t *inode, struct iatt *stbuf, + struct iatt *preparent, struct iatt *postparent, dict_t *xdata) { - xlator_t *prev = NULL; - int ret = -1; - dht_local_t *local = NULL; - xlator_t *hashed_subvol = NULL; - dht_conf_t *conf = NULL; - - local = frame->local; - conf = this->private; - - hashed_subvol = TIER_HASHED_SUBVOL; - - if (!local) { - op_ret = -1; - op_errno = EINVAL; + xlator_t *prev = NULL; + int ret = -1; + dht_local_t *local = NULL; + xlator_t *hashed_subvol = NULL; + dht_conf_t *conf = NULL; + + local = frame->local; + conf = this->private; + + hashed_subvol = TIER_HASHED_SUBVOL; + + if (!local) { + op_ret = -1; + op_errno = EINVAL; + goto out; + } + + if (op_ret == -1) { + if (local->linked == _gf_true && local->xattr_req) { + local->op_errno = op_errno; + local->op_ret = op_ret; + ret = dht_fill_dict_to_avoid_unlink_of_migrating_file( + local->xattr_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED, + "Failed to set dictionary value to " + "unlink of migrating file"); goto out; - } + } - if (op_ret == -1) { - if (local->linked == _gf_true && local->xattr_req) { - local->op_errno = op_errno; - local->op_ret = op_ret; - ret = dht_fill_dict_to_avoid_unlink_of_migrating_file - (local->xattr_req); - if (ret) { - gf_msg (this->name, GF_LOG_WARNING, 0, - DHT_MSG_DICT_SET_FAILED, - "Failed to set dictionary value to " - "unlink of migrating file"); - goto out; - } - - STACK_WIND (frame, - tier_create_unlink_stale_linkto_cbk, - hashed_subvol, - hashed_subvol->fops->unlink, - &local->loc, 0, local->xattr_req); - return 0; - } - goto out; + STACK_WIND(frame, tier_create_unlink_stale_linkto_cbk, + hashed_subvol, hashed_subvol->fops->unlink, &local->loc, + 0, local->xattr_req); + return 0; } + goto out; + } - prev = cookie; + prev = cookie; - if (local->loc.parent) { - dht_inode_ctx_time_update (local->loc.parent, this, - preparent, 0); + if (local->loc.parent) { + dht_inode_ctx_time_update(local->loc.parent, this, preparent, 0); - dht_inode_ctx_time_update (local->loc.parent, this, - postparent, 1); - } + dht_inode_ctx_time_update(local->loc.parent, this, postparent, 1); + } - ret = dht_layout_preset (this, prev, inode); - if (ret != 0) { - gf_msg_debug (this->name, 0, - "could not set preset layout for subvol %s", - prev->name); - op_ret = -1; - op_errno = EINVAL; - goto out; - } + ret = dht_layout_preset(this, prev, inode); + if (ret != 0) { + gf_msg_debug(this->name, 0, "could not set preset layout for subvol %s", + prev->name); + op_ret = -1; + op_errno = EINVAL; + goto out; + } - local->op_errno = op_errno; + local->op_errno = op_errno; - if (local->linked == _gf_true) { - local->stbuf = *stbuf; - dht_linkfile_attr_heal (frame, this); - } + if (local->linked == _gf_true) { + local->stbuf = *stbuf; + dht_linkfile_attr_heal(frame, this); + } out: - if (local) { - if (local->xattr_req) { - dict_del (local->xattr_req, TIER_LINKFILE_GFID); - } + if (local) { + if (local->xattr_req) { + dict_del(local->xattr_req, TIER_LINKFILE_GFID); } + } - DHT_STRIP_PHASE1_FLAGS (stbuf); + DHT_STRIP_PHASE1_FLAGS(stbuf); - DHT_STACK_UNWIND (create, frame, op_ret, op_errno, fd, inode, - stbuf, preparent, postparent, xdata); + DHT_STACK_UNWIND(create, frame, op_ret, op_errno, fd, inode, stbuf, + preparent, postparent, xdata); - return 0; + return 0; } int -tier_create_linkfile_create_cbk (call_frame_t *frame, void *cookie, - xlator_t *this, - int32_t op_ret, int32_t op_errno, - inode_t *inode, struct iatt *stbuf, - struct iatt *preparent, struct iatt *postparent, - dict_t *xdata) +tier_create_linkfile_create_cbk(call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, + int32_t op_errno, inode_t *inode, + struct iatt *stbuf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) { - dht_local_t *local = NULL; - xlator_t *cached_subvol = NULL; - dht_conf_t *conf = NULL; - int ret = -1; - unsigned char *gfid = NULL; - - local = frame->local; - if (!local) { - op_errno = EINVAL; - goto err; - } - - if (op_ret == -1) { - local->op_errno = op_errno; - goto err; - } - - conf = this->private; - if (!conf) { - local->op_errno = EINVAL; - op_errno = EINVAL; - goto err; - } - - cached_subvol = TIER_UNHASHED_SUBVOL; - - if (local->params) { - dict_del (local->params, conf->link_xattr_name); - dict_del (local->params, GLUSTERFS_INTERNAL_FOP_KEY); - } - - /* - * We will delete the linkfile if data file creation fails. - * When deleting this stale linkfile, there is a possibility - * for a race between this linkfile deletion and a stale - * linkfile deletion triggered by another lookup from different - * client. - * - * For eg: - * - * Client 1 Client 2 - * - * 1 linkfile created for foo - * - * 2 data file creation failed - * - * 3 creating a file with same name - * - * 4 lookup before creation deleted - * the linkfile created by client1 - * considering as a stale linkfile. - * - * 5 New linkfile created for foo - * with different gfid. - * - * 6 Trigger linkfile deletion as - * data file creation failed. - * - * 7 Linkfile deleted which is - * created by client2. - * - * 8 Data file created. - * - * With this race, we will end up having a file in a non-hashed subvol - * without a linkfile in hashed subvol. - * - * To avoid this, we store the gfid of linkfile created by client, So - * If we delete the linkfile , we validate gfid of existing file with - * stored value from posix layer. - * - * Storing this value in local->xattr_req as local->params was also used - * to create the data file. During the linkfile deletion we will use - * local->xattr_req dictionary. - */ + dht_local_t *local = NULL; + xlator_t *cached_subvol = NULL; + dht_conf_t *conf = NULL; + int ret = -1; + unsigned char *gfid = NULL; + + local = frame->local; + if (!local) { + op_errno = EINVAL; + goto err; + } + + if (op_ret == -1) { + local->op_errno = op_errno; + goto err; + } + + conf = this->private; + if (!conf) { + local->op_errno = EINVAL; + op_errno = EINVAL; + goto err; + } + + cached_subvol = TIER_UNHASHED_SUBVOL; + + if (local->params) { + dict_del(local->params, conf->link_xattr_name); + dict_del(local->params, GLUSTERFS_INTERNAL_FOP_KEY); + } + + /* + * We will delete the linkfile if data file creation fails. + * When deleting this stale linkfile, there is a possibility + * for a race between this linkfile deletion and a stale + * linkfile deletion triggered by another lookup from different + * client. + * + * For eg: + * + * Client 1 Client 2 + * + * 1 linkfile created for foo + * + * 2 data file creation failed + * + * 3 creating a file with same name + * + * 4 lookup before creation deleted + * the linkfile created by client1 + * considering as a stale linkfile. + * + * 5 New linkfile created for foo + * with different gfid. + * + * 6 Trigger linkfile deletion as + * data file creation failed. + * + * 7 Linkfile deleted which is + * created by client2. + * + * 8 Data file created. + * + * With this race, we will end up having a file in a non-hashed subvol + * without a linkfile in hashed subvol. + * + * To avoid this, we store the gfid of linkfile created by client, So + * If we delete the linkfile , we validate gfid of existing file with + * stored value from posix layer. + * + * Storing this value in local->xattr_req as local->params was also used + * to create the data file. During the linkfile deletion we will use + * local->xattr_req dictionary. + */ + if (!local->xattr_req) { + local->xattr_req = dict_new(); if (!local->xattr_req) { - local->xattr_req = dict_new (); - if (!local->xattr_req) { - local->op_errno = ENOMEM; - op_errno = ENOMEM; - goto err; - } + local->op_errno = ENOMEM; + op_errno = ENOMEM; + goto err; } - - gfid = GF_MALLOC (sizeof (uuid_t), gf_common_mt_char); - if (!gfid) { - local->op_errno = ENOMEM; - op_errno = ENOMEM; - goto err; - } - - gf_uuid_copy (gfid, stbuf->ia_gfid); - ret = dict_set_dynptr (local->xattr_req, TIER_LINKFILE_GFID, - gfid, sizeof (uuid_t)); - if (ret) { - GF_FREE (gfid); - gf_msg (this->name, GF_LOG_WARNING, 0, - DHT_MSG_DICT_SET_FAILED, - "Failed to set dictionary value" - " : key = %s", TIER_LINKFILE_GFID); - } - - STACK_WIND_COOKIE (frame, tier_create_cbk, cached_subvol, - cached_subvol, cached_subvol->fops->create, - &local->loc, local->flags, local->mode, - local->umask, local->fd, local->params); - - return 0; + } + + gfid = GF_MALLOC(sizeof(uuid_t), gf_common_mt_char); + if (!gfid) { + local->op_errno = ENOMEM; + op_errno = ENOMEM; + goto err; + } + + gf_uuid_copy(gfid, stbuf->ia_gfid); + ret = dict_set_dynptr(local->xattr_req, TIER_LINKFILE_GFID, gfid, + sizeof(uuid_t)); + if (ret) { + GF_FREE(gfid); + gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED, + "Failed to set dictionary value" + " : key = %s", + TIER_LINKFILE_GFID); + } + + STACK_WIND_COOKIE(frame, tier_create_cbk, cached_subvol, cached_subvol, + cached_subvol->fops->create, &local->loc, local->flags, + local->mode, local->umask, local->fd, local->params); + + return 0; err: - DHT_STACK_UNWIND (create, frame, -1, op_errno, NULL, NULL, NULL, - NULL, NULL, NULL); - return 0; + DHT_STACK_UNWIND(create, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL, + NULL); + return 0; } gf_boolean_t -tier_is_hot_tier_decommissioned (xlator_t *this) +tier_is_hot_tier_decommissioned(xlator_t *this) { - dht_conf_t *conf = NULL; - xlator_t *hot_tier = NULL; - int i = 0; - - conf = this->private; - hot_tier = conf->subvolumes[1]; - - if (conf->decommission_subvols_cnt) { - for (i = 0; i < conf->subvolume_cnt; i++) { - if (conf->decommissioned_bricks[i] && - conf->decommissioned_bricks[i] == hot_tier) - return _gf_true; - } + dht_conf_t *conf = NULL; + xlator_t *hot_tier = NULL; + int i = 0; + + conf = this->private; + hot_tier = conf->subvolumes[1]; + + if (conf->decommission_subvols_cnt) { + for (i = 0; i < conf->subvolume_cnt; i++) { + if (conf->decommissioned_bricks[i] && + conf->decommissioned_bricks[i] == hot_tier) + return _gf_true; } + } - return _gf_false; + return _gf_false; } int -tier_create (call_frame_t *frame, xlator_t *this, - loc_t *loc, int32_t flags, mode_t mode, - mode_t umask, fd_t *fd, dict_t *params) +tier_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + mode_t mode, mode_t umask, fd_t *fd, dict_t *params) { - int op_errno = -1; - dht_local_t *local = NULL; - dht_conf_t *conf = NULL; - xlator_t *hot_subvol = NULL; - xlator_t *cold_subvol = NULL; - - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (loc, err); - - conf = this->private; - - dht_get_du_info (frame, this, loc); - - local = dht_local_init (frame, loc, fd, GF_FOP_CREATE); - if (!local) { - op_errno = ENOMEM; - goto err; - } - - - cold_subvol = TIER_HASHED_SUBVOL; - hot_subvol = TIER_UNHASHED_SUBVOL; - - if (conf->subvolumes[0] != cold_subvol) { - hot_subvol = conf->subvolumes[0]; - } - /* - * if hot tier full, write to cold. - * Also if hot tier is full, create in cold - */ - if (dht_is_subvol_filled (this, hot_subvol) || - tier_is_hot_tier_decommissioned (this)) { - gf_msg_debug (this->name, 0, - "creating %s on %s", loc->path, - cold_subvol->name); - - STACK_WIND_COOKIE (frame, tier_create_cbk, cold_subvol, - cold_subvol, cold_subvol->fops->create, - loc, flags, mode, umask, fd, params); - } else { - local->params = dict_ref (params); - local->flags = flags; - local->mode = mode; - local->umask = umask; - local->cached_subvol = hot_subvol; - local->hashed_subvol = cold_subvol; - - gf_msg_debug (this->name, 0, - "creating %s on %s (link at %s)", loc->path, - hot_subvol->name, cold_subvol->name); - - dht_linkfile_create (frame, tier_create_linkfile_create_cbk, - this, hot_subvol, cold_subvol, loc); - - goto out; - } + int op_errno = -1; + dht_local_t *local = NULL; + dht_conf_t *conf = NULL; + xlator_t *hot_subvol = NULL; + xlator_t *cold_subvol = NULL; + + VALIDATE_OR_GOTO(frame, err); + VALIDATE_OR_GOTO(this, err); + VALIDATE_OR_GOTO(loc, err); + + conf = this->private; + + dht_get_du_info(frame, this, loc); + + local = dht_local_init(frame, loc, fd, GF_FOP_CREATE); + if (!local) { + op_errno = ENOMEM; + goto err; + } + + cold_subvol = TIER_HASHED_SUBVOL; + hot_subvol = TIER_UNHASHED_SUBVOL; + + if (conf->subvolumes[0] != cold_subvol) { + hot_subvol = conf->subvolumes[0]; + } + /* + * if hot tier full, write to cold. + * Also if hot tier is full, create in cold + */ + if (dht_is_subvol_filled(this, hot_subvol) || + tier_is_hot_tier_decommissioned(this)) { + gf_msg_debug(this->name, 0, "creating %s on %s", loc->path, + cold_subvol->name); + + STACK_WIND_COOKIE(frame, tier_create_cbk, cold_subvol, cold_subvol, + cold_subvol->fops->create, loc, flags, mode, umask, + fd, params); + } else { + local->params = dict_ref(params); + local->flags = flags; + local->mode = mode; + local->umask = umask; + local->cached_subvol = hot_subvol; + local->hashed_subvol = cold_subvol; + + gf_msg_debug(this->name, 0, "creating %s on %s (link at %s)", loc->path, + hot_subvol->name, cold_subvol->name); + + dht_linkfile_create(frame, tier_create_linkfile_create_cbk, this, + hot_subvol, cold_subvol, loc); + + goto out; + } out: - return 0; + return 0; err: - op_errno = (op_errno == -1) ? errno : op_errno; - DHT_STACK_UNWIND (create, frame, -1, op_errno, NULL, NULL, NULL, - NULL, NULL, NULL); + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND(create, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL, + NULL); - return 0; + return 0; } int -tier_unlink_nonhashed_linkfile_cbk (call_frame_t *frame, void *cookie, - xlator_t *this, int op_ret, int op_errno, - struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) +tier_unlink_nonhashed_linkfile_cbk(call_frame_t *frame, void *cookie, + xlator_t *this, int op_ret, int op_errno, + struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) { - dht_local_t *local = NULL; - xlator_t *prev = NULL; - - local = frame->local; - prev = cookie; - - LOCK (&frame->lock); - { - if ((op_ret == -1) && (op_errno != ENOENT)) { - local->op_errno = op_errno; - local->op_ret = op_ret; - gf_msg_debug (this->name, op_errno, - "Unlink link: subvolume %s" - " returned -1", - prev->name); - goto unlock; - } - - local->op_ret = 0; + dht_local_t *local = NULL; + xlator_t *prev = NULL; + + local = frame->local; + prev = cookie; + + LOCK(&frame->lock); + { + if ((op_ret == -1) && (op_errno != ENOENT)) { + local->op_errno = op_errno; + local->op_ret = op_ret; + gf_msg_debug(this->name, op_errno, + "Unlink link: subvolume %s" + " returned -1", + prev->name); + goto unlock; } + + local->op_ret = 0; + } unlock: - UNLOCK (&frame->lock); + UNLOCK(&frame->lock); - if (local->op_ret == -1) - goto err; - DHT_STACK_UNWIND (unlink, frame, local->op_ret, local->op_errno, - &local->preparent, &local->postparent, NULL); + if (local->op_ret == -1) + goto err; + DHT_STACK_UNWIND(unlink, frame, local->op_ret, local->op_errno, + &local->preparent, &local->postparent, NULL); - - return 0; + return 0; err: - DHT_STACK_UNWIND (unlink, frame, -1, local->op_errno, - NULL, NULL, NULL); - return 0; + DHT_STACK_UNWIND(unlink, frame, -1, local->op_errno, NULL, NULL, NULL); + return 0; } int -tier_unlink_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, inode_t *inode, - struct iatt *preparent, dict_t *xdata, - struct iatt *postparent) +tier_unlink_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, inode_t *inode, + struct iatt *preparent, dict_t *xdata, + struct iatt *postparent) { - dht_local_t *local = NULL; - xlator_t *prev = NULL; - dht_conf_t *conf = NULL; - xlator_t *hot_subvol = NULL; + dht_local_t *local = NULL; + xlator_t *prev = NULL; + dht_conf_t *conf = NULL; + xlator_t *hot_subvol = NULL; - local = frame->local; - prev = cookie; - conf = this->private; - hot_subvol = TIER_UNHASHED_SUBVOL; + local = frame->local; + prev = cookie; + conf = this->private; + hot_subvol = TIER_UNHASHED_SUBVOL; - if (!op_ret) { - /* - * linkfile present on hot tier. unlinking the linkfile - */ - STACK_WIND_COOKIE (frame, tier_unlink_nonhashed_linkfile_cbk, - hot_subvol, hot_subvol, hot_subvol->fops->unlink, - &local->loc, local->flags, NULL); - return 0; - } + if (!op_ret) { + /* + * linkfile present on hot tier. unlinking the linkfile + */ + STACK_WIND_COOKIE(frame, tier_unlink_nonhashed_linkfile_cbk, hot_subvol, + hot_subvol, hot_subvol->fops->unlink, &local->loc, + local->flags, NULL); + return 0; + } - LOCK (&frame->lock); - { - if (op_errno == ENOENT) { - local->op_ret = 0; - local->op_errno = op_errno; - } else { - local->op_ret = op_ret; - local->op_errno = op_errno; - } - gf_msg_debug (this->name, op_errno, - "Lookup : subvolume %s returned -1", - prev->name); + LOCK(&frame->lock); + { + if (op_errno == ENOENT) { + local->op_ret = 0; + local->op_errno = op_errno; + } else { + local->op_ret = op_ret; + local->op_errno = op_errno; } + gf_msg_debug(this->name, op_errno, "Lookup : subvolume %s returned -1", + prev->name); + } - UNLOCK (&frame->lock); + UNLOCK(&frame->lock); - DHT_STACK_UNWIND (unlink, frame, local->op_ret, local->op_errno, - &local->preparent, &local->postparent, xdata); + DHT_STACK_UNWIND(unlink, frame, local->op_ret, local->op_errno, + &local->preparent, &local->postparent, xdata); - return 0; + return 0; } int -tier_unlink_linkfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this, +tier_unlink_linkfile_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, int op_errno, struct iatt *preparent, struct iatt *postparent, dict_t *xdata) { - dht_local_t *local = NULL; - xlator_t *prev = NULL; - - local = frame->local; - prev = cookie; - - LOCK (&frame->lock); - { - /* Ignore EINVAL for tier to ignore error when the file - does not exist on the other tier */ - if ((op_ret == -1) && !((op_errno == ENOENT) || - (op_errno == EINVAL))) { - local->op_errno = op_errno; - local->op_ret = op_ret; - gf_msg_debug (this->name, op_errno, - "Unlink link: subvolume %s" - " returned -1", - prev->name); - goto unlock; - } - - local->op_ret = 0; + dht_local_t *local = NULL; + xlator_t *prev = NULL; + + local = frame->local; + prev = cookie; + + LOCK(&frame->lock); + { + /* Ignore EINVAL for tier to ignore error when the file + does not exist on the other tier */ + if ((op_ret == -1) && !((op_errno == ENOENT) || (op_errno == EINVAL))) { + local->op_errno = op_errno; + local->op_ret = op_ret; + gf_msg_debug(this->name, op_errno, + "Unlink link: subvolume %s" + " returned -1", + prev->name); + goto unlock; } + + local->op_ret = 0; + } unlock: - UNLOCK (&frame->lock); + UNLOCK(&frame->lock); - if (local->op_ret == -1) - goto err; + if (local->op_ret == -1) + goto err; - DHT_STACK_UNWIND (unlink, frame, local->op_ret, local->op_errno, - &local->preparent, &local->postparent, xdata); + DHT_STACK_UNWIND(unlink, frame, local->op_ret, local->op_errno, + &local->preparent, &local->postparent, xdata); - return 0; + return 0; err: - DHT_STACK_UNWIND (unlink, frame, -1, local->op_errno, - NULL, NULL, NULL); - return 0; + DHT_STACK_UNWIND(unlink, frame, -1, local->op_errno, NULL, NULL, NULL); + return 0; } int32_t -tier_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) +tier_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, + int op_errno, struct iatt *preparent, struct iatt *postparent, + dict_t *xdata) { - dht_local_t *local = NULL; - xlator_t *prev = NULL; - struct iatt *stbuf = NULL; - dht_conf_t *conf = NULL; - int ret = -1; - xlator_t *hot_tier = NULL; - xlator_t *cold_tier = NULL; - - local = frame->local; - prev = cookie; - conf = this->private; - - cold_tier = TIER_HASHED_SUBVOL; - hot_tier = TIER_UNHASHED_SUBVOL; - - LOCK (&frame->lock); - { - if (op_ret == -1) { - if (op_errno == ENOENT) { - local->op_ret = 0; - } else { - local->op_ret = -1; - local->op_errno = op_errno; - } - gf_msg_debug (this->name, op_errno, - "Unlink: subvolume %s returned -1" - " with errno = %d", - prev->name, op_errno); - goto unlock; - } - + dht_local_t *local = NULL; + xlator_t *prev = NULL; + struct iatt *stbuf = NULL; + dht_conf_t *conf = NULL; + int ret = -1; + xlator_t *hot_tier = NULL; + xlator_t *cold_tier = NULL; + + local = frame->local; + prev = cookie; + conf = this->private; + + cold_tier = TIER_HASHED_SUBVOL; + hot_tier = TIER_UNHASHED_SUBVOL; + + LOCK(&frame->lock); + { + if (op_ret == -1) { + if (op_errno == ENOENT) { local->op_ret = 0; + } else { + local->op_ret = -1; + local->op_errno = op_errno; + } + gf_msg_debug(this->name, op_errno, + "Unlink: subvolume %s returned -1" + " with errno = %d", + prev->name, op_errno); + goto unlock; + } - local->postparent = *postparent; - local->preparent = *preparent; + local->op_ret = 0; - if (local->loc.parent) { - dht_inode_ctx_time_update (local->loc.parent, this, - &local->preparent, 0); - dht_inode_ctx_time_update (local->loc.parent, this, - &local->postparent, 1); - } + local->postparent = *postparent; + local->preparent = *preparent; + + if (local->loc.parent) { + dht_inode_ctx_time_update(local->loc.parent, this, + &local->preparent, 0); + dht_inode_ctx_time_update(local->loc.parent, this, + &local->postparent, 1); } + } unlock: - UNLOCK (&frame->lock); - - if (local->op_ret) - goto out; + UNLOCK(&frame->lock); - if (cold_tier != local->cached_subvol) { - /* - * File is present in hot tier, so there will be - * a link file on cold tier, deleting the linkfile - * from cold tier - */ - STACK_WIND_COOKIE (frame, tier_unlink_linkfile_cbk, cold_tier, - cold_tier, cold_tier->fops->unlink, - &local->loc, - local->flags, xdata); - return 0; - } + if (local->op_ret) + goto out; - ret = dict_get_bin (xdata, DHT_IATT_IN_XDATA_KEY, (void **) &stbuf); - if (!ret && stbuf && ((IS_DHT_MIGRATION_PHASE2 (stbuf)) || - IS_DHT_MIGRATION_PHASE1 (stbuf))) { - /* - * File is migrating from cold to hot tier. - * Delete the destination linkfile. - */ - STACK_WIND_COOKIE (frame, tier_unlink_lookup_cbk, hot_tier, - hot_tier, hot_tier->fops->lookup, - &local->loc, NULL); - return 0; + if (cold_tier != local->cached_subvol) { + /* + * File is present in hot tier, so there will be + * a link file on cold tier, deleting the linkfile + * from cold tier + */ + STACK_WIND_COOKIE(frame, tier_unlink_linkfile_cbk, cold_tier, cold_tier, + cold_tier->fops->unlink, &local->loc, local->flags, + xdata); + return 0; + } - } + ret = dict_get_bin(xdata, DHT_IATT_IN_XDATA_KEY, (void **)&stbuf); + if (!ret && stbuf && + ((IS_DHT_MIGRATION_PHASE2(stbuf)) || IS_DHT_MIGRATION_PHASE1(stbuf))) { + /* + * File is migrating from cold to hot tier. + * Delete the destination linkfile. + */ + STACK_WIND_COOKIE(frame, tier_unlink_lookup_cbk, hot_tier, hot_tier, + hot_tier->fops->lookup, &local->loc, NULL); + return 0; + } out: - DHT_STACK_UNWIND (unlink, frame, local->op_ret, local->op_errno, - &local->preparent, &local->postparent, xdata); + DHT_STACK_UNWIND(unlink, frame, local->op_ret, local->op_errno, + &local->preparent, &local->postparent, xdata); - return 0; + return 0; } int -tier_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag, +tier_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag, dict_t *xdata) { - xlator_t *cached_subvol = NULL; - xlator_t *hashed_subvol = NULL; - dht_conf_t *conf = NULL; - int op_errno = -1; - dht_local_t *local = NULL; - int ret = -1; - - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (loc, err); - - conf = this->private; - - local = dht_local_init (frame, loc, NULL, GF_FOP_UNLINK); - if (!local) { - op_errno = ENOMEM; - - goto err; - } - - hashed_subvol = TIER_HASHED_SUBVOL; - - cached_subvol = local->cached_subvol; - if (!cached_subvol) { - gf_msg_debug (this->name, 0, - "no cached subvolume for path=%s", loc->path); - op_errno = EINVAL; - goto err; - } - - local->flags = xflag; - if (IA_ISREG (loc->inode->ia_type) && - (hashed_subvol == cached_subvol)) { - /* - * File resides in cold tier. We need to stat - * the file to see if it is being promoted. - * If yes we need to delete the destination - * file as well. - * - * Currently we are doing this check only for - * regular files. - */ - xdata = xdata ? dict_ref (xdata) : dict_new (); - if (xdata) { - ret = dict_set_int8 (xdata, DHT_IATT_IN_XDATA_KEY, 1); - if (ret) { - gf_msg_debug (this->name, 0, - "Failed to set dictionary key %s", - DHT_IATT_IN_XDATA_KEY); - } - } - } - + xlator_t *cached_subvol = NULL; + xlator_t *hashed_subvol = NULL; + dht_conf_t *conf = NULL; + int op_errno = -1; + dht_local_t *local = NULL; + int ret = -1; + + VALIDATE_OR_GOTO(frame, err); + VALIDATE_OR_GOTO(this, err); + VALIDATE_OR_GOTO(loc, err); + + conf = this->private; + + local = dht_local_init(frame, loc, NULL, GF_FOP_UNLINK); + if (!local) { + op_errno = ENOMEM; + + goto err; + } + + hashed_subvol = TIER_HASHED_SUBVOL; + + cached_subvol = local->cached_subvol; + if (!cached_subvol) { + gf_msg_debug(this->name, 0, "no cached subvolume for path=%s", + loc->path); + op_errno = EINVAL; + goto err; + } + + local->flags = xflag; + if (IA_ISREG(loc->inode->ia_type) && (hashed_subvol == cached_subvol)) { /* - * File is on hot tier, delete the data file first, then - * linkfile from cold. + * File resides in cold tier. We need to stat + * the file to see if it is being promoted. + * If yes we need to delete the destination + * file as well. + * + * Currently we are doing this check only for + * regular files. */ - STACK_WIND_COOKIE (frame, tier_unlink_cbk, cached_subvol, - cached_subvol, cached_subvol->fops->unlink, loc, - xflag, xdata); - if (xdata) - dict_unref (xdata); - return 0; + xdata = xdata ? dict_ref(xdata) : dict_new(); + if (xdata) { + ret = dict_set_int8(xdata, DHT_IATT_IN_XDATA_KEY, 1); + if (ret) { + gf_msg_debug(this->name, 0, "Failed to set dictionary key %s", + DHT_IATT_IN_XDATA_KEY); + } + } + } + + /* + * File is on hot tier, delete the data file first, then + * linkfile from cold. + */ + STACK_WIND_COOKIE(frame, tier_unlink_cbk, cached_subvol, cached_subvol, + cached_subvol->fops->unlink, loc, xflag, xdata); + if (xdata) + dict_unref(xdata); + return 0; err: - op_errno = (op_errno == -1) ? errno : op_errno; - DHT_STACK_UNWIND (unlink, frame, -1, op_errno, NULL, NULL, NULL); + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND(unlink, frame, -1, op_errno, NULL, NULL, NULL); - return 0; + return 0; } int -tier_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, gf_dirent_t *orig_entries, - dict_t *xdata) +tier_readdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, + int op_errno, gf_dirent_t *orig_entries, dict_t *xdata) { - gf_dirent_t entries; - gf_dirent_t *orig_entry = NULL; - gf_dirent_t *entry = NULL; - int count = 0; - - INIT_LIST_HEAD (&entries.list); - - if (op_ret < 0) - goto unwind; - - list_for_each_entry (orig_entry, (&orig_entries->list), list) { - entry = gf_dirent_for_name (orig_entry->d_name); - if (!entry) { - gf_msg (this->name, GF_LOG_ERROR, ENOMEM, - DHT_MSG_NO_MEMORY, - "Memory allocation failed "); - goto unwind; - } + gf_dirent_t entries; + gf_dirent_t *orig_entry = NULL; + gf_dirent_t *entry = NULL; + int count = 0; + + INIT_LIST_HEAD(&entries.list); + + if (op_ret < 0) + goto unwind; + + list_for_each_entry(orig_entry, (&orig_entries->list), list) + { + entry = gf_dirent_for_name(orig_entry->d_name); + if (!entry) { + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY, + "Memory allocation failed "); + goto unwind; + } - entry->d_off = orig_entry->d_off; - entry->d_ino = orig_entry->d_ino; - entry->d_type = orig_entry->d_type; - entry->d_len = orig_entry->d_len; + entry->d_off = orig_entry->d_off; + entry->d_ino = orig_entry->d_ino; + entry->d_type = orig_entry->d_type; + entry->d_len = orig_entry->d_len; - list_add_tail (&entry->list, &entries.list); - count++; - } - op_ret = count; + list_add_tail(&entry->list, &entries.list); + count++; + } + op_ret = count; unwind: - if (op_ret < 0) - op_ret = 0; + if (op_ret < 0) + op_ret = 0; - DHT_STACK_UNWIND (readdir, frame, op_ret, op_errno, &entries, NULL); + DHT_STACK_UNWIND(readdir, frame, op_ret, op_errno, &entries, NULL); - gf_dirent_free (&entries); + gf_dirent_free(&entries); - return 0; + return 0; } int -tier_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, +tier_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, int op_errno, gf_dirent_t *orig_entries, dict_t *xdata) { - dht_local_t *local = NULL; - gf_dirent_t entries; - gf_dirent_t *orig_entry = NULL; - gf_dirent_t *entry = NULL; - xlator_t *prev = NULL; - xlator_t *next_subvol = NULL; - off_t next_offset = 0; - int count = 0; - dht_conf_t *conf = NULL; - int ret = 0; - inode_table_t *itable = NULL; - inode_t *inode = NULL; - - INIT_LIST_HEAD (&entries.list); - prev = cookie; - local = frame->local; - itable = local->fd ? local->fd->inode->table : NULL; - - conf = this->private; - GF_VALIDATE_OR_GOTO(this->name, conf, unwind); - - if (op_ret < 0) - goto done; - - list_for_each_entry (orig_entry, (&orig_entries->list), list) { - next_offset = orig_entry->d_off; - - if (IA_ISINVAL(orig_entry->d_stat.ia_type)) { - /*stat failed somewhere- ignore this entry*/ - continue; - } + dht_local_t *local = NULL; + gf_dirent_t entries; + gf_dirent_t *orig_entry = NULL; + gf_dirent_t *entry = NULL; + xlator_t *prev = NULL; + xlator_t *next_subvol = NULL; + off_t next_offset = 0; + int count = 0; + dht_conf_t *conf = NULL; + int ret = 0; + inode_table_t *itable = NULL; + inode_t *inode = NULL; + + INIT_LIST_HEAD(&entries.list); + prev = cookie; + local = frame->local; + itable = local->fd ? local->fd->inode->table : NULL; + + conf = this->private; + GF_VALIDATE_OR_GOTO(this->name, conf, unwind); + + if (op_ret < 0) + goto done; + + list_for_each_entry(orig_entry, (&orig_entries->list), list) + { + next_offset = orig_entry->d_off; + + if (IA_ISINVAL(orig_entry->d_stat.ia_type)) { + /*stat failed somewhere- ignore this entry*/ + continue; + } - entry = gf_dirent_for_name (orig_entry->d_name); - if (!entry) { + entry = gf_dirent_for_name(orig_entry->d_name); + if (!entry) { + goto unwind; + } - goto unwind; - } + entry->d_off = orig_entry->d_off; + entry->d_stat = orig_entry->d_stat; + entry->d_ino = orig_entry->d_ino; + entry->d_type = orig_entry->d_type; + entry->d_len = orig_entry->d_len; - entry->d_off = orig_entry->d_off; - entry->d_stat = orig_entry->d_stat; - entry->d_ino = orig_entry->d_ino; - entry->d_type = orig_entry->d_type; - entry->d_len = orig_entry->d_len; - - if (orig_entry->dict) - entry->dict = dict_ref (orig_entry->dict); - - if (check_is_linkfile (NULL, (&orig_entry->d_stat), - orig_entry->dict, - conf->link_xattr_name)) { - goto entries; - - } else if (IA_ISDIR(entry->d_stat.ia_type)) { - if (orig_entry->inode) { - dht_inode_ctx_time_update (orig_entry->inode, - this, &entry->d_stat, - 1); - } - } else { - if (orig_entry->inode) { - ret = dht_layout_preset (this, prev, - orig_entry->inode); - if (ret) - gf_msg (this->name, GF_LOG_WARNING, 0, - DHT_MSG_LAYOUT_SET_FAILED, - "failed to link the layout " - "in inode"); - - entry->inode = inode_ref (orig_entry->inode); - } else if (itable) { - /* - * orig_entry->inode might be null if any upper - * layer xlators below client set to null, to - * force a lookup on the inode even if the inode - * is present in the inode table. In that case - * we just update the ctx to make sure we didn't - * missed anything. - */ - inode = inode_find (itable, - orig_entry->d_stat.ia_gfid); - if (inode) { - ret = dht_layout_preset - (this, TIER_HASHED_SUBVOL, - inode); - if (ret) - gf_msg (this->name, - GF_LOG_WARNING, 0, - DHT_MSG_LAYOUT_SET_FAILED, - "failed to link the layout" - " in inode"); - inode_unref (inode); - inode = NULL; - } - } - } + if (orig_entry->dict) + entry->dict = dict_ref(orig_entry->dict); -entries: - list_add_tail (&entry->list, &entries.list); - count++; - } - op_ret = count; + if (check_is_linkfile(NULL, (&orig_entry->d_stat), orig_entry->dict, + conf->link_xattr_name)) { + goto entries; -done: - if (count == 0) { - /* non-zero next_offset means that - EOF is not yet hit on the current subvol - */ - if (next_offset != 0) { - next_subvol = prev; - } else { - goto unwind; + } else if (IA_ISDIR(entry->d_stat.ia_type)) { + if (orig_entry->inode) { + dht_inode_ctx_time_update(orig_entry->inode, this, + &entry->d_stat, 1); + } + } else { + if (orig_entry->inode) { + ret = dht_layout_preset(this, prev, orig_entry->inode); + if (ret) + gf_msg(this->name, GF_LOG_WARNING, 0, + DHT_MSG_LAYOUT_SET_FAILED, + "failed to link the layout " + "in inode"); + + entry->inode = inode_ref(orig_entry->inode); + } else if (itable) { + /* + * orig_entry->inode might be null if any upper + * layer xlators below client set to null, to + * force a lookup on the inode even if the inode + * is present in the inode table. In that case + * we just update the ctx to make sure we didn't + * missed anything. + */ + inode = inode_find(itable, orig_entry->d_stat.ia_gfid); + if (inode) { + ret = dht_layout_preset(this, TIER_HASHED_SUBVOL, inode); + if (ret) + gf_msg(this->name, GF_LOG_WARNING, 0, + DHT_MSG_LAYOUT_SET_FAILED, + "failed to link the layout" + " in inode"); + inode_unref(inode); + inode = NULL; } + } + } + + entries: + list_add_tail(&entry->list, &entries.list); + count++; + } + op_ret = count; - STACK_WIND_COOKIE (frame, tier_readdirp_cbk, next_subvol, - next_subvol, next_subvol->fops->readdirp, - local->fd, local->size, next_offset, - local->xattr); - return 0; +done: + if (count == 0) { + /* non-zero next_offset means that + EOF is not yet hit on the current subvol + */ + if (next_offset != 0) { + next_subvol = prev; + } else { + goto unwind; } + STACK_WIND_COOKIE(frame, tier_readdirp_cbk, next_subvol, next_subvol, + next_subvol->fops->readdirp, local->fd, local->size, + next_offset, local->xattr); + return 0; + } + unwind: - if (op_ret < 0) - op_ret = 0; + if (op_ret < 0) + op_ret = 0; - DHT_STACK_UNWIND (readdirp, frame, op_ret, op_errno, &entries, NULL); + DHT_STACK_UNWIND(readdirp, frame, op_ret, op_errno, &entries, NULL); - gf_dirent_free (&entries); + gf_dirent_free(&entries); - return 0; + return 0; } int -tier_do_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, +tier_do_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, off_t yoff, int whichop, dict_t *dict) { - dht_local_t *local = NULL; - int op_errno = -1; - xlator_t *hashed_subvol = NULL; - int ret = 0; - dht_conf_t *conf = NULL; - - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (fd, err); - VALIDATE_OR_GOTO (this->private, err); - - conf = this->private; - - local = dht_local_init (frame, NULL, NULL, whichop); - if (!local) { - op_errno = ENOMEM; - goto err; + dht_local_t *local = NULL; + int op_errno = -1; + xlator_t *hashed_subvol = NULL; + int ret = 0; + dht_conf_t *conf = NULL; + + VALIDATE_OR_GOTO(frame, err); + VALIDATE_OR_GOTO(this, err); + VALIDATE_OR_GOTO(fd, err); + VALIDATE_OR_GOTO(this->private, err); + + conf = this->private; + + local = dht_local_init(frame, NULL, NULL, whichop); + if (!local) { + op_errno = ENOMEM; + goto err; + } + + local->fd = fd_ref(fd); + local->size = size; + local->xattr_req = (dict) ? dict_ref(dict) : NULL; + + hashed_subvol = TIER_HASHED_SUBVOL; + + /* TODO: do proper readdir */ + if (whichop == GF_FOP_READDIRP) { + if (dict) + local->xattr = dict_ref(dict); + else + local->xattr = dict_new(); + + if (local->xattr) { + ret = dict_set_uint32(local->xattr, conf->link_xattr_name, 256); + if (ret) + gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED, + "Failed to set dictionary value" + " : key = %s", + conf->link_xattr_name); } - local->fd = fd_ref (fd); - local->size = size; - local->xattr_req = (dict) ? dict_ref (dict) : NULL; - - hashed_subvol = TIER_HASHED_SUBVOL; + STACK_WIND_COOKIE(frame, tier_readdirp_cbk, hashed_subvol, + hashed_subvol, hashed_subvol->fops->readdirp, fd, + size, yoff, local->xattr); + } else { + STACK_WIND_COOKIE(frame, tier_readdir_cbk, hashed_subvol, hashed_subvol, + hashed_subvol->fops->readdir, fd, size, yoff, + local->xattr); + } - /* TODO: do proper readdir */ - if (whichop == GF_FOP_READDIRP) { - if (dict) - local->xattr = dict_ref (dict); - else - local->xattr = dict_new (); - - if (local->xattr) { - ret = dict_set_uint32 (local->xattr, - conf->link_xattr_name, 256); - if (ret) - gf_msg (this->name, GF_LOG_WARNING, 0, - DHT_MSG_DICT_SET_FAILED, - "Failed to set dictionary value" - " : key = %s", - conf->link_xattr_name); - - } - - STACK_WIND_COOKIE (frame, tier_readdirp_cbk, hashed_subvol, - hashed_subvol, hashed_subvol->fops->readdirp, - fd, size, yoff, local->xattr); - - } else { - STACK_WIND_COOKIE (frame, tier_readdir_cbk, hashed_subvol, - hashed_subvol, hashed_subvol->fops->readdir, - fd, size, yoff, local->xattr); - } - - return 0; + return 0; err: - op_errno = (op_errno == -1) ? errno : op_errno; - DHT_STACK_UNWIND (readdir, frame, -1, op_errno, NULL, NULL); + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND(readdir, frame, -1, op_errno, NULL, NULL); - return 0; + return 0; } int -tier_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, +tier_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, off_t yoff, dict_t *xdata) { - int op = GF_FOP_READDIR; - dht_conf_t *conf = NULL; - int i = 0; - - conf = this->private; - if (!conf) - goto out; - - for (i = 0; i < conf->subvolume_cnt; i++) { - if (!conf->subvolume_status[i]) { - op = GF_FOP_READDIRP; - break; - } + int op = GF_FOP_READDIR; + dht_conf_t *conf = NULL; + int i = 0; + + conf = this->private; + if (!conf) + goto out; + + for (i = 0; i < conf->subvolume_cnt; i++) { + if (!conf->subvolume_status[i]) { + op = GF_FOP_READDIRP; + break; } + } - if (conf->use_readdirp) - op = GF_FOP_READDIRP; + if (conf->use_readdirp) + op = GF_FOP_READDIRP; out: - tier_do_readdir (frame, this, fd, size, yoff, op, 0); - return 0; + tier_do_readdir(frame, this, fd, size, yoff, op, 0); + return 0; } int -tier_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, - off_t yoff, dict_t *dict) +tier_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t yoff, dict_t *dict) { - tier_do_readdir (frame, this, fd, size, yoff, GF_FOP_READDIRP, dict); - return 0; + tier_do_readdir(frame, this, fd, size, yoff, GF_FOP_READDIRP, dict); + return 0; } int -tier_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, struct statvfs *statvfs, - dict_t *xdata) +tier_statfs_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, + int op_errno, struct statvfs *statvfs, dict_t *xdata) { - gf_boolean_t event = _gf_false; - qdstatfs_action_t action = qdstatfs_action_OFF; - dht_local_t *local = NULL; - int this_call_cnt = 0; - int bsize = 0; - int frsize = 0; - GF_UNUSED int ret = 0; - unsigned long new_usage = 0; - unsigned long cur_usage = 0; - xlator_t *prev = NULL; - dht_conf_t *conf = NULL; - tier_statvfs_t *tier_stat = NULL; - - prev = cookie; - local = frame->local; - GF_ASSERT (local); - - conf = this->private; - - if (xdata) - ret = dict_get_int8 (xdata, "quota-deem-statfs", - (int8_t *)&event); - - tier_stat = &local->tier_statvfs; - - LOCK (&frame->lock); - { - if (op_ret == -1) { - local->op_errno = op_errno; - goto unlock; - } - if (!statvfs) { - op_errno = EINVAL; - local->op_ret = -1; - goto unlock; - } - local->op_ret = 0; + gf_boolean_t event = _gf_false; + qdstatfs_action_t action = qdstatfs_action_OFF; + dht_local_t *local = NULL; + int this_call_cnt = 0; + int bsize = 0; + int frsize = 0; + GF_UNUSED int ret = 0; + unsigned long new_usage = 0; + unsigned long cur_usage = 0; + xlator_t *prev = NULL; + dht_conf_t *conf = NULL; + tier_statvfs_t *tier_stat = NULL; + + prev = cookie; + local = frame->local; + GF_ASSERT(local); + + conf = this->private; + + if (xdata) + ret = dict_get_int8(xdata, "quota-deem-statfs", (int8_t *)&event); + + tier_stat = &local->tier_statvfs; + + LOCK(&frame->lock); + { + if (op_ret == -1) { + local->op_errno = op_errno; + goto unlock; + } + if (!statvfs) { + op_errno = EINVAL; + local->op_ret = -1; + goto unlock; + } + local->op_ret = 0; + + if (local->quota_deem_statfs) { + if (event == _gf_true) { + action = qdstatfs_action_COMPARE; + } else { + action = qdstatfs_action_NEGLECT; + } + } else { + if (event == _gf_true) { + action = qdstatfs_action_REPLACE; + local->quota_deem_statfs = _gf_true; + } + } - if (local->quota_deem_statfs) { - if (event == _gf_true) { - action = qdstatfs_action_COMPARE; - } else { - action = qdstatfs_action_NEGLECT; - } - } else { - - if (event == _gf_true) { - action = qdstatfs_action_REPLACE; - local->quota_deem_statfs = _gf_true; - } - } + if (local->quota_deem_statfs) { + switch (action) { + case qdstatfs_action_NEGLECT: + goto unlock; - if (local->quota_deem_statfs) { - switch (action) { - case qdstatfs_action_NEGLECT: - goto unlock; - - case qdstatfs_action_REPLACE: - local->statvfs = *statvfs; - goto unlock; - - case qdstatfs_action_COMPARE: - new_usage = statvfs->f_blocks - - statvfs->f_bfree; - cur_usage = local->statvfs.f_blocks - - local->statvfs.f_bfree; - - /* Take the max of the usage from subvols */ - if (new_usage >= cur_usage) - local->statvfs = *statvfs; - goto unlock; - - default: - break; - } - } + case qdstatfs_action_REPLACE: + local->statvfs = *statvfs; + goto unlock; - if (local->statvfs.f_bsize != 0) { - bsize = max(local->statvfs.f_bsize, statvfs->f_bsize); - frsize = max(local->statvfs.f_frsize, statvfs->f_frsize); - dht_normalize_stats(&local->statvfs, bsize, frsize); - dht_normalize_stats(statvfs, bsize, frsize); - } else { - local->statvfs.f_bsize = statvfs->f_bsize; - local->statvfs.f_frsize = statvfs->f_frsize; - } + case qdstatfs_action_COMPARE: + new_usage = statvfs->f_blocks - statvfs->f_bfree; + cur_usage = local->statvfs.f_blocks - + local->statvfs.f_bfree; - if (prev == TIER_HASHED_SUBVOL) { - local->statvfs.f_blocks = statvfs->f_blocks; - local->statvfs.f_files = statvfs->f_files; - local->statvfs.f_fsid = statvfs->f_fsid; - local->statvfs.f_flag = statvfs->f_flag; - local->statvfs.f_namemax = statvfs->f_namemax; - tier_stat->blocks_used = (statvfs->f_blocks - statvfs->f_bfree); - tier_stat->pblocks_used = (statvfs->f_blocks - statvfs->f_bavail); - tier_stat->files_used = (statvfs->f_files - statvfs->f_ffree); - tier_stat->pfiles_used = (statvfs->f_files - statvfs->f_favail); - tier_stat->hashed_fsid = statvfs->f_fsid; - } else { - tier_stat->unhashed_fsid = statvfs->f_fsid; - tier_stat->unhashed_blocks_used = (statvfs->f_blocks - statvfs->f_bfree); - tier_stat->unhashed_pblocks_used = (statvfs->f_blocks - statvfs->f_bavail); - tier_stat->unhashed_files_used = (statvfs->f_files - statvfs->f_ffree); - tier_stat->unhashed_pfiles_used = (statvfs->f_files - statvfs->f_favail); - } + /* Take the max of the usage from subvols */ + if (new_usage >= cur_usage) + local->statvfs = *statvfs; + goto unlock; + default: + break; + } } -unlock: - UNLOCK (&frame->lock); - - this_call_cnt = dht_frame_return (frame); - if (is_last_call (this_call_cnt)) { - if (tier_stat->unhashed_fsid != tier_stat->hashed_fsid) { - tier_stat->blocks_used += tier_stat->unhashed_blocks_used; - tier_stat->pblocks_used += tier_stat->unhashed_pblocks_used; - tier_stat->files_used += tier_stat->unhashed_files_used; - tier_stat->pfiles_used += tier_stat->unhashed_pfiles_used; - } - local->statvfs.f_bfree = local->statvfs.f_blocks - - tier_stat->blocks_used; - local->statvfs.f_bavail = local->statvfs.f_blocks - - tier_stat->pblocks_used; - local->statvfs.f_ffree = local->statvfs.f_files - - tier_stat->files_used; - local->statvfs.f_favail = local->statvfs.f_files - - tier_stat->pfiles_used; - DHT_STACK_UNWIND (statfs, frame, local->op_ret, local->op_errno, - &local->statvfs, xdata); + + if (local->statvfs.f_bsize != 0) { + bsize = max(local->statvfs.f_bsize, statvfs->f_bsize); + frsize = max(local->statvfs.f_frsize, statvfs->f_frsize); + dht_normalize_stats(&local->statvfs, bsize, frsize); + dht_normalize_stats(statvfs, bsize, frsize); + } else { + local->statvfs.f_bsize = statvfs->f_bsize; + local->statvfs.f_frsize = statvfs->f_frsize; } - return 0; + if (prev == TIER_HASHED_SUBVOL) { + local->statvfs.f_blocks = statvfs->f_blocks; + local->statvfs.f_files = statvfs->f_files; + local->statvfs.f_fsid = statvfs->f_fsid; + local->statvfs.f_flag = statvfs->f_flag; + local->statvfs.f_namemax = statvfs->f_namemax; + tier_stat->blocks_used = (statvfs->f_blocks - statvfs->f_bfree); + tier_stat->pblocks_used = (statvfs->f_blocks - statvfs->f_bavail); + tier_stat->files_used = (statvfs->f_files - statvfs->f_ffree); + tier_stat->pfiles_used = (statvfs->f_files - statvfs->f_favail); + tier_stat->hashed_fsid = statvfs->f_fsid; + } else { + tier_stat->unhashed_fsid = statvfs->f_fsid; + tier_stat->unhashed_blocks_used = (statvfs->f_blocks - + statvfs->f_bfree); + tier_stat->unhashed_pblocks_used = (statvfs->f_blocks - + statvfs->f_bavail); + tier_stat->unhashed_files_used = (statvfs->f_files - + statvfs->f_ffree); + tier_stat->unhashed_pfiles_used = (statvfs->f_files - + statvfs->f_favail); + } + } +unlock: + UNLOCK(&frame->lock); + + this_call_cnt = dht_frame_return(frame); + if (is_last_call(this_call_cnt)) { + if (tier_stat->unhashed_fsid != tier_stat->hashed_fsid) { + tier_stat->blocks_used += tier_stat->unhashed_blocks_used; + tier_stat->pblocks_used += tier_stat->unhashed_pblocks_used; + tier_stat->files_used += tier_stat->unhashed_files_used; + tier_stat->pfiles_used += tier_stat->unhashed_pfiles_used; + } + local->statvfs.f_bfree = local->statvfs.f_blocks - + tier_stat->blocks_used; + local->statvfs.f_bavail = local->statvfs.f_blocks - + tier_stat->pblocks_used; + local->statvfs.f_ffree = local->statvfs.f_files - tier_stat->files_used; + local->statvfs.f_favail = local->statvfs.f_files - + tier_stat->pfiles_used; + DHT_STACK_UNWIND(statfs, frame, local->op_ret, local->op_errno, + &local->statvfs, xdata); + } + + return 0; } - int -tier_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) +tier_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) { - dht_local_t *local = NULL; - dht_conf_t *conf = NULL; - int op_errno = -1; - int i = -1; - inode_t *inode = NULL; - inode_table_t *itable = NULL; - uuid_t root_gfid = {0, }; - loc_t newloc = {0, }; - - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (loc, err); - VALIDATE_OR_GOTO (this->private, err); - - conf = this->private; - - local = dht_local_init (frame, NULL, NULL, GF_FOP_STATFS); - if (!local) { - op_errno = ENOMEM; - goto err; + dht_local_t *local = NULL; + dht_conf_t *conf = NULL; + int op_errno = -1; + int i = -1; + inode_t *inode = NULL; + inode_table_t *itable = NULL; + uuid_t root_gfid = { + 0, + }; + loc_t newloc = { + 0, + }; + + VALIDATE_OR_GOTO(frame, err); + VALIDATE_OR_GOTO(this, err); + VALIDATE_OR_GOTO(loc, err); + VALIDATE_OR_GOTO(this->private, err); + + conf = this->private; + + local = dht_local_init(frame, NULL, NULL, GF_FOP_STATFS); + if (!local) { + op_errno = ENOMEM; + goto err; + } + + if (loc->inode && !IA_ISDIR(loc->inode->ia_type)) { + itable = loc->inode->table; + if (!itable) { + op_errno = EINVAL; + goto err; } - if (loc->inode && !IA_ISDIR (loc->inode->ia_type)) { - itable = loc->inode->table; - if (!itable) { - op_errno = EINVAL; - goto err; - } - - loc = &local->loc2; - root_gfid[15] = 1; + loc = &local->loc2; + root_gfid[15] = 1; - inode = inode_find (itable, root_gfid); - if (!inode) { - op_errno = EINVAL; - goto err; - } - - dht_build_root_loc (inode, &newloc); - loc = &newloc; + inode = inode_find(itable, root_gfid); + if (!inode) { + op_errno = EINVAL; + goto err; } - local->call_cnt = conf->subvolume_cnt; + dht_build_root_loc(inode, &newloc); + loc = &newloc; + } - for (i = 0; i < conf->subvolume_cnt; i++) { - STACK_WIND_COOKIE (frame, tier_statfs_cbk, conf->subvolumes[i], - conf->subvolumes[i], - conf->subvolumes[i]->fops->statfs, loc, - xdata); - } + local->call_cnt = conf->subvolume_cnt; - return 0; + for (i = 0; i < conf->subvolume_cnt; i++) { + STACK_WIND_COOKIE(frame, tier_statfs_cbk, conf->subvolumes[i], + conf->subvolumes[i], + conf->subvolumes[i]->fops->statfs, loc, xdata); + } + + return 0; err: - op_errno = (op_errno == -1) ? errno : op_errno; - DHT_STACK_UNWIND (statfs, frame, -1, op_errno, NULL, NULL); + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND(statfs, frame, -1, op_errno, NULL, NULL); - return 0; + return 0; } diff --git a/xlators/cluster/dht/src/tier.c b/xlators/cluster/dht/src/tier.c index 63eb65be302..f7fd6ef22e2 100644 --- a/xlators/cluster/dht/src/tier.c +++ b/xlators/cluster/dht/src/tier.c @@ -38,103 +38,101 @@ static gfdb_methods_t gfdb_methods; * Closes all the fds and frees the qfile_array * */ static void -qfile_array_free (tier_qfile_array_t *qfile_array) +qfile_array_free(tier_qfile_array_t *qfile_array) { - ssize_t i = 0; - - if (qfile_array) { - if (qfile_array->fd_array) { - for (i = 0; i < qfile_array->array_size; i++) { - if (qfile_array->fd_array[i] != -1) { - sys_close (qfile_array->fd_array[i]); - } - } + ssize_t i = 0; + + if (qfile_array) { + if (qfile_array->fd_array) { + for (i = 0; i < qfile_array->array_size; i++) { + if (qfile_array->fd_array[i] != -1) { + sys_close(qfile_array->fd_array[i]); } - GF_FREE (qfile_array->fd_array); + } } - GF_FREE (qfile_array); + GF_FREE(qfile_array->fd_array); + } + GF_FREE(qfile_array); } - /* Create a new query file list with given size */ static tier_qfile_array_t * -qfile_array_new (ssize_t array_size) +qfile_array_new(ssize_t array_size) { - int ret = -1; - tier_qfile_array_t *qfile_array = NULL; - ssize_t i = 0; - - GF_VALIDATE_OR_GOTO ("tier", (array_size > 0), out); - - qfile_array = GF_CALLOC (1, sizeof (tier_qfile_array_t), - gf_tier_mt_qfile_array_t); - if (!qfile_array) { - gf_msg ("tier", GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, - "Failed to allocate memory for tier_qfile_array_t"); - goto out; - } - - qfile_array->fd_array = GF_MALLOC (array_size * sizeof (int), - gf_dht_mt_int32_t); - if (!qfile_array->fd_array) { - gf_msg ("tier", GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, - "Failed to allocate memory for " - "tier_qfile_array_t->fd_array"); - goto out; - } - - /* Init all the fds to -1 */ - for (i = 0; i < array_size; i++) { - qfile_array->fd_array[i] = -1; - } - - qfile_array->array_size = array_size; - qfile_array->next_index = 0; - - /* Set exhausted count to list size as the list is empty */ - qfile_array->exhausted_count = qfile_array->array_size; - - ret = 0; + int ret = -1; + tier_qfile_array_t *qfile_array = NULL; + ssize_t i = 0; + + GF_VALIDATE_OR_GOTO("tier", (array_size > 0), out); + + qfile_array = GF_CALLOC(1, sizeof(tier_qfile_array_t), + gf_tier_mt_qfile_array_t); + if (!qfile_array) { + gf_msg("tier", GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, + "Failed to allocate memory for tier_qfile_array_t"); + goto out; + } + + qfile_array->fd_array = GF_MALLOC(array_size * sizeof(int), + gf_dht_mt_int32_t); + if (!qfile_array->fd_array) { + gf_msg("tier", GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, + "Failed to allocate memory for " + "tier_qfile_array_t->fd_array"); + goto out; + } + + /* Init all the fds to -1 */ + for (i = 0; i < array_size; i++) { + qfile_array->fd_array[i] = -1; + } + + qfile_array->array_size = array_size; + qfile_array->next_index = 0; + + /* Set exhausted count to list size as the list is empty */ + qfile_array->exhausted_count = qfile_array->array_size; + + ret = 0; out: - if (ret) { - qfile_array_free (qfile_array); - qfile_array = NULL; - } - return qfile_array; + if (ret) { + qfile_array_free(qfile_array); + qfile_array = NULL; + } + return qfile_array; } - /* Checks if the query file list is empty or totally exhausted. */ static gf_boolean_t -is_qfile_array_empty (tier_qfile_array_t *qfile_array) +is_qfile_array_empty(tier_qfile_array_t *qfile_array) { - return (qfile_array->exhausted_count == qfile_array->array_size) ? - _gf_true : _gf_false; + return (qfile_array->exhausted_count == qfile_array->array_size) + ? _gf_true + : _gf_false; } - /* Shifts the next_fd pointer to the next available fd in the list */ static void -shift_next_index (tier_qfile_array_t *qfile_array) +shift_next_index(tier_qfile_array_t *qfile_array) { - int qfile_fd = 0; - int spin_count = 0; + int qfile_fd = 0; + int spin_count = 0; - if (is_qfile_array_empty (qfile_array)) { - return; - } - - do { - /* change next_index in a rotional manner */ - (qfile_array->next_index == (qfile_array->array_size - 1)) ? - qfile_array->next_index = 0 : qfile_array->next_index++; + if (is_qfile_array_empty(qfile_array)) { + return; + } - qfile_fd = (qfile_array->fd_array[qfile_array->next_index]); + do { + /* change next_index in a rotional manner */ + (qfile_array->next_index == (qfile_array->array_size - 1)) + ? qfile_array->next_index = 0 + : qfile_array->next_index++; - spin_count++; + qfile_fd = (qfile_array->fd_array[qfile_array->next_index]); - } while ((qfile_fd == -1) && (spin_count < qfile_array->array_size)); + spin_count++; + } while ((qfile_fd == -1) && (spin_count < qfile_array->array_size)); } /* @@ -149,1844 +147,1707 @@ shift_next_index (tier_qfile_array_t *qfile_array) * < 0 if there was failure * */ static int -read_query_record_list (tier_qfile_array_t *qfile_array, - gfdb_query_record_t **query_record) +read_query_record_list(tier_qfile_array_t *qfile_array, + gfdb_query_record_t **query_record) { - int ret = -1; - int qfile_fd = 0; - - GF_VALIDATE_OR_GOTO ("tier", qfile_array, out); - GF_VALIDATE_OR_GOTO ("tier", qfile_array->fd_array, out); - - do { - if (is_qfile_array_empty (qfile_array)) { - ret = 0; - break; - } - - qfile_fd = qfile_array->fd_array[qfile_array->next_index]; - ret = gfdb_methods.gfdb_read_query_record - (qfile_fd, query_record); - if (ret <= 0) { - /*The qfile_fd has reached EOF or - * there was an error. - * 1. Close the exhausted fd - * 2. increment the exhausted count - * 3. shift next_qfile to next qfile - **/ - sys_close (qfile_fd); - qfile_array->fd_array[qfile_array->next_index] = -1; - qfile_array->exhausted_count++; - /* shift next_qfile to next qfile */ - shift_next_index (qfile_array); - continue; - } else { - /* shift next_qfile to next qfile */ - shift_next_index (qfile_array); - break; - } - } while (1); + int ret = -1; + int qfile_fd = 0; + + GF_VALIDATE_OR_GOTO("tier", qfile_array, out); + GF_VALIDATE_OR_GOTO("tier", qfile_array->fd_array, out); + + do { + if (is_qfile_array_empty(qfile_array)) { + ret = 0; + break; + } + + qfile_fd = qfile_array->fd_array[qfile_array->next_index]; + ret = gfdb_methods.gfdb_read_query_record(qfile_fd, query_record); + if (ret <= 0) { + /*The qfile_fd has reached EOF or + * there was an error. + * 1. Close the exhausted fd + * 2. increment the exhausted count + * 3. shift next_qfile to next qfile + **/ + sys_close(qfile_fd); + qfile_array->fd_array[qfile_array->next_index] = -1; + qfile_array->exhausted_count++; + /* shift next_qfile to next qfile */ + shift_next_index(qfile_array); + continue; + } else { + /* shift next_qfile to next qfile */ + shift_next_index(qfile_array); + break; + } + } while (1); out: - return ret; + return ret; } - /* Check and update the watermark every WM_INTERVAL seconds */ -#define WM_INTERVAL 5 -#define WM_INTERVAL_EMERG 1 +#define WM_INTERVAL 5 +#define WM_INTERVAL_EMERG 1 static int -tier_check_same_node (xlator_t *this, loc_t *loc, gf_defrag_info_t *defrag) +tier_check_same_node(xlator_t *this, loc_t *loc, gf_defrag_info_t *defrag) { - int ret = -1; - dict_t *dict = NULL; - char *uuid_str = NULL; - uuid_t node_uuid = {0,}; - - GF_VALIDATE_OR_GOTO ("tier", this, out); - GF_VALIDATE_OR_GOTO (this->name, loc, out); - GF_VALIDATE_OR_GOTO (this->name, defrag, out); - - if (syncop_getxattr (this, loc, &dict, GF_XATTR_NODE_UUID_KEY, - NULL, NULL)) { - gf_msg (this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, - "Unable to get NODE_UUID_KEY %s %s\n", - loc->name, loc->path); - goto out; - } - - if (dict_get_str (dict, GF_XATTR_NODE_UUID_KEY, &uuid_str) < 0) { - gf_msg (this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, - "Failed to get node-uuids for %s", loc->path); - goto out; - } - - - if (gf_uuid_parse (uuid_str, node_uuid)) { - gf_msg (this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, - "uuid_parse failed for %s", loc->path); - goto out; - } - - if (gf_uuid_compare (node_uuid, defrag->node_uuid)) { - gf_msg_debug (this->name, 0, - "%s does not belong to this node", loc->path); - ret = 1; - goto out; - } - - ret = 0; + int ret = -1; + dict_t *dict = NULL; + char *uuid_str = NULL; + uuid_t node_uuid = { + 0, + }; + + GF_VALIDATE_OR_GOTO("tier", this, out); + GF_VALIDATE_OR_GOTO(this->name, loc, out); + GF_VALIDATE_OR_GOTO(this->name, defrag, out); + + if (syncop_getxattr(this, loc, &dict, GF_XATTR_NODE_UUID_KEY, NULL, NULL)) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, + "Unable to get NODE_UUID_KEY %s %s\n", loc->name, loc->path); + goto out; + } + + if (dict_get_str(dict, GF_XATTR_NODE_UUID_KEY, &uuid_str) < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, + "Failed to get node-uuids for %s", loc->path); + goto out; + } + + if (gf_uuid_parse(uuid_str, node_uuid)) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, + "uuid_parse failed for %s", loc->path); + goto out; + } + + if (gf_uuid_compare(node_uuid, defrag->node_uuid)) { + gf_msg_debug(this->name, 0, "%s does not belong to this node", + loc->path); + ret = 1; + goto out; + } + + ret = 0; out: - if (dict) - dict_unref(dict); + if (dict) + dict_unref(dict); - return ret; + return ret; } int -tier_get_fs_stat (xlator_t *this, loc_t *root_loc) +tier_get_fs_stat(xlator_t *this, loc_t *root_loc) { - int ret = 0; - gf_defrag_info_t *defrag = NULL; - dht_conf_t *conf = NULL; - dict_t *xdata = NULL; - struct statvfs statfs = {0, }; - gf_tier_conf_t *tier_conf = NULL; - - - conf = this->private; - if (!conf) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_LOG_TIER_STATUS, - "conf is NULL"); - ret = -1; - goto exit; - } - - defrag = conf->defrag; - if (!defrag) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_LOG_TIER_STATUS, - "defrag is NULL"); - ret = -1; - goto exit; - } - - tier_conf = &defrag->tier_conf; - - xdata = dict_new (); - if (!xdata) { - gf_msg (this->name, GF_LOG_ERROR, ENOMEM, - DHT_MSG_NO_MEMORY, - "failed to allocate dictionary"); - ret = -1; - goto exit; - } - - ret = dict_set_int8 (xdata, GF_INTERNAL_IGNORE_DEEM_STATFS, 1); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_DICT_SET_FAILED, - "Failed to set " - GF_INTERNAL_IGNORE_DEEM_STATFS" in dict"); - ret = -1; - goto exit; - } - - /* Find how much free space is on the hot subvolume. - * Then see if that value */ - /* is less than or greater than user defined watermarks. - * Stash results in */ - /* the tier_conf data structure. */ - - ret = syncop_statfs (conf->subvolumes[1], root_loc, &statfs, - xdata, NULL); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, -ret, - DHT_MSG_LOG_TIER_STATUS, - "Unable to obtain statfs."); - goto exit; - } - - pthread_mutex_lock (&dm_stat_mutex); - - tier_conf->block_size = statfs.f_bsize; - tier_conf->blocks_total = statfs.f_blocks; - tier_conf->blocks_used = statfs.f_blocks - statfs.f_bfree; - - tier_conf->percent_full = GF_PERCENTAGE(tier_conf->blocks_used, - statfs.f_blocks); - pthread_mutex_unlock (&dm_stat_mutex); + int ret = 0; + gf_defrag_info_t *defrag = NULL; + dht_conf_t *conf = NULL; + dict_t *xdata = NULL; + struct statvfs statfs = { + 0, + }; + gf_tier_conf_t *tier_conf = NULL; + + conf = this->private; + if (!conf) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_STATUS, + "conf is NULL"); + ret = -1; + goto exit; + } + + defrag = conf->defrag; + if (!defrag) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_STATUS, + "defrag is NULL"); + ret = -1; + goto exit; + } + + tier_conf = &defrag->tier_conf; + + xdata = dict_new(); + if (!xdata) { + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY, + "failed to allocate dictionary"); + ret = -1; + goto exit; + } + + ret = dict_set_int8(xdata, GF_INTERNAL_IGNORE_DEEM_STATFS, 1); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED, + "Failed to set " GF_INTERNAL_IGNORE_DEEM_STATFS " in dict"); + ret = -1; + goto exit; + } + + /* Find how much free space is on the hot subvolume. + * Then see if that value */ + /* is less than or greater than user defined watermarks. + * Stash results in */ + /* the tier_conf data structure. */ + + ret = syncop_statfs(conf->subvolumes[1], root_loc, &statfs, xdata, NULL); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_LOG_TIER_STATUS, + "Unable to obtain statfs."); + goto exit; + } + + pthread_mutex_lock(&dm_stat_mutex); + + tier_conf->block_size = statfs.f_bsize; + tier_conf->blocks_total = statfs.f_blocks; + tier_conf->blocks_used = statfs.f_blocks - statfs.f_bfree; + + tier_conf->percent_full = GF_PERCENTAGE(tier_conf->blocks_used, + statfs.f_blocks); + pthread_mutex_unlock(&dm_stat_mutex); exit: - if (xdata) - dict_unref (xdata); - return ret; + if (xdata) + dict_unref(xdata); + return ret; } static void -tier_send_watermark_event (const char *volname, - tier_watermark_op_t old_wm, - tier_watermark_op_t new_wm) +tier_send_watermark_event(const char *volname, tier_watermark_op_t old_wm, + tier_watermark_op_t new_wm) { - if (old_wm == TIER_WM_LOW || old_wm == TIER_WM_NONE) { - if (new_wm == TIER_WM_MID) { - gf_event (EVENT_TIER_WATERMARK_RAISED_TO_MID, - "vol=%s", volname); - } else if (new_wm == TIER_WM_HI) { - gf_event (EVENT_TIER_WATERMARK_HI, "vol=%s", volname); - } - } else if (old_wm == TIER_WM_MID) { - if (new_wm == TIER_WM_LOW) { - gf_event (EVENT_TIER_WATERMARK_DROPPED_TO_LOW, - "vol=%s", volname); - } else if (new_wm == TIER_WM_HI) { - gf_event (EVENT_TIER_WATERMARK_HI, "vol=%s", volname); - } - } else if (old_wm == TIER_WM_HI) { - if (new_wm == TIER_WM_MID) { - gf_event (EVENT_TIER_WATERMARK_DROPPED_TO_MID, - "vol=%s", volname); - } else if (new_wm == TIER_WM_LOW) { - gf_event (EVENT_TIER_WATERMARK_DROPPED_TO_LOW, - "vol=%s", volname); - } - } + if (old_wm == TIER_WM_LOW || old_wm == TIER_WM_NONE) { + if (new_wm == TIER_WM_MID) { + gf_event(EVENT_TIER_WATERMARK_RAISED_TO_MID, "vol=%s", volname); + } else if (new_wm == TIER_WM_HI) { + gf_event(EVENT_TIER_WATERMARK_HI, "vol=%s", volname); + } + } else if (old_wm == TIER_WM_MID) { + if (new_wm == TIER_WM_LOW) { + gf_event(EVENT_TIER_WATERMARK_DROPPED_TO_LOW, "vol=%s", volname); + } else if (new_wm == TIER_WM_HI) { + gf_event(EVENT_TIER_WATERMARK_HI, "vol=%s", volname); + } + } else if (old_wm == TIER_WM_HI) { + if (new_wm == TIER_WM_MID) { + gf_event(EVENT_TIER_WATERMARK_DROPPED_TO_MID, "vol=%s", volname); + } else if (new_wm == TIER_WM_LOW) { + gf_event(EVENT_TIER_WATERMARK_DROPPED_TO_LOW, "vol=%s", volname); + } + } } int -tier_check_watermark (xlator_t *this) +tier_check_watermark(xlator_t *this) { - int ret = -1; - gf_defrag_info_t *defrag = NULL; - dht_conf_t *conf = NULL; - gf_tier_conf_t *tier_conf = NULL; - tier_watermark_op_t wm = TIER_WM_NONE; + int ret = -1; + gf_defrag_info_t *defrag = NULL; + dht_conf_t *conf = NULL; + gf_tier_conf_t *tier_conf = NULL; + tier_watermark_op_t wm = TIER_WM_NONE; - conf = this->private; - if (!conf) - goto exit; + conf = this->private; + if (!conf) + goto exit; - defrag = conf->defrag; - if (!defrag) - goto exit; + defrag = conf->defrag; + if (!defrag) + goto exit; - tier_conf = &defrag->tier_conf; + tier_conf = &defrag->tier_conf; - if (tier_conf->percent_full < tier_conf->watermark_low) { - wm = TIER_WM_LOW; + if (tier_conf->percent_full < tier_conf->watermark_low) { + wm = TIER_WM_LOW; - } else if (tier_conf->percent_full < tier_conf->watermark_hi) { - wm = TIER_WM_MID; + } else if (tier_conf->percent_full < tier_conf->watermark_hi) { + wm = TIER_WM_MID; - } else { - wm = TIER_WM_HI; - } + } else { + wm = TIER_WM_HI; + } - if (wm != tier_conf->watermark_last) { + if (wm != tier_conf->watermark_last) { + tier_send_watermark_event(tier_conf->volname, tier_conf->watermark_last, + wm); - tier_send_watermark_event (tier_conf->volname, - tier_conf->watermark_last, - wm); + tier_conf->watermark_last = wm; + gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS, + "Tier watermark now %d", wm); + } - tier_conf->watermark_last = wm; - gf_msg (this->name, GF_LOG_INFO, 0, - DHT_MSG_LOG_TIER_STATUS, - "Tier watermark now %d", wm); - } - - ret = 0; + ret = 0; exit: - return ret; + return ret; } - static gf_boolean_t -is_hot_tier_full (gf_tier_conf_t *tier_conf) +is_hot_tier_full(gf_tier_conf_t *tier_conf) { - if (tier_conf && (tier_conf->mode == TIER_MODE_WM) && - (tier_conf->watermark_last == TIER_WM_HI)) - return _gf_true; + if (tier_conf && (tier_conf->mode == TIER_MODE_WM) && + (tier_conf->watermark_last == TIER_WM_HI)) + return _gf_true; - return _gf_false; + return _gf_false; } int -tier_do_migration (xlator_t *this, int promote) +tier_do_migration(xlator_t *this, int promote) { - gf_defrag_info_t *defrag = NULL; - dht_conf_t *conf = NULL; - long rand = 0; - int migrate = 0; - gf_tier_conf_t *tier_conf = NULL; + gf_defrag_info_t *defrag = NULL; + dht_conf_t *conf = NULL; + long rand = 0; + int migrate = 0; + gf_tier_conf_t *tier_conf = NULL; - conf = this->private; - if (!conf) - goto exit; + conf = this->private; + if (!conf) + goto exit; - defrag = conf->defrag; - if (!defrag) - goto exit; - - if (tier_check_watermark (this) != 0) { - gf_msg (this->name, GF_LOG_CRITICAL, errno, - DHT_MSG_LOG_TIER_ERROR, - "Failed to get watermark"); - goto exit; - } + defrag = conf->defrag; + if (!defrag) + goto exit; + + if (tier_check_watermark(this) != 0) { + gf_msg(this->name, GF_LOG_CRITICAL, errno, DHT_MSG_LOG_TIER_ERROR, + "Failed to get watermark"); + goto exit; + } - tier_conf = &defrag->tier_conf; + tier_conf = &defrag->tier_conf; - switch (tier_conf->watermark_last) { + switch (tier_conf->watermark_last) { case TIER_WM_LOW: - migrate = promote ? 1 : 0; - break; + migrate = promote ? 1 : 0; + break; case TIER_WM_HI: - migrate = promote ? 0 : 1; - break; + migrate = promote ? 0 : 1; + break; case TIER_WM_MID: - /* coverity[DC.WEAK_CRYPTO] */ - rand = random() % 100; - if (promote) { - migrate = (rand > tier_conf->percent_full); - } else { - migrate = (rand <= tier_conf->percent_full); - } - break; - } + /* coverity[DC.WEAK_CRYPTO] */ + rand = random() % 100; + if (promote) { + migrate = (rand > tier_conf->percent_full); + } else { + migrate = (rand <= tier_conf->percent_full); + } + break; + } exit: - return migrate; + return migrate; } int -tier_migrate (xlator_t *this, int is_promotion, dict_t *migrate_data, - loc_t *loc, gf_tier_conf_t *tier_conf) +tier_migrate(xlator_t *this, int is_promotion, dict_t *migrate_data, loc_t *loc, + gf_tier_conf_t *tier_conf) { - int ret = -1; - - pthread_mutex_lock (&tier_conf->pause_mutex); - if (is_promotion) - tier_conf->promote_in_progress = 1; - else - tier_conf->demote_in_progress = 1; - pthread_mutex_unlock (&tier_conf->pause_mutex); - - /* Data migration */ - ret = syncop_setxattr (this, loc, migrate_data, 0, - NULL, NULL); - - pthread_mutex_lock (&tier_conf->pause_mutex); - if (is_promotion) - tier_conf->promote_in_progress = 0; - else - tier_conf->demote_in_progress = 0; - pthread_mutex_unlock (&tier_conf->pause_mutex); - - return ret; + int ret = -1; + + pthread_mutex_lock(&tier_conf->pause_mutex); + if (is_promotion) + tier_conf->promote_in_progress = 1; + else + tier_conf->demote_in_progress = 1; + pthread_mutex_unlock(&tier_conf->pause_mutex); + + /* Data migration */ + ret = syncop_setxattr(this, loc, migrate_data, 0, NULL, NULL); + + pthread_mutex_lock(&tier_conf->pause_mutex); + if (is_promotion) + tier_conf->promote_in_progress = 0; + else + tier_conf->demote_in_progress = 0; + pthread_mutex_unlock(&tier_conf->pause_mutex); + + return ret; } /* returns _gf_true: if file can be promoted * returns _gf_false: if file cannot be promoted */ static gf_boolean_t -tier_can_promote_file (xlator_t *this, char const *file_name, - struct iatt *current, gf_defrag_info_t *defrag) +tier_can_promote_file(xlator_t *this, char const *file_name, + struct iatt *current, gf_defrag_info_t *defrag) { - gf_boolean_t ret = _gf_false; - fsblkcnt_t estimated_usage = 0; - - if (defrag->tier_conf.tier_max_promote_size && - (current->ia_size > defrag->tier_conf.tier_max_promote_size)) { - gf_msg (this->name, GF_LOG_INFO, 0, - DHT_MSG_LOG_TIER_STATUS, - "File %s (gfid:%s) with size (%lu) exceeds maxsize " - "(%d) for promotion. File will not be promoted.", - file_name, - uuid_utoa(current->ia_gfid), - current->ia_size, - defrag->tier_conf.tier_max_promote_size); - goto err; - } - - /* bypass further validations for TEST mode */ - if (defrag->tier_conf.mode != TIER_MODE_WM) { - ret = _gf_true; - goto err; - } - - /* convert the file size to blocks as per the block size of the - * destination tier - * NOTE: add (block_size - 1) to get the correct block size when - * there is a remainder after a modulo - */ - estimated_usage = ((current->ia_size + defrag->tier_conf.block_size - 1) / - defrag->tier_conf.block_size) + - defrag->tier_conf.blocks_used; - - /* test if the estimated block usage goes above HI watermark */ - if (GF_PERCENTAGE (estimated_usage, defrag->tier_conf.blocks_total) >= - defrag->tier_conf.watermark_hi) { - gf_msg (this->name, GF_LOG_INFO, 0, - DHT_MSG_LOG_TIER_STATUS, - "Estimated block count consumption on " - "hot tier (%lu) exceeds hi watermark (%d%%). " - "File will not be promoted.", - estimated_usage, - defrag->tier_conf.watermark_hi); - goto err; - } + gf_boolean_t ret = _gf_false; + fsblkcnt_t estimated_usage = 0; + + if (defrag->tier_conf.tier_max_promote_size && + (current->ia_size > defrag->tier_conf.tier_max_promote_size)) { + gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS, + "File %s (gfid:%s) with size (%lu) exceeds maxsize " + "(%d) for promotion. File will not be promoted.", + file_name, uuid_utoa(current->ia_gfid), current->ia_size, + defrag->tier_conf.tier_max_promote_size); + goto err; + } + + /* bypass further validations for TEST mode */ + if (defrag->tier_conf.mode != TIER_MODE_WM) { ret = _gf_true; + goto err; + } + + /* convert the file size to blocks as per the block size of the + * destination tier + * NOTE: add (block_size - 1) to get the correct block size when + * there is a remainder after a modulo + */ + estimated_usage = ((current->ia_size + defrag->tier_conf.block_size - 1) / + defrag->tier_conf.block_size) + + defrag->tier_conf.blocks_used; + + /* test if the estimated block usage goes above HI watermark */ + if (GF_PERCENTAGE(estimated_usage, defrag->tier_conf.blocks_total) >= + defrag->tier_conf.watermark_hi) { + gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS, + "Estimated block count consumption on " + "hot tier (%lu) exceeds hi watermark (%d%%). " + "File will not be promoted.", + estimated_usage, defrag->tier_conf.watermark_hi); + goto err; + } + ret = _gf_true; err: - return ret; + return ret; } static int -tier_set_migrate_data (dict_t *migrate_data) +tier_set_migrate_data(dict_t *migrate_data) { - int failed = 1; - - - failed = dict_set_str (migrate_data, GF_XATTR_FILE_MIGRATE_KEY, "force"); - if (failed) { - goto bail_out; - } - - /* Flag to suggest the xattr call is from migrator */ - failed = dict_set_str (migrate_data, "from.migrator", "yes"); - if (failed) { - goto bail_out; - } - - /* Flag to suggest its a tiering migration - * The reason for this dic key-value is that - * promotions and demotions are multithreaded - * so the original frame from gf_defrag_start() - * is not carried. A new frame will be created when - * we do syncop_setxattr(). This does not have the - * frame->root->pid of the original frame. So we pass - * this dic key-value when we do syncop_setxattr() to do - * data migration and set the frame->root->pid to - * GF_CLIENT_PID_TIER_DEFRAG in dht_setxattr() just before - * calling dht_start_rebalance_task() */ - failed = dict_set_str (migrate_data, TIERING_MIGRATION_KEY, "yes"); - if (failed) { - goto bail_out; - } - - failed = 0; + int failed = 1; + + failed = dict_set_str(migrate_data, GF_XATTR_FILE_MIGRATE_KEY, "force"); + if (failed) { + goto bail_out; + } + + /* Flag to suggest the xattr call is from migrator */ + failed = dict_set_str(migrate_data, "from.migrator", "yes"); + if (failed) { + goto bail_out; + } + + /* Flag to suggest its a tiering migration + * The reason for this dic key-value is that + * promotions and demotions are multithreaded + * so the original frame from gf_defrag_start() + * is not carried. A new frame will be created when + * we do syncop_setxattr(). This does not have the + * frame->root->pid of the original frame. So we pass + * this dic key-value when we do syncop_setxattr() to do + * data migration and set the frame->root->pid to + * GF_CLIENT_PID_TIER_DEFRAG in dht_setxattr() just before + * calling dht_start_rebalance_task() */ + failed = dict_set_str(migrate_data, TIERING_MIGRATION_KEY, "yes"); + if (failed) { + goto bail_out; + } + + failed = 0; bail_out: - return failed; + return failed; } static char * -tier_get_parent_path (xlator_t *this, - loc_t *p_loc, - struct iatt *par_stbuf, - int *per_link_status) +tier_get_parent_path(xlator_t *this, loc_t *p_loc, struct iatt *par_stbuf, + int *per_link_status) { - int ret = -1; - char *parent_path = NULL; - dict_t *xdata_request = NULL; - dict_t *xdata_response = NULL; - - - xdata_request = dict_new (); - if (!xdata_request) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_LOG_TIER_ERROR, - "Failed to create xdata_request dict"); - goto err; - } - ret = dict_set_int32 (xdata_request, - GET_ANCESTRY_PATH_KEY, 42); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_LOG_TIER_ERROR, - "Failed to set value to dict : key %s \n", - GET_ANCESTRY_PATH_KEY); - goto err; - } - - ret = syncop_lookup (this, p_loc, par_stbuf, NULL, - xdata_request, &xdata_response); - /* When the parent gfid is a stale entry, the lookup - * will fail and stop the demotion process. - * The parent gfid can be stale when a huge folder is - * deleted while the files within it are being migrated - */ - if (ret == -ESTALE) { - gf_msg (this->name, GF_LOG_WARNING, -ret, - DHT_MSG_STALE_LOOKUP, - "Stale entry in parent lookup for %s", - uuid_utoa (p_loc->gfid)); - *per_link_status = 1; - goto err; - } else if (ret) { - gf_msg (this->name, GF_LOG_ERROR, -ret, - DHT_MSG_LOG_TIER_ERROR, - "Error in parent lookup for %s", - uuid_utoa (p_loc->gfid)); - *per_link_status = -1; - goto err; - } - ret = dict_get_str (xdata_response, GET_ANCESTRY_PATH_KEY, &parent_path); - if (ret || !parent_path) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_LOG_TIER_ERROR, - "Failed to get parent path for %s", - uuid_utoa (p_loc->gfid)); - *per_link_status = -1; - goto err; - } + int ret = -1; + char *parent_path = NULL; + dict_t *xdata_request = NULL; + dict_t *xdata_response = NULL; + + xdata_request = dict_new(); + if (!xdata_request) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, + "Failed to create xdata_request dict"); + goto err; + } + ret = dict_set_int32(xdata_request, GET_ANCESTRY_PATH_KEY, 42); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, + "Failed to set value to dict : key %s \n", + GET_ANCESTRY_PATH_KEY); + goto err; + } + + ret = syncop_lookup(this, p_loc, par_stbuf, NULL, xdata_request, + &xdata_response); + /* When the parent gfid is a stale entry, the lookup + * will fail and stop the demotion process. + * The parent gfid can be stale when a huge folder is + * deleted while the files within it are being migrated + */ + if (ret == -ESTALE) { + gf_msg(this->name, GF_LOG_WARNING, -ret, DHT_MSG_STALE_LOOKUP, + "Stale entry in parent lookup for %s", uuid_utoa(p_loc->gfid)); + *per_link_status = 1; + goto err; + } else if (ret) { + gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_LOG_TIER_ERROR, + "Error in parent lookup for %s", uuid_utoa(p_loc->gfid)); + *per_link_status = -1; + goto err; + } + ret = dict_get_str(xdata_response, GET_ANCESTRY_PATH_KEY, &parent_path); + if (ret || !parent_path) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, + "Failed to get parent path for %s", uuid_utoa(p_loc->gfid)); + *per_link_status = -1; + goto err; + } err: - if (xdata_request) { - dict_unref (xdata_request); - } + if (xdata_request) { + dict_unref(xdata_request); + } - if (xdata_response) { - dict_unref (xdata_response); - xdata_response = NULL; - } + if (xdata_response) { + dict_unref(xdata_response); + xdata_response = NULL; + } - return parent_path; + return parent_path; } static int -tier_get_file_name_and_path (xlator_t *this, - uuid_t gfid, - gfdb_link_info_t *link_info, - char const *parent_path, - loc_t *loc, - int *per_link_status) +tier_get_file_name_and_path(xlator_t *this, uuid_t gfid, + gfdb_link_info_t *link_info, + char const *parent_path, loc_t *loc, + int *per_link_status) { - int ret = -1; - - loc->name = gf_strdup (link_info->file_name); - if (!loc->name) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_LOG_TIER_ERROR, "Memory " - "allocation failed for %s", - uuid_utoa (gfid)); - *per_link_status = -1; - goto err; - } - ret = gf_asprintf((char **)&(loc->path), "%s/%s", parent_path, loc->name); - if (ret < 0) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_LOG_TIER_ERROR, "Failed to " - "construct file path for %s %s\n", - parent_path, loc->name); - *per_link_status = -1; - goto err; - } - - ret = 0; + int ret = -1; + + loc->name = gf_strdup(link_info->file_name); + if (!loc->name) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, + "Memory " + "allocation failed for %s", + uuid_utoa(gfid)); + *per_link_status = -1; + goto err; + } + ret = gf_asprintf((char **)&(loc->path), "%s/%s", parent_path, loc->name); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, + "Failed to " + "construct file path for %s %s\n", + parent_path, loc->name); + *per_link_status = -1; + goto err; + } + + ret = 0; err: - return ret; + return ret; } static int -tier_lookup_file (xlator_t *this, - loc_t *p_loc, - loc_t *loc, - struct iatt *current, - int *per_link_status) +tier_lookup_file(xlator_t *this, loc_t *p_loc, loc_t *loc, struct iatt *current, + int *per_link_status) { - int ret = -1; - - ret = syncop_lookup (this, loc, current, NULL, NULL, NULL); - - /* The file may be deleted even when the parent - * is available and the lookup will - * return a stale entry which would stop the - * migration. so if its a stale entry, then skip - * the file and keep migrating. - */ - if (ret == -ESTALE) { - gf_msg (this->name, GF_LOG_WARNING, -ret, - DHT_MSG_STALE_LOOKUP, - "Stale lookup for %s", - uuid_utoa (p_loc->gfid)); - *per_link_status = 1; - goto err; - } else if (ret) { - gf_msg (this->name, GF_LOG_ERROR, -ret, - DHT_MSG_LOG_TIER_ERROR, "Failed to " - "lookup file %s\n", loc->name); - *per_link_status = -1; - goto err; - } - ret = 0; + int ret = -1; + + ret = syncop_lookup(this, loc, current, NULL, NULL, NULL); + + /* The file may be deleted even when the parent + * is available and the lookup will + * return a stale entry which would stop the + * migration. so if its a stale entry, then skip + * the file and keep migrating. + */ + if (ret == -ESTALE) { + gf_msg(this->name, GF_LOG_WARNING, -ret, DHT_MSG_STALE_LOOKUP, + "Stale lookup for %s", uuid_utoa(p_loc->gfid)); + *per_link_status = 1; + goto err; + } else if (ret) { + gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_LOG_TIER_ERROR, + "Failed to " + "lookup file %s\n", + loc->name); + *per_link_status = -1; + goto err; + } + ret = 0; err: - return ret; + return ret; } static gf_boolean_t -tier_is_file_already_at_destination (xlator_t *src_subvol, - query_cbk_args_t *query_cbk_args, - dht_conf_t *conf, - int *per_link_status) +tier_is_file_already_at_destination(xlator_t *src_subvol, + query_cbk_args_t *query_cbk_args, + dht_conf_t *conf, int *per_link_status) { - gf_boolean_t at_destination = _gf_true; - - if (src_subvol == NULL) { - *per_link_status = 1; - goto err; - } - if (query_cbk_args->is_promotion && - src_subvol == conf->subvolumes[1]) { - *per_link_status = 1; - goto err; - } - - if (!query_cbk_args->is_promotion && - src_subvol == conf->subvolumes[0]) { - *per_link_status = 1; - goto err; - } - at_destination = _gf_false; + gf_boolean_t at_destination = _gf_true; + + if (src_subvol == NULL) { + *per_link_status = 1; + goto err; + } + if (query_cbk_args->is_promotion && src_subvol == conf->subvolumes[1]) { + *per_link_status = 1; + goto err; + } + + if (!query_cbk_args->is_promotion && src_subvol == conf->subvolumes[0]) { + *per_link_status = 1; + goto err; + } + at_destination = _gf_false; err: - return at_destination; + return at_destination; } static void -tier_update_migration_counters (query_cbk_args_t *query_cbk_args, - gf_defrag_info_t *defrag, - uint64_t *total_migrated_bytes, - int *total_files) +tier_update_migration_counters(query_cbk_args_t *query_cbk_args, + gf_defrag_info_t *defrag, + uint64_t *total_migrated_bytes, int *total_files) { - if (query_cbk_args->is_promotion) { - defrag->total_files_promoted++; - *total_migrated_bytes += - defrag->tier_conf.st_last_promoted_size; - pthread_mutex_lock (&dm_stat_mutex); - defrag->tier_conf.blocks_used += - defrag->tier_conf.st_last_promoted_size; - pthread_mutex_unlock (&dm_stat_mutex); - } else { - defrag->total_files_demoted++; - *total_migrated_bytes += - defrag->tier_conf.st_last_demoted_size; - pthread_mutex_lock (&dm_stat_mutex); - defrag->tier_conf.blocks_used -= - defrag->tier_conf.st_last_demoted_size; - pthread_mutex_unlock (&dm_stat_mutex); - } - if (defrag->tier_conf.blocks_total) { - pthread_mutex_lock (&dm_stat_mutex); - defrag->tier_conf.percent_full = - GF_PERCENTAGE (defrag->tier_conf.blocks_used, - defrag->tier_conf.blocks_total); - pthread_mutex_unlock (&dm_stat_mutex); - } - - (*total_files)++; + if (query_cbk_args->is_promotion) { + defrag->total_files_promoted++; + *total_migrated_bytes += defrag->tier_conf.st_last_promoted_size; + pthread_mutex_lock(&dm_stat_mutex); + defrag->tier_conf.blocks_used += defrag->tier_conf + .st_last_promoted_size; + pthread_mutex_unlock(&dm_stat_mutex); + } else { + defrag->total_files_demoted++; + *total_migrated_bytes += defrag->tier_conf.st_last_demoted_size; + pthread_mutex_lock(&dm_stat_mutex); + defrag->tier_conf.blocks_used -= defrag->tier_conf.st_last_demoted_size; + pthread_mutex_unlock(&dm_stat_mutex); + } + if (defrag->tier_conf.blocks_total) { + pthread_mutex_lock(&dm_stat_mutex); + defrag->tier_conf.percent_full = GF_PERCENTAGE( + defrag->tier_conf.blocks_used, defrag->tier_conf.blocks_total); + pthread_mutex_unlock(&dm_stat_mutex); + } + + (*total_files)++; } static int -tier_migrate_link (xlator_t *this, - dht_conf_t *conf, - uuid_t gfid, - gfdb_link_info_t *link_info, - gf_defrag_info_t *defrag, - query_cbk_args_t *query_cbk_args, - dict_t *migrate_data, - int *per_link_status, - int *total_files, - uint64_t *total_migrated_bytes) +tier_migrate_link(xlator_t *this, dht_conf_t *conf, uuid_t gfid, + gfdb_link_info_t *link_info, gf_defrag_info_t *defrag, + query_cbk_args_t *query_cbk_args, dict_t *migrate_data, + int *per_link_status, int *total_files, + uint64_t *total_migrated_bytes) { - int ret = -1; - struct iatt current = {0,}; - struct iatt par_stbuf = {0,}; - loc_t p_loc = {0,}; - loc_t loc = {0,}; - xlator_t *src_subvol = NULL; - inode_t *linked_inode = NULL; - char *parent_path = NULL; - - - /* Lookup for parent and get the path of parent */ - gf_uuid_copy (p_loc.gfid, link_info->pargfid); - p_loc.inode = inode_new (defrag->root_inode->table); - if (!p_loc.inode) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_LOG_TIER_ERROR, - "Failed to create reference to inode" - " for %s", uuid_utoa (p_loc.gfid)); - - *per_link_status = -1; - goto err; - } - - parent_path = tier_get_parent_path (this, &p_loc, &par_stbuf, - per_link_status); - if (!parent_path) { - goto err; - } - - linked_inode = inode_link (p_loc.inode, NULL, NULL, &par_stbuf); - inode_unref (p_loc.inode); - p_loc.inode = linked_inode; - - - /* Preparing File Inode */ - gf_uuid_copy (loc.gfid, gfid); - loc.inode = inode_new (defrag->root_inode->table); - gf_uuid_copy (loc.pargfid, link_info->pargfid); - loc.parent = inode_ref (p_loc.inode); - - /* Get filename and Construct file path */ - if (tier_get_file_name_and_path (this, gfid, link_info, - parent_path, &loc, per_link_status) != 0) { - goto err; - } - gf_uuid_copy (loc.parent->gfid, link_info->pargfid); - - /* lookup file inode */ - if (tier_lookup_file (this, &p_loc, &loc, - ¤t, per_link_status) != 0) { - goto err; - } - - if (query_cbk_args->is_promotion) { - if (!tier_can_promote_file (this, - link_info->file_name, - ¤t, - defrag)) { - *per_link_status = 1; - goto err; - } - } - - linked_inode = inode_link (loc.inode, NULL, NULL, ¤t); - inode_unref (loc.inode); - loc.inode = linked_inode; - - - /* - * Do not promote/demote if file already is where it - * should be. It means another brick moved the file - * so is not an error. So we set per_link_status = 1 - * so that we ignore counting this. - */ - src_subvol = dht_subvol_get_cached (this, loc.inode); - - if (tier_is_file_already_at_destination (src_subvol, query_cbk_args, - conf, per_link_status)) { - goto err; - } - - gf_msg_debug (this->name, 0, "Tier %s: src_subvol %s file %s", - (query_cbk_args->is_promotion ? "promote" : "demote"), - src_subvol->name, - loc.path); - - - ret = tier_check_same_node (this, &loc, defrag); - if (ret != 0) { - if (ret < 0) { - *per_link_status = -1; - goto err; - } - ret = 0; - /* By setting per_link_status to 1 we are - * ignoring this status and will not be counting - * this file for migration */ - *per_link_status = 1; - goto err; - } - - gf_uuid_copy (loc.gfid, loc.inode->gfid); - - if (gf_defrag_get_pause_state (&defrag->tier_conf) != TIER_RUNNING) { - gf_msg (this->name, GF_LOG_INFO, 0, - DHT_MSG_LOG_TIER_STATUS, - "Tiering paused. " - "Exiting tier_migrate_link"); - goto err; - } - - ret = tier_migrate (this, query_cbk_args->is_promotion, - migrate_data, &loc, &defrag->tier_conf); - - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, -ret, - DHT_MSG_LOG_TIER_ERROR, "Failed to " - "migrate %s ", loc.path); - *per_link_status = -1; - goto err; + int ret = -1; + struct iatt current = { + 0, + }; + struct iatt par_stbuf = { + 0, + }; + loc_t p_loc = { + 0, + }; + loc_t loc = { + 0, + }; + xlator_t *src_subvol = NULL; + inode_t *linked_inode = NULL; + char *parent_path = NULL; + + /* Lookup for parent and get the path of parent */ + gf_uuid_copy(p_loc.gfid, link_info->pargfid); + p_loc.inode = inode_new(defrag->root_inode->table); + if (!p_loc.inode) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, + "Failed to create reference to inode" + " for %s", + uuid_utoa(p_loc.gfid)); + + *per_link_status = -1; + goto err; + } + + parent_path = tier_get_parent_path(this, &p_loc, &par_stbuf, + per_link_status); + if (!parent_path) { + goto err; + } + + linked_inode = inode_link(p_loc.inode, NULL, NULL, &par_stbuf); + inode_unref(p_loc.inode); + p_loc.inode = linked_inode; + + /* Preparing File Inode */ + gf_uuid_copy(loc.gfid, gfid); + loc.inode = inode_new(defrag->root_inode->table); + gf_uuid_copy(loc.pargfid, link_info->pargfid); + loc.parent = inode_ref(p_loc.inode); + + /* Get filename and Construct file path */ + if (tier_get_file_name_and_path(this, gfid, link_info, parent_path, &loc, + per_link_status) != 0) { + goto err; + } + gf_uuid_copy(loc.parent->gfid, link_info->pargfid); + + /* lookup file inode */ + if (tier_lookup_file(this, &p_loc, &loc, ¤t, per_link_status) != 0) { + goto err; + } + + if (query_cbk_args->is_promotion) { + if (!tier_can_promote_file(this, link_info->file_name, ¤t, + defrag)) { + *per_link_status = 1; + goto err; + } + } + + linked_inode = inode_link(loc.inode, NULL, NULL, ¤t); + inode_unref(loc.inode); + loc.inode = linked_inode; + + /* + * Do not promote/demote if file already is where it + * should be. It means another brick moved the file + * so is not an error. So we set per_link_status = 1 + * so that we ignore counting this. + */ + src_subvol = dht_subvol_get_cached(this, loc.inode); + + if (tier_is_file_already_at_destination(src_subvol, query_cbk_args, conf, + per_link_status)) { + goto err; + } + + gf_msg_debug(this->name, 0, "Tier %s: src_subvol %s file %s", + (query_cbk_args->is_promotion ? "promote" : "demote"), + src_subvol->name, loc.path); + + ret = tier_check_same_node(this, &loc, defrag); + if (ret != 0) { + if (ret < 0) { + *per_link_status = -1; + goto err; } - - tier_update_migration_counters (query_cbk_args, defrag, - total_migrated_bytes, total_files); - ret = 0; + /* By setting per_link_status to 1 we are + * ignoring this status and will not be counting + * this file for migration */ + *per_link_status = 1; + goto err; + } + + gf_uuid_copy(loc.gfid, loc.inode->gfid); + + if (gf_defrag_get_pause_state(&defrag->tier_conf) != TIER_RUNNING) { + gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS, + "Tiering paused. " + "Exiting tier_migrate_link"); + goto err; + } + + ret = tier_migrate(this, query_cbk_args->is_promotion, migrate_data, &loc, + &defrag->tier_conf); + + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_LOG_TIER_ERROR, + "Failed to " + "migrate %s ", + loc.path); + *per_link_status = -1; + goto err; + } + + tier_update_migration_counters(query_cbk_args, defrag, total_migrated_bytes, + total_files); + + ret = 0; err: - GF_FREE ((char *) loc.name); - loc.name = NULL; - loc_wipe (&loc); - loc_wipe (&p_loc); - - if ((*total_files >= defrag->tier_conf.max_migrate_files) - || (*total_migrated_bytes > defrag->tier_conf.max_migrate_bytes)) { - gf_msg (this->name, GF_LOG_INFO, 0, - DHT_MSG_LOG_TIER_STATUS, - "Reached cycle migration limit." - "migrated bytes %"PRId64" files %d", - *total_migrated_bytes, - *total_files); - ret = -1; - } - - return ret; + GF_FREE((char *)loc.name); + loc.name = NULL; + loc_wipe(&loc); + loc_wipe(&p_loc); + + if ((*total_files >= defrag->tier_conf.max_migrate_files) || + (*total_migrated_bytes > defrag->tier_conf.max_migrate_bytes)) { + gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS, + "Reached cycle migration limit." + "migrated bytes %" PRId64 " files %d", + *total_migrated_bytes, *total_files); + ret = -1; + } + + return ret; } - static int -tier_migrate_using_query_file (void *_args) +tier_migrate_using_query_file(void *_args) { - int ret = -1; - query_cbk_args_t *query_cbk_args = (query_cbk_args_t *) _args; - xlator_t *this = NULL; - gf_defrag_info_t *defrag = NULL; - gfdb_query_record_t *query_record = NULL; - gfdb_link_info_t *link_info = NULL; - dict_t *migrate_data = NULL; - /* - * per_file_status and per_link_status - * 0 : success - * -1 : failure - * 1 : ignore the status and don't count for migration - * */ - int per_file_status = 0; - int per_link_status = 0; - int total_status = 0; - dht_conf_t *conf = NULL; - uint64_t total_migrated_bytes = 0; - int total_files = 0; - loc_t root_loc = { 0 }; - gfdb_time_t start_time = { 0 }; - gfdb_time_t current_time = { 0 }; - int total_time = 0; - int max_time = 0; - gf_boolean_t emergency_demote_mode = _gf_false; - - - GF_VALIDATE_OR_GOTO ("tier", query_cbk_args, out); - GF_VALIDATE_OR_GOTO ("tier", query_cbk_args->this, out); - this = query_cbk_args->this; - GF_VALIDATE_OR_GOTO (this->name, query_cbk_args->defrag, out); - GF_VALIDATE_OR_GOTO (this->name, query_cbk_args->qfile_array, out); - GF_VALIDATE_OR_GOTO (this->name, this->private, out); - - conf = this->private; - - defrag = query_cbk_args->defrag; - migrate_data = dict_new (); - if (!migrate_data) - goto out; - - emergency_demote_mode = (!query_cbk_args->is_promotion && - is_hot_tier_full(&defrag->tier_conf)); - - if (tier_set_migrate_data (migrate_data) != 0) { - goto out; + int ret = -1; + query_cbk_args_t *query_cbk_args = (query_cbk_args_t *)_args; + xlator_t *this = NULL; + gf_defrag_info_t *defrag = NULL; + gfdb_query_record_t *query_record = NULL; + gfdb_link_info_t *link_info = NULL; + dict_t *migrate_data = NULL; + /* + * per_file_status and per_link_status + * 0 : success + * -1 : failure + * 1 : ignore the status and don't count for migration + * */ + int per_file_status = 0; + int per_link_status = 0; + int total_status = 0; + dht_conf_t *conf = NULL; + uint64_t total_migrated_bytes = 0; + int total_files = 0; + loc_t root_loc = {0}; + gfdb_time_t start_time = {0}; + gfdb_time_t current_time = {0}; + int total_time = 0; + int max_time = 0; + gf_boolean_t emergency_demote_mode = _gf_false; + + GF_VALIDATE_OR_GOTO("tier", query_cbk_args, out); + GF_VALIDATE_OR_GOTO("tier", query_cbk_args->this, out); + this = query_cbk_args->this; + GF_VALIDATE_OR_GOTO(this->name, query_cbk_args->defrag, out); + GF_VALIDATE_OR_GOTO(this->name, query_cbk_args->qfile_array, out); + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + + conf = this->private; + + defrag = query_cbk_args->defrag; + migrate_data = dict_new(); + if (!migrate_data) + goto out; + + emergency_demote_mode = (!query_cbk_args->is_promotion && + is_hot_tier_full(&defrag->tier_conf)); + + if (tier_set_migrate_data(migrate_data) != 0) { + goto out; + } + + dht_build_root_loc(defrag->root_inode, &root_loc); + + ret = gettimeofday(&start_time, NULL); + if (query_cbk_args->is_promotion) { + max_time = defrag->tier_conf.tier_promote_frequency; + } else { + max_time = defrag->tier_conf.tier_demote_frequency; + } + + /* Per file */ + while ((ret = read_query_record_list(query_cbk_args->qfile_array, + &query_record)) != 0) { + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, + "Failed to fetch query record " + "from query file"); + goto out; } - dht_build_root_loc (defrag->root_inode, &root_loc); - - ret = gettimeofday (&start_time, NULL); - if (query_cbk_args->is_promotion) { - max_time = defrag->tier_conf.tier_promote_frequency; - } else { - max_time = defrag->tier_conf.tier_demote_frequency; + if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) { + ret = -1; + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, + "Exiting tier migration as" + "defrag status is not started"); + goto out; } - /* Per file */ - while ((ret = read_query_record_list (query_cbk_args->qfile_array, - &query_record)) != 0) { - - if (ret < 0) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_LOG_TIER_ERROR, - "Failed to fetch query record " - "from query file"); - goto out; - } - - if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) { - ret = -1; - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_LOG_TIER_ERROR, - "Exiting tier migration as" - "defrag status is not started"); - goto out; - } - - ret = gettimeofday (¤t_time, NULL); - if (ret < 0) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_LOG_TIER_ERROR, - "Could not get current time."); - goto out; - } + ret = gettimeofday(¤t_time, NULL); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, + "Could not get current time."); + goto out; + } - total_time = current_time.tv_sec - start_time.tv_sec; - if (total_time > max_time) { - gf_msg (this->name, GF_LOG_INFO, 0, - DHT_MSG_LOG_TIER_STATUS, - "Max cycle time reached. Exiting migration."); - goto out; - } + total_time = current_time.tv_sec - start_time.tv_sec; + if (total_time > max_time) { + gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS, + "Max cycle time reached. Exiting migration."); + goto out; + } - per_file_status = 0; - per_link_status = 0; + per_file_status = 0; + per_link_status = 0; - if (gf_defrag_get_pause_state (&defrag->tier_conf) - != TIER_RUNNING) { - gf_msg (this->name, GF_LOG_INFO, 0, - DHT_MSG_LOG_TIER_STATUS, - "Tiering paused. " - "Exiting tier_migrate_using_query_file"); - break; - } + if (gf_defrag_get_pause_state(&defrag->tier_conf) != TIER_RUNNING) { + gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS, + "Tiering paused. " + "Exiting tier_migrate_using_query_file"); + break; + } - if (defrag->tier_conf.mode == TIER_MODE_WM) { - ret = tier_get_fs_stat (this, &root_loc); - if (ret != 0) { - gfdb_methods.gfdb_query_record_free (query_record); - query_record = NULL; - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_LOG_TIER_STATUS, - "tier_get_fs_stat() FAILED ... " - "skipping file migrations until next cycle"); - break; - } - - if (!tier_do_migration (this, query_cbk_args->is_promotion)) { - gfdb_methods.gfdb_query_record_free (query_record); - query_record = NULL; - - /* We have crossed the high watermark. Stop processing - * files if this is a promotion cycle so demotion gets - * a chance to start if not already running*/ - - if (query_cbk_args->is_promotion && - is_hot_tier_full (&defrag->tier_conf)) { - - gf_msg (this->name, GF_LOG_INFO, 0, - DHT_MSG_LOG_TIER_STATUS, - "High watermark crossed during " - "promotion. Exiting " - "tier_migrate_using_query_file"); - break; - } - continue; - } - } + if (defrag->tier_conf.mode == TIER_MODE_WM) { + ret = tier_get_fs_stat(this, &root_loc); + if (ret != 0) { + gfdb_methods.gfdb_query_record_free(query_record); + query_record = NULL; + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_STATUS, + "tier_get_fs_stat() FAILED ... " + "skipping file migrations until next cycle"); + break; + } - per_link_status = 0; + if (!tier_do_migration(this, query_cbk_args->is_promotion)) { + gfdb_methods.gfdb_query_record_free(query_record); + query_record = NULL; - /* For now we only support single link migration. And we will - * ignore other hard links in the link info list of query record - * TODO: Multiple hard links migration */ - if (!list_empty (&query_record->link_list)) { - link_info = list_first_entry (&query_record->link_list, - gfdb_link_info_t, list); - } - if (link_info != NULL) { - if (tier_migrate_link (this, - conf, - query_record->gfid, - link_info, - defrag, - query_cbk_args, - migrate_data, - &per_link_status, - &total_files, - &total_migrated_bytes) != 0) { - gf_msg (this->name, GF_LOG_INFO, 0, - DHT_MSG_LOG_TIER_STATUS, - "%s failed for %s(gfid:%s)", - (query_cbk_args->is_promotion ? - "Promotion" : "Demotion"), - link_info->file_name, - uuid_utoa (query_record->gfid)); - } - } - per_file_status = per_link_status; - - if (per_file_status < 0) {/* Failure */ - pthread_mutex_lock (&dm_stat_mutex); - defrag->total_failures++; - pthread_mutex_unlock (&dm_stat_mutex); - } else if (per_file_status == 0) {/* Success */ - pthread_mutex_lock (&dm_stat_mutex); - defrag->total_files++; - pthread_mutex_unlock (&dm_stat_mutex); - } else if (per_file_status == 1) {/* Ignore */ - per_file_status = 0; - /* Since this attempt was ignored we - * decrement the lookup count*/ - pthread_mutex_lock (&dm_stat_mutex); - defrag->num_files_lookedup--; - pthread_mutex_unlock (&dm_stat_mutex); + /* We have crossed the high watermark. Stop processing + * files if this is a promotion cycle so demotion gets + * a chance to start if not already running*/ + + if (query_cbk_args->is_promotion && + is_hot_tier_full(&defrag->tier_conf)) { + gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS, + "High watermark crossed during " + "promotion. Exiting " + "tier_migrate_using_query_file"); + break; } - total_status = total_status + per_file_status; - per_link_status = 0; - per_file_status = 0; - - gfdb_methods.gfdb_query_record_free (query_record); - query_record = NULL; + continue; + } + } + + per_link_status = 0; + + /* For now we only support single link migration. And we will + * ignore other hard links in the link info list of query record + * TODO: Multiple hard links migration */ + if (!list_empty(&query_record->link_list)) { + link_info = list_first_entry(&query_record->link_list, + gfdb_link_info_t, list); + } + if (link_info != NULL) { + if (tier_migrate_link(this, conf, query_record->gfid, link_info, + defrag, query_cbk_args, migrate_data, + &per_link_status, &total_files, + &total_migrated_bytes) != 0) { + gf_msg( + this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS, + "%s failed for %s(gfid:%s)", + (query_cbk_args->is_promotion ? "Promotion" : "Demotion"), + link_info->file_name, uuid_utoa(query_record->gfid)); + } + } + per_file_status = per_link_status; + + if (per_file_status < 0) { /* Failure */ + pthread_mutex_lock(&dm_stat_mutex); + defrag->total_failures++; + pthread_mutex_unlock(&dm_stat_mutex); + } else if (per_file_status == 0) { /* Success */ + pthread_mutex_lock(&dm_stat_mutex); + defrag->total_files++; + pthread_mutex_unlock(&dm_stat_mutex); + } else if (per_file_status == 1) { /* Ignore */ + per_file_status = 0; + /* Since this attempt was ignored we + * decrement the lookup count*/ + pthread_mutex_lock(&dm_stat_mutex); + defrag->num_files_lookedup--; + pthread_mutex_unlock(&dm_stat_mutex); + } + total_status = total_status + per_file_status; + per_link_status = 0; + per_file_status = 0; + + gfdb_methods.gfdb_query_record_free(query_record); + query_record = NULL; - /* If we are demoting and the entry watermark was HI, then - * we are done with emergency demotions if the current - * watermark has fallen below hi-watermark level - */ - if (emergency_demote_mode) { - if (tier_check_watermark (this) == 0) { - if (!is_hot_tier_full (&defrag->tier_conf)) { - break; - } - } + /* If we are demoting and the entry watermark was HI, then + * we are done with emergency demotions if the current + * watermark has fallen below hi-watermark level + */ + if (emergency_demote_mode) { + if (tier_check_watermark(this) == 0) { + if (!is_hot_tier_full(&defrag->tier_conf)) { + break; } + } } + } out: - if (migrate_data) - dict_unref (migrate_data); + if (migrate_data) + dict_unref(migrate_data); - gfdb_methods.gfdb_query_record_free (query_record); - query_record = NULL; + gfdb_methods.gfdb_query_record_free(query_record); + query_record = NULL; - return total_status; + return total_status; } - /* This is the call back function per record/file from data base */ static int -tier_gf_query_callback (gfdb_query_record_t *gfdb_query_record, - void *_args) { - int ret = -1; - query_cbk_args_t *query_cbk_args = _args; - - GF_VALIDATE_OR_GOTO ("tier", query_cbk_args, out); - GF_VALIDATE_OR_GOTO ("tier", query_cbk_args->defrag, out); - GF_VALIDATE_OR_GOTO ("tier", (query_cbk_args->query_fd > 0), out); - - ret = gfdb_methods.gfdb_write_query_record (query_cbk_args->query_fd, - gfdb_query_record); - if (ret) { - gf_msg ("tier", GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, - "Failed writing query record to query file"); - goto out; - } - - pthread_mutex_lock (&dm_stat_mutex); - query_cbk_args->defrag->num_files_lookedup++; - pthread_mutex_unlock (&dm_stat_mutex); - - ret = 0; +tier_gf_query_callback(gfdb_query_record_t *gfdb_query_record, void *_args) +{ + int ret = -1; + query_cbk_args_t *query_cbk_args = _args; + + GF_VALIDATE_OR_GOTO("tier", query_cbk_args, out); + GF_VALIDATE_OR_GOTO("tier", query_cbk_args->defrag, out); + GF_VALIDATE_OR_GOTO("tier", (query_cbk_args->query_fd > 0), out); + + ret = gfdb_methods.gfdb_write_query_record(query_cbk_args->query_fd, + gfdb_query_record); + if (ret) { + gf_msg("tier", GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, + "Failed writing query record to query file"); + goto out; + } + + pthread_mutex_lock(&dm_stat_mutex); + query_cbk_args->defrag->num_files_lookedup++; + pthread_mutex_unlock(&dm_stat_mutex); + + ret = 0; out: - return ret; + return ret; } - - - /* Create query file in tier process */ static int -tier_process_self_query (tier_brick_list_t *local_brick, void *args) +tier_process_self_query(tier_brick_list_t *local_brick, void *args) { - int ret = -1; - char *db_path = NULL; - query_cbk_args_t *query_cbk_args = NULL; - xlator_t *this = NULL; - gfdb_conn_node_t *conn_node = NULL; - dict_t *params_dict = NULL; - dict_t *ctr_ipc_dict = NULL; - gfdb_brick_info_t *gfdb_brick_info = args; - - /*Init of all the essentials*/ - GF_VALIDATE_OR_GOTO ("tier", gfdb_brick_info , out); - query_cbk_args = gfdb_brick_info->_query_cbk_args; - - GF_VALIDATE_OR_GOTO ("tier", query_cbk_args->this, out); - this = query_cbk_args->this; - - GF_VALIDATE_OR_GOTO (this->name, - gfdb_brick_info->_query_cbk_args, out); - - GF_VALIDATE_OR_GOTO (this->name, local_brick, out); - - GF_VALIDATE_OR_GOTO (this->name, local_brick->xlator, out); - - GF_VALIDATE_OR_GOTO (this->name, local_brick->brick_db_path, out); - - db_path = local_brick->brick_db_path; - - /*Preparing DB parameters before init_db i.e getting db connection*/ - params_dict = dict_new (); - if (!params_dict) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_LOG_TIER_ERROR, - "DB Params cannot initialized"); - goto out; - } - SET_DB_PARAM_TO_DICT(this->name, params_dict, - (char *) gfdb_methods.get_db_path_key(), - db_path, ret, out); - - /*Get the db connection*/ - conn_node = gfdb_methods.init_db ((void *)params_dict, dht_tier_db_type); - if (!conn_node) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_LOG_TIER_ERROR, - "FATAL: Failed initializing db operations"); - goto out; - } - - /* Query for eligible files from db */ - query_cbk_args->query_fd = open (local_brick->qfile_path, - O_WRONLY | O_CREAT | O_APPEND, - S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); - if (query_cbk_args->query_fd < 0) { - gf_msg (this->name, GF_LOG_ERROR, errno, - DHT_MSG_LOG_TIER_ERROR, - "Failed to open query file %s", - local_brick->qfile_path); - goto out; - } - if (!gfdb_brick_info->_gfdb_promote) { - if (query_cbk_args->defrag->tier_conf.watermark_last == - TIER_WM_HI) { - /* emergency demotion mode */ - ret = gfdb_methods.find_all (conn_node, - tier_gf_query_callback, - (void *)query_cbk_args, - query_cbk_args->defrag->tier_conf. - query_limit); - } else { - if (query_cbk_args->defrag->write_freq_threshold == 0 && - query_cbk_args->defrag->read_freq_threshold == 0) { - ret = gfdb_methods.find_unchanged_for_time ( - conn_node, - tier_gf_query_callback, - (void *)query_cbk_args, - gfdb_brick_info->time_stamp); - } else { - ret = gfdb_methods.find_unchanged_for_time_freq ( - conn_node, - tier_gf_query_callback, - (void *)query_cbk_args, - gfdb_brick_info->time_stamp, - query_cbk_args->defrag-> - write_freq_threshold, - query_cbk_args->defrag-> - read_freq_threshold, - _gf_false); - } - } + int ret = -1; + char *db_path = NULL; + query_cbk_args_t *query_cbk_args = NULL; + xlator_t *this = NULL; + gfdb_conn_node_t *conn_node = NULL; + dict_t *params_dict = NULL; + dict_t *ctr_ipc_dict = NULL; + gfdb_brick_info_t *gfdb_brick_info = args; + + /*Init of all the essentials*/ + GF_VALIDATE_OR_GOTO("tier", gfdb_brick_info, out); + query_cbk_args = gfdb_brick_info->_query_cbk_args; + + GF_VALIDATE_OR_GOTO("tier", query_cbk_args->this, out); + this = query_cbk_args->this; + + GF_VALIDATE_OR_GOTO(this->name, gfdb_brick_info->_query_cbk_args, out); + + GF_VALIDATE_OR_GOTO(this->name, local_brick, out); + + GF_VALIDATE_OR_GOTO(this->name, local_brick->xlator, out); + + GF_VALIDATE_OR_GOTO(this->name, local_brick->brick_db_path, out); + + db_path = local_brick->brick_db_path; + + /*Preparing DB parameters before init_db i.e getting db connection*/ + params_dict = dict_new(); + if (!params_dict) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, + "DB Params cannot initialized"); + goto out; + } + SET_DB_PARAM_TO_DICT(this->name, params_dict, + (char *)gfdb_methods.get_db_path_key(), db_path, ret, + out); + + /*Get the db connection*/ + conn_node = gfdb_methods.init_db((void *)params_dict, dht_tier_db_type); + if (!conn_node) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, + "FATAL: Failed initializing db operations"); + goto out; + } + + /* Query for eligible files from db */ + query_cbk_args->query_fd = open(local_brick->qfile_path, + O_WRONLY | O_CREAT | O_APPEND, + S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); + if (query_cbk_args->query_fd < 0) { + gf_msg(this->name, GF_LOG_ERROR, errno, DHT_MSG_LOG_TIER_ERROR, + "Failed to open query file %s", local_brick->qfile_path); + goto out; + } + if (!gfdb_brick_info->_gfdb_promote) { + if (query_cbk_args->defrag->tier_conf.watermark_last == TIER_WM_HI) { + /* emergency demotion mode */ + ret = gfdb_methods.find_all( + conn_node, tier_gf_query_callback, (void *)query_cbk_args, + query_cbk_args->defrag->tier_conf.query_limit); } else { - if (query_cbk_args->defrag->write_freq_threshold == 0 && - query_cbk_args->defrag->read_freq_threshold == 0) { - ret = gfdb_methods.find_recently_changed_files ( - conn_node, - tier_gf_query_callback, - (void *)query_cbk_args, - gfdb_brick_info->time_stamp); - } else { - ret = gfdb_methods.find_recently_changed_files_freq ( - conn_node, - tier_gf_query_callback, - (void *)query_cbk_args, - gfdb_brick_info->time_stamp, - query_cbk_args->defrag->write_freq_threshold, - query_cbk_args->defrag->read_freq_threshold, - _gf_false); - } - } - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_LOG_TIER_ERROR, - "FATAL: query from db failed"); - goto out; - } - - /*Clear the heat on the DB entries*/ - /*Preparing ctr_ipc_dict*/ - ctr_ipc_dict = dict_new (); - if (!ctr_ipc_dict) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_LOG_TIER_ERROR, - "ctr_ipc_dict cannot initialized"); - goto out; - } - - SET_DB_PARAM_TO_DICT(this->name, ctr_ipc_dict, - GFDB_IPC_CTR_KEY, GFDB_IPC_CTR_CLEAR_OPS, - ret, out); - - ret = syncop_ipc (local_brick->xlator, GF_IPC_TARGET_CTR, ctr_ipc_dict, - NULL); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_LOG_TIER_ERROR, "Failed clearing the heat " - "on db %s error %d", local_brick->brick_db_path, ret); - goto out; - } - - ret = 0; + if (query_cbk_args->defrag->write_freq_threshold == 0 && + query_cbk_args->defrag->read_freq_threshold == 0) { + ret = gfdb_methods.find_unchanged_for_time( + conn_node, tier_gf_query_callback, (void *)query_cbk_args, + gfdb_brick_info->time_stamp); + } else { + ret = gfdb_methods.find_unchanged_for_time_freq( + conn_node, tier_gf_query_callback, (void *)query_cbk_args, + gfdb_brick_info->time_stamp, + query_cbk_args->defrag->write_freq_threshold, + query_cbk_args->defrag->read_freq_threshold, _gf_false); + } + } + } else { + if (query_cbk_args->defrag->write_freq_threshold == 0 && + query_cbk_args->defrag->read_freq_threshold == 0) { + ret = gfdb_methods.find_recently_changed_files( + conn_node, tier_gf_query_callback, (void *)query_cbk_args, + gfdb_brick_info->time_stamp); + } else { + ret = gfdb_methods.find_recently_changed_files_freq( + conn_node, tier_gf_query_callback, (void *)query_cbk_args, + gfdb_brick_info->time_stamp, + query_cbk_args->defrag->write_freq_threshold, + query_cbk_args->defrag->read_freq_threshold, _gf_false); + } + } + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, + "FATAL: query from db failed"); + goto out; + } + + /*Clear the heat on the DB entries*/ + /*Preparing ctr_ipc_dict*/ + ctr_ipc_dict = dict_new(); + if (!ctr_ipc_dict) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, + "ctr_ipc_dict cannot initialized"); + goto out; + } + + SET_DB_PARAM_TO_DICT(this->name, ctr_ipc_dict, GFDB_IPC_CTR_KEY, + GFDB_IPC_CTR_CLEAR_OPS, ret, out); + + ret = syncop_ipc(local_brick->xlator, GF_IPC_TARGET_CTR, ctr_ipc_dict, + NULL); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, + "Failed clearing the heat " + "on db %s error %d", + local_brick->brick_db_path, ret); + goto out; + } + + ret = 0; out: - if (params_dict) { - dict_unref (params_dict); - params_dict = NULL; - } - - if (ctr_ipc_dict) { - dict_unref (ctr_ipc_dict); - ctr_ipc_dict = NULL; - } - - if (query_cbk_args && query_cbk_args->query_fd >= 0) { - sys_close (query_cbk_args->query_fd); - query_cbk_args->query_fd = -1; - } - gfdb_methods.fini_db (conn_node); - - return ret; + if (params_dict) { + dict_unref(params_dict); + params_dict = NULL; + } + + if (ctr_ipc_dict) { + dict_unref(ctr_ipc_dict); + ctr_ipc_dict = NULL; + } + + if (query_cbk_args && query_cbk_args->query_fd >= 0) { + sys_close(query_cbk_args->query_fd); + query_cbk_args->query_fd = -1; + } + gfdb_methods.fini_db(conn_node); + + return ret; } - - - - /*Ask CTR to create the query file*/ static int -tier_process_ctr_query (tier_brick_list_t *local_brick, void *args) +tier_process_ctr_query(tier_brick_list_t *local_brick, void *args) { - int ret = -1; - query_cbk_args_t *query_cbk_args = NULL; - xlator_t *this = NULL; - dict_t *ctr_ipc_in_dict = NULL; - dict_t *ctr_ipc_out_dict = NULL; - gfdb_brick_info_t *gfdb_brick_info = args; - gfdb_ipc_ctr_params_t *ipc_ctr_params = NULL; - int count = 0; + int ret = -1; + query_cbk_args_t *query_cbk_args = NULL; + xlator_t *this = NULL; + dict_t *ctr_ipc_in_dict = NULL; + dict_t *ctr_ipc_out_dict = NULL; + gfdb_brick_info_t *gfdb_brick_info = args; + gfdb_ipc_ctr_params_t *ipc_ctr_params = NULL; + int count = 0; + + /*Init of all the essentials*/ + GF_VALIDATE_OR_GOTO("tier", gfdb_brick_info, out); + query_cbk_args = gfdb_brick_info->_query_cbk_args; + + GF_VALIDATE_OR_GOTO("tier", query_cbk_args->this, out); + this = query_cbk_args->this; + + GF_VALIDATE_OR_GOTO(this->name, gfdb_brick_info->_query_cbk_args, out); + + GF_VALIDATE_OR_GOTO(this->name, local_brick, out); + + GF_VALIDATE_OR_GOTO(this->name, local_brick->xlator, out); + + GF_VALIDATE_OR_GOTO(this->name, local_brick->brick_db_path, out); + + /*Preparing ctr_ipc_in_dict*/ + ctr_ipc_in_dict = dict_new(); + if (!ctr_ipc_in_dict) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, + "ctr_ipc_in_dict cannot initialized"); + goto out; + } + + ipc_ctr_params = GF_CALLOC(1, sizeof(gfdb_ipc_ctr_params_t), + gf_tier_mt_ipc_ctr_params_t); + if (!ipc_ctr_params) { + goto out; + } + + /* set all the query params*/ + ipc_ctr_params->is_promote = gfdb_brick_info->_gfdb_promote; + + ipc_ctr_params->write_freq_threshold = query_cbk_args->defrag + ->write_freq_threshold; + + ipc_ctr_params->read_freq_threshold = query_cbk_args->defrag + ->read_freq_threshold; + + ipc_ctr_params->query_limit = query_cbk_args->defrag->tier_conf.query_limit; + + ipc_ctr_params->emergency_demote = (!gfdb_brick_info->_gfdb_promote && + query_cbk_args->defrag->tier_conf + .watermark_last == TIER_WM_HI); + + memcpy(&ipc_ctr_params->time_stamp, gfdb_brick_info->time_stamp, + sizeof(gfdb_time_t)); + + SET_DB_PARAM_TO_DICT(this->name, ctr_ipc_in_dict, GFDB_IPC_CTR_KEY, + GFDB_IPC_CTR_QUERY_OPS, ret, out); + + SET_DB_PARAM_TO_DICT(this->name, ctr_ipc_in_dict, + GFDB_IPC_CTR_GET_QFILE_PATH, local_brick->qfile_path, + ret, out); + + ret = dict_set_bin(ctr_ipc_in_dict, GFDB_IPC_CTR_GET_QUERY_PARAMS, + ipc_ctr_params, sizeof(*ipc_ctr_params)); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, LG_MSG_SET_PARAM_FAILED, + "Failed setting %s to params dictionary", + GFDB_IPC_CTR_GET_QUERY_PARAMS); + GF_FREE(ipc_ctr_params); + goto out; + } + ipc_ctr_params = NULL; + + ret = syncop_ipc(local_brick->xlator, GF_IPC_TARGET_CTR, ctr_ipc_in_dict, + &ctr_ipc_out_dict); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_IPC_TIER_ERROR, + "Failed query on %s ret %d", local_brick->brick_db_path, ret); + goto out; + } + + ret = dict_get_int32(ctr_ipc_out_dict, GFDB_IPC_CTR_RET_QUERY_COUNT, + &count); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, + "Failed getting count " + "of records on %s", + local_brick->brick_db_path); + goto out; + } + + if (count < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, + "Failed query on %s", local_brick->brick_db_path); + ret = -1; + goto out; + } + + pthread_mutex_lock(&dm_stat_mutex); + query_cbk_args->defrag->num_files_lookedup = count; + pthread_mutex_unlock(&dm_stat_mutex); + + ret = 0; +out: - /*Init of all the essentials*/ - GF_VALIDATE_OR_GOTO ("tier", gfdb_brick_info , out); - query_cbk_args = gfdb_brick_info->_query_cbk_args; + if (ctr_ipc_in_dict) { + dict_unref(ctr_ipc_in_dict); + ctr_ipc_in_dict = NULL; + } - GF_VALIDATE_OR_GOTO ("tier", query_cbk_args->this, out); - this = query_cbk_args->this; + if (ctr_ipc_out_dict) { + dict_unref(ctr_ipc_out_dict); + ctr_ipc_out_dict = NULL; + } - GF_VALIDATE_OR_GOTO (this->name, - gfdb_brick_info->_query_cbk_args, out); + GF_FREE(ipc_ctr_params); - GF_VALIDATE_OR_GOTO (this->name, local_brick, out); + return ret; +} - GF_VALIDATE_OR_GOTO (this->name, local_brick->xlator, out); +/* This is the call back function for each brick from hot/cold bricklist + * It picks up each bricks db and queries for eligible files for migration. + * The list of eligible files are populated in appropriate query files*/ +static int +tier_process_brick(tier_brick_list_t *local_brick, void *args) +{ + int ret = -1; + dict_t *ctr_ipc_in_dict = NULL; + dict_t *ctr_ipc_out_dict = NULL; + char *strval = NULL; - GF_VALIDATE_OR_GOTO (this->name, local_brick->brick_db_path, out); + GF_VALIDATE_OR_GOTO("tier", local_brick, out); + GF_VALIDATE_OR_GOTO("tier", local_brick->xlator, out); + if (dht_tier_db_type == GFDB_SQLITE3) { /*Preparing ctr_ipc_in_dict*/ - ctr_ipc_in_dict = dict_new (); + ctr_ipc_in_dict = dict_new(); if (!ctr_ipc_in_dict) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_LOG_TIER_ERROR, - "ctr_ipc_in_dict cannot initialized"); - goto out; - } - - ipc_ctr_params = GF_CALLOC (1, sizeof (gfdb_ipc_ctr_params_t), - gf_tier_mt_ipc_ctr_params_t); - if (!ipc_ctr_params) { - goto out; + gf_msg("tier", GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, + "ctr_ipc_in_dict cannot initialized"); + goto out; } - /* set all the query params*/ - ipc_ctr_params->is_promote = gfdb_brick_info->_gfdb_promote; - - ipc_ctr_params->write_freq_threshold = - query_cbk_args->defrag->write_freq_threshold; - - ipc_ctr_params->read_freq_threshold = - query_cbk_args->defrag->read_freq_threshold; - - ipc_ctr_params->query_limit = - query_cbk_args->defrag->tier_conf.query_limit; - - ipc_ctr_params->emergency_demote = - (!gfdb_brick_info->_gfdb_promote && - query_cbk_args->defrag->tier_conf.watermark_last == - TIER_WM_HI); - - memcpy (&ipc_ctr_params->time_stamp, - gfdb_brick_info->time_stamp, - sizeof (gfdb_time_t)); - - SET_DB_PARAM_TO_DICT (this->name, ctr_ipc_in_dict, - GFDB_IPC_CTR_KEY, GFDB_IPC_CTR_QUERY_OPS, - ret, out); - - - SET_DB_PARAM_TO_DICT (this->name, ctr_ipc_in_dict, - GFDB_IPC_CTR_GET_QFILE_PATH, - local_brick->qfile_path, - ret, out); - - ret = dict_set_bin (ctr_ipc_in_dict, GFDB_IPC_CTR_GET_QUERY_PARAMS, - ipc_ctr_params, sizeof (*ipc_ctr_params)); + ret = dict_set_str(ctr_ipc_in_dict, GFDB_IPC_CTR_KEY, + GFDB_IPC_CTR_GET_DB_PARAM_OPS); if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, LG_MSG_SET_PARAM_FAILED, - "Failed setting %s to params dictionary", - GFDB_IPC_CTR_GET_QUERY_PARAMS); - GF_FREE (ipc_ctr_params); - goto out; + gf_msg("tier", GF_LOG_ERROR, 0, LG_MSG_SET_PARAM_FAILED, + "Failed to set %s " + "to params dictionary", + GFDB_IPC_CTR_KEY); + goto out; } - ipc_ctr_params = NULL; - ret = syncop_ipc (local_brick->xlator, GF_IPC_TARGET_CTR, - ctr_ipc_in_dict, &ctr_ipc_out_dict); + ret = dict_set_str(ctr_ipc_in_dict, GFDB_IPC_CTR_GET_DB_PARAM_OPS, ""); if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_LOG_IPC_TIER_ERROR, "Failed query on %s ret %d", - local_brick->brick_db_path, ret); - goto out; + gf_msg("tier", GF_LOG_ERROR, 0, LG_MSG_SET_PARAM_FAILED, + "Failed to set %s " + "to params dictionary", + GFDB_IPC_CTR_GET_DB_PARAM_OPS); + goto out; } - ret = dict_get_int32(ctr_ipc_out_dict, GFDB_IPC_CTR_RET_QUERY_COUNT, - &count); + ret = dict_set_str(ctr_ipc_in_dict, GFDB_IPC_CTR_GET_DB_KEY, + "journal_mode"); if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_LOG_TIER_ERROR, "Failed getting count " - "of records on %s", - local_brick->brick_db_path); - goto out; + gf_msg("tier", GF_LOG_ERROR, 0, LG_MSG_SET_PARAM_FAILED, + "Failed to set %s " + "to params dictionary", + GFDB_IPC_CTR_GET_DB_KEY); + goto out; } - if (count < 0) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_LOG_TIER_ERROR, "Failed query on %s", - local_brick->brick_db_path); - ret = -1; - goto out; - } - - pthread_mutex_lock (&dm_stat_mutex); - query_cbk_args->defrag->num_files_lookedup = count; - pthread_mutex_unlock (&dm_stat_mutex); - - ret = 0; -out: - - if (ctr_ipc_in_dict) { - dict_unref(ctr_ipc_in_dict); - ctr_ipc_in_dict = NULL; + ret = syncop_ipc(local_brick->xlator, GF_IPC_TARGET_CTR, + ctr_ipc_in_dict, &ctr_ipc_out_dict); + if (ret || ctr_ipc_out_dict == NULL) { + gf_msg("tier", GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, + "Failed to get " + "journal_mode of sql db %s", + local_brick->brick_db_path); + goto out; } - if (ctr_ipc_out_dict) { - dict_unref(ctr_ipc_out_dict); - ctr_ipc_out_dict = NULL; + ret = dict_get_str(ctr_ipc_out_dict, "journal_mode", &strval); + if (ret) { + gf_msg("tier", GF_LOG_ERROR, 0, LG_MSG_GET_PARAM_FAILED, + "Failed to get %s " + "from params dictionary" + "journal_mode", + strval); + goto out; } - GF_FREE (ipc_ctr_params); - - return ret; -} - - - - -/* This is the call back function for each brick from hot/cold bricklist - * It picks up each bricks db and queries for eligible files for migration. - * The list of eligible files are populated in appropriate query files*/ -static int -tier_process_brick (tier_brick_list_t *local_brick, void *args) { - int ret = -1; - dict_t *ctr_ipc_in_dict = NULL; - dict_t *ctr_ipc_out_dict = NULL; - char *strval = NULL; - - GF_VALIDATE_OR_GOTO ("tier", local_brick, out); - - GF_VALIDATE_OR_GOTO ("tier", local_brick->xlator, out); - - if (dht_tier_db_type == GFDB_SQLITE3) { - - /*Preparing ctr_ipc_in_dict*/ - ctr_ipc_in_dict = dict_new (); - if (!ctr_ipc_in_dict) { - gf_msg ("tier", GF_LOG_ERROR, 0, - DHT_MSG_LOG_TIER_ERROR, - "ctr_ipc_in_dict cannot initialized"); - goto out; - } - - ret = dict_set_str (ctr_ipc_in_dict, GFDB_IPC_CTR_KEY, - GFDB_IPC_CTR_GET_DB_PARAM_OPS); - if (ret) { - gf_msg ("tier", GF_LOG_ERROR, 0,\ - LG_MSG_SET_PARAM_FAILED, "Failed to set %s " - "to params dictionary", GFDB_IPC_CTR_KEY); - goto out; - } - - ret = dict_set_str (ctr_ipc_in_dict, - GFDB_IPC_CTR_GET_DB_PARAM_OPS, ""); - if (ret) { - gf_msg ("tier", GF_LOG_ERROR, 0,\ - LG_MSG_SET_PARAM_FAILED, "Failed to set %s " - "to params dictionary", - GFDB_IPC_CTR_GET_DB_PARAM_OPS); - goto out; - } - - ret = dict_set_str (ctr_ipc_in_dict, - GFDB_IPC_CTR_GET_DB_KEY, "journal_mode"); - if (ret) { - gf_msg ("tier", GF_LOG_ERROR, 0, - LG_MSG_SET_PARAM_FAILED, "Failed to set %s " - "to params dictionary", - GFDB_IPC_CTR_GET_DB_KEY); - goto out; - } - - - - ret = syncop_ipc (local_brick->xlator, GF_IPC_TARGET_CTR, - ctr_ipc_in_dict, &ctr_ipc_out_dict); - if (ret || ctr_ipc_out_dict == NULL) { - gf_msg ("tier", GF_LOG_ERROR, 0, - DHT_MSG_LOG_TIER_ERROR, "Failed to get " - "journal_mode of sql db %s", - local_brick->brick_db_path); - goto out; - } - - ret = dict_get_str (ctr_ipc_out_dict, "journal_mode", &strval); - if (ret) { - gf_msg ("tier", GF_LOG_ERROR, 0, - LG_MSG_GET_PARAM_FAILED, "Failed to get %s " - "from params dictionary" - "journal_mode", strval); - goto out; - } - - if (strval && (strncmp(strval, "wal", SLEN ("wal")) == 0)) { - ret = tier_process_self_query (local_brick, args); - if (ret) { - goto out; - } - } else { - ret = tier_process_ctr_query (local_brick, args); - if (ret) { - goto out; - } - } - ret = 0; - + if (strval && (strncmp(strval, "wal", SLEN("wal")) == 0)) { + ret = tier_process_self_query(local_brick, args); + if (ret) { + goto out; + } } else { - ret = tier_process_self_query (local_brick, args); - if (ret) { - goto out; - } - } - - ret = 0; -out: - if (ctr_ipc_in_dict) - dict_unref (ctr_ipc_in_dict); - - if (ctr_ipc_out_dict) - dict_unref (ctr_ipc_out_dict); - - return ret; -} - - - - -static int -tier_build_migration_qfile (migration_args_t *args, - query_cbk_args_t *query_cbk_args, - gf_boolean_t is_promotion) -{ - gfdb_time_t current_time; - gfdb_brick_info_t gfdb_brick_info; - gfdb_time_t time_in_past; - int ret = -1; - tier_brick_list_t *local_brick = NULL; - int i = 0; - time_in_past.tv_sec = args->freq_time; - time_in_past.tv_usec = 0; - - ret = gettimeofday (¤t_time, NULL); - if (ret == -1) { - gf_msg (args->this->name, GF_LOG_ERROR, errno, - DHT_MSG_SYS_CALL_GET_TIME_FAILED, - "Failed to get current time"); + ret = tier_process_ctr_query(local_brick, args); + if (ret) { goto out; - } - time_in_past.tv_sec = current_time.tv_sec - time_in_past.tv_sec; - - /* The migration daemon may run a varying numberof usec after the */ - /* sleep call triggers. A file may be registered in CTR some number */ - /* of usec X after the daemon started and missed in the subsequent */ - /* cycle if the daemon starts Y usec after the period in seconds */ - /* where Y>X. Normalize away this problem by always setting usec */ - /* to 0. */ - time_in_past.tv_usec = 0; - - gfdb_brick_info.time_stamp = &time_in_past; - gfdb_brick_info._gfdb_promote = is_promotion; - gfdb_brick_info._query_cbk_args = query_cbk_args; - - list_for_each_entry (local_brick, args->brick_list, list) { - - /* Construct query file path for this brick - * i.e - * /var/run/gluster/xlator_name/ - * {promote/demote}-brickname-indexinbricklist - * So that no two query files will have same path even - * bricks have the same name - * */ - snprintf (local_brick->qfile_path, PATH_MAX , "%s-%s-%d", - GET_QFILE_PATH (gfdb_brick_info._gfdb_promote), - local_brick->brick_name, i); - - /* Delete any old query files for this brick */ - sys_unlink (local_brick->qfile_path); - - ret = tier_process_brick (local_brick, - &gfdb_brick_info); - if (ret) { - gf_msg (args->this->name, GF_LOG_ERROR, 0, - DHT_MSG_BRICK_QUERY_FAILED, - "Brick %s query failed\n", - local_brick->brick_db_path); - } - i++; + } } ret = 0; -out: - return ret; -} -static int -tier_migrate_files_using_qfile (migration_args_t *comp, - query_cbk_args_t *query_cbk_args) -{ - int ret = -1; - tier_brick_list_t *local_brick = NULL; - tier_brick_list_t *temp = NULL; - gfdb_time_t current_time = {0,}; - ssize_t qfile_array_size = 0; - int count = 0; - int temp_fd = 0; - gf_tier_conf_t *tier_conf = NULL; - - tier_conf = &(query_cbk_args->defrag->tier_conf); - - /* Time for error query files */ - gettimeofday (¤t_time, NULL); - - /* Build the qfile list */ - list_for_each_entry_safe (local_brick, temp, comp->brick_list, list) { - qfile_array_size++; - } - query_cbk_args->qfile_array = qfile_array_new (qfile_array_size); - if (!query_cbk_args->qfile_array) { - gf_msg ("tier", GF_LOG_ERROR, 0, - DHT_MSG_LOG_TIER_ERROR, "Failed to create new " - "qfile_array"); - goto out; - } - - /*Open all qfiles*/ - count = 0; - query_cbk_args->qfile_array->exhausted_count = 0; - list_for_each_entry_safe (local_brick, temp, comp->brick_list, list) { - temp_fd = query_cbk_args->qfile_array->fd_array[count]; - temp_fd = open (local_brick->qfile_path, O_RDONLY, - S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); - if (temp_fd < 0) { - gf_msg ("tier", GF_LOG_ERROR, errno, - DHT_MSG_LOG_TIER_ERROR, "Failed to open " - "%s to the query file", - local_brick->qfile_path); - query_cbk_args->qfile_array->exhausted_count++; - } - query_cbk_args->qfile_array->fd_array[count] = temp_fd; - count++; - } - - /* Moving the query file index to the next, so that we won't the same - * query file every cycle as the first one */ - query_cbk_args->qfile_array->next_index = - (query_cbk_args->is_promotion) ? - tier_conf->last_promote_qfile_index : - tier_conf->last_demote_qfile_index; - shift_next_index (query_cbk_args->qfile_array); - if (query_cbk_args->is_promotion) { - tier_conf->last_promote_qfile_index = - query_cbk_args->qfile_array->next_index; - } else { - tier_conf->last_demote_qfile_index = - query_cbk_args->qfile_array->next_index; - } - - /* Migrate files using query file list */ - ret = tier_migrate_using_query_file ((void *)query_cbk_args); -out: - qfile_array_free (query_cbk_args->qfile_array); - - /* If there is an error rename all the query files to .err files - * with a timestamp for better debugging */ + } else { + ret = tier_process_self_query(local_brick, args); if (ret) { - struct tm tm = {0,}; - char time_str[128] = {0,}; - char query_file_path_err[PATH_MAX] = {0,}; - int32_t len = 0; - - /* Time format for error query files */ - gmtime_r (¤t_time.tv_sec, &tm); - strftime (time_str, sizeof (time_str), "%F-%T", &tm); - - list_for_each_entry_safe (local_brick, temp, comp->brick_list, - list) { - /* rename error qfile*/ - len = snprintf (query_file_path_err, - sizeof (query_file_path_err), - "%s-%s.err", local_brick->qfile_path, - time_str); - if ((len >= 0) && - (len < sizeof(query_file_path_err))) { - if (sys_rename (local_brick->qfile_path, - query_file_path_err) == -1) - gf_msg_debug ("tier", 0, "rename " - "failed"); - } - } + goto out; } + } - query_cbk_args->qfile_array = NULL; - - return ret; -} - - - -int -tier_demote (migration_args_t *demotion_args) -{ - query_cbk_args_t query_cbk_args; - int ret = -1; - - GF_VALIDATE_OR_GOTO ("tier", demotion_args, out); - GF_VALIDATE_OR_GOTO ("tier", demotion_args->this, out); - GF_VALIDATE_OR_GOTO (demotion_args->this->name, - demotion_args->brick_list, out); - GF_VALIDATE_OR_GOTO (demotion_args->this->name, - demotion_args->defrag, out); - - THIS = demotion_args->this; - - query_cbk_args.this = demotion_args->this; - query_cbk_args.defrag = demotion_args->defrag; - query_cbk_args.is_promotion = 0; - - /*Build the query file using bricklist*/ - ret = tier_build_migration_qfile (demotion_args, &query_cbk_args, - _gf_false); - if (ret) - goto out; - - /* Migrate files using the query file */ - ret = tier_migrate_files_using_qfile (demotion_args, - &query_cbk_args); - if (ret) - goto out; - + ret = 0; out: - demotion_args->return_value = ret; - return ret; -} - - -int -tier_promote (migration_args_t *promotion_args) -{ - int ret = -1; - query_cbk_args_t query_cbk_args; - - GF_VALIDATE_OR_GOTO ("tier", promotion_args->this, out); - GF_VALIDATE_OR_GOTO (promotion_args->this->name, - promotion_args->brick_list, out); - GF_VALIDATE_OR_GOTO (promotion_args->this->name, - promotion_args->defrag, out); + if (ctr_ipc_in_dict) + dict_unref(ctr_ipc_in_dict); - THIS = promotion_args->this; + if (ctr_ipc_out_dict) + dict_unref(ctr_ipc_out_dict); - query_cbk_args.this = promotion_args->this; - query_cbk_args.defrag = promotion_args->defrag; - query_cbk_args.is_promotion = 1; - - /*Build the query file using bricklist*/ - ret = tier_build_migration_qfile (promotion_args, &query_cbk_args, - _gf_true); - if (ret) - goto out; - - /* Migrate files using the query file */ - ret = tier_migrate_files_using_qfile (promotion_args, &query_cbk_args); - if (ret) - goto out; - -out: - promotion_args->return_value = ret; - return ret; + return ret; } - -/* - * Command the CTR on a brick to compact the local database using an IPC - */ static int -tier_process_self_compact (tier_brick_list_t *local_brick, void *args) +tier_build_migration_qfile(migration_args_t *args, + query_cbk_args_t *query_cbk_args, + gf_boolean_t is_promotion) { - int ret = -1; - char *db_path = NULL; - query_cbk_args_t *query_cbk_args = NULL; - xlator_t *this = NULL; - gfdb_conn_node_t *conn_node = NULL; - dict_t *params_dict = NULL; - dict_t *ctr_ipc_dict = NULL; - gfdb_brick_info_t *gfdb_brick_info = args; - - /*Init of all the essentials*/ - GF_VALIDATE_OR_GOTO ("tier", gfdb_brick_info , out); - query_cbk_args = gfdb_brick_info->_query_cbk_args; - - GF_VALIDATE_OR_GOTO ("tier", query_cbk_args->this, out); - this = query_cbk_args->this; - - GF_VALIDATE_OR_GOTO (this->name, - gfdb_brick_info->_query_cbk_args, out); + gfdb_time_t current_time; + gfdb_brick_info_t gfdb_brick_info; + gfdb_time_t time_in_past; + int ret = -1; + tier_brick_list_t *local_brick = NULL; + int i = 0; + time_in_past.tv_sec = args->freq_time; + time_in_past.tv_usec = 0; + + ret = gettimeofday(¤t_time, NULL); + if (ret == -1) { + gf_msg(args->this->name, GF_LOG_ERROR, errno, + DHT_MSG_SYS_CALL_GET_TIME_FAILED, "Failed to get current time"); + goto out; + } + time_in_past.tv_sec = current_time.tv_sec - time_in_past.tv_sec; + + /* The migration daemon may run a varying numberof usec after the */ + /* sleep call triggers. A file may be registered in CTR some number */ + /* of usec X after the daemon started and missed in the subsequent */ + /* cycle if the daemon starts Y usec after the period in seconds */ + /* where Y>X. Normalize away this problem by always setting usec */ + /* to 0. */ + time_in_past.tv_usec = 0; + + gfdb_brick_info.time_stamp = &time_in_past; + gfdb_brick_info._gfdb_promote = is_promotion; + gfdb_brick_info._query_cbk_args = query_cbk_args; + + list_for_each_entry(local_brick, args->brick_list, list) + { + /* Construct query file path for this brick + * i.e + * /var/run/gluster/xlator_name/ + * {promote/demote}-brickname-indexinbricklist + * So that no two query files will have same path even + * bricks have the same name + * */ + snprintf(local_brick->qfile_path, PATH_MAX, "%s-%s-%d", + GET_QFILE_PATH(gfdb_brick_info._gfdb_promote), + local_brick->brick_name, i); - GF_VALIDATE_OR_GOTO (this->name, local_brick, out); + /* Delete any old query files for this brick */ + sys_unlink(local_brick->qfile_path); - GF_VALIDATE_OR_GOTO (this->name, local_brick->xlator, out); + ret = tier_process_brick(local_brick, &gfdb_brick_info); + if (ret) { + gf_msg(args->this->name, GF_LOG_ERROR, 0, + DHT_MSG_BRICK_QUERY_FAILED, "Brick %s query failed\n", + local_brick->brick_db_path); + } + i++; + } + ret = 0; +out: + return ret; +} - GF_VALIDATE_OR_GOTO (this->name, local_brick->brick_db_path, out); +static int +tier_migrate_files_using_qfile(migration_args_t *comp, + query_cbk_args_t *query_cbk_args) +{ + int ret = -1; + tier_brick_list_t *local_brick = NULL; + tier_brick_list_t *temp = NULL; + gfdb_time_t current_time = { + 0, + }; + ssize_t qfile_array_size = 0; + int count = 0; + int temp_fd = 0; + gf_tier_conf_t *tier_conf = NULL; + + tier_conf = &(query_cbk_args->defrag->tier_conf); + + /* Time for error query files */ + gettimeofday(¤t_time, NULL); + + /* Build the qfile list */ + list_for_each_entry_safe(local_brick, temp, comp->brick_list, list) + { + qfile_array_size++; + } + query_cbk_args->qfile_array = qfile_array_new(qfile_array_size); + if (!query_cbk_args->qfile_array) { + gf_msg("tier", GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, + "Failed to create new " + "qfile_array"); + goto out; + } + + /*Open all qfiles*/ + count = 0; + query_cbk_args->qfile_array->exhausted_count = 0; + list_for_each_entry_safe(local_brick, temp, comp->brick_list, list) + { + temp_fd = query_cbk_args->qfile_array->fd_array[count]; + temp_fd = open(local_brick->qfile_path, O_RDONLY, + S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); + if (temp_fd < 0) { + gf_msg("tier", GF_LOG_ERROR, errno, DHT_MSG_LOG_TIER_ERROR, + "Failed to open " + "%s to the query file", + local_brick->qfile_path); + query_cbk_args->qfile_array->exhausted_count++; + } + query_cbk_args->qfile_array->fd_array[count] = temp_fd; + count++; + } + + /* Moving the query file index to the next, so that we won't the same + * query file every cycle as the first one */ + query_cbk_args->qfile_array + ->next_index = (query_cbk_args->is_promotion) + ? tier_conf->last_promote_qfile_index + : tier_conf->last_demote_qfile_index; + shift_next_index(query_cbk_args->qfile_array); + if (query_cbk_args->is_promotion) { + tier_conf->last_promote_qfile_index = query_cbk_args->qfile_array + ->next_index; + } else { + tier_conf->last_demote_qfile_index = query_cbk_args->qfile_array + ->next_index; + } + + /* Migrate files using query file list */ + ret = tier_migrate_using_query_file((void *)query_cbk_args); +out: + qfile_array_free(query_cbk_args->qfile_array); + + /* If there is an error rename all the query files to .err files + * with a timestamp for better debugging */ + if (ret) { + struct tm tm = { + 0, + }; + char time_str[128] = { + 0, + }; + char query_file_path_err[PATH_MAX] = { + 0, + }; + int32_t len = 0; + + /* Time format for error query files */ + gmtime_r(¤t_time.tv_sec, &tm); + strftime(time_str, sizeof(time_str), "%F-%T", &tm); + + list_for_each_entry_safe(local_brick, temp, comp->brick_list, list) + { + /* rename error qfile*/ + len = snprintf(query_file_path_err, sizeof(query_file_path_err), + "%s-%s.err", local_brick->qfile_path, time_str); + if ((len >= 0) && (len < sizeof(query_file_path_err))) { + if (sys_rename(local_brick->qfile_path, query_file_path_err) == + -1) + gf_msg_debug("tier", 0, + "rename " + "failed"); + } + } + } + + query_cbk_args->qfile_array = NULL; + + return ret; +} - db_path = local_brick->brick_db_path; +int +tier_demote(migration_args_t *demotion_args) +{ + query_cbk_args_t query_cbk_args; + int ret = -1; - /*Preparing DB parameters before init_db i.e getting db connection*/ - params_dict = dict_new (); - if (!params_dict) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_LOG_TIER_ERROR, - "DB Params cannot initialized"); - goto out; - } - SET_DB_PARAM_TO_DICT (this->name, params_dict, - (char *) gfdb_methods.get_db_path_key(), db_path, - ret, out); - - /*Get the db connection*/ - conn_node = gfdb_methods.init_db ((void *)params_dict, - dht_tier_db_type); - if (!conn_node) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_LOG_TIER_ERROR, - "FATAL: Failed initializing db operations"); - goto out; - } + GF_VALIDATE_OR_GOTO("tier", demotion_args, out); + GF_VALIDATE_OR_GOTO("tier", demotion_args->this, out); + GF_VALIDATE_OR_GOTO(demotion_args->this->name, demotion_args->brick_list, + out); + GF_VALIDATE_OR_GOTO(demotion_args->this->name, demotion_args->defrag, out); - ret = 0; + THIS = demotion_args->this; - /*Preparing ctr_ipc_dict*/ - ctr_ipc_dict = dict_new (); - if (!ctr_ipc_dict) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_LOG_TIER_ERROR, - "ctr_ipc_dict cannot initialized"); - goto out; - } + query_cbk_args.this = demotion_args->this; + query_cbk_args.defrag = demotion_args->defrag; + query_cbk_args.is_promotion = 0; - ret = dict_set_int32 (ctr_ipc_dict, "compact_active", - query_cbk_args->defrag-> - tier_conf.compact_active); + /*Build the query file using bricklist*/ + ret = tier_build_migration_qfile(demotion_args, &query_cbk_args, _gf_false); + if (ret) + goto out; - if (ret) { - gf_msg ("tier", GF_LOG_ERROR, 0, - LG_MSG_SET_PARAM_FAILED, "Failed to set %s " - "to params dictionary", - "compact_active"); - goto out; - } + /* Migrate files using the query file */ + ret = tier_migrate_files_using_qfile(demotion_args, &query_cbk_args); + if (ret) + goto out; - ret = dict_set_int32 (ctr_ipc_dict, "compact_mode_switched", - query_cbk_args->defrag-> - tier_conf.compact_mode_switched); +out: + demotion_args->return_value = ret; + return ret; +} - if (ret) { - gf_msg ("tier", GF_LOG_ERROR, 0, - LG_MSG_SET_PARAM_FAILED, "Failed to set %s " - "to params dictionary", - "compact_mode_switched"); - goto out; - } +int +tier_promote(migration_args_t *promotion_args) +{ + int ret = -1; + query_cbk_args_t query_cbk_args; - SET_DB_PARAM_TO_DICT(this->name, ctr_ipc_dict, - GFDB_IPC_CTR_KEY, GFDB_IPC_CTR_SET_COMPACT_PRAGMA, - ret, out); + GF_VALIDATE_OR_GOTO("tier", promotion_args->this, out); + GF_VALIDATE_OR_GOTO(promotion_args->this->name, promotion_args->brick_list, + out); + GF_VALIDATE_OR_GOTO(promotion_args->this->name, promotion_args->defrag, + out); - gf_msg (this->name, GF_LOG_TRACE, 0, DHT_MSG_LOG_TIER_STATUS, - "Starting Compaction IPC"); + THIS = promotion_args->this; - ret = syncop_ipc (local_brick->xlator, GF_IPC_TARGET_CTR, ctr_ipc_dict, - NULL); + query_cbk_args.this = promotion_args->this; + query_cbk_args.defrag = promotion_args->defrag; + query_cbk_args.is_promotion = 1; - gf_msg (this->name, GF_LOG_TRACE, 0, DHT_MSG_LOG_TIER_STATUS, - "Ending Compaction IPC"); + /*Build the query file using bricklist*/ + ret = tier_build_migration_qfile(promotion_args, &query_cbk_args, _gf_true); + if (ret) + goto out; - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_LOG_TIER_ERROR, "Failed compaction " - "on db %s error %d", local_brick->brick_db_path, ret); - goto out; - } + /* Migrate files using the query file */ + ret = tier_migrate_files_using_qfile(promotion_args, &query_cbk_args); + if (ret) + goto out; - gf_msg (this->name, GF_LOG_TRACE, 0, DHT_MSG_LOG_TIER_STATUS, - "SUCCESS: %s Compaction", local_brick->brick_name); +out: + promotion_args->return_value = ret; + return ret; +} - ret = 0; +/* + * Command the CTR on a brick to compact the local database using an IPC + */ +static int +tier_process_self_compact(tier_brick_list_t *local_brick, void *args) +{ + int ret = -1; + char *db_path = NULL; + query_cbk_args_t *query_cbk_args = NULL; + xlator_t *this = NULL; + gfdb_conn_node_t *conn_node = NULL; + dict_t *params_dict = NULL; + dict_t *ctr_ipc_dict = NULL; + gfdb_brick_info_t *gfdb_brick_info = args; + + /*Init of all the essentials*/ + GF_VALIDATE_OR_GOTO("tier", gfdb_brick_info, out); + query_cbk_args = gfdb_brick_info->_query_cbk_args; + + GF_VALIDATE_OR_GOTO("tier", query_cbk_args->this, out); + this = query_cbk_args->this; + + GF_VALIDATE_OR_GOTO(this->name, gfdb_brick_info->_query_cbk_args, out); + + GF_VALIDATE_OR_GOTO(this->name, local_brick, out); + + GF_VALIDATE_OR_GOTO(this->name, local_brick->xlator, out); + + GF_VALIDATE_OR_GOTO(this->name, local_brick->brick_db_path, out); + + db_path = local_brick->brick_db_path; + + /*Preparing DB parameters before init_db i.e getting db connection*/ + params_dict = dict_new(); + if (!params_dict) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, + "DB Params cannot initialized"); + goto out; + } + SET_DB_PARAM_TO_DICT(this->name, params_dict, + (char *)gfdb_methods.get_db_path_key(), db_path, ret, + out); + + /*Get the db connection*/ + conn_node = gfdb_methods.init_db((void *)params_dict, dht_tier_db_type); + if (!conn_node) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, + "FATAL: Failed initializing db operations"); + goto out; + } + + ret = 0; + + /*Preparing ctr_ipc_dict*/ + ctr_ipc_dict = dict_new(); + if (!ctr_ipc_dict) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, + "ctr_ipc_dict cannot initialized"); + goto out; + } + + ret = dict_set_int32(ctr_ipc_dict, "compact_active", + query_cbk_args->defrag->tier_conf.compact_active); + + if (ret) { + gf_msg("tier", GF_LOG_ERROR, 0, LG_MSG_SET_PARAM_FAILED, + "Failed to set %s " + "to params dictionary", + "compact_active"); + goto out; + } + + ret = dict_set_int32( + ctr_ipc_dict, "compact_mode_switched", + query_cbk_args->defrag->tier_conf.compact_mode_switched); + + if (ret) { + gf_msg("tier", GF_LOG_ERROR, 0, LG_MSG_SET_PARAM_FAILED, + "Failed to set %s " + "to params dictionary", + "compact_mode_switched"); + goto out; + } + + SET_DB_PARAM_TO_DICT(this->name, ctr_ipc_dict, GFDB_IPC_CTR_KEY, + GFDB_IPC_CTR_SET_COMPACT_PRAGMA, ret, out); + + gf_msg(this->name, GF_LOG_TRACE, 0, DHT_MSG_LOG_TIER_STATUS, + "Starting Compaction IPC"); + + ret = syncop_ipc(local_brick->xlator, GF_IPC_TARGET_CTR, ctr_ipc_dict, + NULL); + + gf_msg(this->name, GF_LOG_TRACE, 0, DHT_MSG_LOG_TIER_STATUS, + "Ending Compaction IPC"); + + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, + "Failed compaction " + "on db %s error %d", + local_brick->brick_db_path, ret); + goto out; + } + + gf_msg(this->name, GF_LOG_TRACE, 0, DHT_MSG_LOG_TIER_STATUS, + "SUCCESS: %s Compaction", local_brick->brick_name); + + ret = 0; out: - if (params_dict) { - dict_unref (params_dict); - params_dict = NULL; - } + if (params_dict) { + dict_unref(params_dict); + params_dict = NULL; + } - if (ctr_ipc_dict) { - dict_unref (ctr_ipc_dict); - ctr_ipc_dict = NULL; - } + if (ctr_ipc_dict) { + dict_unref(ctr_ipc_dict); + ctr_ipc_dict = NULL; + } - gfdb_methods.fini_db (conn_node); + gfdb_methods.fini_db(conn_node); - return ret; + return ret; } /* @@ -1995,403 +1856,366 @@ out: * function to prepare the compaction IPC. */ static int -tier_compact_db_brick (tier_brick_list_t *local_brick, void *args) +tier_compact_db_brick(tier_brick_list_t *local_brick, void *args) { - int ret = -1; + int ret = -1; - GF_VALIDATE_OR_GOTO ("tier", local_brick, out); + GF_VALIDATE_OR_GOTO("tier", local_brick, out); - GF_VALIDATE_OR_GOTO ("tier", local_brick->xlator, out); + GF_VALIDATE_OR_GOTO("tier", local_brick->xlator, out); - ret = tier_process_self_compact (local_brick, args); - if (ret) { - gf_msg ("tier", GF_LOG_INFO, 0, - DHT_MSG_LOG_TIER_STATUS, - "Brick %s did not compact", - local_brick->brick_name); - goto out; - } + ret = tier_process_self_compact(local_brick, args); + if (ret) { + gf_msg("tier", GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS, + "Brick %s did not compact", local_brick->brick_name); + goto out; + } - ret = 0; + ret = 0; out: - return ret; + return ret; } static int -tier_send_compact (migration_args_t *args, - query_cbk_args_t *query_cbk_args) +tier_send_compact(migration_args_t *args, query_cbk_args_t *query_cbk_args) { - gfdb_time_t current_time; - gfdb_brick_info_t gfdb_brick_info; - gfdb_time_t time_in_past; - int ret = -1; - tier_brick_list_t *local_brick = NULL; - - time_in_past.tv_sec = args->freq_time; - time_in_past.tv_usec = 0; - - ret = gettimeofday (¤t_time, NULL); - if (ret == -1) { - gf_msg (args->this->name, GF_LOG_ERROR, errno, - DHT_MSG_SYS_CALL_GET_TIME_FAILED, - "Failed to get current time"); - goto out; + gfdb_time_t current_time; + gfdb_brick_info_t gfdb_brick_info; + gfdb_time_t time_in_past; + int ret = -1; + tier_brick_list_t *local_brick = NULL; + + time_in_past.tv_sec = args->freq_time; + time_in_past.tv_usec = 0; + + ret = gettimeofday(¤t_time, NULL); + if (ret == -1) { + gf_msg(args->this->name, GF_LOG_ERROR, errno, + DHT_MSG_SYS_CALL_GET_TIME_FAILED, "Failed to get current time"); + goto out; + } + time_in_past.tv_sec = current_time.tv_sec - time_in_past.tv_sec; + + /* The migration daemon may run a varying numberof usec after the sleep + call triggers. A file may be registered in CTR some number of usec X + after the daemon started and missed in the subsequent cycle if the + daemon starts Y usec after the period in seconds where Y>X. Normalize + away this problem by always setting usec to 0. */ + time_in_past.tv_usec = 0; + + gfdb_brick_info.time_stamp = &time_in_past; + + /* This is meant to say we are always compacting at this point */ + /* We simply borrow the promotion flag to do this */ + gfdb_brick_info._gfdb_promote = 1; + + gfdb_brick_info._query_cbk_args = query_cbk_args; + + list_for_each_entry(local_brick, args->brick_list, list) + { + gf_msg(args->this->name, GF_LOG_TRACE, 0, DHT_MSG_LOG_TIER_STATUS, + "Start compaction for %s", local_brick->brick_name); + + ret = tier_compact_db_brick(local_brick, &gfdb_brick_info); + if (ret) { + gf_msg(args->this->name, GF_LOG_ERROR, 0, + DHT_MSG_BRICK_QUERY_FAILED, "Brick %s compaction failed\n", + local_brick->brick_db_path); } - time_in_past.tv_sec = current_time.tv_sec - time_in_past.tv_sec; - /* The migration daemon may run a varying numberof usec after the sleep - call triggers. A file may be registered in CTR some number of usec X - after the daemon started and missed in the subsequent cycle if the - daemon starts Y usec after the period in seconds where Y>X. Normalize - away this problem by always setting usec to 0. */ - time_in_past.tv_usec = 0; - - gfdb_brick_info.time_stamp = &time_in_past; - - /* This is meant to say we are always compacting at this point */ - /* We simply borrow the promotion flag to do this */ - gfdb_brick_info._gfdb_promote = 1; + gf_msg(args->this->name, GF_LOG_TRACE, 0, DHT_MSG_LOG_TIER_STATUS, + "End compaction for %s", local_brick->brick_name); + } + ret = 0; +out: + return ret; +} - gfdb_brick_info._query_cbk_args = query_cbk_args; +static int +tier_compact(void *args) +{ + int ret = -1; + query_cbk_args_t query_cbk_args; + migration_args_t *compaction_args = args; + + GF_VALIDATE_OR_GOTO("tier", compaction_args->this, out); + GF_VALIDATE_OR_GOTO(compaction_args->this->name, + compaction_args->brick_list, out); + GF_VALIDATE_OR_GOTO(compaction_args->this->name, compaction_args->defrag, + out); + + THIS = compaction_args->this; + + query_cbk_args.this = compaction_args->this; + query_cbk_args.defrag = compaction_args->defrag; + query_cbk_args.is_compaction = 1; + + /* Send the compaction pragma out to all the bricks on the bricklist. */ + /* tier_get_bricklist ensures all bricks on the list are local to */ + /* this node. */ + ret = tier_send_compact(compaction_args, &query_cbk_args); + if (ret) + goto out; + + ret = 0; +out: + compaction_args->return_value = ret; + return ret; +} - list_for_each_entry (local_brick, args->brick_list, list) { +static int +tier_get_bricklist(xlator_t *xl, struct list_head *local_bricklist_head) +{ + xlator_list_t *child = NULL; + char *rv = NULL; + char *rh = NULL; + char *brickname = NULL; + char db_name[PATH_MAX] = ""; + int ret = 0; + tier_brick_list_t *local_brick = NULL; + int32_t len = 0; + + GF_VALIDATE_OR_GOTO("tier", xl, out); + GF_VALIDATE_OR_GOTO("tier", local_bricklist_head, out); + + /* + * This function obtains remote subvolumes and filters out only + * those running on the same node as the tier daemon. + */ + if (strcmp(xl->type, "protocol/client") == 0) { + ret = dict_get_str(xl->options, "remote-host", &rh); + if (ret < 0) + goto out; - gf_msg (args->this->name, GF_LOG_TRACE, 0, - DHT_MSG_LOG_TIER_STATUS, - "Start compaction for %s", - local_brick->brick_name); + if (gf_is_local_addr(rh)) { + local_brick = GF_CALLOC(1, sizeof(tier_brick_list_t), + gf_tier_mt_bricklist_t); + if (!local_brick) { + goto out; + } - ret = tier_compact_db_brick (local_brick, - &gfdb_brick_info); - if (ret) { - gf_msg (args->this->name, GF_LOG_ERROR, 0, - DHT_MSG_BRICK_QUERY_FAILED, - "Brick %s compaction failed\n", - local_brick->brick_db_path); - } + ret = dict_get_str(xl->options, "remote-subvolume", &rv); + if (ret < 0) + goto out; - gf_msg (args->this->name, GF_LOG_TRACE, 0, - DHT_MSG_LOG_TIER_STATUS, - "End compaction for %s", - local_brick->brick_name); + brickname = strrchr(rv, '/') + 1; + snprintf(db_name, sizeof(db_name), "%s.db", brickname); - } - ret = 0; -out: - return ret; -} + local_brick->brick_db_path = GF_MALLOC(PATH_MAX, gf_common_mt_char); + if (!local_brick->brick_db_path) { + gf_msg("tier", GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_STATUS, + "Failed to allocate memory for" + " bricklist."); + ret = -1; + goto out; + } -static int -tier_compact (void *args) -{ - int ret = -1; - query_cbk_args_t query_cbk_args; - migration_args_t *compaction_args = args; - - GF_VALIDATE_OR_GOTO ("tier", compaction_args->this, out); - GF_VALIDATE_OR_GOTO (compaction_args->this->name, - compaction_args->brick_list, out); - GF_VALIDATE_OR_GOTO (compaction_args->this->name, - compaction_args->defrag, out); - - THIS = compaction_args->this; - - query_cbk_args.this = compaction_args->this; - query_cbk_args.defrag = compaction_args->defrag; - query_cbk_args.is_compaction = 1; - - /* Send the compaction pragma out to all the bricks on the bricklist. */ - /* tier_get_bricklist ensures all bricks on the list are local to */ - /* this node. */ - ret = tier_send_compact (compaction_args, &query_cbk_args); - if (ret) + len = snprintf(local_brick->brick_db_path, PATH_MAX, "%s/%s/%s", rv, + GF_HIDDEN_PATH, db_name); + if ((len < 0) || (len >= PATH_MAX)) { + gf_msg("tier", GF_LOG_ERROR, EINVAL, DHT_MSG_LOG_TIER_STATUS, + "DB path too long"); + ret = -1; goto out; + } - ret = 0; -out: - compaction_args->return_value = ret; - return ret; - } + local_brick->xlator = xl; -static int -tier_get_bricklist (xlator_t *xl, struct list_head *local_bricklist_head) -{ - xlator_list_t *child = NULL; - char *rv = NULL; - char *rh = NULL; - char *brickname = NULL; - char db_name[PATH_MAX] = ""; - int ret = 0; - tier_brick_list_t *local_brick = NULL; - int32_t len = 0; - - GF_VALIDATE_OR_GOTO ("tier", xl, out); - GF_VALIDATE_OR_GOTO ("tier", local_bricklist_head, out); + snprintf(local_brick->brick_name, NAME_MAX, "%s", brickname); - /* - * This function obtains remote subvolumes and filters out only - * those running on the same node as the tier daemon. - */ - if (strcmp(xl->type, "protocol/client") == 0) { - ret = dict_get_str (xl->options, "remote-host", &rh); - if (ret < 0) - goto out; - - if (gf_is_local_addr (rh)) { - - local_brick = GF_CALLOC (1, sizeof(tier_brick_list_t), - gf_tier_mt_bricklist_t); - if (!local_brick) { - goto out; - } - - ret = dict_get_str (xl->options, "remote-subvolume", - &rv); - if (ret < 0) - goto out; - - brickname = strrchr(rv, '/') + 1; - snprintf(db_name, sizeof(db_name), "%s.db", - brickname); - - local_brick->brick_db_path = - GF_MALLOC (PATH_MAX, gf_common_mt_char); - if (!local_brick->brick_db_path) { - gf_msg ("tier", GF_LOG_ERROR, 0, - DHT_MSG_LOG_TIER_STATUS, - "Failed to allocate memory for" - " bricklist."); - ret = -1; - goto out; - } - - len = snprintf(local_brick->brick_db_path, - PATH_MAX, "%s/%s/%s", rv, - GF_HIDDEN_PATH, db_name); - if ((len < 0) || (len >= PATH_MAX)) { - gf_msg ("tier", GF_LOG_ERROR, EINVAL, - DHT_MSG_LOG_TIER_STATUS, - "DB path too long"); - ret = -1; - goto out; - } - - local_brick->xlator = xl; - - snprintf (local_brick->brick_name, - NAME_MAX, "%s", brickname); - - list_add_tail (&(local_brick->list), - local_bricklist_head); - - ret = 0; - goto out; - } + list_add_tail(&(local_brick->list), local_bricklist_head); + + ret = 0; + goto out; } + } - for (child = xl->children; child; child = child->next) { - ret = tier_get_bricklist (child->xlator, local_bricklist_head); - if (ret) { - goto out; - } + for (child = xl->children; child; child = child->next) { + ret = tier_get_bricklist(child->xlator, local_bricklist_head); + if (ret) { + goto out; } + } - ret = 0; + ret = 0; out: - if (ret) { - if (local_brick) { - GF_FREE (local_brick->brick_db_path); - } - GF_FREE (local_brick); + if (ret) { + if (local_brick) { + GF_FREE(local_brick->brick_db_path); } + GF_FREE(local_brick); + } - return ret; + return ret; } int -tier_get_freq_demote (gf_tier_conf_t *tier_conf) +tier_get_freq_demote(gf_tier_conf_t *tier_conf) { - if ((tier_conf->mode == TIER_MODE_WM) && - (tier_conf->watermark_last == TIER_WM_HI)) - return DEFAULT_DEMOTE_DEGRADED; - else - return tier_conf->tier_demote_frequency; + if ((tier_conf->mode == TIER_MODE_WM) && + (tier_conf->watermark_last == TIER_WM_HI)) + return DEFAULT_DEMOTE_DEGRADED; + else + return tier_conf->tier_demote_frequency; } int -tier_get_freq_promote (gf_tier_conf_t *tier_conf) +tier_get_freq_promote(gf_tier_conf_t *tier_conf) { - return tier_conf->tier_promote_frequency; + return tier_conf->tier_promote_frequency; } int -tier_get_freq_compact_hot (gf_tier_conf_t *tier_conf) +tier_get_freq_compact_hot(gf_tier_conf_t *tier_conf) { - return tier_conf->tier_compact_hot_frequency; + return tier_conf->tier_compact_hot_frequency; } int -tier_get_freq_compact_cold (gf_tier_conf_t *tier_conf) +tier_get_freq_compact_cold(gf_tier_conf_t *tier_conf) { - return tier_conf->tier_compact_cold_frequency; + return tier_conf->tier_compact_cold_frequency; } static int -tier_check_demote (gfdb_time_t current_time, int freq) +tier_check_demote(gfdb_time_t current_time, int freq) { - return ((current_time.tv_sec % freq) == 0) ? - _gf_true : _gf_false; + return ((current_time.tv_sec % freq) == 0) ? _gf_true : _gf_false; } static gf_boolean_t -tier_check_promote (gf_tier_conf_t *tier_conf, - gfdb_time_t current_time, - int freq) +tier_check_promote(gf_tier_conf_t *tier_conf, gfdb_time_t current_time, + int freq) { - if ((tier_conf->mode == TIER_MODE_WM) && - (tier_conf->watermark_last == TIER_WM_HI)) - return _gf_false; + if ((tier_conf->mode == TIER_MODE_WM) && + (tier_conf->watermark_last == TIER_WM_HI)) + return _gf_false; - else - return ((current_time.tv_sec % freq) == 0) ? - _gf_true : _gf_false; + else + return ((current_time.tv_sec % freq) == 0) ? _gf_true : _gf_false; } static gf_boolean_t -tier_check_compact (gf_tier_conf_t *tier_conf, - gfdb_time_t current_time, - int freq_compact) +tier_check_compact(gf_tier_conf_t *tier_conf, gfdb_time_t current_time, + int freq_compact) { + if (!(tier_conf->compact_active || tier_conf->compact_mode_switched)) + return _gf_false; - if (!(tier_conf->compact_active || - tier_conf->compact_mode_switched)) - return _gf_false; - - - return ((current_time.tv_sec % freq_compact) == 0) ? - _gf_true : _gf_false; + return ((current_time.tv_sec % freq_compact) == 0) ? _gf_true : _gf_false; } - void -clear_bricklist (struct list_head *brick_list) +clear_bricklist(struct list_head *brick_list) { - tier_brick_list_t *local_brick = NULL; - tier_brick_list_t *temp = NULL; - - if (list_empty(brick_list)) { - return; - } + tier_brick_list_t *local_brick = NULL; + tier_brick_list_t *temp = NULL; - list_for_each_entry_safe (local_brick, temp, brick_list, list) { - list_del (&local_brick->list); - GF_FREE (local_brick->brick_db_path); - GF_FREE (local_brick); - } + if (list_empty(brick_list)) { + return; + } + + list_for_each_entry_safe(local_brick, temp, brick_list, list) + { + list_del(&local_brick->list); + GF_FREE(local_brick->brick_db_path); + GF_FREE(local_brick); + } } - static void -set_brick_list_qpath (struct list_head *brick_list, gf_boolean_t is_cold) +set_brick_list_qpath(struct list_head *brick_list, gf_boolean_t is_cold) { - - tier_brick_list_t *local_brick = NULL; - int i = 0; - - GF_VALIDATE_OR_GOTO ("tier", brick_list, out); - - list_for_each_entry (local_brick, brick_list, list) { - - /* Construct query file path for this brick - * i.e - * /var/run/gluster/xlator_name/ - * {promote/demote}-brickname-indexinbricklist - * So that no two query files will have same path even - * bricks have the same name - * */ - snprintf (local_brick->qfile_path, PATH_MAX , "%s-%s-%d", - GET_QFILE_PATH (is_cold), - local_brick->brick_name, i); - i++; - } + tier_brick_list_t *local_brick = NULL; + int i = 0; + + GF_VALIDATE_OR_GOTO("tier", brick_list, out); + + list_for_each_entry(local_brick, brick_list, list) + { + /* Construct query file path for this brick + * i.e + * /var/run/gluster/xlator_name/ + * {promote/demote}-brickname-indexinbricklist + * So that no two query files will have same path even + * bricks have the same name + * */ + snprintf(local_brick->qfile_path, PATH_MAX, "%s-%s-%d", + GET_QFILE_PATH(is_cold), local_brick->brick_name, i); + i++; + } out: - return; + return; } static int -tier_prepare_compact (migration_args_t *args, gfdb_time_t current_time) +tier_prepare_compact(migration_args_t *args, gfdb_time_t current_time) { - xlator_t *this = NULL; - dht_conf_t *conf = NULL; - gf_defrag_info_t *defrag = NULL; - gf_tier_conf_t *tier_conf = NULL; - gf_boolean_t is_hot_tier = args->is_hot_tier; - int freq = 0; - int ret = -1; - const char *tier_type = is_hot_tier ? "hot" : "cold"; + xlator_t *this = NULL; + dht_conf_t *conf = NULL; + gf_defrag_info_t *defrag = NULL; + gf_tier_conf_t *tier_conf = NULL; + gf_boolean_t is_hot_tier = args->is_hot_tier; + int freq = 0; + int ret = -1; + const char *tier_type = is_hot_tier ? "hot" : "cold"; - this = args->this; + this = args->this; - conf = this->private; + conf = this->private; - defrag = conf->defrag; + defrag = conf->defrag; - tier_conf = &defrag->tier_conf; + tier_conf = &defrag->tier_conf; - freq = is_hot_tier ? tier_get_freq_compact_hot (tier_conf) : - tier_get_freq_compact_cold (tier_conf); + freq = is_hot_tier ? tier_get_freq_compact_hot(tier_conf) + : tier_get_freq_compact_cold(tier_conf); - defrag->tier_conf.compact_mode_switched = is_hot_tier ? - defrag->tier_conf.compact_mode_switched_hot : - defrag->tier_conf.compact_mode_switched_cold; + defrag->tier_conf.compact_mode_switched = + is_hot_tier ? defrag->tier_conf.compact_mode_switched_hot + : defrag->tier_conf.compact_mode_switched_cold; - gf_msg(this->name, GF_LOG_TRACE, 0, - DHT_MSG_LOG_TIER_STATUS, - "Compact mode %i", - defrag->tier_conf.compact_mode_switched); + gf_msg(this->name, GF_LOG_TRACE, 0, DHT_MSG_LOG_TIER_STATUS, + "Compact mode %i", defrag->tier_conf.compact_mode_switched); - if (tier_check_compact (tier_conf, current_time, - freq)) { - gf_msg (this->name, GF_LOG_INFO, 0, - DHT_MSG_LOG_TIER_STATUS, - "Start compaction on %s tier", - tier_type); + if (tier_check_compact(tier_conf, current_time, freq)) { + gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS, + "Start compaction on %s tier", tier_type); - args->freq_time = freq; - ret = tier_compact (args); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_LOG_TIER_ERROR, "Compaction failed on " - "%s tier", tier_type); - goto out; - } + args->freq_time = freq; + ret = tier_compact(args); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, + "Compaction failed on " + "%s tier", + tier_type); + goto out; + } - gf_msg (this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS, - "End compaction on %s tier", tier_type); + gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS, + "End compaction on %s tier", tier_type); - if (is_hot_tier) { - defrag->tier_conf.compact_mode_switched_hot = - _gf_false; - } else { - defrag->tier_conf.compact_mode_switched_cold = - _gf_false; - } + if (is_hot_tier) { + defrag->tier_conf.compact_mode_switched_hot = _gf_false; + } else { + defrag->tier_conf.compact_mode_switched_cold = _gf_false; } + } out: - return ret; + return ret; } static int tier_get_wm_interval(tier_mode_t mode, tier_watermark_op_t wm) { - if (mode == TIER_MODE_WM && wm == TIER_WM_HI) - return WM_INTERVAL_EMERG; + if (mode == TIER_MODE_WM && wm == TIER_WM_HI) + return WM_INTERVAL_EMERG; - return WM_INTERVAL; + return WM_INTERVAL; } /* @@ -2402,941 +2226,863 @@ tier_get_wm_interval(tier_mode_t mode, tier_watermark_op_t wm) * 1. Check trigger to migrate data. * 2. Check for state changes (pause, unpause, stop). */ -static void -*tier_run (void *in_args) +static void * +tier_run(void *in_args) { - dht_conf_t *conf = NULL; - gfdb_time_t current_time = { 0 }; - int freq = 0; - int ret = 0; - xlator_t *any = NULL; - xlator_t *xlator = NULL; - gf_tier_conf_t *tier_conf = NULL; - loc_t root_loc = { 0 }; - int check_watermark = 0; - gf_defrag_info_t *defrag = NULL; - xlator_t *this = NULL; - migration_args_t *args = in_args; - GF_VALIDATE_OR_GOTO ("tier", args, out); - GF_VALIDATE_OR_GOTO ("tier", args->brick_list, out); - - this = args->this; - GF_VALIDATE_OR_GOTO ("tier", this, out); - - conf = this->private; - GF_VALIDATE_OR_GOTO ("tier", conf, out); - - defrag = conf->defrag; - GF_VALIDATE_OR_GOTO ("tier", defrag, out); + dht_conf_t *conf = NULL; + gfdb_time_t current_time = {0}; + int freq = 0; + int ret = 0; + xlator_t *any = NULL; + xlator_t *xlator = NULL; + gf_tier_conf_t *tier_conf = NULL; + loc_t root_loc = {0}; + int check_watermark = 0; + gf_defrag_info_t *defrag = NULL; + xlator_t *this = NULL; + migration_args_t *args = in_args; + GF_VALIDATE_OR_GOTO("tier", args, out); + GF_VALIDATE_OR_GOTO("tier", args->brick_list, out); + + this = args->this; + GF_VALIDATE_OR_GOTO("tier", this, out); + + conf = this->private; + GF_VALIDATE_OR_GOTO("tier", conf, out); + + defrag = conf->defrag; + GF_VALIDATE_OR_GOTO("tier", defrag, out); + + if (list_empty(args->brick_list)) { + gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_ERROR, + "Brick list for tier is empty. Exiting."); + goto out; + } + + defrag->defrag_status = GF_DEFRAG_STATUS_STARTED; + tier_conf = &defrag->tier_conf; + + dht_build_root_loc(defrag->root_inode, &root_loc); + + while (1) { + /* + * Check if a graph switch occurred. If so, stop migration + * thread. It will need to be restarted manually. + */ + any = THIS->ctx->active->first; + xlator = xlator_search_by_name(any, this->name); - if (list_empty (args->brick_list)) { - gf_msg (this->name, GF_LOG_INFO, 0, - DHT_MSG_LOG_TIER_ERROR, - "Brick list for tier is empty. Exiting."); - goto out; + if (xlator != this) { + gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS, + "Detected graph switch. Exiting migration " + "daemon."); + goto out; } - defrag->defrag_status = GF_DEFRAG_STATUS_STARTED; - tier_conf = &defrag->tier_conf; - - dht_build_root_loc (defrag->root_inode, &root_loc); + gf_defrag_check_pause_tier(tier_conf); - while (1) { + sleep(1); - /* - * Check if a graph switch occurred. If so, stop migration - * thread. It will need to be restarted manually. - */ - any = THIS->ctx->active->first; - xlator = xlator_search_by_name (any, this->name); - - if (xlator != this) { - gf_msg (this->name, GF_LOG_INFO, 0, - DHT_MSG_LOG_TIER_STATUS, - "Detected graph switch. Exiting migration " - "daemon."); - goto out; - } + if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) { + ret = 1; + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, + "defrag->defrag_status != " + "GF_DEFRAG_STATUS_STARTED"); + goto out; + } - gf_defrag_check_pause_tier (tier_conf); + if (defrag->cmd == GF_DEFRAG_CMD_START_DETACH_TIER || + defrag->cmd == GF_DEFRAG_CMD_DETACH_START) { + ret = 0; + defrag->defrag_status = GF_DEFRAG_STATUS_COMPLETE; + gf_msg(this->name, GF_LOG_DEBUG, 0, DHT_MSG_LOG_TIER_ERROR, + "defrag->defrag_cmd == " + "GF_DEFRAG_CMD_START_DETACH_TIER"); + goto out; + } - sleep(1); + if (gf_defrag_get_pause_state(&defrag->tier_conf) != TIER_RUNNING) + continue; - if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) { - ret = 1; - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_LOG_TIER_ERROR, - "defrag->defrag_status != " - "GF_DEFRAG_STATUS_STARTED"); - goto out; + /* To have proper synchronization amongst all + * brick holding nodes, so that promotion and demotions + * start atomically w.r.t promotion/demotion frequency + * period, all nodes should have their system time + * in-sync with each other either manually set or + * using a NTP server*/ + ret = gettimeofday(¤t_time, NULL); + if (ret == -1) { + gf_msg(this->name, GF_LOG_ERROR, errno, + DHT_MSG_SYS_CALL_GET_TIME_FAILED, + "Failed to get current time"); + goto out; + } + + check_watermark++; + + /* emergency demotion requires frequent watermark monitoring */ + if (check_watermark >= + tier_get_wm_interval(tier_conf->mode, tier_conf->watermark_last)) { + check_watermark = 0; + if (tier_conf->mode == TIER_MODE_WM) { + ret = tier_get_fs_stat(this, &root_loc); + if (ret != 0) { + continue; } - - if (defrag->cmd == GF_DEFRAG_CMD_START_DETACH_TIER || - defrag->cmd == GF_DEFRAG_CMD_DETACH_START) { - ret = 0; - defrag->defrag_status = - GF_DEFRAG_STATUS_COMPLETE; - gf_msg (this->name, GF_LOG_DEBUG, 0, - DHT_MSG_LOG_TIER_ERROR, - "defrag->defrag_cmd == " - "GF_DEFRAG_CMD_START_DETACH_TIER"); - goto out; + ret = tier_check_watermark(this); + if (ret != 0) { + gf_msg(this->name, GF_LOG_CRITICAL, errno, + DHT_MSG_LOG_TIER_ERROR, "Failed to get watermark"); + continue; } + } + } - if (gf_defrag_get_pause_state (&defrag->tier_conf) != - TIER_RUNNING) - continue; - - /* To have proper synchronization amongst all - * brick holding nodes, so that promotion and demotions - * start atomically w.r.t promotion/demotion frequency - * period, all nodes should have their system time - * in-sync with each other either manually set or - * using a NTP server*/ - ret = gettimeofday (¤t_time, NULL); - if (ret == -1) { - gf_msg (this->name, GF_LOG_ERROR, errno, - DHT_MSG_SYS_CALL_GET_TIME_FAILED, - "Failed to get current time"); - goto out; - } + if (args->is_promotion) { + freq = tier_get_freq_promote(tier_conf); - check_watermark++; - - /* emergency demotion requires frequent watermark monitoring */ - if (check_watermark >= - tier_get_wm_interval(tier_conf->mode, - tier_conf->watermark_last)) { - check_watermark = 0; - if (tier_conf->mode == TIER_MODE_WM) { - ret = tier_get_fs_stat (this, &root_loc); - if (ret != 0) { - continue; - } - ret = tier_check_watermark (this); - if (ret != 0) { - gf_msg (this->name, GF_LOG_CRITICAL, errno, - DHT_MSG_LOG_TIER_ERROR, - "Failed to get watermark"); - continue; - } - } + if (tier_check_promote(tier_conf, current_time, freq)) { + args->freq_time = freq; + ret = tier_promote(args); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, + "Promotion failed"); } + } + } else if (args->is_compaction) { + tier_prepare_compact(args, current_time); + } else { + freq = tier_get_freq_demote(tier_conf); - if (args->is_promotion) { - freq = tier_get_freq_promote (tier_conf); - - if (tier_check_promote (tier_conf, current_time, freq)) { - args->freq_time = freq; - ret = tier_promote (args); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_LOG_TIER_ERROR, - "Promotion failed"); - } - } - } else if (args->is_compaction) { - tier_prepare_compact (args, current_time); - } else { - freq = tier_get_freq_demote (tier_conf); - - if (tier_check_demote (current_time, freq)) { - args->freq_time = freq; - ret = tier_demote (args); - if (ret) { - gf_msg (this->name, - GF_LOG_ERROR, - 0, - DHT_MSG_LOG_TIER_ERROR, - "Demotion failed"); - } - } + if (tier_check_demote(current_time, freq)) { + args->freq_time = freq; + ret = tier_demote(args); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, + "Demotion failed"); } - - /* Check the statfs immediately after the processing threads - return */ - check_watermark = WM_INTERVAL; + } } - ret = 0; + /* Check the statfs immediately after the processing threads + return */ + check_watermark = WM_INTERVAL; + } + + ret = 0; out: - args->return_value = ret; + args->return_value = ret; - return NULL; + return NULL; } int -tier_start (xlator_t *this, gf_defrag_info_t *defrag) +tier_start(xlator_t *this, gf_defrag_info_t *defrag) { - pthread_t promote_thread; - pthread_t demote_thread; - pthread_t hot_compact_thread; - pthread_t cold_compact_thread; - int ret = -1; - struct list_head bricklist_hot = { 0 }; - struct list_head bricklist_cold = { 0 }; - migration_args_t promotion_args = { 0 }; - migration_args_t demotion_args = { 0 }; - migration_args_t hot_compaction_args = { 0 }; - migration_args_t cold_compaction_args = { 0 }; - dht_conf_t *conf = NULL; - - INIT_LIST_HEAD ((&bricklist_hot)); - INIT_LIST_HEAD ((&bricklist_cold)); - - conf = this->private; - - tier_get_bricklist (conf->subvolumes[1], &bricklist_hot); - set_brick_list_qpath (&bricklist_hot, _gf_false); - - demotion_args.this = this; - demotion_args.brick_list = &bricklist_hot; - demotion_args.defrag = defrag; - demotion_args.is_promotion = _gf_false; - demotion_args.is_compaction = _gf_false; - - ret = gf_thread_create (&demote_thread, - NULL, &tier_run, &demotion_args, "tierdem"); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_LOG_TIER_ERROR, - "Failed to start demotion thread."); - defrag->defrag_status = GF_DEFRAG_STATUS_FAILED; - goto cleanup; - } - - tier_get_bricklist (conf->subvolumes[0], &bricklist_cold); - set_brick_list_qpath (&bricklist_cold, _gf_true); - - promotion_args.this = this; - promotion_args.brick_list = &bricklist_cold; - promotion_args.defrag = defrag; - promotion_args.is_promotion = _gf_true; - - ret = gf_thread_create (&promote_thread, NULL, &tier_run, - &promotion_args, "tierpro"); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_LOG_TIER_ERROR, - "Failed to start promotion thread."); - defrag->defrag_status = GF_DEFRAG_STATUS_FAILED; - goto waitforspawned; - } - - hot_compaction_args.this = this; - hot_compaction_args.brick_list = &bricklist_hot; - hot_compaction_args.defrag = defrag; - hot_compaction_args.is_promotion = _gf_false; - hot_compaction_args.is_compaction = _gf_true; - hot_compaction_args.is_hot_tier = _gf_true; - - ret = gf_thread_create (&hot_compact_thread, NULL, &tier_run, - &hot_compaction_args, "tierhcom"); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_LOG_TIER_ERROR, - "Failed to start compaction thread."); - defrag->defrag_status = GF_DEFRAG_STATUS_FAILED; - goto waitforspawnedpromote; - } - - cold_compaction_args.this = this; - cold_compaction_args.brick_list = &bricklist_cold; - cold_compaction_args.defrag = defrag; - cold_compaction_args.is_promotion = _gf_false; - cold_compaction_args.is_compaction = _gf_true; - cold_compaction_args.is_hot_tier = _gf_false; - - ret = gf_thread_create (&cold_compact_thread, NULL, &tier_run, - &cold_compaction_args, "tierccom"); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_LOG_TIER_ERROR, - "Failed to start compaction thread."); - defrag->defrag_status = GF_DEFRAG_STATUS_FAILED; - goto waitforspawnedhotcompact; - } - pthread_join (cold_compact_thread, NULL); + pthread_t promote_thread; + pthread_t demote_thread; + pthread_t hot_compact_thread; + pthread_t cold_compact_thread; + int ret = -1; + struct list_head bricklist_hot = {0}; + struct list_head bricklist_cold = {0}; + migration_args_t promotion_args = {0}; + migration_args_t demotion_args = {0}; + migration_args_t hot_compaction_args = {0}; + migration_args_t cold_compaction_args = {0}; + dht_conf_t *conf = NULL; + + INIT_LIST_HEAD((&bricklist_hot)); + INIT_LIST_HEAD((&bricklist_cold)); + + conf = this->private; + + tier_get_bricklist(conf->subvolumes[1], &bricklist_hot); + set_brick_list_qpath(&bricklist_hot, _gf_false); + + demotion_args.this = this; + demotion_args.brick_list = &bricklist_hot; + demotion_args.defrag = defrag; + demotion_args.is_promotion = _gf_false; + demotion_args.is_compaction = _gf_false; + + ret = gf_thread_create(&demote_thread, NULL, &tier_run, &demotion_args, + "tierdem"); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, + "Failed to start demotion thread."); + defrag->defrag_status = GF_DEFRAG_STATUS_FAILED; + goto cleanup; + } + + tier_get_bricklist(conf->subvolumes[0], &bricklist_cold); + set_brick_list_qpath(&bricklist_cold, _gf_true); + + promotion_args.this = this; + promotion_args.brick_list = &bricklist_cold; + promotion_args.defrag = defrag; + promotion_args.is_promotion = _gf_true; + + ret = gf_thread_create(&promote_thread, NULL, &tier_run, &promotion_args, + "tierpro"); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, + "Failed to start promotion thread."); + defrag->defrag_status = GF_DEFRAG_STATUS_FAILED; + goto waitforspawned; + } + + hot_compaction_args.this = this; + hot_compaction_args.brick_list = &bricklist_hot; + hot_compaction_args.defrag = defrag; + hot_compaction_args.is_promotion = _gf_false; + hot_compaction_args.is_compaction = _gf_true; + hot_compaction_args.is_hot_tier = _gf_true; + + ret = gf_thread_create(&hot_compact_thread, NULL, &tier_run, + &hot_compaction_args, "tierhcom"); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, + "Failed to start compaction thread."); + defrag->defrag_status = GF_DEFRAG_STATUS_FAILED; + goto waitforspawnedpromote; + } + + cold_compaction_args.this = this; + cold_compaction_args.brick_list = &bricklist_cold; + cold_compaction_args.defrag = defrag; + cold_compaction_args.is_promotion = _gf_false; + cold_compaction_args.is_compaction = _gf_true; + cold_compaction_args.is_hot_tier = _gf_false; + + ret = gf_thread_create(&cold_compact_thread, NULL, &tier_run, + &cold_compaction_args, "tierccom"); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, + "Failed to start compaction thread."); + defrag->defrag_status = GF_DEFRAG_STATUS_FAILED; + goto waitforspawnedhotcompact; + } + pthread_join(cold_compact_thread, NULL); waitforspawnedhotcompact: - pthread_join (hot_compact_thread, NULL); + pthread_join(hot_compact_thread, NULL); waitforspawnedpromote: - pthread_join (promote_thread, NULL); + pthread_join(promote_thread, NULL); waitforspawned: - pthread_join (demote_thread, NULL); + pthread_join(demote_thread, NULL); cleanup: - clear_bricklist (&bricklist_cold); - clear_bricklist (&bricklist_hot); - return ret; + clear_bricklist(&bricklist_cold); + clear_bricklist(&bricklist_hot); + return ret; } int32_t -tier_migration_needed (xlator_t *this) +tier_migration_needed(xlator_t *this) { - gf_defrag_info_t *defrag = NULL; - dht_conf_t *conf = NULL; - int ret = 0; + gf_defrag_info_t *defrag = NULL; + dht_conf_t *conf = NULL; + int ret = 0; - conf = this->private; + conf = this->private; - GF_VALIDATE_OR_GOTO (this->name, conf, out); - GF_VALIDATE_OR_GOTO (this->name, conf->defrag, out); + GF_VALIDATE_OR_GOTO(this->name, conf, out); + GF_VALIDATE_OR_GOTO(this->name, conf->defrag, out); - defrag = conf->defrag; + defrag = conf->defrag; - if ((defrag->cmd == GF_DEFRAG_CMD_START_TIER) || - (defrag->cmd == GF_DEFRAG_CMD_START_DETACH_TIER)) - ret = 1; + if ((defrag->cmd == GF_DEFRAG_CMD_START_TIER) || + (defrag->cmd == GF_DEFRAG_CMD_START_DETACH_TIER)) + ret = 1; out: - return ret; + return ret; } int32_t -tier_migration_get_dst (xlator_t *this, dht_local_t *local) +tier_migration_get_dst(xlator_t *this, dht_local_t *local) { - dht_conf_t *conf = NULL; - int32_t ret = -1; - gf_defrag_info_t *defrag = NULL; + dht_conf_t *conf = NULL; + int32_t ret = -1; + gf_defrag_info_t *defrag = NULL; - GF_VALIDATE_OR_GOTO ("tier", this, out); - GF_VALIDATE_OR_GOTO (this->name, this->private, out); + GF_VALIDATE_OR_GOTO("tier", this, out); + GF_VALIDATE_OR_GOTO(this->name, this->private, out); - conf = this->private; + conf = this->private; - defrag = conf->defrag; + defrag = conf->defrag; - if (defrag && defrag->cmd == GF_DEFRAG_CMD_START_DETACH_TIER) { - local->rebalance.target_node = conf->subvolumes[0]; + if (defrag && defrag->cmd == GF_DEFRAG_CMD_START_DETACH_TIER) { + local->rebalance.target_node = conf->subvolumes[0]; - } else if (conf->subvolumes[0] == local->cached_subvol) - local->rebalance.target_node = - conf->subvolumes[1]; - else - local->rebalance.target_node = - conf->subvolumes[0]; + } else if (conf->subvolumes[0] == local->cached_subvol) + local->rebalance.target_node = conf->subvolumes[1]; + else + local->rebalance.target_node = conf->subvolumes[0]; - if (local->rebalance.target_node) - ret = 0; + if (local->rebalance.target_node) + ret = 0; out: - return ret; + return ret; } xlator_t * -tier_search (xlator_t *this, dht_layout_t *layout, const char *name) +tier_search(xlator_t *this, dht_layout_t *layout, const char *name) { - xlator_t *subvol = NULL; - dht_conf_t *conf = NULL; + xlator_t *subvol = NULL; + dht_conf_t *conf = NULL; - GF_VALIDATE_OR_GOTO ("tier", this, out); - GF_VALIDATE_OR_GOTO (this->name, this->private, out); + GF_VALIDATE_OR_GOTO("tier", this, out); + GF_VALIDATE_OR_GOTO(this->name, this->private, out); - conf = this->private; + conf = this->private; - subvol = TIER_HASHED_SUBVOL; + subvol = TIER_HASHED_SUBVOL; - out: - return subvol; +out: + return subvol; } - static int -tier_load_externals (xlator_t *this) +tier_load_externals(xlator_t *this) { - int ret = -1; - char *libpathfull = (LIBDIR "/libgfdb.so.0"); - get_gfdb_methods_t get_gfdb_methods; + int ret = -1; + char *libpathfull = (LIBDIR "/libgfdb.so.0"); + get_gfdb_methods_t get_gfdb_methods; - GF_VALIDATE_OR_GOTO ("this", this, out); + GF_VALIDATE_OR_GOTO("this", this, out); - libhandle = dlopen (libpathfull, RTLD_NOW); - if (!libhandle) { - gf_msg(this->name, GF_LOG_ERROR, 0, - DHT_MSG_LOG_TIER_ERROR, - "Error loading libgfdb.so %s\n", dlerror()); - ret = -1; - goto out; - } + libhandle = dlopen(libpathfull, RTLD_NOW); + if (!libhandle) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, + "Error loading libgfdb.so %s\n", dlerror()); + ret = -1; + goto out; + } - get_gfdb_methods = dlsym (libhandle, "get_gfdb_methods"); - if (!get_gfdb_methods) { - gf_msg(this->name, GF_LOG_ERROR, 0, - DHT_MSG_LOG_TIER_ERROR, - "Error loading get_gfdb_methods()"); - ret = -1; - goto out; - } + get_gfdb_methods = dlsym(libhandle, "get_gfdb_methods"); + if (!get_gfdb_methods) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, + "Error loading get_gfdb_methods()"); + ret = -1; + goto out; + } - get_gfdb_methods (&gfdb_methods); + get_gfdb_methods(&gfdb_methods); - ret = 0; + ret = 0; out: - if (ret && libhandle) - dlclose (libhandle); + if (ret && libhandle) + dlclose(libhandle); - return ret; + return ret; } static tier_mode_t -tier_validate_mode (char *mode) +tier_validate_mode(char *mode) { - int ret = -1; + int ret = -1; - if (strcmp (mode, "test") == 0) { - ret = TIER_MODE_TEST; - } else { - ret = TIER_MODE_WM; - } + if (strcmp(mode, "test") == 0) { + ret = TIER_MODE_TEST; + } else { + ret = TIER_MODE_WM; + } - return ret; + return ret; } static gf_boolean_t -tier_validate_compact_mode (char *mode) +tier_validate_compact_mode(char *mode) { - gf_boolean_t ret = _gf_false; + gf_boolean_t ret = _gf_false; - gf_msg ("tier", GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS, - "tier_validate_compact_mode: mode = %s", mode); + gf_msg("tier", GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS, + "tier_validate_compact_mode: mode = %s", mode); - if (!strcmp (mode, "on")) { - ret = _gf_true; - } else { - ret = _gf_false; - } + if (!strcmp(mode, "on")) { + ret = _gf_true; + } else { + ret = _gf_false; + } - gf_msg ("tier", GF_LOG_ERROR, 0, - DHT_MSG_LOG_TIER_STATUS, - "tier_validate_compact_mode: ret = %i", ret); + gf_msg("tier", GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_STATUS, + "tier_validate_compact_mode: ret = %i", ret); - return ret; + return ret; } int -tier_init_methods (xlator_t *this) +tier_init_methods(xlator_t *this) { - int ret = -1; - dht_conf_t *conf = NULL; - dht_methods_t *methods = NULL; + int ret = -1; + dht_conf_t *conf = NULL; + dht_methods_t *methods = NULL; - GF_VALIDATE_OR_GOTO ("tier", this, err); + GF_VALIDATE_OR_GOTO("tier", this, err); - conf = this->private; + conf = this->private; - methods = &(conf->methods); + methods = &(conf->methods); - methods->migration_get_dst_subvol = tier_migration_get_dst; - methods->migration_other = tier_start; - methods->migration_needed = tier_migration_needed; - methods->layout_search = tier_search; + methods->migration_get_dst_subvol = tier_migration_get_dst; + methods->migration_other = tier_start; + methods->migration_needed = tier_migration_needed; + methods->layout_search = tier_search; - ret = 0; + ret = 0; err: - return ret; + return ret; } - static void -tier_save_vol_name (xlator_t *this) +tier_save_vol_name(xlator_t *this) { - dht_conf_t *conf = NULL; - gf_defrag_info_t *defrag = NULL; - char *suffix = NULL; - int name_len = 0; + dht_conf_t *conf = NULL; + gf_defrag_info_t *defrag = NULL; + char *suffix = NULL; + int name_len = 0; + conf = this->private; + defrag = conf->defrag; - conf = this->private; - defrag = conf->defrag; - - suffix = strstr (this->name, "-tier-dht"); + suffix = strstr(this->name, "-tier-dht"); - if (suffix) - name_len = suffix - this->name; - else - name_len = strlen (this->name); + if (suffix) + name_len = suffix - this->name; + else + name_len = strlen(this->name); - if (name_len > GD_VOLUME_NAME_MAX) - name_len = GD_VOLUME_NAME_MAX; + if (name_len > GD_VOLUME_NAME_MAX) + name_len = GD_VOLUME_NAME_MAX; - strncpy (defrag->tier_conf.volname, this->name, name_len); - defrag->tier_conf.volname[name_len] = 0; + strncpy(defrag->tier_conf.volname, this->name, name_len); + defrag->tier_conf.volname[name_len] = 0; } int -tier_init (xlator_t *this) +tier_init(xlator_t *this) { - int ret = -1; - int freq = 0; - int maxsize = 0; - dht_conf_t *conf = NULL; - gf_defrag_info_t *defrag = NULL; - char *voldir = NULL; - char *mode = NULL; - char *paused = NULL; - tier_mode_t tier_mode = DEFAULT_TIER_MODE; - gf_boolean_t compact_mode = _gf_false; - - ret = dht_init (this); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_LOG_TIER_ERROR, - "tier_init failed"); - goto out; - } - - conf = this->private; - - ret = tier_init_methods (this); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_LOG_TIER_ERROR, - "tier_init_methods failed"); - goto out; - } - - if (conf->subvolume_cnt != 2) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_LOG_TIER_ERROR, - "Invalid number of subvolumes %d", conf->subvolume_cnt); - goto out; - } - - /* if instatiated from client side initialization is complete. */ - if (!conf->defrag) { - ret = 0; - goto out; - } - - /* if instatiated from server side, load db libraries */ - ret = tier_load_externals (this); - if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, - DHT_MSG_LOG_TIER_ERROR, - "Could not load externals. Aborting"); - goto out; - } - - defrag = conf->defrag; + int ret = -1; + int freq = 0; + int maxsize = 0; + dht_conf_t *conf = NULL; + gf_defrag_info_t *defrag = NULL; + char *voldir = NULL; + char *mode = NULL; + char *paused = NULL; + tier_mode_t tier_mode = DEFAULT_TIER_MODE; + gf_boolean_t compact_mode = _gf_false; + + ret = dht_init(this); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, + "tier_init failed"); + goto out; + } + + conf = this->private; + + ret = tier_init_methods(this); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, + "tier_init_methods failed"); + goto out; + } + + if (conf->subvolume_cnt != 2) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, + "Invalid number of subvolumes %d", conf->subvolume_cnt); + goto out; + } + + /* if instatiated from client side initialization is complete. */ + if (!conf->defrag) { + ret = 0; + goto out; + } - defrag->tier_conf.last_demote_qfile_index = 0; - defrag->tier_conf.last_promote_qfile_index = 0; + /* if instatiated from server side, load db libraries */ + ret = tier_load_externals(this); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, + "Could not load externals. Aborting"); + goto out; + } - defrag->tier_conf.is_tier = 1; - defrag->this = this; + defrag = conf->defrag; - ret = dict_get_int32 (this->options, - "tier-max-promote-file-size", &maxsize); - if (ret) { - maxsize = 0; - } + defrag->tier_conf.last_demote_qfile_index = 0; + defrag->tier_conf.last_promote_qfile_index = 0; - defrag->tier_conf.tier_max_promote_size = maxsize; + defrag->tier_conf.is_tier = 1; + defrag->this = this; - ret = dict_get_int32 (this->options, - "tier-promote-frequency", &freq); - if (ret) { - freq = DEFAULT_PROMOTE_FREQ_SEC; - } + ret = dict_get_int32(this->options, "tier-max-promote-file-size", &maxsize); + if (ret) { + maxsize = 0; + } - defrag->tier_conf.tier_promote_frequency = freq; + defrag->tier_conf.tier_max_promote_size = maxsize; - ret = dict_get_int32 (this->options, - "tier-demote-frequency", &freq); - if (ret) { - freq = DEFAULT_DEMOTE_FREQ_SEC; - } + ret = dict_get_int32(this->options, "tier-promote-frequency", &freq); + if (ret) { + freq = DEFAULT_PROMOTE_FREQ_SEC; + } - defrag->tier_conf.tier_demote_frequency = freq; + defrag->tier_conf.tier_promote_frequency = freq; - ret = dict_get_int32 (this->options, - "tier-hot-compact-frequency", &freq); - if (ret) { - freq = DEFAULT_HOT_COMPACT_FREQ_SEC; - } + ret = dict_get_int32(this->options, "tier-demote-frequency", &freq); + if (ret) { + freq = DEFAULT_DEMOTE_FREQ_SEC; + } - defrag->tier_conf.tier_compact_hot_frequency = freq; + defrag->tier_conf.tier_demote_frequency = freq; - ret = dict_get_int32 (this->options, - "tier-cold-compact-frequency", &freq); - if (ret) { - freq = DEFAULT_COLD_COMPACT_FREQ_SEC; - } + ret = dict_get_int32(this->options, "tier-hot-compact-frequency", &freq); + if (ret) { + freq = DEFAULT_HOT_COMPACT_FREQ_SEC; + } - defrag->tier_conf.tier_compact_cold_frequency = freq; + defrag->tier_conf.tier_compact_hot_frequency = freq; - ret = dict_get_int32 (this->options, - "watermark-hi", &freq); - if (ret) { - freq = DEFAULT_WM_HI; - } + ret = dict_get_int32(this->options, "tier-cold-compact-frequency", &freq); + if (ret) { + freq = DEFAULT_COLD_COMPACT_FREQ_SEC; + } - defrag->tier_conf.watermark_hi = freq; + defrag->tier_conf.tier_compact_cold_frequency = freq; - ret = dict_get_int32 (this->options, - "watermark-low", &freq); - if (ret) { - freq = DEFAULT_WM_LOW; - } + ret = dict_get_int32(this->options, "watermark-hi", &freq); + if (ret) { + freq = DEFAULT_WM_HI; + } - defrag->tier_conf.watermark_low = freq; + defrag->tier_conf.watermark_hi = freq; - ret = dict_get_int32 (this->options, - "write-freq-threshold", &freq); - if (ret) { - freq = DEFAULT_WRITE_FREQ_SEC; - } + ret = dict_get_int32(this->options, "watermark-low", &freq); + if (ret) { + freq = DEFAULT_WM_LOW; + } - defrag->write_freq_threshold = freq; + defrag->tier_conf.watermark_low = freq; - ret = dict_get_int32 (this->options, - "read-freq-threshold", &freq); - if (ret) { - freq = DEFAULT_READ_FREQ_SEC; - } + ret = dict_get_int32(this->options, "write-freq-threshold", &freq); + if (ret) { + freq = DEFAULT_WRITE_FREQ_SEC; + } - defrag->read_freq_threshold = freq; + defrag->write_freq_threshold = freq; - ret = dict_get_int32 (this->options, - "tier-max-mb", &freq); - if (ret) { - freq = DEFAULT_TIER_MAX_MIGRATE_MB; - } + ret = dict_get_int32(this->options, "read-freq-threshold", &freq); + if (ret) { + freq = DEFAULT_READ_FREQ_SEC; + } - defrag->tier_conf.max_migrate_bytes = (uint64_t) freq * 1024 * 1024; + defrag->read_freq_threshold = freq; - ret = dict_get_int32 (this->options, - "tier-max-files", &freq); - if (ret) { - freq = DEFAULT_TIER_MAX_MIGRATE_FILES; - } + ret = dict_get_int32(this->options, "tier-max-mb", &freq); + if (ret) { + freq = DEFAULT_TIER_MAX_MIGRATE_MB; + } - defrag->tier_conf.max_migrate_files = freq; + defrag->tier_conf.max_migrate_bytes = (uint64_t)freq * 1024 * 1024; + ret = dict_get_int32(this->options, "tier-max-files", &freq); + if (ret) { + freq = DEFAULT_TIER_MAX_MIGRATE_FILES; + } - ret = dict_get_int32 (this->options, - "tier-query-limit", - &(defrag->tier_conf.query_limit)); - if (ret) { - defrag->tier_conf.query_limit = - DEFAULT_TIER_QUERY_LIMIT; - } + defrag->tier_conf.max_migrate_files = freq; - ret = dict_get_str (this->options, - "tier-compact", &mode); + ret = dict_get_int32(this->options, "tier-query-limit", + &(defrag->tier_conf.query_limit)); + if (ret) { + defrag->tier_conf.query_limit = DEFAULT_TIER_QUERY_LIMIT; + } - if (ret) { - defrag->tier_conf.compact_active = DEFAULT_COMP_MODE; - } else { - compact_mode = tier_validate_compact_mode (mode); - /* If compaction is now active, we need to inform the bricks on - the hot and cold tier of this. See dht-common.h for more. */ - defrag->tier_conf.compact_active = compact_mode; - if (compact_mode) { - defrag->tier_conf.compact_mode_switched_hot = _gf_true; - defrag->tier_conf.compact_mode_switched_cold = _gf_true; - } - } + ret = dict_get_str(this->options, "tier-compact", &mode); - ret = dict_get_str (this->options, - "tier-mode", &mode); - if (ret) { - defrag->tier_conf.mode = DEFAULT_TIER_MODE; - } else { - tier_mode = tier_validate_mode (mode); - defrag->tier_conf.mode = tier_mode; + if (ret) { + defrag->tier_conf.compact_active = DEFAULT_COMP_MODE; + } else { + compact_mode = tier_validate_compact_mode(mode); + /* If compaction is now active, we need to inform the bricks on + the hot and cold tier of this. See dht-common.h for more. */ + defrag->tier_conf.compact_active = compact_mode; + if (compact_mode) { + defrag->tier_conf.compact_mode_switched_hot = _gf_true; + defrag->tier_conf.compact_mode_switched_cold = _gf_true; } + } - pthread_mutex_init (&defrag->tier_conf.pause_mutex, 0); + ret = dict_get_str(this->options, "tier-mode", &mode); + if (ret) { + defrag->tier_conf.mode = DEFAULT_TIER_MODE; + } else { + tier_mode = tier_validate_mode(mode); + defrag->tier_conf.mode = tier_mode; + } - gf_defrag_set_pause_state (&defrag->tier_conf, TIER_RUNNING); + pthread_mutex_init(&defrag->tier_conf.pause_mutex, 0); - ret = dict_get_str (this->options, - "tier-pause", &paused); + gf_defrag_set_pause_state(&defrag->tier_conf, TIER_RUNNING); - if (paused && strcmp (paused, "on") == 0) - gf_defrag_set_pause_state (&defrag->tier_conf, - TIER_REQUEST_PAUSE); + ret = dict_get_str(this->options, "tier-pause", &paused); - ret = gf_asprintf(&voldir, "%s/%s", - DEFAULT_VAR_RUN_DIRECTORY, - this->name); - if (ret < 0) - goto out; + if (paused && strcmp(paused, "on") == 0) + gf_defrag_set_pause_state(&defrag->tier_conf, TIER_REQUEST_PAUSE); - ret = mkdir_p(voldir, 0777, _gf_true); - if (ret == -1 && errno != EEXIST) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_LOG_TIER_ERROR, - "tier_init failed"); + ret = gf_asprintf(&voldir, "%s/%s", DEFAULT_VAR_RUN_DIRECTORY, this->name); + if (ret < 0) + goto out; - GF_FREE(voldir); - goto out; - } + ret = mkdir_p(voldir, 0777, _gf_true); + if (ret == -1 && errno != EEXIST) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, + "tier_init failed"); GF_FREE(voldir); + goto out; + } - ret = gf_asprintf (&promotion_qfile, "%s/%s/promote", - DEFAULT_VAR_RUN_DIRECTORY, - this->name); - if (ret < 0) - goto out; + GF_FREE(voldir); - ret = gf_asprintf (&demotion_qfile, "%s/%s/demote", - DEFAULT_VAR_RUN_DIRECTORY, - this->name); - if (ret < 0) { - GF_FREE (promotion_qfile); - goto out; - } + ret = gf_asprintf(&promotion_qfile, "%s/%s/promote", + DEFAULT_VAR_RUN_DIRECTORY, this->name); + if (ret < 0) + goto out; - gf_msg (this->name, GF_LOG_INFO, 0, - DHT_MSG_LOG_TIER_STATUS, - "Promote/demote frequency %d/%d " - "Write/Read freq thresholds %d/%d", - defrag->tier_conf.tier_promote_frequency, - defrag->tier_conf.tier_demote_frequency, - defrag->write_freq_threshold, - defrag->read_freq_threshold); + ret = gf_asprintf(&demotion_qfile, "%s/%s/demote", + DEFAULT_VAR_RUN_DIRECTORY, this->name); + if (ret < 0) { + GF_FREE(promotion_qfile); + goto out; + } - tier_save_vol_name (this); + gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS, + "Promote/demote frequency %d/%d " + "Write/Read freq thresholds %d/%d", + defrag->tier_conf.tier_promote_frequency, + defrag->tier_conf.tier_demote_frequency, + defrag->write_freq_threshold, defrag->read_freq_threshold); - ret = 0; + tier_save_vol_name(this); + + ret = 0; out: - return ret; + return ret; } int -tier_cli_pause_done (int op_ret, call_frame_t *sync_frame, void *data) +tier_cli_pause_done(int op_ret, call_frame_t *sync_frame, void *data) { - gf_msg ("tier", GF_LOG_INFO, 0, - DHT_MSG_TIER_PAUSED, - "Migrate file paused with op_ret %d", op_ret); + gf_msg("tier", GF_LOG_INFO, 0, DHT_MSG_TIER_PAUSED, + "Migrate file paused with op_ret %d", op_ret); - return op_ret; + return op_ret; } int -tier_cli_pause (void *data) +tier_cli_pause(void *data) { - gf_defrag_info_t *defrag = NULL; - xlator_t *this = NULL; - dht_conf_t *conf = NULL; - int ret = -1; + gf_defrag_info_t *defrag = NULL; + xlator_t *this = NULL; + dht_conf_t *conf = NULL; + int ret = -1; - this = data; + this = data; - conf = this->private; - GF_VALIDATE_OR_GOTO (this->name, conf, exit); + conf = this->private; + GF_VALIDATE_OR_GOTO(this->name, conf, exit); - defrag = conf->defrag; - GF_VALIDATE_OR_GOTO (this->name, defrag, exit); + defrag = conf->defrag; + GF_VALIDATE_OR_GOTO(this->name, defrag, exit); - gf_defrag_pause_tier (this, defrag); + gf_defrag_pause_tier(this, defrag); - ret = 0; + ret = 0; exit: - return ret; + return ret; } int -tier_reconfigure (xlator_t *this, dict_t *options) +tier_reconfigure(xlator_t *this, dict_t *options) { - dht_conf_t *conf = NULL; - gf_defrag_info_t *defrag = NULL; - char *mode = NULL; - int migrate_mb = 0; - gf_boolean_t req_pause = _gf_false; - int ret = 0; - call_frame_t *frame = NULL; - gf_boolean_t last_compact_setting = _gf_false; - - conf = this->private; - - if (conf->defrag) { - defrag = conf->defrag; - GF_OPTION_RECONF ("tier-max-promote-file-size", - defrag->tier_conf.tier_max_promote_size, - options, int32, out); - - GF_OPTION_RECONF ("tier-promote-frequency", - defrag->tier_conf.tier_promote_frequency, - options, int32, out); - - GF_OPTION_RECONF ("tier-demote-frequency", - defrag->tier_conf.tier_demote_frequency, - options, int32, out); - - GF_OPTION_RECONF ("write-freq-threshold", - defrag->write_freq_threshold, options, - int32, out); - - GF_OPTION_RECONF ("read-freq-threshold", - defrag->read_freq_threshold, options, - int32, out); - - GF_OPTION_RECONF ("watermark-hi", - defrag->tier_conf.watermark_hi, options, - int32, out); - - GF_OPTION_RECONF ("watermark-low", - defrag->tier_conf.watermark_low, options, - int32, out); - - last_compact_setting = defrag->tier_conf.compact_active; - - GF_OPTION_RECONF ("tier-compact", - defrag->tier_conf.compact_active, options, - bool, out); - - if (last_compact_setting != defrag->tier_conf.compact_active) { - defrag->tier_conf.compact_mode_switched_hot = _gf_true; - defrag->tier_conf.compact_mode_switched_cold = _gf_true; - gf_msg (this->name, GF_LOG_INFO, 0, - DHT_MSG_LOG_TIER_STATUS, - "compact mode switched"); - } + dht_conf_t *conf = NULL; + gf_defrag_info_t *defrag = NULL; + char *mode = NULL; + int migrate_mb = 0; + gf_boolean_t req_pause = _gf_false; + int ret = 0; + call_frame_t *frame = NULL; + gf_boolean_t last_compact_setting = _gf_false; + + conf = this->private; + + if (conf->defrag) { + defrag = conf->defrag; + GF_OPTION_RECONF("tier-max-promote-file-size", + defrag->tier_conf.tier_max_promote_size, options, + int32, out); - GF_OPTION_RECONF ("tier-hot-compact-frequency", - defrag->tier_conf.tier_compact_hot_frequency, - options, int32, out); - - GF_OPTION_RECONF ("tier-cold-compact-frequency", - defrag->tier_conf.tier_compact_cold_frequency, - options, int32, out); - - GF_OPTION_RECONF ("tier-mode", - mode, options, - str, out); - defrag->tier_conf.mode = tier_validate_mode (mode); - - GF_OPTION_RECONF ("tier-max-mb", - migrate_mb, options, - int32, out); - defrag->tier_conf.max_migrate_bytes = (uint64_t) migrate_mb * - 1024 * 1024; - - GF_OPTION_RECONF ("tier-max-files", - defrag->tier_conf.max_migrate_files, options, - int32, out); - - GF_OPTION_RECONF ("tier-query-limit", - defrag->tier_conf.query_limit, - options, int32, out); - - GF_OPTION_RECONF ("tier-pause", - req_pause, options, - bool, out); - - if (req_pause == _gf_true) { - - frame = create_frame (this, this->ctx->pool); - if (!frame) - goto out; - - frame->root->pid = GF_CLIENT_PID_DEFRAG; - - ret = synctask_new (this->ctx->env, tier_cli_pause, - tier_cli_pause_done, frame, this); - - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_LOG_TIER_ERROR, - "pause tier failed on reconfigure"); - } - } else { - ret = gf_defrag_resume_tier (this, defrag); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_LOG_TIER_ERROR, - "resume tier failed on reconfigure"); - } - } + GF_OPTION_RECONF("tier-promote-frequency", + defrag->tier_conf.tier_promote_frequency, options, + int32, out); + + GF_OPTION_RECONF("tier-demote-frequency", + defrag->tier_conf.tier_demote_frequency, options, + int32, out); + + GF_OPTION_RECONF("write-freq-threshold", defrag->write_freq_threshold, + options, int32, out); + + GF_OPTION_RECONF("read-freq-threshold", defrag->read_freq_threshold, + options, int32, out); + + GF_OPTION_RECONF("watermark-hi", defrag->tier_conf.watermark_hi, + options, int32, out); + + GF_OPTION_RECONF("watermark-low", defrag->tier_conf.watermark_low, + options, int32, out); + + last_compact_setting = defrag->tier_conf.compact_active; + + GF_OPTION_RECONF("tier-compact", defrag->tier_conf.compact_active, + options, bool, out); + if (last_compact_setting != defrag->tier_conf.compact_active) { + defrag->tier_conf.compact_mode_switched_hot = _gf_true; + defrag->tier_conf.compact_mode_switched_cold = _gf_true; + gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS, + "compact mode switched"); } + GF_OPTION_RECONF("tier-hot-compact-frequency", + defrag->tier_conf.tier_compact_hot_frequency, options, + int32, out); + + GF_OPTION_RECONF("tier-cold-compact-frequency", + defrag->tier_conf.tier_compact_cold_frequency, options, + int32, out); + + GF_OPTION_RECONF("tier-mode", mode, options, str, out); + defrag->tier_conf.mode = tier_validate_mode(mode); + + GF_OPTION_RECONF("tier-max-mb", migrate_mb, options, int32, out); + defrag->tier_conf.max_migrate_bytes = (uint64_t)migrate_mb * 1024 * + 1024; + + GF_OPTION_RECONF("tier-max-files", defrag->tier_conf.max_migrate_files, + options, int32, out); + + GF_OPTION_RECONF("tier-query-limit", defrag->tier_conf.query_limit, + options, int32, out); + + GF_OPTION_RECONF("tier-pause", req_pause, options, bool, out); + + if (req_pause == _gf_true) { + frame = create_frame(this, this->ctx->pool); + if (!frame) + goto out; + + frame->root->pid = GF_CLIENT_PID_DEFRAG; + + ret = synctask_new(this->ctx->env, tier_cli_pause, + tier_cli_pause_done, frame, this); + + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, + "pause tier failed on reconfigure"); + } + } else { + ret = gf_defrag_resume_tier(this, defrag); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, + "resume tier failed on reconfigure"); + } + } + } + out: - return dht_reconfigure (this, options); + return dht_reconfigure(this, options); } void -tier_fini (xlator_t *this) +tier_fini(xlator_t *this) { - if (libhandle) - dlclose (libhandle); + if (libhandle) + dlclose(libhandle); - GF_FREE (demotion_qfile); - GF_FREE (promotion_qfile); + GF_FREE(demotion_qfile); + GF_FREE(promotion_qfile); - dht_fini(this); + dht_fini(this); } -class_methods_t class_methods = { - .init = tier_init, - .fini = tier_fini, - .reconfigure = tier_reconfigure, - .notify = dht_notify -}; +class_methods_t class_methods = {.init = tier_init, + .fini = tier_fini, + .reconfigure = tier_reconfigure, + .notify = dht_notify}; struct xlator_fops fops = { - .lookup = dht_lookup, - .create = tier_create, - .mknod = dht_mknod, - - .open = dht_open, - .statfs = tier_statfs, - .opendir = dht_opendir, - .readdir = tier_readdir, - .readdirp = tier_readdirp, - .fsyncdir = dht_fsyncdir, - .symlink = dht_symlink, - .unlink = tier_unlink, - .link = tier_link, - .mkdir = dht_mkdir, - .rmdir = dht_rmdir, - .rename = dht_rename, - .entrylk = dht_entrylk, - .fentrylk = dht_fentrylk, - - /* Inode read operations */ - .stat = dht_stat, - .fstat = dht_fstat, - .access = dht_access, - .readlink = dht_readlink, - .getxattr = dht_getxattr, - .fgetxattr = dht_fgetxattr, - .readv = dht_readv, - .flush = dht_flush, - .fsync = dht_fsync, - .inodelk = dht_inodelk, - .finodelk = dht_finodelk, - .lk = dht_lk, - - /* Inode write operations */ - .fremovexattr = dht_fremovexattr, - .removexattr = dht_removexattr, - .setxattr = dht_setxattr, - .fsetxattr = dht_fsetxattr, - .truncate = dht_truncate, - .ftruncate = dht_ftruncate, - .writev = dht_writev, - .xattrop = dht_xattrop, - .fxattrop = dht_fxattrop, - .setattr = dht_setattr, - .fsetattr = dht_fsetattr, - .fallocate = dht_fallocate, - .discard = dht_discard, - .zerofill = dht_zerofill, + .lookup = dht_lookup, + .create = tier_create, + .mknod = dht_mknod, + + .open = dht_open, + .statfs = tier_statfs, + .opendir = dht_opendir, + .readdir = tier_readdir, + .readdirp = tier_readdirp, + .fsyncdir = dht_fsyncdir, + .symlink = dht_symlink, + .unlink = tier_unlink, + .link = tier_link, + .mkdir = dht_mkdir, + .rmdir = dht_rmdir, + .rename = dht_rename, + .entrylk = dht_entrylk, + .fentrylk = dht_fentrylk, + + /* Inode read operations */ + .stat = dht_stat, + .fstat = dht_fstat, + .access = dht_access, + .readlink = dht_readlink, + .getxattr = dht_getxattr, + .fgetxattr = dht_fgetxattr, + .readv = dht_readv, + .flush = dht_flush, + .fsync = dht_fsync, + .inodelk = dht_inodelk, + .finodelk = dht_finodelk, + .lk = dht_lk, + + /* Inode write operations */ + .fremovexattr = dht_fremovexattr, + .removexattr = dht_removexattr, + .setxattr = dht_setxattr, + .fsetxattr = dht_fsetxattr, + .truncate = dht_truncate, + .ftruncate = dht_ftruncate, + .writev = dht_writev, + .xattrop = dht_xattrop, + .fxattrop = dht_fxattrop, + .setattr = dht_setattr, + .fsetattr = dht_fsetattr, + .fallocate = dht_fallocate, + .discard = dht_discard, + .zerofill = dht_zerofill, }; -struct xlator_cbks cbks = { - .release = dht_release, - .forget = dht_forget -}; +struct xlator_cbks cbks = {.release = dht_release, .forget = dht_forget}; diff --git a/xlators/cluster/dht/src/unittest/dht_layout_mock.c b/xlators/cluster/dht/src/unittest/dht_layout_mock.c index 6544f4208f5..49bf18b9fe6 100644 --- a/xlators/cluster/dht/src/unittest/dht_layout_mock.c +++ b/xlators/cluster/dht/src/unittest/dht_layout_mock.c @@ -13,60 +13,61 @@ #include "byte-order.h" int -dht_hash_compute (xlator_t *this, int type, const char *name, uint32_t *hash_p) +dht_hash_compute(xlator_t *this, int type, const char *name, uint32_t *hash_p) { return 0; } int -dht_inode_ctx_layout_get (inode_t *inode, xlator_t *this, dht_layout_t **layout) +dht_inode_ctx_layout_get(inode_t *inode, xlator_t *this, dht_layout_t **layout) { return 0; } int -dht_inode_ctx_layout_set (inode_t *inode, xlator_t *this, - dht_layout_t *layout_int) +dht_inode_ctx_layout_set(inode_t *inode, xlator_t *this, + dht_layout_t *layout_int) { return 0; } int -dict_get_ptr (dict_t *this, char *key, void **ptr) +dict_get_ptr(dict_t *this, char *key, void **ptr) { return 0; } int -dict_get_ptr_and_len (dict_t *this, char *key, void **ptr, int *len) +dict_get_ptr_and_len(dict_t *this, char *key, void **ptr, int *len) { return 0; } -int _gf_log (const char *domain, const char *file, - const char *function, int32_t line, gf_loglevel_t level, - const char *fmt, ...) +int +_gf_log(const char *domain, const char *file, const char *function, + int32_t line, gf_loglevel_t level, const char *fmt, ...) { return 0; } -int _gf_log_callingfn (const char *domain, const char *file, - const char *function, int32_t line, gf_loglevel_t level, - const char *fmt, ...) +int +_gf_log_callingfn(const char *domain, const char *file, const char *function, + int32_t line, gf_loglevel_t level, const char *fmt, ...) { return 0; } -void gf_uuid_unparse(const uuid_t uu, char *out) +void +gf_uuid_unparse(const uuid_t uu, char *out) { // could call a will-return function here // to place the correct data in *out } int -_gf_msg (const char *domain, const char *file, const char *function, - int32_t line, gf_loglevel_t level, int errnum, int trace, - uint64_t msgid, const char *fmt, ...) +_gf_msg(const char *domain, const char *file, const char *function, + int32_t line, gf_loglevel_t level, int errnum, int trace, + uint64_t msgid, const char *fmt, ...) { return 0; } diff --git a/xlators/cluster/dht/src/unittest/dht_layout_unittest.c b/xlators/cluster/dht/src/unittest/dht_layout_unittest.c index fb6445ce1f3..72890070835 100644 --- a/xlators/cluster/dht/src/unittest/dht_layout_unittest.c +++ b/xlators/cluster/dht/src/unittest/dht_layout_unittest.c @@ -34,16 +34,16 @@ helper_xlator_init(uint32_t num_types) xl = test_calloc(1, sizeof(xlator_t)); assert_non_null(xl); xl->mem_acct->num_types = num_types; - xl->mem_acct = test_calloc (sizeof(struct mem_acct) - + sizeof(struct mem_acct_rec) + num_types); + xl->mem_acct = test_calloc(sizeof(struct mem_acct) + + sizeof(struct mem_acct_rec) + num_types); assert_non_null(xl->mem_acct); xl->ctx = test_calloc(1, sizeof(glusterfs_ctx_t)); assert_non_null(xl->ctx); for (i = 0; i < num_types; i++) { - ret = LOCK_INIT(&(xl->mem_acct.rec[i].lock)); - assert_false(ret); + ret = LOCK_INIT(&(xl->mem_acct.rec[i].lock)); + assert_false(ret); } ENSURE(num_types == xl->mem_acct.num_types); @@ -58,8 +58,8 @@ helper_xlator_destroy(xlator_t *xl) int i, ret; for (i = 0; i < xl->mem_acct.num_types; i++) { - ret = LOCK_DESTROY(&(xl->mem_acct.rec[i].lock)); - assert_int_equal(ret, 0); + ret = LOCK_DESTROY(&(xl->mem_acct.rec[i].lock)); + assert_int_equal(ret, 0); } free(xl->mem_acct.rec); @@ -76,7 +76,7 @@ test_dht_layout_new(void **state) { xlator_t *xl; dht_layout_t *layout; - dht_conf_t *conf; + dht_conf_t *conf; int cnt; expect_assert_failure(dht_layout_new(NULL, 0)); @@ -90,7 +90,7 @@ test_dht_layout_new(void **state) assert_non_null(layout); assert_int_equal(layout->type, DHT_HASH_TYPE_DM); assert_int_equal(layout->cnt, cnt); - assert_int_equal(GF_ATOMIC_GET (layout->ref), 1); + assert_int_equal(GF_ATOMIC_GET(layout->ref), 1); assert_int_equal(layout->gen, 0); assert_int_equal(layout->spread_cnt, 0); free(layout); @@ -107,7 +107,7 @@ test_dht_layout_new(void **state) assert_non_null(layout); assert_int_equal(layout->type, DHT_HASH_TYPE_DM); assert_int_equal(layout->cnt, cnt); - assert_int_equal(GF_ATOMIC_GET (layout->ref), 1); + assert_int_equal(GF_ATOMIC_GET(layout->ref), 1); assert_int_equal(layout->gen, conf->gen); assert_int_equal(layout->spread_cnt, conf->dir_spread_cnt); free(layout); @@ -116,7 +116,9 @@ test_dht_layout_new(void **state) helper_xlator_destroy(xl); } -int main(void) { +int +main(void) +{ const struct CMUnitTest xlator_dht_layout_tests[] = { unit_test(test_dht_layout_new), }; diff --git a/xlators/cluster/ec/src/ec-code-avx.c b/xlators/cluster/ec/src/ec-code-avx.c index 92bd3e83c5e..70afaa00f54 100644 --- a/xlators/cluster/ec/src/ec-code-avx.c +++ b/xlators/cluster/ec/src/ec-code-avx.c @@ -34,10 +34,9 @@ ec_code_avx_load(ec_code_builder_t *builder, uint32_t dst, uint32_t idx, uint32_t bit) { if (builder->linear) { - ec_code_intel_op_mov_m2avx(builder, REG_SI, REG_DX, 1, - idx * builder->width * builder->bits + - bit * builder->width, - dst); + ec_code_intel_op_mov_m2avx( + builder, REG_SI, REG_DX, 1, + idx * builder->width * builder->bits + bit * builder->width, dst); } else { if (builder->base != idx) { ec_code_intel_op_mov_m2r(builder, REG_SI, REG_NULL, 0, idx * 8, @@ -81,10 +80,9 @@ ec_code_avx_xorm(ec_code_builder_t *builder, uint32_t dst, uint32_t idx, uint32_t bit) { if (builder->linear) { - ec_code_intel_op_xor_m2avx(builder, REG_SI, REG_DX, 1, - idx * builder->width * builder->bits + - bit * builder->width, - dst); + ec_code_intel_op_xor_m2avx( + builder, REG_SI, REG_DX, 1, + idx * builder->width * builder->bits + bit * builder->width, dst); } else { if (builder->base != idx) { ec_code_intel_op_mov_m2r(builder, REG_SI, REG_NULL, 0, idx * 8, @@ -96,21 +94,16 @@ ec_code_avx_xorm(ec_code_builder_t *builder, uint32_t dst, uint32_t idx, } } -static char *ec_code_avx_needed_flags[] = { - "avx2", - NULL -}; +static char *ec_code_avx_needed_flags[] = {"avx2", NULL}; -ec_code_gen_t ec_code_gen_avx = { - .name = "avx", - .flags = ec_code_avx_needed_flags, - .width = 32, - .prolog = ec_code_avx_prolog, - .epilog = ec_code_avx_epilog, - .load = ec_code_avx_load, - .store = ec_code_avx_store, - .copy = ec_code_avx_copy, - .xor2 = ec_code_avx_xor2, - .xor3 = ec_code_avx_xor3, - .xorm = ec_code_avx_xorm -}; +ec_code_gen_t ec_code_gen_avx = {.name = "avx", + .flags = ec_code_avx_needed_flags, + .width = 32, + .prolog = ec_code_avx_prolog, + .epilog = ec_code_avx_epilog, + .load = ec_code_avx_load, + .store = ec_code_avx_store, + .copy = ec_code_avx_copy, + .xor2 = ec_code_avx_xor2, + .xor3 = ec_code_avx_xor3, + .xorm = ec_code_avx_xorm}; diff --git a/xlators/cluster/ec/src/ec-code-c.c b/xlators/cluster/ec/src/ec-code-c.c index 7387f3ea435..acdc665c2cf 100644 --- a/xlators/cluster/ec/src/ec-code-c.c +++ b/xlators/cluster/ec/src/ec-code-c.c @@ -16,12 +16,14 @@ #define WIDTH (EC_METHOD_WORD_SIZE / sizeof(uint64_t)) -static void gf8_muladd_00(void *out, void *in) +static void +gf8_muladd_00(void *out, void *in) { memcpy(out, in, EC_METHOD_WORD_SIZE * 8); } -static void gf8_muladd_01(void *out, void *in) +static void +gf8_muladd_01(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -42,7 +44,8 @@ static void gf8_muladd_01(void *out, void *in) } } -static void gf8_muladd_02(void *out, void *in) +static void +gf8_muladd_02(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -83,7 +86,8 @@ static void gf8_muladd_02(void *out, void *in) } } -static void gf8_muladd_03(void *out, void *in) +static void +gf8_muladd_03(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -126,7 +130,8 @@ static void gf8_muladd_03(void *out, void *in) } } -static void gf8_muladd_04(void *out, void *in) +static void +gf8_muladd_04(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -169,7 +174,8 @@ static void gf8_muladd_04(void *out, void *in) } } -static void gf8_muladd_05(void *out, void *in) +static void +gf8_muladd_05(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -210,7 +216,8 @@ static void gf8_muladd_05(void *out, void *in) } } -static void gf8_muladd_06(void *out, void *in) +static void +gf8_muladd_06(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -253,7 +260,8 @@ static void gf8_muladd_06(void *out, void *in) } } -static void gf8_muladd_07(void *out, void *in) +static void +gf8_muladd_07(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -299,7 +307,8 @@ static void gf8_muladd_07(void *out, void *in) } } -static void gf8_muladd_08(void *out, void *in) +static void +gf8_muladd_08(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -340,7 +349,8 @@ static void gf8_muladd_08(void *out, void *in) } } -static void gf8_muladd_09(void *out, void *in) +static void +gf8_muladd_09(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -383,7 +393,8 @@ static void gf8_muladd_09(void *out, void *in) } } -static void gf8_muladd_0A(void *out, void *in) +static void +gf8_muladd_0A(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -424,7 +435,8 @@ static void gf8_muladd_0A(void *out, void *in) } } -static void gf8_muladd_0B(void *out, void *in) +static void +gf8_muladd_0B(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -469,7 +481,8 @@ static void gf8_muladd_0B(void *out, void *in) } } -static void gf8_muladd_0C(void *out, void *in) +static void +gf8_muladd_0C(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -513,7 +526,8 @@ static void gf8_muladd_0C(void *out, void *in) } } -static void gf8_muladd_0D(void *out, void *in) +static void +gf8_muladd_0D(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -558,7 +572,8 @@ static void gf8_muladd_0D(void *out, void *in) } } -static void gf8_muladd_0E(void *out, void *in) +static void +gf8_muladd_0E(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -604,7 +619,8 @@ static void gf8_muladd_0E(void *out, void *in) } } -static void gf8_muladd_0F(void *out, void *in) +static void +gf8_muladd_0F(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -650,7 +666,8 @@ static void gf8_muladd_0F(void *out, void *in) } } -static void gf8_muladd_10(void *out, void *in) +static void +gf8_muladd_10(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -694,7 +711,8 @@ static void gf8_muladd_10(void *out, void *in) } } -static void gf8_muladd_11(void *out, void *in) +static void +gf8_muladd_11(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -735,7 +753,8 @@ static void gf8_muladd_11(void *out, void *in) } } -static void gf8_muladd_12(void *out, void *in) +static void +gf8_muladd_12(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -779,7 +798,8 @@ static void gf8_muladd_12(void *out, void *in) } } -static void gf8_muladd_13(void *out, void *in) +static void +gf8_muladd_13(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -823,7 +843,8 @@ static void gf8_muladd_13(void *out, void *in) } } -static void gf8_muladd_14(void *out, void *in) +static void +gf8_muladd_14(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -867,7 +888,8 @@ static void gf8_muladd_14(void *out, void *in) } } -static void gf8_muladd_15(void *out, void *in) +static void +gf8_muladd_15(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -910,7 +932,8 @@ static void gf8_muladd_15(void *out, void *in) } } -static void gf8_muladd_16(void *out, void *in) +static void +gf8_muladd_16(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -956,7 +979,8 @@ static void gf8_muladd_16(void *out, void *in) } } -static void gf8_muladd_17(void *out, void *in) +static void +gf8_muladd_17(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -1002,7 +1026,8 @@ static void gf8_muladd_17(void *out, void *in) } } -static void gf8_muladd_18(void *out, void *in) +static void +gf8_muladd_18(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -1046,7 +1071,8 @@ static void gf8_muladd_18(void *out, void *in) } } -static void gf8_muladd_19(void *out, void *in) +static void +gf8_muladd_19(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -1090,7 +1116,8 @@ static void gf8_muladd_19(void *out, void *in) } } -static void gf8_muladd_1A(void *out, void *in) +static void +gf8_muladd_1A(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -1136,7 +1163,8 @@ static void gf8_muladd_1A(void *out, void *in) } } -static void gf8_muladd_1B(void *out, void *in) +static void +gf8_muladd_1B(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -1183,7 +1211,8 @@ static void gf8_muladd_1B(void *out, void *in) } } -static void gf8_muladd_1C(void *out, void *in) +static void +gf8_muladd_1C(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -1230,7 +1259,8 @@ static void gf8_muladd_1C(void *out, void *in) } } -static void gf8_muladd_1D(void *out, void *in) +static void +gf8_muladd_1D(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -1277,7 +1307,8 @@ static void gf8_muladd_1D(void *out, void *in) } } -static void gf8_muladd_1E(void *out, void *in) +static void +gf8_muladd_1E(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -1323,7 +1354,8 @@ static void gf8_muladd_1E(void *out, void *in) } } -static void gf8_muladd_1F(void *out, void *in) +static void +gf8_muladd_1F(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -1367,7 +1399,8 @@ static void gf8_muladd_1F(void *out, void *in) } } -static void gf8_muladd_20(void *out, void *in) +static void +gf8_muladd_20(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -1411,7 +1444,8 @@ static void gf8_muladd_20(void *out, void *in) } } -static void gf8_muladd_21(void *out, void *in) +static void +gf8_muladd_21(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -1454,7 +1488,8 @@ static void gf8_muladd_21(void *out, void *in) } } -static void gf8_muladd_22(void *out, void *in) +static void +gf8_muladd_22(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -1495,7 +1530,8 @@ static void gf8_muladd_22(void *out, void *in) } } -static void gf8_muladd_23(void *out, void *in) +static void +gf8_muladd_23(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -1536,7 +1572,8 @@ static void gf8_muladd_23(void *out, void *in) } } -static void gf8_muladd_24(void *out, void *in) +static void +gf8_muladd_24(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -1580,7 +1617,8 @@ static void gf8_muladd_24(void *out, void *in) } } -static void gf8_muladd_25(void *out, void *in) +static void +gf8_muladd_25(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -1623,7 +1661,8 @@ static void gf8_muladd_25(void *out, void *in) } } -static void gf8_muladd_26(void *out, void *in) +static void +gf8_muladd_26(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -1668,7 +1707,8 @@ static void gf8_muladd_26(void *out, void *in) } } -static void gf8_muladd_27(void *out, void *in) +static void +gf8_muladd_27(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -1709,7 +1749,8 @@ static void gf8_muladd_27(void *out, void *in) } } -static void gf8_muladd_28(void *out, void *in) +static void +gf8_muladd_28(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -1754,7 +1795,8 @@ static void gf8_muladd_28(void *out, void *in) } } -static void gf8_muladd_29(void *out, void *in) +static void +gf8_muladd_29(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -1799,7 +1841,8 @@ static void gf8_muladd_29(void *out, void *in) } } -static void gf8_muladd_2A(void *out, void *in) +static void +gf8_muladd_2A(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -1843,7 +1886,8 @@ static void gf8_muladd_2A(void *out, void *in) } } -static void gf8_muladd_2B(void *out, void *in) +static void +gf8_muladd_2B(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -1887,7 +1931,8 @@ static void gf8_muladd_2B(void *out, void *in) } } -static void gf8_muladd_2C(void *out, void *in) +static void +gf8_muladd_2C(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -1933,7 +1978,8 @@ static void gf8_muladd_2C(void *out, void *in) } } -static void gf8_muladd_2D(void *out, void *in) +static void +gf8_muladd_2D(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -1979,7 +2025,8 @@ static void gf8_muladd_2D(void *out, void *in) } } -static void gf8_muladd_2E(void *out, void *in) +static void +gf8_muladd_2E(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -2024,7 +2071,8 @@ static void gf8_muladd_2E(void *out, void *in) } } -static void gf8_muladd_2F(void *out, void *in) +static void +gf8_muladd_2F(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -2069,7 +2117,8 @@ static void gf8_muladd_2F(void *out, void *in) } } -static void gf8_muladd_30(void *out, void *in) +static void +gf8_muladd_30(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -2113,7 +2162,8 @@ static void gf8_muladd_30(void *out, void *in) } } -static void gf8_muladd_31(void *out, void *in) +static void +gf8_muladd_31(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -2158,7 +2208,8 @@ static void gf8_muladd_31(void *out, void *in) } } -static void gf8_muladd_32(void *out, void *in) +static void +gf8_muladd_32(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -2202,7 +2253,8 @@ static void gf8_muladd_32(void *out, void *in) } } -static void gf8_muladd_33(void *out, void *in) +static void +gf8_muladd_33(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -2249,7 +2301,8 @@ static void gf8_muladd_33(void *out, void *in) } } -static void gf8_muladd_34(void *out, void *in) +static void +gf8_muladd_34(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -2296,7 +2349,8 @@ static void gf8_muladd_34(void *out, void *in) } } -static void gf8_muladd_35(void *out, void *in) +static void +gf8_muladd_35(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -2341,7 +2395,8 @@ static void gf8_muladd_35(void *out, void *in) } } -static void gf8_muladd_36(void *out, void *in) +static void +gf8_muladd_36(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -2385,7 +2440,8 @@ static void gf8_muladd_36(void *out, void *in) } } -static void gf8_muladd_37(void *out, void *in) +static void +gf8_muladd_37(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -2431,7 +2487,8 @@ static void gf8_muladd_37(void *out, void *in) } } -static void gf8_muladd_38(void *out, void *in) +static void +gf8_muladd_38(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -2476,7 +2533,8 @@ static void gf8_muladd_38(void *out, void *in) } } -static void gf8_muladd_39(void *out, void *in) +static void +gf8_muladd_39(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -2521,7 +2579,8 @@ static void gf8_muladd_39(void *out, void *in) } } -static void gf8_muladd_3A(void *out, void *in) +static void +gf8_muladd_3A(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -2569,7 +2628,8 @@ static void gf8_muladd_3A(void *out, void *in) } } -static void gf8_muladd_3B(void *out, void *in) +static void +gf8_muladd_3B(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -2614,7 +2674,8 @@ static void gf8_muladd_3B(void *out, void *in) } } -static void gf8_muladd_3C(void *out, void *in) +static void +gf8_muladd_3C(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -2659,7 +2720,8 @@ static void gf8_muladd_3C(void *out, void *in) } } -static void gf8_muladd_3D(void *out, void *in) +static void +gf8_muladd_3D(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -2704,7 +2766,8 @@ static void gf8_muladd_3D(void *out, void *in) } } -static void gf8_muladd_3E(void *out, void *in) +static void +gf8_muladd_3E(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -2748,7 +2811,8 @@ static void gf8_muladd_3E(void *out, void *in) } } -static void gf8_muladd_3F(void *out, void *in) +static void +gf8_muladd_3F(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -2793,7 +2857,8 @@ static void gf8_muladd_3F(void *out, void *in) } } -static void gf8_muladd_40(void *out, void *in) +static void +gf8_muladd_40(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -2837,7 +2902,8 @@ static void gf8_muladd_40(void *out, void *in) } } -static void gf8_muladd_41(void *out, void *in) +static void +gf8_muladd_41(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -2881,7 +2947,8 @@ static void gf8_muladd_41(void *out, void *in) } } -static void gf8_muladd_42(void *out, void *in) +static void +gf8_muladd_42(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -2922,7 +2989,8 @@ static void gf8_muladd_42(void *out, void *in) } } -static void gf8_muladd_43(void *out, void *in) +static void +gf8_muladd_43(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -2963,7 +3031,8 @@ static void gf8_muladd_43(void *out, void *in) } } -static void gf8_muladd_44(void *out, void *in) +static void +gf8_muladd_44(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -3006,7 +3075,8 @@ static void gf8_muladd_44(void *out, void *in) } } -static void gf8_muladd_45(void *out, void *in) +static void +gf8_muladd_45(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -3047,7 +3117,8 @@ static void gf8_muladd_45(void *out, void *in) } } -static void gf8_muladd_46(void *out, void *in) +static void +gf8_muladd_46(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -3088,7 +3159,8 @@ static void gf8_muladd_46(void *out, void *in) } } -static void gf8_muladd_47(void *out, void *in) +static void +gf8_muladd_47(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -3131,7 +3203,8 @@ static void gf8_muladd_47(void *out, void *in) } } -static void gf8_muladd_48(void *out, void *in) +static void +gf8_muladd_48(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -3175,7 +3248,8 @@ static void gf8_muladd_48(void *out, void *in) } } -static void gf8_muladd_49(void *out, void *in) +static void +gf8_muladd_49(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -3219,7 +3293,8 @@ static void gf8_muladd_49(void *out, void *in) } } -static void gf8_muladd_4A(void *out, void *in) +static void +gf8_muladd_4A(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -3263,7 +3338,8 @@ static void gf8_muladd_4A(void *out, void *in) } } -static void gf8_muladd_4B(void *out, void *in) +static void +gf8_muladd_4B(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -3309,7 +3385,8 @@ static void gf8_muladd_4B(void *out, void *in) } } -static void gf8_muladd_4C(void *out, void *in) +static void +gf8_muladd_4C(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -3354,7 +3431,8 @@ static void gf8_muladd_4C(void *out, void *in) } } -static void gf8_muladd_4D(void *out, void *in) +static void +gf8_muladd_4D(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -3399,7 +3477,8 @@ static void gf8_muladd_4D(void *out, void *in) } } -static void gf8_muladd_4E(void *out, void *in) +static void +gf8_muladd_4E(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -3440,7 +3519,8 @@ static void gf8_muladd_4E(void *out, void *in) } } -static void gf8_muladd_4F(void *out, void *in) +static void +gf8_muladd_4F(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -3481,7 +3561,8 @@ static void gf8_muladd_4F(void *out, void *in) } } -static void gf8_muladd_50(void *out, void *in) +static void +gf8_muladd_50(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -3526,7 +3607,8 @@ static void gf8_muladd_50(void *out, void *in) } } -static void gf8_muladd_51(void *out, void *in) +static void +gf8_muladd_51(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -3567,7 +3649,8 @@ static void gf8_muladd_51(void *out, void *in) } } -static void gf8_muladd_52(void *out, void *in) +static void +gf8_muladd_52(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -3613,7 +3696,8 @@ static void gf8_muladd_52(void *out, void *in) } } -static void gf8_muladd_53(void *out, void *in) +static void +gf8_muladd_53(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -3654,7 +3738,8 @@ static void gf8_muladd_53(void *out, void *in) } } -static void gf8_muladd_54(void *out, void *in) +static void +gf8_muladd_54(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -3700,7 +3785,8 @@ static void gf8_muladd_54(void *out, void *in) } } -static void gf8_muladd_55(void *out, void *in) +static void +gf8_muladd_55(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -3745,7 +3831,8 @@ static void gf8_muladd_55(void *out, void *in) } } -static void gf8_muladd_56(void *out, void *in) +static void +gf8_muladd_56(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -3788,7 +3875,8 @@ static void gf8_muladd_56(void *out, void *in) } } -static void gf8_muladd_57(void *out, void *in) +static void +gf8_muladd_57(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -3832,7 +3920,8 @@ static void gf8_muladd_57(void *out, void *in) } } -static void gf8_muladd_58(void *out, void *in) +static void +gf8_muladd_58(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -3876,7 +3965,8 @@ static void gf8_muladd_58(void *out, void *in) } } -static void gf8_muladd_59(void *out, void *in) +static void +gf8_muladd_59(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -3920,7 +4010,8 @@ static void gf8_muladd_59(void *out, void *in) } } -static void gf8_muladd_5A(void *out, void *in) +static void +gf8_muladd_5A(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -3965,7 +4056,8 @@ static void gf8_muladd_5A(void *out, void *in) } } -static void gf8_muladd_5B(void *out, void *in) +static void +gf8_muladd_5B(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -4012,7 +4104,8 @@ static void gf8_muladd_5B(void *out, void *in) } } -static void gf8_muladd_5C(void *out, void *in) +static void +gf8_muladd_5C(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -4057,7 +4150,8 @@ static void gf8_muladd_5C(void *out, void *in) } } -static void gf8_muladd_5D(void *out, void *in) +static void +gf8_muladd_5D(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -4104,7 +4198,8 @@ static void gf8_muladd_5D(void *out, void *in) } } -static void gf8_muladd_5E(void *out, void *in) +static void +gf8_muladd_5E(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -4151,7 +4246,8 @@ static void gf8_muladd_5E(void *out, void *in) } } -static void gf8_muladd_5F(void *out, void *in) +static void +gf8_muladd_5F(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -4197,7 +4293,8 @@ static void gf8_muladd_5F(void *out, void *in) } } -static void gf8_muladd_60(void *out, void *in) +static void +gf8_muladd_60(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -4241,7 +4338,8 @@ static void gf8_muladd_60(void *out, void *in) } } -static void gf8_muladd_61(void *out, void *in) +static void +gf8_muladd_61(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -4285,7 +4383,8 @@ static void gf8_muladd_61(void *out, void *in) } } -static void gf8_muladd_62(void *out, void *in) +static void +gf8_muladd_62(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -4331,7 +4430,8 @@ static void gf8_muladd_62(void *out, void *in) } } -static void gf8_muladd_63(void *out, void *in) +static void +gf8_muladd_63(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -4378,7 +4478,8 @@ static void gf8_muladd_63(void *out, void *in) } } -static void gf8_muladd_64(void *out, void *in) +static void +gf8_muladd_64(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -4422,7 +4523,8 @@ static void gf8_muladd_64(void *out, void *in) } } -static void gf8_muladd_65(void *out, void *in) +static void +gf8_muladd_65(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -4468,7 +4570,8 @@ static void gf8_muladd_65(void *out, void *in) } } -static void gf8_muladd_66(void *out, void *in) +static void +gf8_muladd_66(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -4515,7 +4618,8 @@ static void gf8_muladd_66(void *out, void *in) } } -static void gf8_muladd_67(void *out, void *in) +static void +gf8_muladd_67(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -4561,7 +4665,8 @@ static void gf8_muladd_67(void *out, void *in) } } -static void gf8_muladd_68(void *out, void *in) +static void +gf8_muladd_68(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -4607,7 +4712,8 @@ static void gf8_muladd_68(void *out, void *in) } } -static void gf8_muladd_69(void *out, void *in) +static void +gf8_muladd_69(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -4650,7 +4756,8 @@ static void gf8_muladd_69(void *out, void *in) } } -static void gf8_muladd_6A(void *out, void *in) +static void +gf8_muladd_6A(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -4695,7 +4802,8 @@ static void gf8_muladd_6A(void *out, void *in) } } -static void gf8_muladd_6B(void *out, void *in) +static void +gf8_muladd_6B(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -4739,7 +4847,8 @@ static void gf8_muladd_6B(void *out, void *in) } } -static void gf8_muladd_6C(void *out, void *in) +static void +gf8_muladd_6C(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -4783,7 +4892,8 @@ static void gf8_muladd_6C(void *out, void *in) } } -static void gf8_muladd_6D(void *out, void *in) +static void +gf8_muladd_6D(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -4827,7 +4937,8 @@ static void gf8_muladd_6D(void *out, void *in) } } -static void gf8_muladd_6E(void *out, void *in) +static void +gf8_muladd_6E(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -4872,7 +4983,8 @@ static void gf8_muladd_6E(void *out, void *in) } } -static void gf8_muladd_6F(void *out, void *in) +static void +gf8_muladd_6F(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -4917,7 +5029,8 @@ static void gf8_muladd_6F(void *out, void *in) } } -static void gf8_muladd_70(void *out, void *in) +static void +gf8_muladd_70(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -4962,7 +5075,8 @@ static void gf8_muladd_70(void *out, void *in) } } -static void gf8_muladd_71(void *out, void *in) +static void +gf8_muladd_71(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -5007,7 +5121,8 @@ static void gf8_muladd_71(void *out, void *in) } } -static void gf8_muladd_72(void *out, void *in) +static void +gf8_muladd_72(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -5052,7 +5167,8 @@ static void gf8_muladd_72(void *out, void *in) } } -static void gf8_muladd_73(void *out, void *in) +static void +gf8_muladd_73(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -5093,7 +5209,8 @@ static void gf8_muladd_73(void *out, void *in) } } -static void gf8_muladd_74(void *out, void *in) +static void +gf8_muladd_74(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -5137,7 +5254,8 @@ static void gf8_muladd_74(void *out, void *in) } } -static void gf8_muladd_75(void *out, void *in) +static void +gf8_muladd_75(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -5182,7 +5300,8 @@ static void gf8_muladd_75(void *out, void *in) } } -static void gf8_muladd_76(void *out, void *in) +static void +gf8_muladd_76(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -5228,7 +5347,8 @@ static void gf8_muladd_76(void *out, void *in) } } -static void gf8_muladd_77(void *out, void *in) +static void +gf8_muladd_77(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -5274,7 +5394,8 @@ static void gf8_muladd_77(void *out, void *in) } } -static void gf8_muladd_78(void *out, void *in) +static void +gf8_muladd_78(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -5319,7 +5440,8 @@ static void gf8_muladd_78(void *out, void *in) } } -static void gf8_muladd_79(void *out, void *in) +static void +gf8_muladd_79(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -5365,7 +5487,8 @@ static void gf8_muladd_79(void *out, void *in) } } -static void gf8_muladd_7A(void *out, void *in) +static void +gf8_muladd_7A(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -5409,7 +5532,8 @@ static void gf8_muladd_7A(void *out, void *in) } } -static void gf8_muladd_7B(void *out, void *in) +static void +gf8_muladd_7B(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -5454,7 +5578,8 @@ static void gf8_muladd_7B(void *out, void *in) } } -static void gf8_muladd_7C(void *out, void *in) +static void +gf8_muladd_7C(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -5498,7 +5623,8 @@ static void gf8_muladd_7C(void *out, void *in) } } -static void gf8_muladd_7D(void *out, void *in) +static void +gf8_muladd_7D(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -5544,7 +5670,8 @@ static void gf8_muladd_7D(void *out, void *in) } } -static void gf8_muladd_7E(void *out, void *in) +static void +gf8_muladd_7E(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -5589,7 +5716,8 @@ static void gf8_muladd_7E(void *out, void *in) } } -static void gf8_muladd_7F(void *out, void *in) +static void +gf8_muladd_7F(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -5635,7 +5763,8 @@ static void gf8_muladd_7F(void *out, void *in) } } -static void gf8_muladd_80(void *out, void *in) +static void +gf8_muladd_80(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -5680,7 +5809,8 @@ static void gf8_muladd_80(void *out, void *in) } } -static void gf8_muladd_81(void *out, void *in) +static void +gf8_muladd_81(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -5724,7 +5854,8 @@ static void gf8_muladd_81(void *out, void *in) } } -static void gf8_muladd_82(void *out, void *in) +static void +gf8_muladd_82(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -5767,7 +5898,8 @@ static void gf8_muladd_82(void *out, void *in) } } -static void gf8_muladd_83(void *out, void *in) +static void +gf8_muladd_83(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -5814,7 +5946,8 @@ static void gf8_muladd_83(void *out, void *in) } } -static void gf8_muladd_84(void *out, void *in) +static void +gf8_muladd_84(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -5855,7 +5988,8 @@ static void gf8_muladd_84(void *out, void *in) } } -static void gf8_muladd_85(void *out, void *in) +static void +gf8_muladd_85(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -5901,7 +6035,8 @@ static void gf8_muladd_85(void *out, void *in) } } -static void gf8_muladd_86(void *out, void *in) +static void +gf8_muladd_86(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -5942,7 +6077,8 @@ static void gf8_muladd_86(void *out, void *in) } } -static void gf8_muladd_87(void *out, void *in) +static void +gf8_muladd_87(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -5986,7 +6122,8 @@ static void gf8_muladd_87(void *out, void *in) } } -static void gf8_muladd_88(void *out, void *in) +static void +gf8_muladd_88(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -6030,7 +6167,8 @@ static void gf8_muladd_88(void *out, void *in) } } -static void gf8_muladd_89(void *out, void *in) +static void +gf8_muladd_89(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -6075,7 +6213,8 @@ static void gf8_muladd_89(void *out, void *in) } } -static void gf8_muladd_8A(void *out, void *in) +static void +gf8_muladd_8A(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -6116,7 +6255,8 @@ static void gf8_muladd_8A(void *out, void *in) } } -static void gf8_muladd_8B(void *out, void *in) +static void +gf8_muladd_8B(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -6163,7 +6303,8 @@ static void gf8_muladd_8B(void *out, void *in) } } -static void gf8_muladd_8C(void *out, void *in) +static void +gf8_muladd_8C(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -6204,7 +6345,8 @@ static void gf8_muladd_8C(void *out, void *in) } } -static void gf8_muladd_8D(void *out, void *in) +static void +gf8_muladd_8D(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -6247,7 +6389,8 @@ static void gf8_muladd_8D(void *out, void *in) } } -static void gf8_muladd_8E(void *out, void *in) +static void +gf8_muladd_8E(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -6288,7 +6431,8 @@ static void gf8_muladd_8E(void *out, void *in) } } -static void gf8_muladd_8F(void *out, void *in) +static void +gf8_muladd_8F(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -6331,7 +6475,8 @@ static void gf8_muladd_8F(void *out, void *in) } } -static void gf8_muladd_90(void *out, void *in) +static void +gf8_muladd_90(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -6376,7 +6521,8 @@ static void gf8_muladd_90(void *out, void *in) } } -static void gf8_muladd_91(void *out, void *in) +static void +gf8_muladd_91(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -6422,7 +6568,8 @@ static void gf8_muladd_91(void *out, void *in) } } -static void gf8_muladd_92(void *out, void *in) +static void +gf8_muladd_92(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -6466,7 +6613,8 @@ static void gf8_muladd_92(void *out, void *in) } } -static void gf8_muladd_93(void *out, void *in) +static void +gf8_muladd_93(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -6511,7 +6659,8 @@ static void gf8_muladd_93(void *out, void *in) } } -static void gf8_muladd_94(void *out, void *in) +static void +gf8_muladd_94(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -6554,7 +6703,8 @@ static void gf8_muladd_94(void *out, void *in) } } -static void gf8_muladd_95(void *out, void *in) +static void +gf8_muladd_95(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -6602,7 +6752,8 @@ static void gf8_muladd_95(void *out, void *in) } } -static void gf8_muladd_96(void *out, void *in) +static void +gf8_muladd_96(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -6647,7 +6798,8 @@ static void gf8_muladd_96(void *out, void *in) } } -static void gf8_muladd_97(void *out, void *in) +static void +gf8_muladd_97(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -6693,7 +6845,8 @@ static void gf8_muladd_97(void *out, void *in) } } -static void gf8_muladd_98(void *out, void *in) +static void +gf8_muladd_98(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -6737,7 +6890,8 @@ static void gf8_muladd_98(void *out, void *in) } } -static void gf8_muladd_99(void *out, void *in) +static void +gf8_muladd_99(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -6782,7 +6936,8 @@ static void gf8_muladd_99(void *out, void *in) } } -static void gf8_muladd_9A(void *out, void *in) +static void +gf8_muladd_9A(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -6827,7 +6982,8 @@ static void gf8_muladd_9A(void *out, void *in) } } -static void gf8_muladd_9B(void *out, void *in) +static void +gf8_muladd_9B(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -6870,7 +7026,8 @@ static void gf8_muladd_9B(void *out, void *in) } } -static void gf8_muladd_9C(void *out, void *in) +static void +gf8_muladd_9C(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -6913,7 +7070,8 @@ static void gf8_muladd_9C(void *out, void *in) } } -static void gf8_muladd_9D(void *out, void *in) +static void +gf8_muladd_9D(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -6956,7 +7114,8 @@ static void gf8_muladd_9D(void *out, void *in) } } -static void gf8_muladd_9E(void *out, void *in) +static void +gf8_muladd_9E(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -6999,7 +7158,8 @@ static void gf8_muladd_9E(void *out, void *in) } } -static void gf8_muladd_9F(void *out, void *in) +static void +gf8_muladd_9F(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -7042,7 +7202,8 @@ static void gf8_muladd_9F(void *out, void *in) } } -static void gf8_muladd_A0(void *out, void *in) +static void +gf8_muladd_A0(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -7088,7 +7249,8 @@ static void gf8_muladd_A0(void *out, void *in) } } -static void gf8_muladd_A1(void *out, void *in) +static void +gf8_muladd_A1(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -7133,7 +7295,8 @@ static void gf8_muladd_A1(void *out, void *in) } } -static void gf8_muladd_A2(void *out, void *in) +static void +gf8_muladd_A2(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -7176,7 +7339,8 @@ static void gf8_muladd_A2(void *out, void *in) } } -static void gf8_muladd_A3(void *out, void *in) +static void +gf8_muladd_A3(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -7220,7 +7384,8 @@ static void gf8_muladd_A3(void *out, void *in) } } -static void gf8_muladd_A4(void *out, void *in) +static void +gf8_muladd_A4(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -7267,7 +7432,8 @@ static void gf8_muladd_A4(void *out, void *in) } } -static void gf8_muladd_A5(void *out, void *in) +static void +gf8_muladd_A5(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -7312,7 +7478,8 @@ static void gf8_muladd_A5(void *out, void *in) } } -static void gf8_muladd_A6(void *out, void *in) +static void +gf8_muladd_A6(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -7353,7 +7520,8 @@ static void gf8_muladd_A6(void *out, void *in) } } -static void gf8_muladd_A7(void *out, void *in) +static void +gf8_muladd_A7(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -7394,7 +7562,8 @@ static void gf8_muladd_A7(void *out, void *in) } } -static void gf8_muladd_A8(void *out, void *in) +static void +gf8_muladd_A8(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -7439,7 +7608,8 @@ static void gf8_muladd_A8(void *out, void *in) } } -static void gf8_muladd_A9(void *out, void *in) +static void +gf8_muladd_A9(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -7480,7 +7650,8 @@ static void gf8_muladd_A9(void *out, void *in) } } -static void gf8_muladd_AA(void *out, void *in) +static void +gf8_muladd_AA(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -7525,7 +7696,8 @@ static void gf8_muladd_AA(void *out, void *in) } } -static void gf8_muladd_AB(void *out, void *in) +static void +gf8_muladd_AB(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -7569,7 +7741,8 @@ static void gf8_muladd_AB(void *out, void *in) } } -static void gf8_muladd_AC(void *out, void *in) +static void +gf8_muladd_AC(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -7612,7 +7785,8 @@ static void gf8_muladd_AC(void *out, void *in) } } -static void gf8_muladd_AD(void *out, void *in) +static void +gf8_muladd_AD(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -7653,7 +7827,8 @@ static void gf8_muladd_AD(void *out, void *in) } } -static void gf8_muladd_AE(void *out, void *in) +static void +gf8_muladd_AE(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -7698,7 +7873,8 @@ static void gf8_muladd_AE(void *out, void *in) } } -static void gf8_muladd_AF(void *out, void *in) +static void +gf8_muladd_AF(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -7741,7 +7917,8 @@ static void gf8_muladd_AF(void *out, void *in) } } -static void gf8_muladd_B0(void *out, void *in) +static void +gf8_muladd_B0(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -7787,7 +7964,8 @@ static void gf8_muladd_B0(void *out, void *in) } } -static void gf8_muladd_B1(void *out, void *in) +static void +gf8_muladd_B1(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -7831,7 +8009,8 @@ static void gf8_muladd_B1(void *out, void *in) } } -static void gf8_muladd_B2(void *out, void *in) +static void +gf8_muladd_B2(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -7878,7 +8057,8 @@ static void gf8_muladd_B2(void *out, void *in) } } -static void gf8_muladd_B3(void *out, void *in) +static void +gf8_muladd_B3(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -7923,7 +8103,8 @@ static void gf8_muladd_B3(void *out, void *in) } } -static void gf8_muladd_B4(void *out, void *in) +static void +gf8_muladd_B4(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -7966,7 +8147,8 @@ static void gf8_muladd_B4(void *out, void *in) } } -static void gf8_muladd_B5(void *out, void *in) +static void +gf8_muladd_B5(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -8011,7 +8193,8 @@ static void gf8_muladd_B5(void *out, void *in) } } -static void gf8_muladd_B6(void *out, void *in) +static void +gf8_muladd_B6(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -8057,7 +8240,8 @@ static void gf8_muladd_B6(void *out, void *in) } } -static void gf8_muladd_B7(void *out, void *in) +static void +gf8_muladd_B7(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -8101,7 +8285,8 @@ static void gf8_muladd_B7(void *out, void *in) } } -static void gf8_muladd_B8(void *out, void *in) +static void +gf8_muladd_B8(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -8146,7 +8331,8 @@ static void gf8_muladd_B8(void *out, void *in) } } -static void gf8_muladd_B9(void *out, void *in) +static void +gf8_muladd_B9(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -8191,7 +8377,8 @@ static void gf8_muladd_B9(void *out, void *in) } } -static void gf8_muladd_BA(void *out, void *in) +static void +gf8_muladd_BA(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -8236,7 +8423,8 @@ static void gf8_muladd_BA(void *out, void *in) } } -static void gf8_muladd_BB(void *out, void *in) +static void +gf8_muladd_BB(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -8280,7 +8468,8 @@ static void gf8_muladd_BB(void *out, void *in) } } -static void gf8_muladd_BC(void *out, void *in) +static void +gf8_muladd_BC(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -8325,7 +8514,8 @@ static void gf8_muladd_BC(void *out, void *in) } } -static void gf8_muladd_BD(void *out, void *in) +static void +gf8_muladd_BD(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -8370,7 +8560,8 @@ static void gf8_muladd_BD(void *out, void *in) } } -static void gf8_muladd_BE(void *out, void *in) +static void +gf8_muladd_BE(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -8413,7 +8604,8 @@ static void gf8_muladd_BE(void *out, void *in) } } -static void gf8_muladd_BF(void *out, void *in) +static void +gf8_muladd_BF(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -8459,7 +8651,8 @@ static void gf8_muladd_BF(void *out, void *in) } } -static void gf8_muladd_C0(void *out, void *in) +static void +gf8_muladd_C0(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -8503,7 +8696,8 @@ static void gf8_muladd_C0(void *out, void *in) } } -static void gf8_muladd_C1(void *out, void *in) +static void +gf8_muladd_C1(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -8547,7 +8741,8 @@ static void gf8_muladd_C1(void *out, void *in) } } -static void gf8_muladd_C2(void *out, void *in) +static void +gf8_muladd_C2(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -8591,7 +8786,8 @@ static void gf8_muladd_C2(void *out, void *in) } } -static void gf8_muladd_C3(void *out, void *in) +static void +gf8_muladd_C3(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -8636,7 +8832,8 @@ static void gf8_muladd_C3(void *out, void *in) } } -static void gf8_muladd_C4(void *out, void *in) +static void +gf8_muladd_C4(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -8680,7 +8877,8 @@ static void gf8_muladd_C4(void *out, void *in) } } -static void gf8_muladd_C5(void *out, void *in) +static void +gf8_muladd_C5(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -8723,7 +8921,8 @@ static void gf8_muladd_C5(void *out, void *in) } } -static void gf8_muladd_C6(void *out, void *in) +static void +gf8_muladd_C6(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -8771,7 +8970,8 @@ static void gf8_muladd_C6(void *out, void *in) } } -static void gf8_muladd_C7(void *out, void *in) +static void +gf8_muladd_C7(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -8815,7 +9015,8 @@ static void gf8_muladd_C7(void *out, void *in) } } -static void gf8_muladd_C8(void *out, void *in) +static void +gf8_muladd_C8(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -8859,7 +9060,8 @@ static void gf8_muladd_C8(void *out, void *in) } } -static void gf8_muladd_C9(void *out, void *in) +static void +gf8_muladd_C9(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -8902,7 +9104,8 @@ static void gf8_muladd_C9(void *out, void *in) } } -static void gf8_muladd_CA(void *out, void *in) +static void +gf8_muladd_CA(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -8948,7 +9151,8 @@ static void gf8_muladd_CA(void *out, void *in) } } -static void gf8_muladd_CB(void *out, void *in) +static void +gf8_muladd_CB(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -8993,7 +9197,8 @@ static void gf8_muladd_CB(void *out, void *in) } } -static void gf8_muladd_CC(void *out, void *in) +static void +gf8_muladd_CC(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -9039,7 +9244,8 @@ static void gf8_muladd_CC(void *out, void *in) } } -static void gf8_muladd_CD(void *out, void *in) +static void +gf8_muladd_CD(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -9083,7 +9289,8 @@ static void gf8_muladd_CD(void *out, void *in) } } -static void gf8_muladd_CE(void *out, void *in) +static void +gf8_muladd_CE(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -9128,7 +9335,8 @@ static void gf8_muladd_CE(void *out, void *in) } } -static void gf8_muladd_CF(void *out, void *in) +static void +gf8_muladd_CF(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -9173,7 +9381,8 @@ static void gf8_muladd_CF(void *out, void *in) } } -static void gf8_muladd_D0(void *out, void *in) +static void +gf8_muladd_D0(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -9220,7 +9429,8 @@ static void gf8_muladd_D0(void *out, void *in) } } -static void gf8_muladd_D1(void *out, void *in) +static void +gf8_muladd_D1(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -9265,7 +9475,8 @@ static void gf8_muladd_D1(void *out, void *in) } } -static void gf8_muladd_D2(void *out, void *in) +static void +gf8_muladd_D2(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -9308,7 +9519,8 @@ static void gf8_muladd_D2(void *out, void *in) } } -static void gf8_muladd_D3(void *out, void *in) +static void +gf8_muladd_D3(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -9354,7 +9566,8 @@ static void gf8_muladd_D3(void *out, void *in) } } -static void gf8_muladd_D4(void *out, void *in) +static void +gf8_muladd_D4(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -9399,7 +9612,8 @@ static void gf8_muladd_D4(void *out, void *in) } } -static void gf8_muladd_D5(void *out, void *in) +static void +gf8_muladd_D5(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -9444,7 +9658,8 @@ static void gf8_muladd_D5(void *out, void *in) } } -static void gf8_muladd_D6(void *out, void *in) +static void +gf8_muladd_D6(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -9488,7 +9703,8 @@ static void gf8_muladd_D6(void *out, void *in) } } -static void gf8_muladd_D7(void *out, void *in) +static void +gf8_muladd_D7(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -9533,7 +9749,8 @@ static void gf8_muladd_D7(void *out, void *in) } } -static void gf8_muladd_D8(void *out, void *in) +static void +gf8_muladd_D8(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -9577,7 +9794,8 @@ static void gf8_muladd_D8(void *out, void *in) } } -static void gf8_muladd_D9(void *out, void *in) +static void +gf8_muladd_D9(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -9618,7 +9836,8 @@ static void gf8_muladd_D9(void *out, void *in) } } -static void gf8_muladd_DA(void *out, void *in) +static void +gf8_muladd_DA(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -9662,7 +9881,8 @@ static void gf8_muladd_DA(void *out, void *in) } } -static void gf8_muladd_DB(void *out, void *in) +static void +gf8_muladd_DB(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -9709,7 +9929,8 @@ static void gf8_muladd_DB(void *out, void *in) } } -static void gf8_muladd_DC(void *out, void *in) +static void +gf8_muladd_DC(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -9755,7 +9976,8 @@ static void gf8_muladd_DC(void *out, void *in) } } -static void gf8_muladd_DD(void *out, void *in) +static void +gf8_muladd_DD(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -9796,7 +10018,8 @@ static void gf8_muladd_DD(void *out, void *in) } } -static void gf8_muladd_DE(void *out, void *in) +static void +gf8_muladd_DE(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -9840,7 +10063,8 @@ static void gf8_muladd_DE(void *out, void *in) } } -static void gf8_muladd_DF(void *out, void *in) +static void +gf8_muladd_DF(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -9885,7 +10109,8 @@ static void gf8_muladd_DF(void *out, void *in) } } -static void gf8_muladd_E0(void *out, void *in) +static void +gf8_muladd_E0(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -9929,7 +10154,8 @@ static void gf8_muladd_E0(void *out, void *in) } } -static void gf8_muladd_E1(void *out, void *in) +static void +gf8_muladd_E1(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -9975,7 +10201,8 @@ static void gf8_muladd_E1(void *out, void *in) } } -static void gf8_muladd_E2(void *out, void *in) +static void +gf8_muladd_E2(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -10016,7 +10243,8 @@ static void gf8_muladd_E2(void *out, void *in) } } -static void gf8_muladd_E3(void *out, void *in) +static void +gf8_muladd_E3(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -10063,7 +10291,8 @@ static void gf8_muladd_E3(void *out, void *in) } } -static void gf8_muladd_E4(void *out, void *in) +static void +gf8_muladd_E4(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -10108,7 +10337,8 @@ static void gf8_muladd_E4(void *out, void *in) } } -static void gf8_muladd_E5(void *out, void *in) +static void +gf8_muladd_E5(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -10153,7 +10383,8 @@ static void gf8_muladd_E5(void *out, void *in) } } -static void gf8_muladd_E6(void *out, void *in) +static void +gf8_muladd_E6(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -10194,7 +10425,8 @@ static void gf8_muladd_E6(void *out, void *in) } } -static void gf8_muladd_E7(void *out, void *in) +static void +gf8_muladd_E7(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -10241,7 +10473,8 @@ static void gf8_muladd_E7(void *out, void *in) } } -static void gf8_muladd_E8(void *out, void *in) +static void +gf8_muladd_E8(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -10287,7 +10520,8 @@ static void gf8_muladd_E8(void *out, void *in) } } -static void gf8_muladd_E9(void *out, void *in) +static void +gf8_muladd_E9(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -10332,7 +10566,8 @@ static void gf8_muladd_E9(void *out, void *in) } } -static void gf8_muladd_EA(void *out, void *in) +static void +gf8_muladd_EA(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -10373,7 +10608,8 @@ static void gf8_muladd_EA(void *out, void *in) } } -static void gf8_muladd_EB(void *out, void *in) +static void +gf8_muladd_EB(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -10417,7 +10653,8 @@ static void gf8_muladd_EB(void *out, void *in) } } -static void gf8_muladd_EC(void *out, void *in) +static void +gf8_muladd_EC(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -10458,7 +10695,8 @@ static void gf8_muladd_EC(void *out, void *in) } } -static void gf8_muladd_ED(void *out, void *in) +static void +gf8_muladd_ED(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -10502,7 +10740,8 @@ static void gf8_muladd_ED(void *out, void *in) } } -static void gf8_muladd_EE(void *out, void *in) +static void +gf8_muladd_EE(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -10548,7 +10787,8 @@ static void gf8_muladd_EE(void *out, void *in) } } -static void gf8_muladd_EF(void *out, void *in) +static void +gf8_muladd_EF(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -10592,7 +10832,8 @@ static void gf8_muladd_EF(void *out, void *in) } } -static void gf8_muladd_F0(void *out, void *in) +static void +gf8_muladd_F0(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -10638,7 +10879,8 @@ static void gf8_muladd_F0(void *out, void *in) } } -static void gf8_muladd_F1(void *out, void *in) +static void +gf8_muladd_F1(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -10684,7 +10926,8 @@ static void gf8_muladd_F1(void *out, void *in) } } -static void gf8_muladd_F2(void *out, void *in) +static void +gf8_muladd_F2(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -10730,7 +10973,8 @@ static void gf8_muladd_F2(void *out, void *in) } } -static void gf8_muladd_F3(void *out, void *in) +static void +gf8_muladd_F3(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -10774,7 +11018,8 @@ static void gf8_muladd_F3(void *out, void *in) } } -static void gf8_muladd_F4(void *out, void *in) +static void +gf8_muladd_F4(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -10817,7 +11062,8 @@ static void gf8_muladd_F4(void *out, void *in) } } -static void gf8_muladd_F5(void *out, void *in) +static void +gf8_muladd_F5(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -10860,7 +11106,8 @@ static void gf8_muladd_F5(void *out, void *in) } } -static void gf8_muladd_F6(void *out, void *in) +static void +gf8_muladd_F6(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -10904,7 +11151,8 @@ static void gf8_muladd_F6(void *out, void *in) } } -static void gf8_muladd_F7(void *out, void *in) +static void +gf8_muladd_F7(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -10947,7 +11195,8 @@ static void gf8_muladd_F7(void *out, void *in) } } -static void gf8_muladd_F8(void *out, void *in) +static void +gf8_muladd_F8(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -10992,7 +11241,8 @@ static void gf8_muladd_F8(void *out, void *in) } } -static void gf8_muladd_F9(void *out, void *in) +static void +gf8_muladd_F9(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -11039,7 +11289,8 @@ static void gf8_muladd_F9(void *out, void *in) } } -static void gf8_muladd_FA(void *out, void *in) +static void +gf8_muladd_FA(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -11085,7 +11336,8 @@ static void gf8_muladd_FA(void *out, void *in) } } -static void gf8_muladd_FB(void *out, void *in) +static void +gf8_muladd_FB(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -11129,7 +11381,8 @@ static void gf8_muladd_FB(void *out, void *in) } } -static void gf8_muladd_FC(void *out, void *in) +static void +gf8_muladd_FC(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -11175,7 +11428,8 @@ static void gf8_muladd_FC(void *out, void *in) } } -static void gf8_muladd_FD(void *out, void *in) +static void +gf8_muladd_FD(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -11221,7 +11475,8 @@ static void gf8_muladd_FD(void *out, void *in) } } -static void gf8_muladd_FE(void *out, void *in) +static void +gf8_muladd_FE(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -11268,7 +11523,8 @@ static void gf8_muladd_FE(void *out, void *in) } } -static void gf8_muladd_FF(void *out, void *in) +static void +gf8_muladd_FF(void *out, void *in) { unsigned int i; uint64_t *in_ptr = (uint64_t *)in; @@ -11315,75 +11571,65 @@ static void gf8_muladd_FF(void *out, void *in) } static void (*gf8_muladd[])(void *out, void *in) = { - gf8_muladd_00, gf8_muladd_01, gf8_muladd_02, gf8_muladd_03, - gf8_muladd_04, gf8_muladd_05, gf8_muladd_06, gf8_muladd_07, - gf8_muladd_08, gf8_muladd_09, gf8_muladd_0A, gf8_muladd_0B, - gf8_muladd_0C, gf8_muladd_0D, gf8_muladd_0E, gf8_muladd_0F, - gf8_muladd_10, gf8_muladd_11, gf8_muladd_12, gf8_muladd_13, - gf8_muladd_14, gf8_muladd_15, gf8_muladd_16, gf8_muladd_17, - gf8_muladd_18, gf8_muladd_19, gf8_muladd_1A, gf8_muladd_1B, - gf8_muladd_1C, gf8_muladd_1D, gf8_muladd_1E, gf8_muladd_1F, - gf8_muladd_20, gf8_muladd_21, gf8_muladd_22, gf8_muladd_23, - gf8_muladd_24, gf8_muladd_25, gf8_muladd_26, gf8_muladd_27, - gf8_muladd_28, gf8_muladd_29, gf8_muladd_2A, gf8_muladd_2B, - gf8_muladd_2C, gf8_muladd_2D, gf8_muladd_2E, gf8_muladd_2F, - gf8_muladd_30, gf8_muladd_31, gf8_muladd_32, gf8_muladd_33, - gf8_muladd_34, gf8_muladd_35, gf8_muladd_36, gf8_muladd_37, - gf8_muladd_38, gf8_muladd_39, gf8_muladd_3A, gf8_muladd_3B, - gf8_muladd_3C, gf8_muladd_3D, gf8_muladd_3E, gf8_muladd_3F, - gf8_muladd_40, gf8_muladd_41, gf8_muladd_42, gf8_muladd_43, - gf8_muladd_44, gf8_muladd_45, gf8_muladd_46, gf8_muladd_47, - gf8_muladd_48, gf8_muladd_49, gf8_muladd_4A, gf8_muladd_4B, - gf8_muladd_4C, gf8_muladd_4D, gf8_muladd_4E, gf8_muladd_4F, - gf8_muladd_50, gf8_muladd_51, gf8_muladd_52, gf8_muladd_53, - gf8_muladd_54, gf8_muladd_55, gf8_muladd_56, gf8_muladd_57, - gf8_muladd_58, gf8_muladd_59, gf8_muladd_5A, gf8_muladd_5B, - gf8_muladd_5C, gf8_muladd_5D, gf8_muladd_5E, gf8_muladd_5F, - gf8_muladd_60, gf8_muladd_61, gf8_muladd_62, gf8_muladd_63, - gf8_muladd_64, gf8_muladd_65, gf8_muladd_66, gf8_muladd_67, - gf8_muladd_68, gf8_muladd_69, gf8_muladd_6A, gf8_muladd_6B, - gf8_muladd_6C, gf8_muladd_6D, gf8_muladd_6E, gf8_muladd_6F, - gf8_muladd_70, gf8_muladd_71, gf8_muladd_72, gf8_muladd_73, - gf8_muladd_74, gf8_muladd_75, gf8_muladd_76, gf8_muladd_77, - gf8_muladd_78, gf8_muladd_79, gf8_muladd_7A, gf8_muladd_7B, - gf8_muladd_7C, gf8_muladd_7D, gf8_muladd_7E, gf8_muladd_7F, - gf8_muladd_80, gf8_muladd_81, gf8_muladd_82, gf8_muladd_83, - gf8_muladd_84, gf8_muladd_85, gf8_muladd_86, gf8_muladd_87, - gf8_muladd_88, gf8_muladd_89, gf8_muladd_8A, gf8_muladd_8B, - gf8_muladd_8C, gf8_muladd_8D, gf8_muladd_8E, gf8_muladd_8F, - gf8_muladd_90, gf8_muladd_91, gf8_muladd_92, gf8_muladd_93, - gf8_muladd_94, gf8_muladd_95, gf8_muladd_96, gf8_muladd_97, - gf8_muladd_98, gf8_muladd_99, gf8_muladd_9A, gf8_muladd_9B, - gf8_muladd_9C, gf8_muladd_9D, gf8_muladd_9E, gf8_muladd_9F, - gf8_muladd_A0, gf8_muladd_A1, gf8_muladd_A2, gf8_muladd_A3, - gf8_muladd_A4, gf8_muladd_A5, gf8_muladd_A6, gf8_muladd_A7, - gf8_muladd_A8, gf8_muladd_A9, gf8_muladd_AA, gf8_muladd_AB, - gf8_muladd_AC, gf8_muladd_AD, gf8_muladd_AE, gf8_muladd_AF, - gf8_muladd_B0, gf8_muladd_B1, gf8_muladd_B2, gf8_muladd_B3, - gf8_muladd_B4, gf8_muladd_B5, gf8_muladd_B6, gf8_muladd_B7, - gf8_muladd_B8, gf8_muladd_B9, gf8_muladd_BA, gf8_muladd_BB, - gf8_muladd_BC, gf8_muladd_BD, gf8_muladd_BE, gf8_muladd_BF, - gf8_muladd_C0, gf8_muladd_C1, gf8_muladd_C2, gf8_muladd_C3, - gf8_muladd_C4, gf8_muladd_C5, gf8_muladd_C6, gf8_muladd_C7, - gf8_muladd_C8, gf8_muladd_C9, gf8_muladd_CA, gf8_muladd_CB, - gf8_muladd_CC, gf8_muladd_CD, gf8_muladd_CE, gf8_muladd_CF, - gf8_muladd_D0, gf8_muladd_D1, gf8_muladd_D2, gf8_muladd_D3, - gf8_muladd_D4, gf8_muladd_D5, gf8_muladd_D6, gf8_muladd_D7, - gf8_muladd_D8, gf8_muladd_D9, gf8_muladd_DA, gf8_muladd_DB, - gf8_muladd_DC, gf8_muladd_DD, gf8_muladd_DE, gf8_muladd_DF, - gf8_muladd_E0, gf8_muladd_E1, gf8_muladd_E2, gf8_muladd_E3, - gf8_muladd_E4, gf8_muladd_E5, gf8_muladd_E6, gf8_muladd_E7, - gf8_muladd_E8, gf8_muladd_E9, gf8_muladd_EA, gf8_muladd_EB, - gf8_muladd_EC, gf8_muladd_ED, gf8_muladd_EE, gf8_muladd_EF, - gf8_muladd_F0, gf8_muladd_F1, gf8_muladd_F2, gf8_muladd_F3, - gf8_muladd_F4, gf8_muladd_F5, gf8_muladd_F6, gf8_muladd_F7, - gf8_muladd_F8, gf8_muladd_F9, gf8_muladd_FA, gf8_muladd_FB, - gf8_muladd_FC, gf8_muladd_FD, gf8_muladd_FE, gf8_muladd_FF + gf8_muladd_00, gf8_muladd_01, gf8_muladd_02, gf8_muladd_03, gf8_muladd_04, + gf8_muladd_05, gf8_muladd_06, gf8_muladd_07, gf8_muladd_08, gf8_muladd_09, + gf8_muladd_0A, gf8_muladd_0B, gf8_muladd_0C, gf8_muladd_0D, gf8_muladd_0E, + gf8_muladd_0F, gf8_muladd_10, gf8_muladd_11, gf8_muladd_12, gf8_muladd_13, + gf8_muladd_14, gf8_muladd_15, gf8_muladd_16, gf8_muladd_17, gf8_muladd_18, + gf8_muladd_19, gf8_muladd_1A, gf8_muladd_1B, gf8_muladd_1C, gf8_muladd_1D, + gf8_muladd_1E, gf8_muladd_1F, gf8_muladd_20, gf8_muladd_21, gf8_muladd_22, + gf8_muladd_23, gf8_muladd_24, gf8_muladd_25, gf8_muladd_26, gf8_muladd_27, + gf8_muladd_28, gf8_muladd_29, gf8_muladd_2A, gf8_muladd_2B, gf8_muladd_2C, + gf8_muladd_2D, gf8_muladd_2E, gf8_muladd_2F, gf8_muladd_30, gf8_muladd_31, + gf8_muladd_32, gf8_muladd_33, gf8_muladd_34, gf8_muladd_35, gf8_muladd_36, + gf8_muladd_37, gf8_muladd_38, gf8_muladd_39, gf8_muladd_3A, gf8_muladd_3B, + gf8_muladd_3C, gf8_muladd_3D, gf8_muladd_3E, gf8_muladd_3F, gf8_muladd_40, + gf8_muladd_41, gf8_muladd_42, gf8_muladd_43, gf8_muladd_44, gf8_muladd_45, + gf8_muladd_46, gf8_muladd_47, gf8_muladd_48, gf8_muladd_49, gf8_muladd_4A, + gf8_muladd_4B, gf8_muladd_4C, gf8_muladd_4D, gf8_muladd_4E, gf8_muladd_4F, + gf8_muladd_50, gf8_muladd_51, gf8_muladd_52, gf8_muladd_53, gf8_muladd_54, + gf8_muladd_55, gf8_muladd_56, gf8_muladd_57, gf8_muladd_58, gf8_muladd_59, + gf8_muladd_5A, gf8_muladd_5B, gf8_muladd_5C, gf8_muladd_5D, gf8_muladd_5E, + gf8_muladd_5F, gf8_muladd_60, gf8_muladd_61, gf8_muladd_62, gf8_muladd_63, + gf8_muladd_64, gf8_muladd_65, gf8_muladd_66, gf8_muladd_67, gf8_muladd_68, + gf8_muladd_69, gf8_muladd_6A, gf8_muladd_6B, gf8_muladd_6C, gf8_muladd_6D, + gf8_muladd_6E, gf8_muladd_6F, gf8_muladd_70, gf8_muladd_71, gf8_muladd_72, + gf8_muladd_73, gf8_muladd_74, gf8_muladd_75, gf8_muladd_76, gf8_muladd_77, + gf8_muladd_78, gf8_muladd_79, gf8_muladd_7A, gf8_muladd_7B, gf8_muladd_7C, + gf8_muladd_7D, gf8_muladd_7E, gf8_muladd_7F, gf8_muladd_80, gf8_muladd_81, + gf8_muladd_82, gf8_muladd_83, gf8_muladd_84, gf8_muladd_85, gf8_muladd_86, + gf8_muladd_87, gf8_muladd_88, gf8_muladd_89, gf8_muladd_8A, gf8_muladd_8B, + gf8_muladd_8C, gf8_muladd_8D, gf8_muladd_8E, gf8_muladd_8F, gf8_muladd_90, + gf8_muladd_91, gf8_muladd_92, gf8_muladd_93, gf8_muladd_94, gf8_muladd_95, + gf8_muladd_96, gf8_muladd_97, gf8_muladd_98, gf8_muladd_99, gf8_muladd_9A, + gf8_muladd_9B, gf8_muladd_9C, gf8_muladd_9D, gf8_muladd_9E, gf8_muladd_9F, + gf8_muladd_A0, gf8_muladd_A1, gf8_muladd_A2, gf8_muladd_A3, gf8_muladd_A4, + gf8_muladd_A5, gf8_muladd_A6, gf8_muladd_A7, gf8_muladd_A8, gf8_muladd_A9, + gf8_muladd_AA, gf8_muladd_AB, gf8_muladd_AC, gf8_muladd_AD, gf8_muladd_AE, + gf8_muladd_AF, gf8_muladd_B0, gf8_muladd_B1, gf8_muladd_B2, gf8_muladd_B3, + gf8_muladd_B4, gf8_muladd_B5, gf8_muladd_B6, gf8_muladd_B7, gf8_muladd_B8, + gf8_muladd_B9, gf8_muladd_BA, gf8_muladd_BB, gf8_muladd_BC, gf8_muladd_BD, + gf8_muladd_BE, gf8_muladd_BF, gf8_muladd_C0, gf8_muladd_C1, gf8_muladd_C2, + gf8_muladd_C3, gf8_muladd_C4, gf8_muladd_C5, gf8_muladd_C6, gf8_muladd_C7, + gf8_muladd_C8, gf8_muladd_C9, gf8_muladd_CA, gf8_muladd_CB, gf8_muladd_CC, + gf8_muladd_CD, gf8_muladd_CE, gf8_muladd_CF, gf8_muladd_D0, gf8_muladd_D1, + gf8_muladd_D2, gf8_muladd_D3, gf8_muladd_D4, gf8_muladd_D5, gf8_muladd_D6, + gf8_muladd_D7, gf8_muladd_D8, gf8_muladd_D9, gf8_muladd_DA, gf8_muladd_DB, + gf8_muladd_DC, gf8_muladd_DD, gf8_muladd_DE, gf8_muladd_DF, gf8_muladd_E0, + gf8_muladd_E1, gf8_muladd_E2, gf8_muladd_E3, gf8_muladd_E4, gf8_muladd_E5, + gf8_muladd_E6, gf8_muladd_E7, gf8_muladd_E8, gf8_muladd_E9, gf8_muladd_EA, + gf8_muladd_EB, gf8_muladd_EC, gf8_muladd_ED, gf8_muladd_EE, gf8_muladd_EF, + gf8_muladd_F0, gf8_muladd_F1, gf8_muladd_F2, gf8_muladd_F3, gf8_muladd_F4, + gf8_muladd_F5, gf8_muladd_F6, gf8_muladd_F7, gf8_muladd_F8, gf8_muladd_F9, + gf8_muladd_FA, gf8_muladd_FB, gf8_muladd_FC, gf8_muladd_FD, gf8_muladd_FE, + gf8_muladd_FF}; + +static uint64_t zero[EC_METHOD_WORD_SIZE * 8] = { + 0, }; -static uint64_t zero[EC_METHOD_WORD_SIZE * 8] = {0, }; - -void ec_code_c_prepare(ec_gf_t *gf, uint32_t *values, uint32_t count) +void +ec_code_c_prepare(ec_gf_t *gf, uint32_t *values, uint32_t count) { uint32_t i, last, tmp; @@ -11397,8 +11643,9 @@ void ec_code_c_prepare(ec_gf_t *gf, uint32_t *values, uint32_t count) } } -void ec_code_c_linear(void *dst, void *src, uint64_t offset, uint32_t *values, - uint32_t count) +void +ec_code_c_linear(void *dst, void *src, uint64_t offset, uint32_t *values, + uint32_t count) { src += offset; gf8_muladd_00(dst, src); @@ -11409,8 +11656,9 @@ void ec_code_c_linear(void *dst, void *src, uint64_t offset, uint32_t *values, } } -void ec_code_c_interleaved(void *dst, void **src, uint64_t offset, - uint32_t *values, uint32_t count) +void +ec_code_c_interleaved(void *dst, void **src, uint64_t offset, uint32_t *values, + uint32_t count) { uint32_t i, last, tmp; diff --git a/xlators/cluster/ec/src/ec-code-intel.c b/xlators/cluster/ec/src/ec-code-intel.c index b9fdcad4421..f1c4e13e321 100644 --- a/xlators/cluster/ec/src/ec-code-intel.c +++ b/xlators/cluster/ec/src/ec-code-intel.c @@ -71,16 +71,15 @@ ec_code_intel_vex(ec_code_intel_t *intel, gf_boolean_t w, gf_boolean_t l, uint32_t reg) { ec_code_intel_rex(intel, w); - if (((intel->rex.w == 1) || - (intel->rex.x == 0) || - (intel->rex.b == 0)) || + if (((intel->rex.w == 1) || (intel->rex.x == 0) || (intel->rex.b == 0)) || ((opcode != VEX_OPCODE_NONE) && (opcode != VEX_OPCODE_0F))) { intel->rex.present = _gf_false; intel->vex.bytes = 3; intel->vex.data[0] = 0xC4; intel->vex.data[1] = ((intel->rex.r << 7) | (intel->rex.x << 6) | - (intel->rex.b << 5) | opcode) ^ 0xE0; + (intel->rex.b << 5) | opcode) ^ + 0xE0; intel->vex.data[2] = (intel->rex.w << 7) | ((~reg & 0x0F) << 3) | (l ? 0x04 : 0x00) | prefix; } else { @@ -214,22 +213,17 @@ ec_code_intel_emit(ec_code_builder_t *builder, ec_code_intel_t *intel) insn[count++] = intel->vex.data[i]; } if (intel->rex.present) { - insn[count++] = 0x40 | - (intel->rex.w << 3) | - (intel->rex.r << 2) | - (intel->rex.x << 1) | - (intel->rex.b << 0); + insn[count++] = 0x40 | (intel->rex.w << 3) | (intel->rex.r << 2) | + (intel->rex.x << 1) | (intel->rex.b << 0); } for (i = 0; i < intel->opcode.bytes; i++) { insn[count++] = intel->opcode.data[i]; } if (intel->modrm.present) { - insn[count++] = (intel->modrm.mod << 6) | - (intel->modrm.reg << 3) | + insn[count++] = (intel->modrm.mod << 6) | (intel->modrm.reg << 3) | (intel->modrm.rm << 0); if (intel->sib.present) { - insn[count++] = (intel->sib.scale << 6) | - (intel->sib.index << 3) | + insn[count++] = (intel->sib.scale << 6) | (intel->sib.index << 3) | (intel->sib.base << 0); } } @@ -467,9 +461,9 @@ ec_code_intel_op_mov_sse2m(ec_code_builder_t *builder, uint32_t src, } void -ec_code_intel_op_mov_m2sse(ec_code_builder_t *builder, - ec_code_intel_reg_t base, ec_code_intel_reg_t index, - uint32_t scale, int32_t offset, uint32_t dst) +ec_code_intel_op_mov_m2sse(ec_code_builder_t *builder, ec_code_intel_reg_t base, + ec_code_intel_reg_t index, uint32_t scale, + int32_t offset, uint32_t dst) { ec_code_intel_t intel; @@ -500,9 +494,9 @@ ec_code_intel_op_xor_sse2sse(ec_code_builder_t *builder, uint32_t src, } void -ec_code_intel_op_xor_m2sse(ec_code_builder_t *builder, - ec_code_intel_reg_t base, ec_code_intel_reg_t index, - uint32_t scale, int32_t offset, uint32_t dst) +ec_code_intel_op_xor_m2sse(ec_code_builder_t *builder, ec_code_intel_reg_t base, + ec_code_intel_reg_t index, uint32_t scale, + int32_t offset, uint32_t dst) { ec_code_intel_t intel; @@ -526,8 +520,8 @@ ec_code_intel_op_mov_avx2avx(ec_code_builder_t *builder, uint32_t src, ec_code_intel_modrm_reg(&intel, src, dst); ec_code_intel_op_1(&intel, 0x6F, 0); - ec_code_intel_vex(&intel, _gf_false, _gf_true, VEX_OPCODE_0F, - VEX_PREFIX_66, VEX_REG_NONE); + ec_code_intel_vex(&intel, _gf_false, _gf_true, VEX_OPCODE_0F, VEX_PREFIX_66, + VEX_REG_NONE); ec_code_intel_emit(builder, &intel); } @@ -543,16 +537,16 @@ ec_code_intel_op_mov_avx2m(ec_code_builder_t *builder, uint32_t src, ec_code_intel_modrm_mem(&intel, src, base, index, scale, offset); ec_code_intel_op_1(&intel, 0x7F, 0); - ec_code_intel_vex(&intel, _gf_false, _gf_true, VEX_OPCODE_0F, - VEX_PREFIX_66, VEX_REG_NONE); + ec_code_intel_vex(&intel, _gf_false, _gf_true, VEX_OPCODE_0F, VEX_PREFIX_66, + VEX_REG_NONE); ec_code_intel_emit(builder, &intel); } void -ec_code_intel_op_mov_m2avx(ec_code_builder_t *builder, - ec_code_intel_reg_t base, ec_code_intel_reg_t index, - uint32_t scale, int32_t offset, uint32_t dst) +ec_code_intel_op_mov_m2avx(ec_code_builder_t *builder, ec_code_intel_reg_t base, + ec_code_intel_reg_t index, uint32_t scale, + int32_t offset, uint32_t dst) { ec_code_intel_t intel; @@ -560,8 +554,8 @@ ec_code_intel_op_mov_m2avx(ec_code_builder_t *builder, ec_code_intel_modrm_mem(&intel, dst, base, index, scale, offset); ec_code_intel_op_1(&intel, 0x6F, 0); - ec_code_intel_vex(&intel, _gf_false, _gf_true, VEX_OPCODE_0F, - VEX_PREFIX_66, VEX_REG_NONE); + ec_code_intel_vex(&intel, _gf_false, _gf_true, VEX_OPCODE_0F, VEX_PREFIX_66, + VEX_REG_NONE); ec_code_intel_emit(builder, &intel); } @@ -576,16 +570,16 @@ ec_code_intel_op_xor_avx2avx(ec_code_builder_t *builder, uint32_t src, ec_code_intel_modrm_reg(&intel, src, dst); ec_code_intel_op_1(&intel, 0xEF, 0); - ec_code_intel_vex(&intel, _gf_false, _gf_true, VEX_OPCODE_0F, - VEX_PREFIX_66, dst); + ec_code_intel_vex(&intel, _gf_false, _gf_true, VEX_OPCODE_0F, VEX_PREFIX_66, + dst); ec_code_intel_emit(builder, &intel); } void -ec_code_intel_op_xor_m2avx(ec_code_builder_t *builder, - ec_code_intel_reg_t base, ec_code_intel_reg_t index, - uint32_t scale, int32_t offset, uint32_t dst) +ec_code_intel_op_xor_m2avx(ec_code_builder_t *builder, ec_code_intel_reg_t base, + ec_code_intel_reg_t index, uint32_t scale, + int32_t offset, uint32_t dst) { ec_code_intel_t intel; @@ -593,8 +587,8 @@ ec_code_intel_op_xor_m2avx(ec_code_builder_t *builder, ec_code_intel_modrm_mem(&intel, dst, base, index, scale, offset); ec_code_intel_op_1(&intel, 0xEF, 0); - ec_code_intel_vex(&intel, _gf_false, _gf_true, VEX_OPCODE_0F, - VEX_PREFIX_66, dst); + ec_code_intel_vex(&intel, _gf_false, _gf_true, VEX_OPCODE_0F, VEX_PREFIX_66, + dst); ec_code_intel_emit(builder, &intel); } diff --git a/xlators/cluster/ec/src/ec-code-sse.c b/xlators/cluster/ec/src/ec-code-sse.c index 6f2c6fa593f..e11e7ff8400 100644 --- a/xlators/cluster/ec/src/ec-code-sse.c +++ b/xlators/cluster/ec/src/ec-code-sse.c @@ -34,10 +34,9 @@ ec_code_sse_load(ec_code_builder_t *builder, uint32_t dst, uint32_t idx, uint32_t bit) { if (builder->linear) { - ec_code_intel_op_mov_m2sse(builder, REG_SI, REG_DX, 1, - idx * builder->width * builder->bits + - bit * builder->width, - dst); + ec_code_intel_op_mov_m2sse( + builder, REG_SI, REG_DX, 1, + idx * builder->width * builder->bits + bit * builder->width, dst); } else { if (builder->base != idx) { ec_code_intel_op_mov_m2r(builder, REG_SI, REG_NULL, 0, idx * 8, @@ -73,10 +72,9 @@ ec_code_sse_xorm(ec_code_builder_t *builder, uint32_t dst, uint32_t idx, uint32_t bit) { if (builder->linear) { - ec_code_intel_op_xor_m2sse(builder, REG_SI, REG_DX, 1, - idx * builder->width * builder->bits + - bit * builder->width, - dst); + ec_code_intel_op_xor_m2sse( + builder, REG_SI, REG_DX, 1, + idx * builder->width * builder->bits + bit * builder->width, dst); } else { if (builder->base != idx) { ec_code_intel_op_mov_m2r(builder, REG_SI, REG_NULL, 0, idx * 8, @@ -88,21 +86,16 @@ ec_code_sse_xorm(ec_code_builder_t *builder, uint32_t dst, uint32_t idx, } } -static char *ec_code_sse_needed_flags[] = { - "sse2", - NULL -}; +static char *ec_code_sse_needed_flags[] = {"sse2", NULL}; -ec_code_gen_t ec_code_gen_sse = { - .name = "sse", - .flags = ec_code_sse_needed_flags, - .width = 16, - .prolog = ec_code_sse_prolog, - .epilog = ec_code_sse_epilog, - .load = ec_code_sse_load, - .store = ec_code_sse_store, - .copy = ec_code_sse_copy, - .xor2 = ec_code_sse_xor2, - .xor3 = NULL, - .xorm = ec_code_sse_xorm -}; +ec_code_gen_t ec_code_gen_sse = {.name = "sse", + .flags = ec_code_sse_needed_flags, + .width = 16, + .prolog = ec_code_sse_prolog, + .epilog = ec_code_sse_epilog, + .load = ec_code_sse_load, + .store = ec_code_sse_store, + .copy = ec_code_sse_copy, + .xor2 = ec_code_sse_xor2, + .xor3 = NULL, + .xorm = ec_code_sse_xorm}; diff --git a/xlators/cluster/ec/src/ec-code-x64.c b/xlators/cluster/ec/src/ec-code-x64.c index cfec4b3be7a..26565b4493f 100644 --- a/xlators/cluster/ec/src/ec-code-x64.c +++ b/xlators/cluster/ec/src/ec-code-x64.c @@ -14,8 +14,7 @@ static ec_code_intel_reg_t ec_code_x64_regmap[] = { REG_AX, REG_CX, REG_BP, REG_8, REG_9, REG_10, - REG_11, REG_12, REG_13, REG_14, REG_15 -}; + REG_11, REG_12, REG_13, REG_14, REG_15}; static void ec_code_x64_prolog(ec_code_builder_t *builder) @@ -68,10 +67,9 @@ ec_code_x64_load(ec_code_builder_t *builder, uint32_t dst, uint32_t idx, dst = ec_code_x64_regmap[dst]; if (builder->linear) { - ec_code_intel_op_mov_m2r(builder, REG_SI, REG_DX, 1, - idx * builder->width * builder->bits + - bit * builder->width, - dst); + ec_code_intel_op_mov_m2r( + builder, REG_SI, REG_DX, 1, + idx * builder->width * builder->bits + bit * builder->width, dst); } else { if (builder->base != idx) { ec_code_intel_op_mov_m2r(builder, REG_SI, REG_NULL, 0, idx * 8, @@ -117,10 +115,9 @@ ec_code_x64_xorm(ec_code_builder_t *builder, uint32_t dst, uint32_t idx, dst = ec_code_x64_regmap[dst]; if (builder->linear) { - ec_code_intel_op_xor_m2r(builder, REG_SI, REG_DX, 1, - idx * builder->width * builder->bits + - bit * builder->width, - dst); + ec_code_intel_op_xor_m2r( + builder, REG_SI, REG_DX, 1, + idx * builder->width * builder->bits + bit * builder->width, dst); } else { if (builder->base != idx) { ec_code_intel_op_mov_m2r(builder, REG_SI, REG_NULL, 0, idx * 8, @@ -132,20 +129,16 @@ ec_code_x64_xorm(ec_code_builder_t *builder, uint32_t dst, uint32_t idx, } } -static char *ec_code_x64_needed_flags[] = { - NULL -}; - -ec_code_gen_t ec_code_gen_x64 = { - .name = "x64", - .flags = ec_code_x64_needed_flags, - .width = sizeof(uint64_t), - .prolog = ec_code_x64_prolog, - .epilog = ec_code_x64_epilog, - .load = ec_code_x64_load, - .store = ec_code_x64_store, - .copy = ec_code_x64_copy, - .xor2 = ec_code_x64_xor2, - .xor3 = NULL, - .xorm = ec_code_x64_xorm -}; +static char *ec_code_x64_needed_flags[] = {NULL}; + +ec_code_gen_t ec_code_gen_x64 = {.name = "x64", + .flags = ec_code_x64_needed_flags, + .width = sizeof(uint64_t), + .prolog = ec_code_x64_prolog, + .epilog = ec_code_x64_epilog, + .load = ec_code_x64_load, + .store = ec_code_x64_store, + .copy = ec_code_x64_copy, + .xor2 = ec_code_x64_xor2, + .xor3 = NULL, + .xorm = ec_code_x64_xorm}; diff --git a/xlators/cluster/ec/src/ec-code.c b/xlators/cluster/ec/src/ec-code.c index e33cb42d9dc..70878d794ca 100644 --- a/xlators/cluster/ec/src/ec-code.c +++ b/xlators/cluster/ec/src/ec-code.c @@ -47,13 +47,13 @@ struct _ec_code_proc; typedef struct _ec_code_proc ec_code_proc_t; struct _ec_code_proc { - int32_t fd; + int32_t fd; gf_boolean_t eof; gf_boolean_t error; gf_boolean_t skip; - ssize_t size; - ssize_t pos; - char buffer[EC_PROC_BUFFER_SIZE]; + ssize_t size; + ssize_t pos; + char buffer[EC_PROC_BUFFER_SIZE]; }; static ec_code_gen_t *ec_code_gen_table[] = { @@ -66,8 +66,7 @@ static ec_code_gen_t *ec_code_gen_table[] = { #ifdef USE_EC_DYNAMIC_X64 &ec_code_gen_x64, #endif - NULL -}; + NULL}; static void ec_code_arg_set(ec_code_arg_t *arg, uint32_t value) @@ -84,7 +83,6 @@ ec_code_arg_assign(ec_code_builder_t *builder, ec_code_op_t *op, if (builder->regs <= reg) { builder->regs = reg + 1; } - } static void @@ -202,17 +200,17 @@ static void ec_code_dup(ec_code_builder_t *builder, ec_gf_op_t *op) { switch (op->op) { - case EC_GF_OP_COPY: - ec_code_copy(builder, op->arg1, op->arg2); - break; - case EC_GF_OP_XOR2: - ec_code_xor2(builder, op->arg1, op->arg2); - break; - case EC_GF_OP_XOR3: - ec_code_xor3(builder, op->arg1, op->arg2, op->arg3); - break; - default: - break; + case EC_GF_OP_COPY: + ec_code_copy(builder, op->arg1, op->arg2); + break; + case EC_GF_OP_XOR2: + ec_code_xor2(builder, op->arg1, op->arg2); + break; + case EC_GF_OP_XOR3: + ec_code_xor3(builder, op->arg1, op->arg2, op->arg3); + break; + default: + break; } } @@ -285,8 +283,9 @@ ec_code_prepare(ec_code_t *code, uint32_t count, uint32_t width, count *= code->gf->bits + code->gf->max_ops; count += code->gf->bits; - builder = GF_MALLOC(sizeof(ec_code_builder_t) + - sizeof(ec_code_op_t) * count, ec_mt_ec_code_builder_t); + builder = GF_MALLOC( + sizeof(ec_code_builder_t) + sizeof(ec_code_op_t) * count, + ec_mt_ec_code_builder_t); if (builder == NULL) { return EC_ERR(ENOMEM); } @@ -331,15 +330,15 @@ ec_code_chunk_from_space(ec_code_space_t *space) static void * ec_code_to_executable(ec_code_space_t *space, void *addr) { - return (void *)((uintptr_t)addr - (uintptr_t)space - + (uintptr_t)space->exec); + return (void *)((uintptr_t)addr - (uintptr_t)space + + (uintptr_t)space->exec); } static void * ec_code_from_executable(ec_code_space_t *space, void *addr) { - return (void *)((uintptr_t)addr - (uintptr_t)space->exec - + (uintptr_t)space); + return (void *)((uintptr_t)addr - (uintptr_t)space->exec + + (uintptr_t)space); } static void * @@ -395,105 +394,105 @@ ec_code_chunk_touch(ec_code_chunk_t *prev, ec_code_chunk_t *next) static ec_code_space_t * ec_code_space_create(ec_code_t *code, size_t size) { - char path[] = GLUSTERFS_LIBEXECDIR "/ec-code-dynamic.XXXXXX"; - ec_code_space_t *space; - void *exec; - int32_t fd, err; - - /* We need to create memory areas to store the generated dynamic code. - * Obviously these areas need to be written to be able to create the - * code and they also need to be executable to execute it. - * - * However it's a bad practice to have a memory region that is both - * writable *and* executable. In fact, selinux forbids this and causes - * attempts to do so to fail (unless specifically configured). - * - * To solve the problem we'll use two distinct memory areas mapped to - * the same physical storage. One of the memory areas will have write - * permission, and the other will have execute permission. Both areas - * will have the same contents. The physical storage will be a regular - * file that will be mmapped to both areas. - */ - - /* We need to create a temporary file as the backend storage for the - * memory mapped areas. */ - /* coverity[secure_temp] mkstemp uses 0600 as the mode and is safe */ - fd = mkstemp(path); - if (fd < 0) { - err = errno; - gf_msg(THIS->name, GF_LOG_ERROR, err, EC_MSG_DYN_CREATE_FAILED, - "Unable to create a temporary file for the ec dynamic " - "code"); - space = EC_ERR(err); - goto done; - } - /* Once created we don't need to keep it in the file system. It will - * still exist until we close the last file descriptor or unmap the - * memory areas bound to the file. */ - sys_unlink(path); - - size = (size + EC_CODE_ALIGN - 1) & ~(EC_CODE_ALIGN - 1); - if (sys_ftruncate(fd, size) < 0) { - err = errno; - gf_msg(THIS->name, GF_LOG_ERROR, err, EC_MSG_DYN_CREATE_FAILED, - "Unable to resize the file for the ec dynamic code"); - space = EC_ERR(err); - goto done_close; - } + char path[] = GLUSTERFS_LIBEXECDIR "/ec-code-dynamic.XXXXXX"; + ec_code_space_t *space; + void *exec; + int32_t fd, err; + + /* We need to create memory areas to store the generated dynamic code. + * Obviously these areas need to be written to be able to create the + * code and they also need to be executable to execute it. + * + * However it's a bad practice to have a memory region that is both + * writable *and* executable. In fact, selinux forbids this and causes + * attempts to do so to fail (unless specifically configured). + * + * To solve the problem we'll use two distinct memory areas mapped to + * the same physical storage. One of the memory areas will have write + * permission, and the other will have execute permission. Both areas + * will have the same contents. The physical storage will be a regular + * file that will be mmapped to both areas. + */ + + /* We need to create a temporary file as the backend storage for the + * memory mapped areas. */ + /* coverity[secure_temp] mkstemp uses 0600 as the mode and is safe */ + fd = mkstemp(path); + if (fd < 0) { + err = errno; + gf_msg(THIS->name, GF_LOG_ERROR, err, EC_MSG_DYN_CREATE_FAILED, + "Unable to create a temporary file for the ec dynamic " + "code"); + space = EC_ERR(err); + goto done; + } + /* Once created we don't need to keep it in the file system. It will + * still exist until we close the last file descriptor or unmap the + * memory areas bound to the file. */ + sys_unlink(path); + + size = (size + EC_CODE_ALIGN - 1) & ~(EC_CODE_ALIGN - 1); + if (sys_ftruncate(fd, size) < 0) { + err = errno; + gf_msg(THIS->name, GF_LOG_ERROR, err, EC_MSG_DYN_CREATE_FAILED, + "Unable to resize the file for the ec dynamic code"); + space = EC_ERR(err); + goto done_close; + } - /* This creates an executable memory area to be able to run the - * generated fragments of code. */ - exec = mmap(NULL, size, PROT_READ | PROT_EXEC, MAP_SHARED, fd, 0); - if (exec == MAP_FAILED) { - err = errno; - gf_msg(THIS->name, GF_LOG_ERROR, err, EC_MSG_DYN_CREATE_FAILED, - "Unable to map the executable area for the ec dynamic " - "code"); - space = EC_ERR(err); - goto done_close; - } - /* It's not important to check the return value of mlock(). If it fails - * everything will continue to work normally. */ - mlock(exec, size); - - /* This maps a read/write memory area to be able to create the dynamici - * code. */ - space = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); - if (space == MAP_FAILED) { - err = errno; - gf_msg(THIS->name, GF_LOG_ERROR, err, EC_MSG_DYN_CREATE_FAILED, - "Unable to map the writable area for the ec dynamic " - "code"); - space = EC_ERR(err); - - munmap(exec, size); - - goto done_close; - } + /* This creates an executable memory area to be able to run the + * generated fragments of code. */ + exec = mmap(NULL, size, PROT_READ | PROT_EXEC, MAP_SHARED, fd, 0); + if (exec == MAP_FAILED) { + err = errno; + gf_msg(THIS->name, GF_LOG_ERROR, err, EC_MSG_DYN_CREATE_FAILED, + "Unable to map the executable area for the ec dynamic " + "code"); + space = EC_ERR(err); + goto done_close; + } + /* It's not important to check the return value of mlock(). If it fails + * everything will continue to work normally. */ + mlock(exec, size); + + /* This maps a read/write memory area to be able to create the dynamici + * code. */ + space = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + if (space == MAP_FAILED) { + err = errno; + gf_msg(THIS->name, GF_LOG_ERROR, err, EC_MSG_DYN_CREATE_FAILED, + "Unable to map the writable area for the ec dynamic " + "code"); + space = EC_ERR(err); + + munmap(exec, size); + + goto done_close; + } - space->exec = exec; - space->size = size; - space->code = code; - list_add_tail(&space->list, &code->spaces); - INIT_LIST_HEAD(&space->chunks); + space->exec = exec; + space->size = size; + space->code = code; + list_add_tail(&space->list, &code->spaces); + INIT_LIST_HEAD(&space->chunks); done_close: - /* If everything has succeeded, we already have the memory areas - * mapped. We don't need the file descriptor anymore because the - * backend storage will be there until the mmap()'d regions are - * unmapped. */ - sys_close(fd); + /* If everything has succeeded, we already have the memory areas + * mapped. We don't need the file descriptor anymore because the + * backend storage will be there until the mmap()'d regions are + * unmapped. */ + sys_close(fd); done: - return space; + return space; } static void ec_code_space_destroy(ec_code_space_t *space) { - list_del_init(&space->list); + list_del_init(&space->list); - munmap(space->exec, space->size); - munmap(space, space->size); + munmap(space->exec, space->size); + munmap(space, space->size); } static void @@ -501,7 +500,8 @@ ec_code_chunk_merge(ec_code_chunk_t *chunk) { ec_code_chunk_t *item, *tmp; - list_for_each_entry_safe(item, tmp, &chunk->space->chunks, list) { + list_for_each_entry_safe(item, tmp, &chunk->space->chunks, list) + { if ((uintptr_t)item > (uintptr_t)chunk) { list_add_tail(&chunk->list, &item->list); if (ec_code_chunk_touch(chunk, item)) { @@ -520,8 +520,8 @@ ec_code_chunk_merge(ec_code_chunk_t *chunk) list_add_tail(&chunk->list, &chunk->space->chunks); check: - if (chunk->size == chunk->space->size - ec_code_space_size() - - ec_code_chunk_size()) { + if (chunk->size == + chunk->space->size - ec_code_space_size() - ec_code_chunk_size()) { ec_code_space_destroy(chunk->space); } } @@ -536,9 +536,12 @@ ec_code_space_alloc(ec_code_t *code, size_t size) /* To minimize fragmentation, we only allocate chunks of sizes multiples * of EC_CODE_CHUNK_MIN_SIZE. */ size = ((size + ec_code_chunk_size() + EC_CODE_CHUNK_MIN_SIZE - 1) & - ~(EC_CODE_CHUNK_MIN_SIZE - 1)) - ec_code_chunk_size(); - list_for_each_entry(space, &code->spaces, list) { - list_for_each_entry(chunk, &space->chunks, list) { + ~(EC_CODE_CHUNK_MIN_SIZE - 1)) - + ec_code_chunk_size(); + list_for_each_entry(space, &code->spaces, list) + { + list_for_each_entry(chunk, &space->chunks, list) + { if (chunk->size >= size) { goto out; } @@ -608,26 +611,29 @@ ec_code_write(ec_code_builder_t *builder) for (i = 0; i < builder->count; i++) { op = &builder->ops[i]; switch (op->op) { - case EC_GF_OP_LOAD: - gen->load(builder, op->arg1.value, op->arg2.value, op->arg3.value); - break; - case EC_GF_OP_STORE: - gen->store(builder, op->arg1.value, op->arg3.value); - break; - case EC_GF_OP_COPY: - gen->copy(builder, op->arg1.value, op->arg2.value); - break; - case EC_GF_OP_XOR2: - gen->xor2(builder, op->arg1.value, op->arg2.value); - break; - case EC_GF_OP_XOR3: - gen->xor3(builder, op->arg1.value, op->arg2.value, op->arg3.value); - break; - case EC_GF_OP_XORM: - gen->xorm(builder, op->arg1.value, op->arg2.value, op->arg3.value); - break; - default: - break; + case EC_GF_OP_LOAD: + gen->load(builder, op->arg1.value, op->arg2.value, + op->arg3.value); + break; + case EC_GF_OP_STORE: + gen->store(builder, op->arg1.value, op->arg3.value); + break; + case EC_GF_OP_COPY: + gen->copy(builder, op->arg1.value, op->arg2.value); + break; + case EC_GF_OP_XOR2: + gen->xor2(builder, op->arg1.value, op->arg2.value); + break; + case EC_GF_OP_XOR3: + gen->xor3(builder, op->arg1.value, op->arg2.value, + op->arg3.value); + break; + case EC_GF_OP_XORM: + gen->xorm(builder, op->arg1.value, op->arg2.value, + op->arg3.value); + break; + default: + break; } } gen->epilog(builder); @@ -716,67 +722,65 @@ static void * ec_code_build_dynamic(ec_code_t *code, uint32_t width, uint32_t *values, uint32_t count, gf_boolean_t linear) { - ec_code_builder_t *builder; - uint32_t offset, val, next; + ec_code_builder_t *builder; + uint32_t offset, val, next; - builder = ec_code_prepare(code, count, width, linear); - if (EC_IS_ERR(builder)) { - return builder; - } + builder = ec_code_prepare(code, count, width, linear); + if (EC_IS_ERR(builder)) { + return builder; + } - offset = -1; - next = ec_code_value_next(values, count, &offset); - if (next != 0) { - ec_code_gf_load(builder, offset); - do { - val = next; - next = ec_code_value_next(values, count, &offset); - if (next != 0) { - ec_code_gf_mul(builder, ec_gf_div(code->gf, - val, next)); - ec_code_gf_load_xor(builder, offset); - } - } while (next != 0); - ec_code_gf_mul(builder, val); - ec_code_gf_store(builder); - } else { - ec_code_gf_clear(builder); - } + offset = -1; + next = ec_code_value_next(values, count, &offset); + if (next != 0) { + ec_code_gf_load(builder, offset); + do { + val = next; + next = ec_code_value_next(values, count, &offset); + if (next != 0) { + ec_code_gf_mul(builder, ec_gf_div(code->gf, val, next)); + ec_code_gf_load_xor(builder, offset); + } + } while (next != 0); + ec_code_gf_mul(builder, val); + ec_code_gf_store(builder); + } else { + ec_code_gf_clear(builder); + } - return ec_code_compile(builder); + return ec_code_compile(builder); } static void * -ec_code_build(ec_code_t *code, uint32_t width, uint32_t *values, - uint32_t count, gf_boolean_t linear) +ec_code_build(ec_code_t *code, uint32_t width, uint32_t *values, uint32_t count, + gf_boolean_t linear) { - void *func; + void *func; - if (code->gen != NULL) { - func = ec_code_build_dynamic(code, width, values, count, - linear); - if (!EC_IS_ERR(func)) { - return func; - } + if (code->gen != NULL) { + func = ec_code_build_dynamic(code, width, values, count, linear); + if (!EC_IS_ERR(func)) { + return func; + } - gf_msg_debug(THIS->name, GF_LOG_DEBUG, - "Unable to generate dynamic code. Falling back " - "to precompiled code"); + gf_msg_debug(THIS->name, GF_LOG_DEBUG, + "Unable to generate dynamic code. Falling back " + "to precompiled code"); - /* The dynamic code generation shouldn't fail in normal - * conditions, but if it fails at some point, it's very - * probable that it will fail again, so we completely disable - * dynamic code generation. */ - code->gen = NULL; - } + /* The dynamic code generation shouldn't fail in normal + * conditions, but if it fails at some point, it's very + * probable that it will fail again, so we completely disable + * dynamic code generation. */ + code->gen = NULL; + } - ec_code_c_prepare(code->gf, values, count); + ec_code_c_prepare(code->gf, values, count); - if (linear) { - return ec_code_c_linear; - } + if (linear) { + return ec_code_c_linear; + } - return ec_code_c_interleaved; + return ec_code_c_interleaved; } ec_code_func_linear_t @@ -791,17 +795,17 @@ ec_code_func_interleaved_t ec_code_build_interleaved(ec_code_t *code, uint32_t width, uint32_t *values, uint32_t count) { - return (ec_code_func_interleaved_t)ec_code_build(code, width, values, - count, _gf_false); + return (ec_code_func_interleaved_t)ec_code_build(code, width, values, count, + _gf_false); } void ec_code_release(ec_code_t *code, ec_code_func_t *func) { - if ((func->linear != ec_code_c_linear) && - (func->interleaved != ec_code_c_interleaved)) { - ec_code_free(ec_code_chunk_from_func(func->linear)); - } + if ((func->linear != ec_code_c_linear) && + (func->interleaved != ec_code_c_interleaved)) { + ec_code_free(ec_code_chunk_from_func(func->linear)); + } } void @@ -1003,7 +1007,8 @@ ec_code_detect(xlator_t *xl, const char *def) if (ec_code_gen_table[select] == NULL) { gf_msg(xl->name, GF_LOG_WARNING, EINVAL, EC_MSG_EXTENSION_UNKNOWN, "CPU extension '%s' is not known. Not using any cpu " - "extensions", def); + "extensions", + def); return NULL; } diff --git a/xlators/cluster/ec/src/ec-combine.c b/xlators/cluster/ec/src/ec-combine.c index 90e004336c0..551adfac043 100644 --- a/xlators/cluster/ec/src/ec-combine.c +++ b/xlators/cluster/ec/src/ec-combine.c @@ -30,41 +30,38 @@ typedef struct _ec_dict_info ec_dict_info_t; struct _ec_dict_combine; typedef struct _ec_dict_combine ec_dict_combine_t; -struct _ec_dict_info -{ - dict_t * dict; - int32_t count; +struct _ec_dict_info { + dict_t *dict; + int32_t count; }; -struct _ec_dict_combine -{ - ec_cbk_data_t * cbk; - int32_t which; +struct _ec_dict_combine { + ec_cbk_data_t *cbk; + int32_t which; }; int32_t -ec_combine_write (ec_fop_data_t *fop, ec_cbk_data_t *dst, - ec_cbk_data_t *src) +ec_combine_write(ec_fop_data_t *fop, ec_cbk_data_t *dst, ec_cbk_data_t *src) { - int valid = 0; + int valid = 0; - if (!fop || !dst || !src) - return 0; + if (!fop || !dst || !src) + return 0; - switch (fop->id) { + switch (fop->id) { case GF_FOP_REMOVEXATTR: case GF_FOP_FREMOVEXATTR: case GF_FOP_SETXATTR: case GF_FOP_FSETXATTR: - return 1; + return 1; case GF_FOP_SYMLINK: case GF_FOP_LINK: case GF_FOP_CREATE: case GF_FOP_MKNOD: case GF_FOP_MKDIR: - valid = 3; - break; + valid = 3; + break; case GF_FOP_UNLINK: case GF_FOP_RMDIR: case GF_FOP_SETATTR: @@ -75,42 +72,40 @@ ec_combine_write (ec_fop_data_t *fop, ec_cbk_data_t *dst, case GF_FOP_FALLOCATE: case GF_FOP_DISCARD: case GF_FOP_ZEROFILL: - valid = 2; - break; + valid = 2; + break; case GF_FOP_RENAME: - valid = 5; - break; + valid = 5; + break; default: - gf_msg_callingfn (fop->xl->name, GF_LOG_WARNING, EINVAL, - EC_MSG_INVALID_FOP, - "Invalid fop %d", fop->id); - return 0; - break; - } + gf_msg_callingfn(fop->xl->name, GF_LOG_WARNING, EINVAL, + EC_MSG_INVALID_FOP, "Invalid fop %d", fop->id); + return 0; + break; + } - if (!ec_iatt_combine(fop, dst->iatt, src->iatt, valid)) { - gf_msg (fop->xl->name, GF_LOG_NOTICE, 0, - EC_MSG_IATT_MISMATCH, - "Mismatching iatt in " - "answers of '%s'", gf_fop_list[fop->id]); - return 0; - } - return 1; + if (!ec_iatt_combine(fop, dst->iatt, src->iatt, valid)) { + gf_msg(fop->xl->name, GF_LOG_NOTICE, 0, EC_MSG_IATT_MISMATCH, + "Mismatching iatt in " + "answers of '%s'", + gf_fop_list[fop->id]); + return 0; + } + return 1; } -void ec_iatt_time_merge(int64_t *dst_sec, uint32_t *dst_nsec, - int64_t src_sec, uint32_t src_nsec) +void +ec_iatt_time_merge(int64_t *dst_sec, uint32_t *dst_nsec, int64_t src_sec, + uint32_t src_nsec) { if ((*dst_sec < src_sec) || - ((*dst_sec == src_sec) && (*dst_nsec < src_nsec))) - { + ((*dst_sec == src_sec) && (*dst_nsec < src_nsec))) { *dst_sec = src_sec; *dst_nsec = src_nsec; } } -static -gf_boolean_t +static gf_boolean_t ec_iatt_is_trusted(ec_fop_data_t *fop, struct iatt *iatt) { uint64_t ino; @@ -141,14 +136,14 @@ ec_iatt_is_trusted(ec_fop_data_t *fop, struct iatt *iatt) return _gf_false; } -int32_t ec_iatt_combine(ec_fop_data_t *fop, struct iatt *dst, struct iatt *src, - int32_t count) +int32_t +ec_iatt_combine(ec_fop_data_t *fop, struct iatt *dst, struct iatt *src, + int32_t count) { int32_t i; gf_boolean_t failed = _gf_false; - for (i = 0; i < count; i++) - { + for (i = 0; i < count; i++) { /* Check for basic fields. These fields must be equal always, even if * the inode is not locked because in these cases the parent inode * will be locked and differences in these fields require changes in @@ -172,33 +167,30 @@ int32_t ec_iatt_combine(ec_fop_data_t *fop, struct iatt *dst, struct iatt *src, * data is returned. */ failed = _gf_true; } else { - gf_msg_debug (fop->xl->name, 0, - "Ignoring iatt differences because inode is not " - "locked"); + gf_msg_debug(fop->xl->name, 0, + "Ignoring iatt differences because inode is not " + "locked"); } } if (failed) { - gf_msg (fop->xl->name, GF_LOG_WARNING, 0, - EC_MSG_IATT_COMBINE_FAIL, - "Failed to combine iatt (inode: %lu-%lu, links: %u-%u, " - "uid: %u-%u, gid: %u-%u, rdev: %lu-%lu, size: %lu-%lu, " - "mode: %o-%o)", - dst[i].ia_ino, src[i].ia_ino, dst[i].ia_nlink, - src[i].ia_nlink, dst[i].ia_uid, src[i].ia_uid, - dst[i].ia_gid, src[i].ia_gid, dst[i].ia_rdev, - src[i].ia_rdev, dst[i].ia_size, src[i].ia_size, - st_mode_from_ia(dst[i].ia_prot, dst[i].ia_type), - st_mode_from_ia(src[i].ia_prot, dst[i].ia_type)); + gf_msg(fop->xl->name, GF_LOG_WARNING, 0, EC_MSG_IATT_COMBINE_FAIL, + "Failed to combine iatt (inode: %lu-%lu, links: %u-%u, " + "uid: %u-%u, gid: %u-%u, rdev: %lu-%lu, size: %lu-%lu, " + "mode: %o-%o)", + dst[i].ia_ino, src[i].ia_ino, dst[i].ia_nlink, + src[i].ia_nlink, dst[i].ia_uid, src[i].ia_uid, dst[i].ia_gid, + src[i].ia_gid, dst[i].ia_rdev, src[i].ia_rdev, + dst[i].ia_size, src[i].ia_size, + st_mode_from_ia(dst[i].ia_prot, dst[i].ia_type), + st_mode_from_ia(src[i].ia_prot, dst[i].ia_type)); return 0; } } - while (count-- > 0) - { + while (count-- > 0) { dst[count].ia_blocks += src[count].ia_blocks; - if (dst[count].ia_blksize < src[count].ia_blksize) - { + if (dst[count].ia_blksize < src[count].ia_blksize) { dst[count].ia_blksize = src[count].ia_blksize; } @@ -213,13 +205,12 @@ int32_t ec_iatt_combine(ec_fop_data_t *fop, struct iatt *dst, struct iatt *src, return 1; } -void ec_iatt_rebuild(ec_t * ec, struct iatt * iatt, int32_t count, - int32_t answers) +void +ec_iatt_rebuild(ec_t *ec, struct iatt *iatt, int32_t count, int32_t answers) { uint64_t blocks; - while (count-- > 0) - { + while (count-- > 0) { blocks = iatt[count].ia_blocks * ec->fragments + answers - 1; blocks /= answers; iatt[count].ia_blocks = blocks; @@ -227,93 +218,93 @@ void ec_iatt_rebuild(ec_t * ec, struct iatt * iatt, int32_t count, } gf_boolean_t -ec_xattr_match (dict_t *dict, char *key, data_t *value, void *arg) +ec_xattr_match(dict_t *dict, char *key, data_t *value, void *arg) { - if ((fnmatch(GF_XATTR_STIME_PATTERN, key, 0) == 0) || - (strcmp(key, GET_LINK_COUNT) == 0) || - (strcmp(key, GLUSTERFS_INODELK_COUNT) == 0) || - (strcmp(key, GLUSTERFS_ENTRYLK_COUNT) == 0) || - (strcmp(key, GLUSTERFS_OPEN_FD_COUNT) == 0)) { - return _gf_false; - } + if ((fnmatch(GF_XATTR_STIME_PATTERN, key, 0) == 0) || + (strcmp(key, GET_LINK_COUNT) == 0) || + (strcmp(key, GLUSTERFS_INODELK_COUNT) == 0) || + (strcmp(key, GLUSTERFS_ENTRYLK_COUNT) == 0) || + (strcmp(key, GLUSTERFS_OPEN_FD_COUNT) == 0)) { + return _gf_false; + } - return _gf_true; + return _gf_true; } gf_boolean_t -ec_value_ignore (char *key) +ec_value_ignore(char *key) { - if ((strcmp(key, GF_CONTENT_KEY) == 0) || - (strcmp(key, GF_XATTR_PATHINFO_KEY) == 0) || - (strcmp(key, GF_XATTR_USER_PATHINFO_KEY) == 0) || - (strcmp(key, GF_XATTR_LOCKINFO_KEY) == 0) || - (strcmp(key, GLUSTERFS_OPEN_FD_COUNT) == 0) || - (strcmp(key, GLUSTERFS_INODELK_COUNT) == 0) || - (strcmp(key, GLUSTERFS_ENTRYLK_COUNT) == 0) || - (strncmp(key, GF_XATTR_CLRLK_CMD, - SLEN (GF_XATTR_CLRLK_CMD)) == 0) || - (strcmp(key, DHT_IATT_IN_XDATA_KEY) == 0) || - (strncmp(key, EC_QUOTA_PREFIX, SLEN (EC_QUOTA_PREFIX)) == 0) || - (fnmatch(MARKER_XATTR_PREFIX ".*." XTIME, key, 0) == 0) || - (fnmatch(GF_XATTR_MARKER_KEY ".*", key, 0) == 0) || - (XATTR_IS_NODE_UUID(key))) { - return _gf_true; - } + if ((strcmp(key, GF_CONTENT_KEY) == 0) || + (strcmp(key, GF_XATTR_PATHINFO_KEY) == 0) || + (strcmp(key, GF_XATTR_USER_PATHINFO_KEY) == 0) || + (strcmp(key, GF_XATTR_LOCKINFO_KEY) == 0) || + (strcmp(key, GLUSTERFS_OPEN_FD_COUNT) == 0) || + (strcmp(key, GLUSTERFS_INODELK_COUNT) == 0) || + (strcmp(key, GLUSTERFS_ENTRYLK_COUNT) == 0) || + (strncmp(key, GF_XATTR_CLRLK_CMD, SLEN(GF_XATTR_CLRLK_CMD)) == 0) || + (strcmp(key, DHT_IATT_IN_XDATA_KEY) == 0) || + (strncmp(key, EC_QUOTA_PREFIX, SLEN(EC_QUOTA_PREFIX)) == 0) || + (fnmatch(MARKER_XATTR_PREFIX ".*." XTIME, key, 0) == 0) || + (fnmatch(GF_XATTR_MARKER_KEY ".*", key, 0) == 0) || + (XATTR_IS_NODE_UUID(key))) { + return _gf_true; + } - return _gf_false; + return _gf_false; } int32_t -ec_dict_compare (dict_t *dict1, dict_t *dict2) +ec_dict_compare(dict_t *dict1, dict_t *dict2) { - if (are_dicts_equal (dict1, dict2, ec_xattr_match, ec_value_ignore)) - return 1; - return 0; + if (are_dicts_equal(dict1, dict2, ec_xattr_match, ec_value_ignore)) + return 1; + return 0; } static uint32_t ec_dict_list(data_t **list, ec_cbk_data_t *cbk, int32_t which, char *key, gf_boolean_t global) { - ec_t *ec = cbk->fop->xl->private; - ec_cbk_data_t *ans = NULL; - dict_t *dict = NULL; - data_t *data; - uint32_t count; - int32_t i; - - for (i = 0; i < ec->nodes; i++) { - /* We initialize the list with EC_MISSING_DATA if we are - * returning a global list or the current subvolume belongs - * to the group of the accepted answer. Note that if some - * subvolume is known to be down before issuing the request, - * we won't have any answer from it, so we set here the - * appropriate default value. */ - if (global || ((cbk->mask & (1ULL << i)) != 0)) { - list[i] = EC_MISSING_DATA; - } else { - list[i] = NULL; - } + ec_t *ec = cbk->fop->xl->private; + ec_cbk_data_t *ans = NULL; + dict_t *dict = NULL; + data_t *data; + uint32_t count; + int32_t i; + + for (i = 0; i < ec->nodes; i++) { + /* We initialize the list with EC_MISSING_DATA if we are + * returning a global list or the current subvolume belongs + * to the group of the accepted answer. Note that if some + * subvolume is known to be down before issuing the request, + * we won't have any answer from it, so we set here the + * appropriate default value. */ + if (global || ((cbk->mask & (1ULL << i)) != 0)) { + list[i] = EC_MISSING_DATA; + } else { + list[i] = NULL; } + } - count = 0; - list_for_each_entry(ans, &cbk->fop->answer_list, answer_list) { - if (global || ((cbk->mask & ans->mask) != 0)) { - dict = (which == EC_COMBINE_XDATA) ? ans->xdata - : ans->dict; - data = dict_get(dict, key); - if (data != NULL) { - list[ans->idx] = data; - count++; - } - } + count = 0; + list_for_each_entry(ans, &cbk->fop->answer_list, answer_list) + { + if (global || ((cbk->mask & ans->mask) != 0)) { + dict = (which == EC_COMBINE_XDATA) ? ans->xdata : ans->dict; + data = dict_get(dict, key); + if (data != NULL) { + list[ans->idx] = data; + count++; + } } + } - return count; + return count; } -int32_t ec_concat_prepare(xlator_t *xl, char **str, char **sep, char **post, - const char *fmt, va_list args) +int32_t +ec_concat_prepare(xlator_t *xl, char **str, char **sep, char **post, + const char *fmt, va_list args) { char *tmp; int32_t len; @@ -339,9 +330,8 @@ int32_t ec_concat_prepare(xlator_t *xl, char **str, char **sep, char **post, return 0; out: - gf_msg (xl->name, GF_LOG_ERROR, EINVAL, - EC_MSG_INVALID_FORMAT, - "Invalid concat format"); + gf_msg(xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_INVALID_FORMAT, + "Invalid concat format"); GF_FREE(*str); @@ -449,7 +439,8 @@ out: return err; } -int32_t ec_dict_data_merge(ec_cbk_data_t *cbk, int32_t which, char *key) +int32_t +ec_dict_data_merge(ec_cbk_data_t *cbk, int32_t which, char *key) { ec_t *ec = cbk->fop->xl->private; data_t *data[ec->nodes]; @@ -458,7 +449,6 @@ int32_t ec_dict_data_merge(ec_cbk_data_t *cbk, int32_t which, char *key) int32_t i, len; int32_t err; - ec_dict_list(data, cbk, which, key, _gf_false); lockinfo = dict_new(); @@ -526,11 +516,12 @@ out: return err; } -int32_t ec_dict_data_uuid(ec_cbk_data_t * cbk, int32_t which, char * key) +int32_t +ec_dict_data_uuid(ec_cbk_data_t *cbk, int32_t which, char *key) { - ec_cbk_data_t * ans, * min; - dict_t * src, * dst; - data_t * data; + ec_cbk_data_t *ans, *min; + dict_t *src, *dst; + data_t *data; min = cbk; for (ans = cbk->next; ans != NULL; ans = ans->next) { @@ -555,7 +546,8 @@ int32_t ec_dict_data_uuid(ec_cbk_data_t * cbk, int32_t which, char * key) return 0; } -int32_t ec_dict_data_iatt(ec_cbk_data_t *cbk, int32_t which, char *key) +int32_t +ec_dict_data_iatt(ec_cbk_data_t *cbk, int32_t which, char *key) { ec_t *ec = cbk->fop->xl->private; data_t *data[ec->nodes]; @@ -578,12 +570,12 @@ int32_t ec_dict_data_iatt(ec_cbk_data_t *cbk, int32_t which, char *key) if (stbuf == NULL) { stbuf = GF_MALLOC(sizeof(struct iatt), gf_common_mt_char); if (stbuf == NULL) { - ret = -ENOMEM; - goto out; + ret = -ENOMEM; + goto out; } *stbuf = *tmp; } else { - if (!ec_iatt_combine (cbk->fop, stbuf, tmp, 1)) { + if (!ec_iatt_combine(cbk->fop, stbuf, tmp, 1)) { ret = -EINVAL; goto out; } @@ -613,7 +605,8 @@ out: return ret; } -int32_t ec_dict_data_max32(ec_cbk_data_t *cbk, int32_t which, char *key) +int32_t +ec_dict_data_max32(ec_cbk_data_t *cbk, int32_t which, char *key) { ec_t *ec = cbk->fop->xl->private; data_t *data[ec->nodes]; @@ -639,7 +632,8 @@ int32_t ec_dict_data_max32(ec_cbk_data_t *cbk, int32_t which, char *key) return dict_set_uint32(dict, key, max); } -int32_t ec_dict_data_max64(ec_cbk_data_t *cbk, int32_t which, char *key) +int32_t +ec_dict_data_max64(ec_cbk_data_t *cbk, int32_t which, char *key) { ec_t *ec = cbk->fop->xl->private; data_t *data[ec->nodes]; @@ -665,14 +659,19 @@ int32_t ec_dict_data_max64(ec_cbk_data_t *cbk, int32_t which, char *key) return dict_set_uint64(dict, key, max); } -int32_t ec_dict_data_quota(ec_cbk_data_t *cbk, int32_t which, char *key) +int32_t +ec_dict_data_quota(ec_cbk_data_t *cbk, int32_t which, char *key) { - ec_t *ec = cbk->fop->xl->private; - data_t *data[ec->nodes]; - dict_t *dict = NULL; - int32_t i = 0; - quota_meta_t size = {0, }; - quota_meta_t max_size = {0, }; + ec_t *ec = cbk->fop->xl->private; + data_t *data[ec->nodes]; + dict_t *dict = NULL; + int32_t i = 0; + quota_meta_t size = { + 0, + }; + quota_meta_t max_size = { + 0, + }; if (ec_dict_list(data, cbk, which, key, _gf_false) == 0) { return 0; @@ -685,25 +684,26 @@ int32_t ec_dict_data_quota(ec_cbk_data_t *cbk, int32_t which, char *key) */ for (i = 0; i < ec->nodes; i++) { if ((data[i] == NULL) || (data[i] == EC_MISSING_DATA) || - (quota_data_to_meta (data[i], QUOTA_SIZE_KEY, &size) < 0)) { - continue; + (quota_data_to_meta(data[i], QUOTA_SIZE_KEY, &size) < 0)) { + continue; } if (size.size > max_size.size) - max_size.size = size.size; + max_size.size = size.size; if (size.file_count > max_size.file_count) - max_size.file_count = size.file_count; + max_size.file_count = size.file_count; if (size.dir_count > max_size.dir_count) - max_size.dir_count = size.dir_count; + max_size.dir_count = size.dir_count; } max_size.size *= ec->fragments; dict = (which == EC_COMBINE_XDATA) ? cbk->xdata : cbk->dict; - return quota_dict_set_meta (dict, key, &max_size, IA_IFDIR); + return quota_dict_set_meta(dict, key, &max_size, IA_IFDIR); } -int32_t ec_dict_data_stime(ec_cbk_data_t * cbk, int32_t which, char * key) +int32_t +ec_dict_data_stime(ec_cbk_data_t *cbk, int32_t which, char *key) { ec_t *ec = cbk->fop->xl->private; data_t *data[ec->nodes]; @@ -719,8 +719,8 @@ int32_t ec_dict_data_stime(ec_cbk_data_t * cbk, int32_t which, char * key) } err = gf_get_max_stime(cbk->fop->xl, dict, key, data[i]); if (err != 0) { - gf_msg (cbk->fop->xl->name, GF_LOG_ERROR, -err, - EC_MSG_STIME_COMBINE_FAIL, "STIME combination failed"); + gf_msg(cbk->fop->xl->name, GF_LOG_ERROR, -err, + EC_MSG_STIME_COMBINE_FAIL, "STIME combination failed"); return err; } @@ -729,28 +729,24 @@ int32_t ec_dict_data_stime(ec_cbk_data_t * cbk, int32_t which, char * key) return 0; } -int32_t ec_dict_data_combine(dict_t * dict, char * key, data_t * value, - void * arg) +int32_t +ec_dict_data_combine(dict_t *dict, char *key, data_t *value, void *arg) { - ec_dict_combine_t * data = arg; + ec_dict_combine_t *data = arg; if ((strcmp(key, GF_XATTR_PATHINFO_KEY) == 0) || - (strcmp(key, GF_XATTR_USER_PATHINFO_KEY) == 0)) - { - return ec_dict_data_concat("( { })", data->cbk, data->which, - key, NULL, NULL, _gf_false, + (strcmp(key, GF_XATTR_USER_PATHINFO_KEY) == 0)) { + return ec_dict_data_concat("( { })", data->cbk, data->which, key, + NULL, NULL, _gf_false, data->cbk->fop->xl->name); } - if (strncmp(key, GF_XATTR_CLRLK_CMD, SLEN (GF_XATTR_CLRLK_CMD)) == 0) - { + if (strncmp(key, GF_XATTR_CLRLK_CMD, SLEN(GF_XATTR_CLRLK_CMD)) == 0) { return ec_dict_data_concat("{\n}", data->cbk, data->which, key, NULL, NULL, _gf_false); } - if (strncmp(key, GF_XATTR_LOCKINFO_KEY, - SLEN (GF_XATTR_LOCKINFO_KEY)) == 0) - { + if (strncmp(key, GF_XATTR_LOCKINFO_KEY, SLEN(GF_XATTR_LOCKINFO_KEY)) == 0) { return ec_dict_data_merge(data->cbk, data->which, key); } @@ -758,8 +754,7 @@ int32_t ec_dict_data_combine(dict_t * dict, char * key, data_t * value, return ec_dict_data_max32(data->cbk, data->which, key); } - if (strcmp(key, GLUSTERFS_OPEN_FD_COUNT) == 0) - { + if (strcmp(key, GLUSTERFS_OPEN_FD_COUNT) == 0) { return ec_dict_data_max32(data->cbk, data->which, key); } if ((strcmp(key, GLUSTERFS_INODELK_COUNT) == 0) || @@ -771,23 +766,22 @@ int32_t ec_dict_data_combine(dict_t * dict, char * key, data_t * value, return ec_dict_data_quota(data->cbk, data->which, key); } /* Ignore all other quota attributes */ - if (strncmp(key, EC_QUOTA_PREFIX, SLEN (EC_QUOTA_PREFIX)) == 0) { + if (strncmp(key, EC_QUOTA_PREFIX, SLEN(EC_QUOTA_PREFIX)) == 0) { return 0; } if (XATTR_IS_NODE_UUID(key)) { if (data->cbk->fop->int32) { - /* List of node uuid is requested */ - return ec_dict_data_concat("{ }", data->cbk, data->which, key, - GF_XATTR_LIST_NODE_UUIDS_KEY, - UUID0_STR, _gf_true); + /* List of node uuid is requested */ + return ec_dict_data_concat("{ }", data->cbk, data->which, key, + GF_XATTR_LIST_NODE_UUIDS_KEY, UUID0_STR, + _gf_true); } else { - return ec_dict_data_uuid(data->cbk, data->which, key); + return ec_dict_data_uuid(data->cbk, data->which, key); } } - if (fnmatch(GF_XATTR_STIME_PATTERN, key, FNM_NOESCAPE) == 0) - { + if (fnmatch(GF_XATTR_STIME_PATTERN, key, FNM_NOESCAPE) == 0) { return ec_dict_data_stime(data->cbk, data->which, key); } @@ -795,14 +789,15 @@ int32_t ec_dict_data_combine(dict_t * dict, char * key, data_t * value, return ec_dict_data_max64(data->cbk, data->which, key); } - if (strcmp (key, GF_PRESTAT) == 0 || strcmp (key, GF_POSTSTAT) == 0) { + if (strcmp(key, GF_PRESTAT) == 0 || strcmp(key, GF_POSTSTAT) == 0) { return ec_dict_data_iatt(data->cbk, data->which, key); } return 0; } -int32_t ec_dict_combine(ec_cbk_data_t * cbk, int32_t which) +int32_t +ec_dict_combine(ec_cbk_data_t *cbk, int32_t which) { dict_t *dict = NULL; ec_dict_combine_t data; @@ -815,9 +810,8 @@ int32_t ec_dict_combine(ec_cbk_data_t * cbk, int32_t which) if (dict != NULL) { err = dict_foreach(dict, ec_dict_data_combine, &data); if (err != 0) { - gf_msg (cbk->fop->xl->name, GF_LOG_ERROR, -err, - EC_MSG_DICT_COMBINE_FAIL, - "Dictionary combination failed"); + gf_msg(cbk->fop->xl->name, GF_LOG_ERROR, -err, + EC_MSG_DICT_COMBINE_FAIL, "Dictionary combination failed"); return err; } @@ -826,47 +820,43 @@ int32_t ec_dict_combine(ec_cbk_data_t * cbk, int32_t which) return 0; } -int32_t ec_vector_compare(struct iovec * dst_vector, int32_t dst_count, - struct iovec * src_vector, int32_t src_count) +int32_t +ec_vector_compare(struct iovec *dst_vector, int32_t dst_count, + struct iovec *src_vector, int32_t src_count) { int32_t dst_size = 0, src_size = 0; - if (dst_count > 0) - { + if (dst_count > 0) { dst_size = iov_length(dst_vector, dst_count); } - if (src_count > 0) - { + if (src_count > 0) { src_size = iov_length(src_vector, src_count); } return (dst_size == src_size); } -int32_t ec_flock_compare(struct gf_flock * dst, struct gf_flock * src) +int32_t +ec_flock_compare(struct gf_flock *dst, struct gf_flock *src) { - if ((dst->l_type != src->l_type) || - (dst->l_whence != src->l_whence) || - (dst->l_start != src->l_start) || - (dst->l_len != src->l_len) || + if ((dst->l_type != src->l_type) || (dst->l_whence != src->l_whence) || + (dst->l_start != src->l_start) || (dst->l_len != src->l_len) || (dst->l_pid != src->l_pid) || - !is_same_lkowner(&dst->l_owner, &src->l_owner)) - { + !is_same_lkowner(&dst->l_owner, &src->l_owner)) { return 0; } return 1; } -void ec_statvfs_combine(struct statvfs * dst, struct statvfs * src) +void +ec_statvfs_combine(struct statvfs *dst, struct statvfs *src) { - if (dst->f_bsize < src->f_bsize) - { + if (dst->f_bsize < src->f_bsize) { dst->f_bsize = src->f_bsize; } - if (dst->f_frsize < src->f_frsize) - { + if (dst->f_frsize < src->f_frsize) { dst->f_blocks *= dst->f_frsize; dst->f_blocks /= src->f_frsize; @@ -877,9 +867,7 @@ void ec_statvfs_combine(struct statvfs * dst, struct statvfs * src) dst->f_bavail /= src->f_frsize; dst->f_frsize = src->f_frsize; - } - else if (dst->f_frsize > src->f_frsize) - { + } else if (dst->f_frsize > src->f_frsize) { src->f_blocks *= src->f_frsize; src->f_blocks /= dst->f_frsize; @@ -889,90 +877,80 @@ void ec_statvfs_combine(struct statvfs * dst, struct statvfs * src) src->f_bavail *= src->f_frsize; src->f_bavail /= dst->f_frsize; } - if (dst->f_blocks > src->f_blocks) - { + if (dst->f_blocks > src->f_blocks) { dst->f_blocks = src->f_blocks; } - if (dst->f_bfree > src->f_bfree) - { + if (dst->f_bfree > src->f_bfree) { dst->f_bfree = src->f_bfree; } - if (dst->f_bavail > src->f_bavail) - { + if (dst->f_bavail > src->f_bavail) { dst->f_bavail = src->f_bavail; } - if (dst->f_files < src->f_files) - { + if (dst->f_files < src->f_files) { dst->f_files = src->f_files; } - if (dst->f_ffree > src->f_ffree) - { + if (dst->f_ffree > src->f_ffree) { dst->f_ffree = src->f_ffree; } - if (dst->f_favail > src->f_favail) - { + if (dst->f_favail > src->f_favail) { dst->f_favail = src->f_favail; } - if (dst->f_namemax > src->f_namemax) - { + if (dst->f_namemax > src->f_namemax) { dst->f_namemax = src->f_namemax; } - if (dst->f_flag != src->f_flag) - { - gf_msg_debug (THIS->name, 0, - "Mismatching file system flags " - "(%lX, %lX)", - dst->f_flag, src->f_flag); + if (dst->f_flag != src->f_flag) { + gf_msg_debug(THIS->name, 0, + "Mismatching file system flags " + "(%lX, %lX)", + dst->f_flag, src->f_flag); } dst->f_flag &= src->f_flag; } -int32_t ec_combine_check(ec_cbk_data_t * dst, ec_cbk_data_t * src, - ec_combine_f combine) +int32_t +ec_combine_check(ec_cbk_data_t *dst, ec_cbk_data_t *src, ec_combine_f combine) { - ec_fop_data_t * fop = dst->fop; + ec_fop_data_t *fop = dst->fop; - if (dst->op_ret != src->op_ret) - { - gf_msg_debug (fop->xl->name, 0, "Mismatching return code in " - "answers of '%s': %d <-> %d", - ec_fop_name(fop->id), dst->op_ret, src->op_ret); + if (dst->op_ret != src->op_ret) { + gf_msg_debug(fop->xl->name, 0, + "Mismatching return code in " + "answers of '%s': %d <-> %d", + ec_fop_name(fop->id), dst->op_ret, src->op_ret); return 0; } - if (dst->op_ret < 0) - { - if (dst->op_errno != src->op_errno) - { - gf_msg_debug (fop->xl->name, 0, "Mismatching errno code in " - "answers of '%s': %d <-> %d", - ec_fop_name(fop->id), dst->op_errno, src->op_errno); + if (dst->op_ret < 0) { + if (dst->op_errno != src->op_errno) { + gf_msg_debug(fop->xl->name, 0, + "Mismatching errno code in " + "answers of '%s': %d <-> %d", + ec_fop_name(fop->id), dst->op_errno, src->op_errno); return 0; } } - if (!ec_dict_compare(dst->xdata, src->xdata)) - { - gf_msg (fop->xl->name, GF_LOG_DEBUG, 0, - EC_MSG_XDATA_MISMATCH, - "Mismatching xdata in answers " - "of '%s'", ec_fop_name(fop->id)); + if (!ec_dict_compare(dst->xdata, src->xdata)) { + gf_msg(fop->xl->name, GF_LOG_DEBUG, 0, EC_MSG_XDATA_MISMATCH, + "Mismatching xdata in answers " + "of '%s'", + ec_fop_name(fop->id)); return 0; } - if ((dst->op_ret >= 0) && (combine != NULL)) - { + if ((dst->op_ret >= 0) && (combine != NULL)) { return combine(fop, dst, src); } return 1; } -void ec_combine (ec_cbk_data_t *newcbk, ec_combine_f combine) +void +ec_combine(ec_cbk_data_t *newcbk, ec_combine_f combine) { ec_fop_data_t *fop = newcbk->fop; ec_cbk_data_t *cbk = NULL, *tmp = NULL; @@ -987,17 +965,14 @@ void ec_combine (ec_cbk_data_t *newcbk, ec_combine_f combine) item = fop->cbk_list.prev; list_for_each_entry(cbk, &fop->cbk_list, list) { - if (ec_combine_check(newcbk, cbk, combine)) - { + if (ec_combine_check(newcbk, cbk, combine)) { newcbk->count += cbk->count; newcbk->mask |= cbk->mask; item = cbk->list.prev; - while (item != &fop->cbk_list) - { + while (item != &fop->cbk_list) { tmp = list_entry(item, ec_cbk_data_t, list); - if (tmp->count >= newcbk->count) - { + if (tmp->count >= newcbk->count) { break; } item = item->prev; diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c index 86432bd7da7..0eee0a3363f 100644 --- a/xlators/cluster/ec/src/ec-common.c +++ b/xlators/cluster/ec/src/ec-common.c @@ -24,29 +24,28 @@ #define EC_INVALID_INDEX UINT32_MAX void -ec_update_fd_status (fd_t *fd, xlator_t *xl, int idx, - int32_t ret_status) +ec_update_fd_status(fd_t *fd, xlator_t *xl, int idx, int32_t ret_status) { - ec_fd_t *fd_ctx; + ec_fd_t *fd_ctx; - if (fd == NULL) - return; + if (fd == NULL) + return; - LOCK (&fd->lock); - { - fd_ctx = __ec_fd_get(fd, xl); - if (fd_ctx) { - if (ret_status >= 0) - fd_ctx->fd_status[idx] = EC_FD_OPENED; - else - fd_ctx->fd_status[idx] = EC_FD_NOT_OPENED; - } + LOCK(&fd->lock); + { + fd_ctx = __ec_fd_get(fd, xl); + if (fd_ctx) { + if (ret_status >= 0) + fd_ctx->fd_status[idx] = EC_FD_OPENED; + else + fd_ctx->fd_status[idx] = EC_FD_NOT_OPENED; } - UNLOCK (&fd->lock); + } + UNLOCK(&fd->lock); } static int -ec_fd_ctx_need_open (fd_t *fd, xlator_t *this, uintptr_t *need_open) +ec_fd_ctx_need_open(fd_t *fd, xlator_t *this, uintptr_t *need_open) { int i = 0; int count = 0; @@ -56,22 +55,22 @@ ec_fd_ctx_need_open (fd_t *fd, xlator_t *this, uintptr_t *need_open) ec = this->private; *need_open = 0; - fd_ctx = ec_fd_get (fd, this); + fd_ctx = ec_fd_get(fd, this); if (!fd_ctx) return count; - LOCK (&fd->lock); + LOCK(&fd->lock); { for (i = 0; i < ec->nodes; i++) { - if ((fd_ctx->fd_status[i] == EC_FD_NOT_OPENED) && - (ec->xl_up & (1<fd_status[i] = EC_FD_OPENING; - *need_open |= (1<fd_status[i] == EC_FD_NOT_OPENED) && + (ec->xl_up & (1 << i))) { + fd_ctx->fd_status[i] = EC_FD_OPENING; + *need_open |= (1 << i); + count++; + } } } - UNLOCK (&fd->lock); + UNLOCK(&fd->lock); /* If fd needs to open on minimum number of nodes * then ignore fixing the fd as it has been @@ -84,136 +83,137 @@ ec_fd_ctx_need_open (fd_t *fd, xlator_t *this, uintptr_t *need_open) } static gf_boolean_t -ec_is_fd_fixable (fd_t *fd) +ec_is_fd_fixable(fd_t *fd) { if (!fd || !fd->inode) return _gf_false; - else if (fd_is_anonymous (fd)) + else if (fd_is_anonymous(fd)) return _gf_false; - else if (gf_uuid_is_null (fd->inode->gfid)) + else if (gf_uuid_is_null(fd->inode->gfid)) return _gf_false; return _gf_true; } static void -ec_fix_open (ec_fop_data_t *fop) +ec_fix_open(ec_fop_data_t *fop) { - int call_count = 0; - uintptr_t need_open = 0; - int ret = 0; - loc_t loc = {0, }; + int call_count = 0; + uintptr_t need_open = 0; + int ret = 0; + loc_t loc = { + 0, + }; - if (!ec_is_fd_fixable (fop->fd)) + if (!ec_is_fd_fixable(fop->fd)) goto out; /* Evaluate how many remote fd's to be opened */ - call_count = ec_fd_ctx_need_open (fop->fd, fop->xl, &need_open); + call_count = ec_fd_ctx_need_open(fop->fd, fop->xl, &need_open); if (!call_count) goto out; - loc.inode = inode_ref (fop->fd->inode); - gf_uuid_copy (loc.gfid, fop->fd->inode->gfid); - ret = loc_path (&loc, NULL); + loc.inode = inode_ref(fop->fd->inode); + gf_uuid_copy(loc.gfid, fop->fd->inode->gfid); + ret = loc_path(&loc, NULL); if (ret < 0) { goto out; } if (IA_IFDIR == fop->fd->inode->ia_type) { - ec_opendir(fop->frame, fop->xl, need_open, EC_MINIMUM_ONE, - NULL, NULL, &fop->loc[0], fop->fd, NULL); - } else{ - ec_open(fop->frame, fop->xl, need_open, EC_MINIMUM_ONE, - NULL, NULL, &loc, fop->fd->flags, fop->fd, NULL); + ec_opendir(fop->frame, fop->xl, need_open, EC_MINIMUM_ONE, NULL, NULL, + &fop->loc[0], fop->fd, NULL); + } else { + ec_open(fop->frame, fop->xl, need_open, EC_MINIMUM_ONE, NULL, NULL, + &loc, fop->fd->flags, fop->fd, NULL); } out: - loc_wipe (&loc); + loc_wipe(&loc); } off_t -ec_range_end_get (off_t fl_start, size_t fl_size) +ec_range_end_get(off_t fl_start, size_t fl_size) { - off_t fl_end = 0; - switch (fl_size) { + off_t fl_end = 0; + switch (fl_size) { case 0: - return fl_start; + return fl_start; case LLONG_MAX: /*Infinity*/ - return LLONG_MAX; + return LLONG_MAX; default: - fl_end = fl_start + fl_size - 1; - if (fl_end < 0) /*over-flow*/ - return LLONG_MAX; - else - return fl_end; - } + fl_end = fl_start + fl_size - 1; + if (fl_end < 0) /*over-flow*/ + return LLONG_MAX; + else + return fl_end; + } } static gf_boolean_t -ec_is_range_conflict (ec_lock_link_t *l1, ec_lock_link_t *l2) +ec_is_range_conflict(ec_lock_link_t *l1, ec_lock_link_t *l2) { - return ((l1->fl_end >= l2->fl_start) && (l2->fl_end >= l1->fl_start)); + return ((l1->fl_end >= l2->fl_start) && (l2->fl_end >= l1->fl_start)); } static gf_boolean_t -ec_lock_conflict (ec_lock_link_t *l1, ec_lock_link_t *l2) +ec_lock_conflict(ec_lock_link_t *l1, ec_lock_link_t *l2) { - ec_t *ec = l1->fop->xl->private; + ec_t *ec = l1->fop->xl->private; - /* Fops like access/stat won't have to worry what the other fops are - * modifying as the fop is wound only to one brick. So it can be - * executed in parallel*/ - if (l1->fop->minimum == EC_MINIMUM_ONE || - l2->fop->minimum == EC_MINIMUM_ONE) - return _gf_false; + /* Fops like access/stat won't have to worry what the other fops are + * modifying as the fop is wound only to one brick. So it can be + * executed in parallel*/ + if (l1->fop->minimum == EC_MINIMUM_ONE || + l2->fop->minimum == EC_MINIMUM_ONE) + return _gf_false; - if ((l1->fop->flags & EC_FLAG_LOCK_SHARED) && - (l2->fop->flags & EC_FLAG_LOCK_SHARED)) - return _gf_false; + if ((l1->fop->flags & EC_FLAG_LOCK_SHARED) && + (l2->fop->flags & EC_FLAG_LOCK_SHARED)) + return _gf_false; - if (!ec->parallel_writes) { - return _gf_true; - } + if (!ec->parallel_writes) { + return _gf_true; + } - return ec_is_range_conflict (l1, l2); + return ec_is_range_conflict(l1, l2); } uint32_t -ec_select_first_by_read_policy (ec_t *ec, ec_fop_data_t *fop) -{ - if (ec->read_policy == EC_ROUND_ROBIN) { - return ec->idx; - } else if (ec->read_policy == EC_GFID_HASH) { - if (fop->use_fd) { - return SuperFastHash((char *)fop->fd->inode->gfid, - sizeof(fop->fd->inode->gfid)) % ec->nodes; - } else { - if (gf_uuid_is_null (fop->loc[0].gfid)) - loc_gfid (&fop->loc[0], fop->loc[0].gfid); - return SuperFastHash((char *)fop->loc[0].gfid, - sizeof(fop->loc[0].gfid)) % ec->nodes; - } +ec_select_first_by_read_policy(ec_t *ec, ec_fop_data_t *fop) +{ + if (ec->read_policy == EC_ROUND_ROBIN) { + return ec->idx; + } else if (ec->read_policy == EC_GFID_HASH) { + if (fop->use_fd) { + return SuperFastHash((char *)fop->fd->inode->gfid, + sizeof(fop->fd->inode->gfid)) % + ec->nodes; + } else { + if (gf_uuid_is_null(fop->loc[0].gfid)) + loc_gfid(&fop->loc[0], fop->loc[0].gfid); + return SuperFastHash((char *)fop->loc[0].gfid, + sizeof(fop->loc[0].gfid)) % + ec->nodes; } - return 0; + } + return 0; } -static -gf_boolean_t ec_child_valid(ec_t * ec, ec_fop_data_t * fop, uint32_t idx) +static gf_boolean_t +ec_child_valid(ec_t *ec, ec_fop_data_t *fop, uint32_t idx) { return (idx < ec->nodes) && (((fop->remaining >> idx) & 1) == 1); } -static -uint32_t ec_child_next(ec_t * ec, ec_fop_data_t * fop, uint32_t idx) +static uint32_t +ec_child_next(ec_t *ec, ec_fop_data_t *fop, uint32_t idx) { - while (!ec_child_valid(ec, fop, idx)) - { - if (++idx >= ec->nodes) - { + while (!ec_child_valid(ec, fop, idx)) { + if (++idx >= ec->nodes) { idx = 0; } - if (idx == fop->first) - { + if (idx == fop->first) { return EC_INVALID_INDEX; } } @@ -221,20 +221,21 @@ uint32_t ec_child_next(ec_t * ec, ec_fop_data_t * fop, uint32_t idx) return idx; } -int32_t ec_heal_report(call_frame_t * frame, void * cookie, xlator_t * this, - int32_t op_ret, int32_t op_errno, uintptr_t mask, - uintptr_t good, uintptr_t bad, dict_t * xdata) +int32_t +ec_heal_report(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, uintptr_t mask, uintptr_t good, + uintptr_t bad, dict_t *xdata) { if (op_ret < 0) { - gf_msg (this->name, GF_LOG_DEBUG, op_errno, - EC_MSG_HEAL_FAIL, "Heal failed"); + gf_msg(this->name, GF_LOG_DEBUG, op_errno, EC_MSG_HEAL_FAIL, + "Heal failed"); } else { if ((mask & ~good) != 0) { - gf_msg (this->name, GF_LOG_DEBUG, 0, - EC_MSG_HEAL_SUCCESS, "Heal succeeded on %d/%d " - "subvolumes", - gf_bits_count(mask & ~(good | bad)), - gf_bits_count(mask & ~good)); + gf_msg(this->name, GF_LOG_DEBUG, 0, EC_MSG_HEAL_SUCCESS, + "Heal succeeded on %d/%d " + "subvolumes", + gf_bits_count(mask & ~(good | bad)), + gf_bits_count(mask & ~good)); } } @@ -242,103 +243,101 @@ int32_t ec_heal_report(call_frame_t * frame, void * cookie, xlator_t * this, } static uintptr_t -ec_fop_needs_name_heal (ec_fop_data_t *fop) +ec_fop_needs_name_heal(ec_fop_data_t *fop) { - ec_t *ec = NULL; - ec_cbk_data_t *cbk = NULL; - ec_cbk_data_t *enoent_cbk = NULL; + ec_t *ec = NULL; + ec_cbk_data_t *cbk = NULL; + ec_cbk_data_t *enoent_cbk = NULL; - ec = fop->xl->private; - if (fop->id != GF_FOP_LOOKUP) - return 0; + ec = fop->xl->private; + if (fop->id != GF_FOP_LOOKUP) + return 0; - if (!fop->loc[0].name || strlen (fop->loc[0].name) == 0) - return 0; + if (!fop->loc[0].name || strlen(fop->loc[0].name) == 0) + return 0; - list_for_each_entry(cbk, &fop->cbk_list, list) - { - if (cbk->op_ret < 0 && cbk->op_errno == ENOENT) { - enoent_cbk = cbk; - break; - } + list_for_each_entry(cbk, &fop->cbk_list, list) + { + if (cbk->op_ret < 0 && cbk->op_errno == ENOENT) { + enoent_cbk = cbk; + break; } + } - if (!enoent_cbk) - return 0; + if (!enoent_cbk) + return 0; - return ec->xl_up & ~enoent_cbk->mask; + return ec->xl_up & ~enoent_cbk->mask; } -int32_t ec_fop_needs_heal(ec_fop_data_t *fop) +int32_t +ec_fop_needs_heal(ec_fop_data_t *fop) { ec_t *ec = fop->xl->private; if (fop->lock_count == 0) { - /* - * if fop->lock_count is zero that means it saw version mismatch - * without any locks so it can't be trusted. If we launch a heal - * based on this it will lead to INODELKs which will affect I/O - * performance. Considering self-heal-daemon and operations on - * the inode from client which take locks can still trigger the - * heal we can choose to not attempt a heal when fop->lock_count - * is zero. - */ - return 0; + /* + * if fop->lock_count is zero that means it saw version mismatch + * without any locks so it can't be trusted. If we launch a heal + * based on this it will lead to INODELKs which will affect I/O + * performance. Considering self-heal-daemon and operations on + * the inode from client which take locks can still trigger the + * heal we can choose to not attempt a heal when fop->lock_count + * is zero. + */ + return 0; } return (ec->xl_up & ~(fop->remaining | fop->good)) != 0; } -void ec_check_status(ec_fop_data_t * fop) +void +ec_check_status(ec_fop_data_t *fop) { - ec_t * ec = fop->xl->private; + ec_t *ec = fop->xl->private; int32_t partial = 0; char str1[32], str2[32], str3[32], str4[32], str5[32]; - if (!ec_fop_needs_name_heal (fop) && !ec_fop_needs_heal(fop)) { + if (!ec_fop_needs_name_heal(fop) && !ec_fop_needs_heal(fop)) { return; } if (fop->answer && fop->answer->op_ret >= 0) { - if ((fop->id == GF_FOP_LOOKUP) || - (fop->id == GF_FOP_STAT) || (fop->id == GF_FOP_FSTAT)) { + if ((fop->id == GF_FOP_LOOKUP) || (fop->id == GF_FOP_STAT) || + (fop->id == GF_FOP_FSTAT)) { partial = fop->answer->iatt[0].ia_type == IA_IFDIR; } else if (fop->id == GF_FOP_OPENDIR) { partial = 1; } } - gf_msg (fop->xl->name, GF_LOG_WARNING, 0, - EC_MSG_OP_FAIL_ON_SUBVOLS, - "Operation failed on %d of %d subvolumes.(up=%s, mask=%s, " - "remaining=%s, good=%s, bad=%s)", - gf_bits_count(ec->xl_up & ~(fop->remaining | fop->good)), ec->nodes, - ec_bin(str1, sizeof(str1), ec->xl_up, ec->nodes), - ec_bin(str2, sizeof(str2), fop->mask, ec->nodes), - ec_bin(str3, sizeof(str3), fop->remaining, ec->nodes), - ec_bin(str4, sizeof(str4), fop->good, ec->nodes), - ec_bin(str5, sizeof(str5), - ec->xl_up & ~(fop->remaining | fop->good), ec->nodes)); - if (fop->use_fd) - { + gf_msg(fop->xl->name, GF_LOG_WARNING, 0, EC_MSG_OP_FAIL_ON_SUBVOLS, + "Operation failed on %d of %d subvolumes.(up=%s, mask=%s, " + "remaining=%s, good=%s, bad=%s)", + gf_bits_count(ec->xl_up & ~(fop->remaining | fop->good)), ec->nodes, + ec_bin(str1, sizeof(str1), ec->xl_up, ec->nodes), + ec_bin(str2, sizeof(str2), fop->mask, ec->nodes), + ec_bin(str3, sizeof(str3), fop->remaining, ec->nodes), + ec_bin(str4, sizeof(str4), fop->good, ec->nodes), + ec_bin(str5, sizeof(str5), ec->xl_up & ~(fop->remaining | fop->good), + ec->nodes)); + if (fop->use_fd) { if (fop->fd != NULL) { ec_fheal(NULL, fop->xl, -1, EC_MINIMUM_ONE, ec_heal_report, NULL, fop->fd, partial, NULL); } - } - else - { + } else { ec_heal(NULL, fop->xl, -1, EC_MINIMUM_ONE, ec_heal_report, NULL, &fop->loc[0], partial, NULL); - if (fop->loc[1].inode != NULL) - { + if (fop->loc[1].inode != NULL) { ec_heal(NULL, fop->xl, -1, EC_MINIMUM_ONE, ec_heal_report, NULL, &fop->loc[1], partial, NULL); } } } -void ec_update_good(ec_fop_data_t *fop, uintptr_t good) +void +ec_update_good(ec_fop_data_t *fop, uintptr_t good) { fop->good = good; @@ -349,7 +348,8 @@ void ec_update_good(ec_fop_data_t *fop, uintptr_t good) } } -void ec_lock_update_good(ec_lock_t *lock, ec_fop_data_t *fop) +void +ec_lock_update_good(ec_lock_t *lock, ec_fop_data_t *fop) { /* Fops that are executed only on one brick do not have enough information * to update the global mask of good bricks. */ @@ -365,15 +365,16 @@ void ec_lock_update_good(ec_lock_t *lock, ec_fop_data_t *fop) lock->good_mask &= fop->good | fop->remaining; } -void __ec_fop_set_error(ec_fop_data_t * fop, int32_t error) +void +__ec_fop_set_error(ec_fop_data_t *fop, int32_t error) { - if ((error != 0) && (fop->error == 0)) - { + if ((error != 0) && (fop->error == 0)) { fop->error = error; } } -void ec_fop_set_error(ec_fop_data_t * fop, int32_t error) +void +ec_fop_set_error(ec_fop_data_t *fop, int32_t error) { LOCK(&fop->lock); @@ -425,18 +426,20 @@ ec_fop_prepare_answer(ec_fop_data_t *fop, gf_boolean_t ro) return cbk; } -void ec_sleep(ec_fop_data_t *fop) +void +ec_sleep(ec_fop_data_t *fop) { LOCK(&fop->lock); - GF_ASSERT (fop->refs > 0); + GF_ASSERT(fop->refs > 0); fop->refs++; fop->jobs++; UNLOCK(&fop->lock); } -int32_t ec_check_complete(ec_fop_data_t * fop, ec_resume_f resume) +int32_t +ec_check_complete(ec_fop_data_t *fop, ec_resume_f resume) { int32_t error = -1; @@ -444,14 +447,11 @@ int32_t ec_check_complete(ec_fop_data_t * fop, ec_resume_f resume) GF_ASSERT(fop->resume == NULL); - if (--fop->jobs != 0) - { + if (--fop->jobs != 0) { ec_trace("WAIT", fop, "resume=%p", resume); fop->resume = resume; - } - else - { + } else { error = fop->error; fop->error = 0; } @@ -461,7 +461,8 @@ int32_t ec_check_complete(ec_fop_data_t * fop, ec_resume_f resume) return error; } -void ec_resume(ec_fop_data_t * fop, int32_t error) +void +ec_resume(ec_fop_data_t *fop, int32_t error) { ec_resume_f resume = NULL; @@ -469,16 +470,13 @@ void ec_resume(ec_fop_data_t * fop, int32_t error) __ec_fop_set_error(fop, error); - if (--fop->jobs == 0) - { + if (--fop->jobs == 0) { resume = fop->resume; fop->resume = NULL; - if (resume != NULL) - { + if (resume != NULL) { ec_trace("RESUME", fop, "error=%d", error); - if (fop->error != 0) - { + if (fop->error != 0) { error = fop->error; } fop->error = 0; @@ -487,21 +485,20 @@ void ec_resume(ec_fop_data_t * fop, int32_t error) UNLOCK(&fop->lock); - if (resume != NULL) - { + if (resume != NULL) { resume(fop, error); } ec_fop_data_release(fop); } -void ec_resume_parent(ec_fop_data_t * fop, int32_t error) +void +ec_resume_parent(ec_fop_data_t *fop, int32_t error) { - ec_fop_data_t * parent; + ec_fop_data_t *parent; parent = fop->parent; - if (parent != NULL) - { + if (parent != NULL) { ec_trace("RESUME_PARENT", fop, "error=%u", error); fop->parent = NULL; ec_resume(parent, error); @@ -509,22 +506,23 @@ void ec_resume_parent(ec_fop_data_t * fop, int32_t error) } gf_boolean_t -ec_is_recoverable_error (int32_t op_errno) +ec_is_recoverable_error(int32_t op_errno) { - switch (op_errno) { + switch (op_errno) { case ENOTCONN: case ESTALE: case ENOENT: - case EBADFD:/*Opened fd but brick is disconnected*/ - case EIO:/*Backend-fs crash like XFS/ext4 etc*/ - return _gf_true; - } - return _gf_false; + case EBADFD: /*Opened fd but brick is disconnected*/ + case EIO: /*Backend-fs crash like XFS/ext4 etc*/ + return _gf_true; + } + return _gf_false; } -void ec_complete(ec_fop_data_t * fop) +void +ec_complete(ec_fop_data_t *fop) { - ec_cbk_data_t * cbk = NULL; + ec_cbk_data_t *cbk = NULL; int32_t resume = 0, update = 0; int healing_count = 0; @@ -536,9 +534,9 @@ void ec_complete(ec_fop_data_t * fop) if (fop->answer == NULL) { if (!list_empty(&fop->cbk_list)) { cbk = list_entry(fop->cbk_list.next, ec_cbk_data_t, list); - healing_count = gf_bits_count (cbk->mask & fop->healing); - /* fop shouldn't be treated as success if it is not - * successful on at least fop->minimum good copies*/ + healing_count = gf_bits_count(cbk->mask & fop->healing); + /* fop shouldn't be treated as success if it is not + * successful on at least fop->minimum good copies*/ if ((cbk->count - healing_count) >= fop->minimum) { fop->answer = cbk; @@ -560,8 +558,7 @@ void ec_complete(ec_fop_data_t * fop) ec_update_good(fop, cbk->mask); } - if (resume) - { + if (resume) { ec_resume(fop, 0); } @@ -571,40 +568,39 @@ void ec_complete(ec_fop_data_t * fop) /* There could be already granted locks sitting on the bricks, unlock for which * must be wound at all costs*/ static gf_boolean_t -ec_must_wind (ec_fop_data_t *fop) -{ - if ((fop->id == GF_FOP_INODELK) || (fop->id == GF_FOP_FINODELK) || - (fop->id == GF_FOP_LK)) { - if (fop->flock.l_type == F_UNLCK) - return _gf_true; - } else if ((fop->id == GF_FOP_ENTRYLK) || - (fop->id == GF_FOP_FENTRYLK)) { - if (fop->entrylk_cmd == ENTRYLK_UNLOCK) - return _gf_true; - } +ec_must_wind(ec_fop_data_t *fop) +{ + if ((fop->id == GF_FOP_INODELK) || (fop->id == GF_FOP_FINODELK) || + (fop->id == GF_FOP_LK)) { + if (fop->flock.l_type == F_UNLCK) + return _gf_true; + } else if ((fop->id == GF_FOP_ENTRYLK) || (fop->id == GF_FOP_FENTRYLK)) { + if (fop->entrylk_cmd == ENTRYLK_UNLOCK) + return _gf_true; + } - return _gf_false; + return _gf_false; } static gf_boolean_t -ec_internal_op (ec_fop_data_t *fop) -{ - if (ec_must_wind (fop)) - return _gf_true; - if (fop->id == GF_FOP_XATTROP) - return _gf_true; - if (fop->id == GF_FOP_FXATTROP) - return _gf_true; - return _gf_false; +ec_internal_op(ec_fop_data_t *fop) +{ + if (ec_must_wind(fop)) + return _gf_true; + if (fop->id == GF_FOP_XATTROP) + return _gf_true; + if (fop->id == GF_FOP_FXATTROP) + return _gf_true; + return _gf_false; } char * -ec_msg_str (ec_fop_data_t *fop) +ec_msg_str(ec_fop_data_t *fop) { - loc_t *loc1 = NULL; - loc_t *loc2 = NULL; - char gfid1[64] = {0}; - char gfid2[64] = {0}; + loc_t *loc1 = NULL; + loc_t *loc2 = NULL; + char gfid1[64] = {0}; + char gfid2[64] = {0}; if (fop->errstr) return fop->errstr; @@ -614,29 +610,29 @@ ec_msg_str (ec_fop_data_t *fop) loc2 = &fop->loc[1]; if (fop->id == GF_FOP_RENAME) { - gf_asprintf(&fop->errstr, - "FOP : '%s' failed on '%s' and '%s' with gfids " - "%s and %s respectively", ec_fop_name (fop->id), - loc1->path, loc2->path, - uuid_utoa_r (loc1->gfid, gfid1), - uuid_utoa_r (loc2->gfid, gfid2)); + gf_asprintf(&fop->errstr, + "FOP : '%s' failed on '%s' and '%s' with gfids " + "%s and %s respectively", + ec_fop_name(fop->id), loc1->path, loc2->path, + uuid_utoa_r(loc1->gfid, gfid1), + uuid_utoa_r(loc2->gfid, gfid2)); } else { - gf_asprintf(&fop->errstr, - "FOP : '%s' failed on '%s' with gfid %s", - ec_fop_name (fop->id), - loc1->path, uuid_utoa_r (loc1->gfid, gfid1)); + gf_asprintf(&fop->errstr, "FOP : '%s' failed on '%s' with gfid %s", + ec_fop_name(fop->id), loc1->path, + uuid_utoa_r(loc1->gfid, gfid1)); } } else { gf_asprintf(&fop->errstr, "FOP : '%s' failed on gfid %s", - ec_fop_name (fop->id), - uuid_utoa_r (fop->fd->inode->gfid, gfid1)); + ec_fop_name(fop->id), + uuid_utoa_r(fop->fd->inode->gfid, gfid1)); } return fop->errstr; } -int32_t ec_child_select(ec_fop_data_t * fop) +int32_t +ec_child_select(ec_fop_data_t *fop) { - ec_t * ec = fop->xl->private; + ec_t *ec = fop->xl->private; int32_t first = 0, num = 0; ec_fop_cleanup(fop); @@ -645,26 +641,22 @@ int32_t ec_child_select(ec_fop_data_t * fop) /* Wind the fop on same subvols as parent for any internal extra fops like * head/tail read in case of writev fop. Unlocks shouldn't do this because * unlock should go on all subvols where lock is performed*/ - if (fop->parent && !ec_internal_op (fop)) { - fop->mask &= (fop->parent->mask & ~fop->parent->healing); + if (fop->parent && !ec_internal_op(fop)) { + fop->mask &= (fop->parent->mask & ~fop->parent->healing); } - if ((fop->mask & ~ec->xl_up) != 0) - { - gf_msg (fop->xl->name, GF_LOG_WARNING, 0, - EC_MSG_OP_EXEC_UNAVAIL, - "Executing operation with " - "some subvolumes unavailable. (%lX). %s ", - fop->mask & ~ec->xl_up, ec_msg_str(fop)); + if ((fop->mask & ~ec->xl_up) != 0) { + gf_msg(fop->xl->name, GF_LOG_WARNING, 0, EC_MSG_OP_EXEC_UNAVAIL, + "Executing operation with " + "some subvolumes unavailable. (%lX). %s ", + fop->mask & ~ec->xl_up, ec_msg_str(fop)); fop->mask &= ec->xl_up; } - switch (fop->minimum) - { + switch (fop->minimum) { case EC_MINIMUM_ALL: fop->minimum = gf_bits_count(fop->mask); - if (fop->minimum >= ec->fragments) - { + if (fop->minimum >= ec->fragments) { break; } case EC_MINIMUM_MIN: @@ -675,11 +667,11 @@ int32_t ec_child_select(ec_fop_data_t * fop) } if (ec->read_policy == EC_ROUND_ROBIN) { - first = ec->idx; - if (++first >= ec->nodes) { - first = 0; - } - ec->idx = first; + first = ec->idx; + if (++first >= ec->nodes) { + first = 0; + } + ec->idx = first; } num = gf_bits_count(fop->mask); @@ -690,14 +682,12 @@ int32_t ec_child_select(ec_fop_data_t * fop) ec_trace("SELECT", fop, ""); - if ((num < fop->minimum) && (num < ec->fragments)) - { - gf_msg (ec->xl->name, GF_LOG_ERROR, 0, - EC_MSG_CHILDS_INSUFFICIENT, - "Insufficient available children " - "for this request (have %d, need " - "%d). %s", - num, fop->minimum, ec_msg_str(fop)); + if ((num < fop->minimum) && (num < ec->fragments)) { + gf_msg(ec->xl->name, GF_LOG_ERROR, 0, EC_MSG_CHILDS_INSUFFICIENT, + "Insufficient available children " + "for this request (have %d, need " + "%d). %s", + num, fop->minimum, ec_msg_str(fop)); return 0; } @@ -706,10 +696,11 @@ int32_t ec_child_select(ec_fop_data_t * fop) return 1; } -void ec_dispatch_next(ec_fop_data_t * fop, uint32_t idx) +void +ec_dispatch_next(ec_fop_data_t *fop, uint32_t idx) { uint32_t i = EC_INVALID_INDEX; - ec_t * ec = fop->xl->private; + ec_t *ec = fop->xl->private; LOCK(&fop->lock); @@ -727,15 +718,15 @@ void ec_dispatch_next(ec_fop_data_t * fop, uint32_t idx) UNLOCK(&fop->lock); - if (i < EC_MAX_NODES) - { + if (i < EC_MAX_NODES) { fop->wind(ec, fop, idx); } } -void ec_dispatch_mask(ec_fop_data_t * fop, uintptr_t mask) +void +ec_dispatch_mask(ec_fop_data_t *fop, uintptr_t mask) { - ec_t * ec = fop->xl->private; + ec_t *ec = fop->xl->private; int32_t count, idx; count = gf_bits_count(mask); @@ -752,10 +743,8 @@ void ec_dispatch_mask(ec_fop_data_t * fop, uintptr_t mask) UNLOCK(&fop->lock); idx = 0; - while (mask != 0) - { - if ((mask & 1) != 0) - { + while (mask != 0) { + if ((mask & 1) != 0) { fop->wind(ec, fop, idx); } idx++; @@ -763,27 +752,27 @@ void ec_dispatch_mask(ec_fop_data_t * fop, uintptr_t mask) } } -void ec_dispatch_start(ec_fop_data_t * fop) +void +ec_dispatch_start(ec_fop_data_t *fop) { fop->answer = NULL; fop->good = 0; INIT_LIST_HEAD(&fop->cbk_list); - if (fop->lock_count > 0) - { + if (fop->lock_count > 0) { ec_owner_copy(fop->frame, &fop->req_frame->root->lk_owner); } } -void ec_dispatch_one(ec_fop_data_t * fop) +void +ec_dispatch_one(ec_fop_data_t *fop) { ec_dispatch_start(fop); - if (ec_child_select(fop)) - { + if (ec_child_select(fop)) { fop->expected = 1; - fop->first = ec_select_first_by_read_policy (fop->xl->private, fop); + fop->first = ec_select_first_by_read_policy(fop->xl->private, fop); ec_dispatch_next(fop, fop->first); } @@ -799,8 +788,8 @@ ec_dispatch_one_retry(ec_fop_data_t *fop, ec_cbk_data_t **cbk) *cbk = tmp; } if ((tmp != NULL) && (tmp->op_ret < 0) && - ec_is_recoverable_error (tmp->op_errno)) { - GF_ASSERT (fop->mask & (1ULL << tmp->idx)); + ec_is_recoverable_error(tmp->op_errno)) { + GF_ASSERT(fop->mask & (1ULL << tmp->idx)); fop->mask ^= (1ULL << tmp->idx); if (fop->mask) { return _gf_true; @@ -810,12 +799,12 @@ ec_dispatch_one_retry(ec_fop_data_t *fop, ec_cbk_data_t **cbk) return _gf_false; } -void ec_dispatch_inc(ec_fop_data_t * fop) +void +ec_dispatch_inc(ec_fop_data_t *fop) { ec_dispatch_start(fop); - if (ec_child_select(fop)) - { + if (ec_child_select(fop)) { fop->expected = gf_bits_count(fop->remaining); fop->first = 0; @@ -824,35 +813,34 @@ void ec_dispatch_inc(ec_fop_data_t * fop) } void -ec_dispatch_all (ec_fop_data_t *fop) +ec_dispatch_all(ec_fop_data_t *fop) { - ec_dispatch_start(fop); + ec_dispatch_start(fop); - if (ec_child_select(fop)) { - fop->expected = gf_bits_count(fop->remaining); - fop->first = 0; + if (ec_child_select(fop)) { + fop->expected = gf_bits_count(fop->remaining); + fop->first = 0; - ec_dispatch_mask(fop, fop->remaining); - } + ec_dispatch_mask(fop, fop->remaining); + } } -void ec_dispatch_min(ec_fop_data_t * fop) +void +ec_dispatch_min(ec_fop_data_t *fop) { - ec_t * ec = fop->xl->private; + ec_t *ec = fop->xl->private; uintptr_t mask; uint32_t idx; int32_t count; ec_dispatch_start(fop); - if (ec_child_select(fop)) - { + if (ec_child_select(fop)) { fop->expected = count = ec->fragments; - fop->first = ec_select_first_by_read_policy (fop->xl->private, fop); + fop->first = ec_select_first_by_read_policy(fop->xl->private, fop); idx = fop->first - 1; mask = 0; - while (count-- > 0) - { + while (count-- > 0) { idx = ec_child_next(ec, fop, idx + 1); if (idx < EC_MAX_NODES) mask |= 1ULL << idx; @@ -862,19 +850,18 @@ void ec_dispatch_min(ec_fop_data_t * fop) } } -ec_lock_t *ec_lock_allocate(ec_fop_data_t *fop, loc_t *loc) +ec_lock_t * +ec_lock_allocate(ec_fop_data_t *fop, loc_t *loc) { ec_t *ec = fop->xl->private; - ec_lock_t * lock; + ec_lock_t *lock; int32_t err; if ((loc->inode == NULL) || - (gf_uuid_is_null(loc->gfid) && gf_uuid_is_null(loc->inode->gfid))) - { - gf_msg (fop->xl->name, GF_LOG_ERROR, EINVAL, - EC_MSG_INVALID_INODE, - "Trying to lock based on an invalid " - "inode"); + (gf_uuid_is_null(loc->gfid) && gf_uuid_is_null(loc->inode->gfid))) { + gf_msg(fop->xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_INVALID_INODE, + "Trying to lock based on an invalid " + "inode"); __ec_fop_set_error(fop, EINVAL); @@ -882,8 +869,7 @@ ec_lock_t *ec_lock_allocate(ec_fop_data_t *fop, loc_t *loc) } lock = mem_get0(ec->lock_pool); - if (lock != NULL) - { + if (lock != NULL) { lock->good_mask = -1ULL; INIT_LIST_HEAD(&lock->owners); INIT_LIST_HEAD(&lock->waiting); @@ -900,7 +886,8 @@ ec_lock_t *ec_lock_allocate(ec_fop_data_t *fop, loc_t *loc) return lock; } -void ec_lock_destroy(ec_lock_t * lock) +void +ec_lock_destroy(ec_lock_t *lock) { loc_wipe(&lock->loc); if (lock->fd != NULL) { @@ -910,13 +897,15 @@ void ec_lock_destroy(ec_lock_t * lock) mem_put(lock); } -int32_t ec_lock_compare(ec_lock_t * lock1, ec_lock_t * lock2) +int32_t +ec_lock_compare(ec_lock_t *lock1, ec_lock_t *lock2) { return gf_uuid_compare(lock1->loc.gfid, lock2->loc.gfid); } -void ec_lock_insert(ec_fop_data_t *fop, ec_lock_t *lock, uint32_t flags, - loc_t *base, off_t fl_start, size_t fl_size) +void +ec_lock_insert(ec_fop_data_t *fop, ec_lock_t *lock, uint32_t flags, loc_t *base, + off_t fl_start, size_t fl_size) { ec_lock_link_t *link; @@ -951,14 +940,14 @@ void ec_lock_insert(ec_fop_data_t *fop, ec_lock_t *lock, uint32_t flags, link->update[EC_METADATA_TXN] = (flags & EC_UPDATE_META) != 0; link->base = base; link->fl_start = fl_start; - link->fl_end = ec_range_end_get (fl_start, fl_size); + link->fl_end = ec_range_end_get(fl_start, fl_size); lock->refs_pending++; } -void ec_lock_prepare_inode_internal(ec_fop_data_t *fop, loc_t *loc, - uint32_t flags, loc_t *base, - off_t fl_start, size_t fl_size) +void +ec_lock_prepare_inode_internal(ec_fop_data_t *fop, loc_t *loc, uint32_t flags, + loc_t *base, off_t fl_start, size_t fl_size) { ec_lock_t *lock = NULL; ec_inode_t *ctx; @@ -987,8 +976,8 @@ void ec_lock_prepare_inode_internal(ec_fop_data_t *fop, loc_t *loc, if ((fop->lock_count > 0) && (fop->locks[0].lock == lock)) { /* Combine data/meta updates */ fop->locks[0].update[EC_DATA_TXN] |= (flags & EC_UPDATE_DATA) != 0; - fop->locks[0].update[EC_METADATA_TXN] |= - (flags & EC_UPDATE_META) != 0; + fop->locks[0].update[EC_METADATA_TXN] |= (flags & EC_UPDATE_META) != + 0; /* Only one base inode is allowed per fop, so there shouldn't be * overwrites here. */ @@ -999,8 +988,10 @@ void ec_lock_prepare_inode_internal(ec_fop_data_t *fop, loc_t *loc, goto update_query; } - ec_trace("LOCK_INODELK", fop, "lock=%p, inode=%p. Lock already " - "acquired", lock, loc->inode); + ec_trace("LOCK_INODELK", fop, + "lock=%p, inode=%p. Lock already " + "acquired", + lock, loc->inode); goto insert; } @@ -1026,14 +1017,16 @@ unlock: UNLOCK(&loc->inode->lock); } -void ec_lock_prepare_inode(ec_fop_data_t *fop, loc_t *loc, uint32_t flags, - off_t fl_start, size_t fl_size) +void +ec_lock_prepare_inode(ec_fop_data_t *fop, loc_t *loc, uint32_t flags, + off_t fl_start, size_t fl_size) { ec_lock_prepare_inode_internal(fop, loc, flags, NULL, fl_start, fl_size); } -void ec_lock_prepare_parent_inode(ec_fop_data_t *fop, loc_t *loc, loc_t *base, - uint32_t flags) +void +ec_lock_prepare_parent_inode(ec_fop_data_t *fop, loc_t *loc, loc_t *base, + uint32_t flags) { loc_t tmp; int32_t err; @@ -1052,7 +1045,7 @@ void ec_lock_prepare_parent_inode(ec_fop_data_t *fop, loc_t *loc, loc_t *base, if ((flags & EC_INODE_SIZE) != 0) { flags ^= EC_INODE_SIZE; } else { - base = NULL; + base = NULL; } ec_lock_prepare_inode_internal(fop, &tmp, flags, base, 0, LLONG_MAX); @@ -1060,8 +1053,9 @@ void ec_lock_prepare_parent_inode(ec_fop_data_t *fop, loc_t *loc, loc_t *base, loc_wipe(&tmp); } -void ec_lock_prepare_fd(ec_fop_data_t *fop, fd_t *fd, uint32_t flags, - off_t fl_start, size_t fl_size) +void +ec_lock_prepare_fd(ec_fop_data_t *fop, fd_t *fd, uint32_t flags, off_t fl_start, + size_t fl_size) { loc_t loc; int32_t err; @@ -1083,15 +1077,14 @@ void ec_lock_prepare_fd(ec_fop_data_t *fop, fd_t *fd, uint32_t flags, } gf_boolean_t -ec_config_check (xlator_t *xl, ec_config_t *config) +ec_config_check(xlator_t *xl, ec_config_t *config) { ec_t *ec; ec = xl->private; if ((config->version != EC_CONFIG_VERSION) || (config->algorithm != EC_CONFIG_ALGORITHM) || - (config->gf_word_size != EC_GF_BITS) || - (config->bricks != ec->nodes) || + (config->gf_word_size != EC_GF_BITS) || (config->bricks != ec->nodes) || (config->redundancy != ec->redundancy) || (config->chunk_size != EC_METHOD_CHUNK_SIZE)) { uint32_t data_bricks; @@ -1110,20 +1103,17 @@ ec_config_check (xlator_t *xl, ec_config_t *config) if ((config->redundancy < 1) || (config->redundancy * 2 >= config->bricks) || !ec_is_power_of_2(config->gf_word_size) || - ((config->chunk_size * 8) % (config->gf_word_size * data_bricks) - != 0)) { - gf_msg (xl->name, GF_LOG_ERROR, EINVAL, - EC_MSG_INVALID_CONFIG, - "Invalid or corrupted config"); + ((config->chunk_size * 8) % (config->gf_word_size * data_bricks) != + 0)) { + gf_msg(xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_INVALID_CONFIG, + "Invalid or corrupted config"); } else { - gf_msg (xl->name, GF_LOG_ERROR, EINVAL, - EC_MSG_INVALID_CONFIG, - "Unsupported config " - "(V=%u, A=%u, W=%u, " - "N=%u, R=%u, S=%u)", - config->version, config->algorithm, - config->gf_word_size, config->bricks, - config->redundancy, config->chunk_size); + gf_msg(xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_INVALID_CONFIG, + "Unsupported config " + "(V=%u, A=%u, W=%u, " + "N=%u, R=%u, S=%u)", + config->version, config->algorithm, config->gf_word_size, + config->bricks, config->redundancy, config->chunk_size); } return _gf_false; @@ -1133,20 +1123,18 @@ ec_config_check (xlator_t *xl, ec_config_t *config) } gf_boolean_t -ec_set_dirty_flag (ec_lock_link_t *link, ec_inode_t *ctx, - uint64_t *dirty) +ec_set_dirty_flag(ec_lock_link_t *link, ec_inode_t *ctx, uint64_t *dirty) { - gf_boolean_t set_dirty = _gf_false; if (link->update[EC_DATA_TXN] && !ctx->dirty[EC_DATA_TXN]) { - if (!link->optimistic_changelog) - dirty[EC_DATA_TXN] = 1; + if (!link->optimistic_changelog) + dirty[EC_DATA_TXN] = 1; } if (link->update[EC_METADATA_TXN] && !ctx->dirty[EC_METADATA_TXN]) { - if (!link->optimistic_changelog) - dirty[EC_METADATA_TXN] = 1; + if (!link->optimistic_changelog) + dirty[EC_METADATA_TXN] = 1; } if (dirty[EC_METADATA_TXN] || dirty[EC_DATA_TXN]) { @@ -1157,9 +1145,9 @@ ec_set_dirty_flag (ec_lock_link_t *link, ec_inode_t *ctx, } int32_t -ec_prepare_update_cbk (call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, int32_t op_errno, - dict_t *dict, dict_t *xdata) +ec_prepare_update_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *dict, + dict_t *xdata) { struct list_head list; ec_fop_data_t *fop = cookie, *parent, *tmp; @@ -1179,95 +1167,87 @@ ec_prepare_update_cbk (call_frame_t *frame, void *cookie, LOCK(&lock->loc.inode->lock); - list_for_each_entry(link, &lock->owners, owner_list) { + list_for_each_entry(link, &lock->owners, owner_list) + { if ((link->waiting_flags & provided_flags) != 0) { link->waiting_flags ^= (link->waiting_flags & provided_flags); if (EC_NEEDED_FLAGS(link->waiting_flags) == 0) - list_add_tail(&link->fop->cbk_list, &list); + list_add_tail(&link->fop->cbk_list, &list); } } if (op_ret < 0) { - gf_msg (this->name, GF_LOG_WARNING, op_errno, - EC_MSG_SIZE_VERS_GET_FAIL, - "Failed to get size and version : %s", - ec_msg_str(fop)); + gf_msg(this->name, GF_LOG_WARNING, op_errno, EC_MSG_SIZE_VERS_GET_FAIL, + "Failed to get size and version : %s", ec_msg_str(fop)); goto unlock; } if (EC_FLAGS_HAVE(provided_flags, EC_FLAG_XATTROP)) { - op_errno = -ec_dict_del_array(dict, EC_XATTR_VERSION, - ctx->pre_version, - EC_VERSION_SIZE); + op_errno = -ec_dict_del_array(dict, EC_XATTR_VERSION, ctx->pre_version, + EC_VERSION_SIZE); + if (op_errno != 0) { + gf_msg(this->name, GF_LOG_ERROR, op_errno, + EC_MSG_VER_XATTR_GET_FAIL, "Unable to get version xattr. %s", + ec_msg_str(fop)); + goto unlock; + } + ctx->post_version[0] += ctx->pre_version[0]; + ctx->post_version[1] += ctx->pre_version[1]; + + ctx->have_version = _gf_true; + + if (lock->loc.inode->ia_type == IA_IFREG || + lock->loc.inode->ia_type == IA_INVAL) { + op_errno = -ec_dict_del_number(dict, EC_XATTR_SIZE, &ctx->pre_size); if (op_errno != 0) { - gf_msg (this->name, GF_LOG_ERROR, op_errno, - EC_MSG_VER_XATTR_GET_FAIL, - "Unable to get version xattr. %s", - ec_msg_str(fop)); - goto unlock; + if (lock->loc.inode->ia_type == IA_IFREG) { + gf_msg(this->name, GF_LOG_ERROR, op_errno, + EC_MSG_SIZE_XATTR_GET_FAIL, + "Unable to get size xattr. %s", ec_msg_str(fop)); + goto unlock; + } + } else { + ctx->post_size = ctx->pre_size; + + ctx->have_size = _gf_true; } - ctx->post_version[0] += ctx->pre_version[0]; - ctx->post_version[1] += ctx->pre_version[1]; - ctx->have_version = _gf_true; + op_errno = -ec_dict_del_config(dict, EC_XATTR_CONFIG, &ctx->config); + if (op_errno != 0) { + if ((lock->loc.inode->ia_type == IA_IFREG) || + (op_errno != ENODATA)) { + gf_msg(this->name, GF_LOG_ERROR, op_errno, + EC_MSG_CONFIG_XATTR_GET_FAIL, + "Unable to get config xattr. %s", ec_msg_str(fop)); - if (lock->loc.inode->ia_type == IA_IFREG || - lock->loc.inode->ia_type == IA_INVAL) { - op_errno = -ec_dict_del_number(dict, EC_XATTR_SIZE, - &ctx->pre_size); - if (op_errno != 0) { - if (lock->loc.inode->ia_type == IA_IFREG) { - gf_msg (this->name, GF_LOG_ERROR, op_errno, - EC_MSG_SIZE_XATTR_GET_FAIL, - "Unable to get size xattr. %s", - ec_msg_str(fop)); - goto unlock; - } - } else { - ctx->post_size = ctx->pre_size; - - ctx->have_size = _gf_true; + goto unlock; } + } else { + if (!ec_config_check(parent->xl, &ctx->config)) { + gf_msg(this->name, GF_LOG_ERROR, EINVAL, + EC_MSG_CONFIG_XATTR_INVALID, "Invalid config xattr"); - op_errno = -ec_dict_del_config(dict, EC_XATTR_CONFIG, - &ctx->config); - if (op_errno != 0) { - if ((lock->loc.inode->ia_type == IA_IFREG) || - (op_errno != ENODATA)) { - gf_msg (this->name, GF_LOG_ERROR, op_errno, - EC_MSG_CONFIG_XATTR_GET_FAIL, - "Unable to get config xattr. %s", - ec_msg_str(fop)); - - goto unlock; - } - } else { - if (!ec_config_check(parent->xl, &ctx->config)) { - gf_msg (this->name, GF_LOG_ERROR, EINVAL, - EC_MSG_CONFIG_XATTR_INVALID, - "Invalid config xattr"); - - op_errno = EINVAL; - - goto unlock; - } - ctx->have_config = _gf_true; + op_errno = EINVAL; + + goto unlock; } + ctx->have_config = _gf_true; } - ctx->have_info = _gf_true; + } + ctx->have_info = _gf_true; } - ec_set_dirty_flag (fop->data, ctx, dirty); + ec_set_dirty_flag(fop->data, ctx, dirty); if (dirty[EC_METADATA_TXN] && (EC_FLAGS_HAVE(provided_flags, EC_FLAG_METADATA_DIRTY))) { - GF_ASSERT (!ctx->dirty[EC_METADATA_TXN]); - ctx->dirty[EC_METADATA_TXN] = 1; + GF_ASSERT(!ctx->dirty[EC_METADATA_TXN]); + ctx->dirty[EC_METADATA_TXN] = 1; } if (dirty[EC_DATA_TXN] && (EC_FLAGS_HAVE(provided_flags, EC_FLAG_DATA_DIRTY))) { - GF_ASSERT (!ctx->dirty[EC_DATA_TXN]); - ctx->dirty[EC_DATA_TXN] = 1; + GF_ASSERT(!ctx->dirty[EC_DATA_TXN]); + ctx->dirty[EC_DATA_TXN] = 1; } op_errno = 0; unlock: @@ -1279,20 +1259,20 @@ unlock: * it dirty and update versions right away if dirty was not set before. */ if (lock->good_mask & ~(fop->good | fop->remaining)) { - release = _gf_true; + release = _gf_true; } if (parent_link->update[0] && !parent_link->dirty[0]) { - lock->release |= release; + lock->release |= release; } if (parent_link->update[1] && !parent_link->dirty[1]) { - lock->release |= release; + lock->release |= release; } /* We don't allow the main fop to be executed on bricks that have not * succeeded the initial xattrop. */ - ec_lock_update_good (lock, fop); + ec_lock_update_good(lock, fop); /*As of now only data healing marks bricks as healing*/ lock->healing |= fop->healing; @@ -1308,7 +1288,7 @@ unlock: tmp->mask &= fop->good; /*As of now only data healing marks bricks as healing*/ - if (ec_is_data_fop (tmp->id)) { + if (ec_is_data_fop(tmp->id)) { tmp->healing |= fop->healing; } } @@ -1322,52 +1302,53 @@ unlock: static gf_boolean_t ec_set_needed_flag(ec_lock_t *lock, ec_lock_link_t *link, uint64_t flag) { - uint64_t current; + uint64_t current; - link->waiting_flags |= EC_FLAG_NEEDS(flag); + link->waiting_flags |= EC_FLAG_NEEDS(flag); - current = EC_NEEDED_FLAGS(lock->waiting_flags); - if (!EC_FLAGS_HAVE(current, flag)) { - lock->waiting_flags |= EC_FLAG_NEEDS(flag); - link->waiting_flags |= EC_FLAG_PROVIDES(flag); + current = EC_NEEDED_FLAGS(lock->waiting_flags); + if (!EC_FLAGS_HAVE(current, flag)) { + lock->waiting_flags |= EC_FLAG_NEEDS(flag); + link->waiting_flags |= EC_FLAG_PROVIDES(flag); - return _gf_true; - } + return _gf_true; + } - return _gf_false; + return _gf_false; } static uint64_t -ec_set_xattrop_flags_and_params (ec_lock_t *lock, ec_lock_link_t *link, - uint64_t *dirty) +ec_set_xattrop_flags_and_params(ec_lock_t *lock, ec_lock_link_t *link, + uint64_t *dirty) { - uint64_t oldflags = 0; - uint64_t newflags = 0; - ec_inode_t *ctx = lock->ctx; + uint64_t oldflags = 0; + uint64_t newflags = 0; + ec_inode_t *ctx = lock->ctx; - oldflags = EC_NEEDED_FLAGS(lock->waiting_flags); + oldflags = EC_NEEDED_FLAGS(lock->waiting_flags); - if (lock->query && !ctx->have_info) { - ec_set_needed_flag(lock, link, EC_FLAG_XATTROP); - } + if (lock->query && !ctx->have_info) { + ec_set_needed_flag(lock, link, EC_FLAG_XATTROP); + } - if (dirty[EC_DATA_TXN]) { - if (!ec_set_needed_flag(lock, link, EC_FLAG_DATA_DIRTY)) { - dirty[EC_DATA_TXN] = 0; - } + if (dirty[EC_DATA_TXN]) { + if (!ec_set_needed_flag(lock, link, EC_FLAG_DATA_DIRTY)) { + dirty[EC_DATA_TXN] = 0; } + } - if (dirty[EC_METADATA_TXN]) { - if (!ec_set_needed_flag(lock, link, EC_FLAG_METADATA_DIRTY)) { - dirty[EC_METADATA_TXN] = 0; - } + if (dirty[EC_METADATA_TXN]) { + if (!ec_set_needed_flag(lock, link, EC_FLAG_METADATA_DIRTY)) { + dirty[EC_METADATA_TXN] = 0; } - newflags = EC_NEEDED_FLAGS(lock->waiting_flags); + } + newflags = EC_NEEDED_FLAGS(lock->waiting_flags); - return oldflags ^ newflags; + return oldflags ^ newflags; } -void ec_get_size_version(ec_lock_link_t *link) +void +ec_get_size_version(ec_lock_link_t *link) { loc_t loc; ec_lock_t *lock; @@ -1375,7 +1356,7 @@ void ec_get_size_version(ec_lock_link_t *link) ec_fop_data_t *fop; dict_t *dict = NULL; dict_t *xdata = NULL; - ec_t *ec = NULL; + ec_t *ec = NULL; int32_t error = 0; gf_boolean_t set_dirty = _gf_false; uint64_t allzero[EC_VERSION_SIZE] = {0, 0}; @@ -1383,18 +1364,18 @@ void ec_get_size_version(ec_lock_link_t *link) lock = link->lock; ctx = lock->ctx; fop = link->fop; - ec = fop->xl->private; + ec = fop->xl->private; uint64_t changed_flags = 0; - if (ec->optimistic_changelog && - !(ec->node_mask & ~link->lock->good_mask) && !ec_is_data_fop (fop->id)) - link->optimistic_changelog = _gf_true; + if (ec->optimistic_changelog && !(ec->node_mask & ~link->lock->good_mask) && + !ec_is_data_fop(fop->id)) + link->optimistic_changelog = _gf_true; - set_dirty = ec_set_dirty_flag (link, ctx, dirty); + set_dirty = ec_set_dirty_flag(link, ctx, dirty); /* If ec metadata has already been retrieved, do not try again. */ if (ctx->have_info && (!set_dirty)) { - if (ec_is_data_fop (fop->id)) { + if (ec_is_data_fop(fop->id)) { fop->healing |= lock->healing; } return; @@ -1402,24 +1383,23 @@ void ec_get_size_version(ec_lock_link_t *link) /* Determine if there's something we need to retrieve for the current * operation. */ - if (!set_dirty && !lock->query && - (lock->loc.inode->ia_type != IA_IFREG) && + if (!set_dirty && !lock->query && (lock->loc.inode->ia_type != IA_IFREG) && (lock->loc.inode->ia_type != IA_INVAL)) { - return; + return; } memset(&loc, 0, sizeof(loc)); LOCK(&lock->loc.inode->lock); - changed_flags = ec_set_xattrop_flags_and_params (lock, link, dirty); + changed_flags = ec_set_xattrop_flags_and_params(lock, link, dirty); if (link->waiting_flags) { - /* This fop needs to wait until all its flags are cleared which - * potentially can be cleared by other xattrops that are already - * wound*/ - ec_sleep(fop); + /* This fop needs to wait until all its flags are cleared which + * potentially can be cleared by other xattrops that are already + * wound*/ + ec_sleep(fop); } else { - GF_ASSERT (!changed_flags); + GF_ASSERT(!changed_flags); } UNLOCK(&lock->loc.inode->lock); @@ -1434,40 +1414,38 @@ void ec_get_size_version(ec_lock_link_t *link) } if (EC_FLAGS_HAVE(changed_flags, EC_FLAG_XATTROP)) { - /* Once we know that an xattrop will be needed, - * we try to get all available information in a - * single call. */ - error = ec_dict_set_array(dict, EC_XATTR_VERSION, allzero, - EC_VERSION_SIZE); + /* Once we know that an xattrop will be needed, + * we try to get all available information in a + * single call. */ + error = ec_dict_set_array(dict, EC_XATTR_VERSION, allzero, + EC_VERSION_SIZE); + if (error != 0) { + goto out; + } + + if (lock->loc.inode->ia_type == IA_IFREG || + lock->loc.inode->ia_type == IA_INVAL) { + error = ec_dict_set_number(dict, EC_XATTR_SIZE, 0); + if (error == 0) { + error = ec_dict_set_number(dict, EC_XATTR_CONFIG, 0); + } if (error != 0) { goto out; } - if (lock->loc.inode->ia_type == IA_IFREG || - lock->loc.inode->ia_type == IA_INVAL) { - error = ec_dict_set_number(dict, EC_XATTR_SIZE, 0); - if (error == 0) { - error = ec_dict_set_number(dict, EC_XATTR_CONFIG, 0); - } - if (error != 0) { - goto out; - } - - xdata = dict_new(); - if (xdata == NULL || dict_set_int32 (xdata, GF_GET_SIZE, 1)) { - error = -ENOMEM; - goto out; - } - + xdata = dict_new(); + if (xdata == NULL || dict_set_int32(xdata, GF_GET_SIZE, 1)) { + error = -ENOMEM; + goto out; } + } } - if (memcmp (allzero, dirty, sizeof (allzero))) { - error = ec_dict_set_array(dict, EC_XATTR_DIRTY, dirty, - EC_VERSION_SIZE); - if (error != 0) { - goto out; - } + if (memcmp(allzero, dirty, sizeof(allzero))) { + error = ec_dict_set_array(dict, EC_XATTR_DIRTY, dirty, EC_VERSION_SIZE); + if (error != 0) { + goto out; + } } fop->frame->root->uid = 0; @@ -1495,13 +1473,13 @@ void ec_get_size_version(ec_lock_link_t *link) loc.name = NULL; } - ec_xattrop (fop->frame, fop->xl, fop->mask, fop->minimum, - ec_prepare_update_cbk, link, &loc, - GF_XATTROP_ADD_ARRAY64, dict, xdata); + ec_xattrop(fop->frame, fop->xl, fop->mask, fop->minimum, + ec_prepare_update_cbk, link, &loc, GF_XATTROP_ADD_ARRAY64, + dict, xdata); } else { ec_fxattrop(fop->frame, fop->xl, fop->mask, fop->minimum, - ec_prepare_update_cbk, link, lock->fd, - GF_XATTROP_ADD_ARRAY64, dict, xdata); + ec_prepare_update_cbk, link, lock->fd, + GF_XATTROP_ADD_ARRAY64, dict, xdata); } error = 0; @@ -1526,8 +1504,7 @@ out: } gf_boolean_t -__ec_get_inode_size(ec_fop_data_t *fop, inode_t *inode, - uint64_t *size) +__ec_get_inode_size(ec_fop_data_t *fop, inode_t *inode, uint64_t *size) { ec_inode_t *ctx; gf_boolean_t found = _gf_false; @@ -1547,14 +1524,13 @@ out: } gf_boolean_t -ec_get_inode_size(ec_fop_data_t *fop, inode_t *inode, - uint64_t *size) +ec_get_inode_size(ec_fop_data_t *fop, inode_t *inode, uint64_t *size) { gf_boolean_t found = _gf_false; LOCK(&inode->lock); { - found = __ec_get_inode_size (fop, inode, size); + found = __ec_get_inode_size(fop, inode, size); } UNLOCK(&inode->lock); @@ -1562,8 +1538,7 @@ ec_get_inode_size(ec_fop_data_t *fop, inode_t *inode, } gf_boolean_t -__ec_set_inode_size(ec_fop_data_t *fop, inode_t *inode, - uint64_t size) +__ec_set_inode_size(ec_fop_data_t *fop, inode_t *inode, uint64_t size) { ec_inode_t *ctx; gf_boolean_t found = _gf_false; @@ -1590,38 +1565,37 @@ out: } gf_boolean_t -ec_set_inode_size(ec_fop_data_t *fop, inode_t *inode, - uint64_t size) +ec_set_inode_size(ec_fop_data_t *fop, inode_t *inode, uint64_t size) { gf_boolean_t found = _gf_false; - LOCK (&inode->lock); + LOCK(&inode->lock); { - found = __ec_set_inode_size (fop, inode, size); + found = __ec_set_inode_size(fop, inode, size); } - UNLOCK (&inode->lock); + UNLOCK(&inode->lock); return found; } static void -ec_release_stripe_cache (ec_inode_t *ctx) +ec_release_stripe_cache(ec_inode_t *ctx) { - ec_stripe_list_t *stripe_cache = NULL; - ec_stripe_t *stripe = NULL; + ec_stripe_list_t *stripe_cache = NULL; + ec_stripe_t *stripe = NULL; - stripe_cache = &ctx->stripe_cache; - while (!list_empty (&stripe_cache->lru)) { - stripe = list_first_entry (&stripe_cache->lru, ec_stripe_t, - lru); - list_del (&stripe->lru); - GF_FREE (stripe); - } - stripe_cache->count = 0; - stripe_cache->max = 0; + stripe_cache = &ctx->stripe_cache; + while (!list_empty(&stripe_cache->lru)) { + stripe = list_first_entry(&stripe_cache->lru, ec_stripe_t, lru); + list_del(&stripe->lru); + GF_FREE(stripe); + } + stripe_cache->count = 0; + stripe_cache->max = 0; } -void ec_clear_inode_info(ec_fop_data_t *fop, inode_t *inode) +void +ec_clear_inode_info(ec_fop_data_t *fop, inode_t *inode) { ec_inode_t *ctx; @@ -1632,7 +1606,7 @@ void ec_clear_inode_info(ec_fop_data_t *fop, inode_t *inode) goto unlock; } - ec_release_stripe_cache (ctx); + ec_release_stripe_cache(ctx); ctx->have_info = _gf_false; ctx->have_config = _gf_false; ctx->have_version = _gf_false; @@ -1648,10 +1622,10 @@ unlock: UNLOCK(&inode->lock); } -int32_t ec_get_real_size_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, inode_t *inode, - struct iatt *buf, dict_t *xdata, - struct iatt *postparent) +int32_t +ec_get_real_size_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, dict_t *xdata, struct iatt *postparent) { ec_fop_data_t *fop = cookie; ec_lock_link_t *link; @@ -1675,7 +1649,8 @@ int32_t ec_get_real_size_cbk(call_frame_t *frame, void *cookie, xlator_t *this, * Any error processing this request is ignored. In the worst case, an invalid * or not up to date value in the iatt could cause some cache invalidation. */ -void ec_get_real_size(ec_lock_link_t *link) +void +ec_get_real_size(ec_lock_link_t *link) { ec_fop_data_t *fop; dict_t *xdata; @@ -1725,24 +1700,26 @@ ec_lock_update_fd(ec_lock_t *lock, ec_fop_data_t *fop) } static gf_boolean_t -ec_link_has_lock_conflict (ec_lock_link_t *link, gf_boolean_t waitlist_check) +ec_link_has_lock_conflict(ec_lock_link_t *link, gf_boolean_t waitlist_check) { - ec_lock_link_t *trav_link = NULL; + ec_lock_link_t *trav_link = NULL; - list_for_each_entry (trav_link, &link->lock->owners, owner_list) { - if (ec_lock_conflict (trav_link, link)) - return _gf_true; - } + list_for_each_entry(trav_link, &link->lock->owners, owner_list) + { + if (ec_lock_conflict(trav_link, link)) + return _gf_true; + } - if (!waitlist_check) - return _gf_false; + if (!waitlist_check) + return _gf_false; - list_for_each_entry (trav_link, &link->lock->waiting, wait_list) { - if (ec_lock_conflict (trav_link, link)) - return _gf_true; - } + list_for_each_entry(trav_link, &link->lock->waiting, wait_list) + { + if (ec_lock_conflict(trav_link, link)) + return _gf_true; + } - return _gf_false; + return _gf_false; } static void @@ -1763,7 +1740,7 @@ ec_lock_wake_shared(ec_lock_t *lock, struct list_head *list) /* If the fop is not shareable, only this fop can be assigned as owner. * Other fops will need to wait until this one finishes. */ - if (ec_link_has_lock_conflict (link, _gf_false)) { + if (ec_link_has_lock_conflict(link, _gf_false)) { conflict = _gf_true; } @@ -1794,7 +1771,8 @@ ec_lock_apply(ec_lock_link_t *link) ec_get_real_size(link); } -gf_boolean_t ec_lock_acquire(ec_lock_link_t *link); +gf_boolean_t +ec_lock_acquire(ec_lock_link_t *link); static void ec_lock_resume_shared(struct list_head *list) @@ -1818,7 +1796,8 @@ ec_lock_resume_shared(struct list_head *list) } } -void ec_lock_acquired(ec_lock_link_t *link) +void +ec_lock_acquired(ec_lock_link_t *link) { struct list_head list; ec_lock_t *lock; @@ -1850,8 +1829,9 @@ void ec_lock_acquired(ec_lock_link_t *link) ec_lock_resume_shared(&list); } -int32_t ec_locked(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) +int32_t +ec_locked(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + int32_t op_errno, dict_t *xdata) { ec_fop_data_t *fop = cookie; ec_lock_link_t *link = NULL; @@ -1866,15 +1846,15 @@ int32_t ec_locked(call_frame_t *frame, void *cookie, xlator_t *this, ec_lock_acquired(link); ec_lock(fop->parent); } else { - gf_msg (this->name, GF_LOG_WARNING, op_errno, - EC_MSG_PREOP_LOCK_FAILED, - "Failed to complete preop lock"); + gf_msg(this->name, GF_LOG_WARNING, op_errno, EC_MSG_PREOP_LOCK_FAILED, + "Failed to complete preop lock"); } return 0; } -gf_boolean_t ec_lock_acquire(ec_lock_link_t *link) +gf_boolean_t +ec_lock_acquire(ec_lock_link_t *link) { ec_lock_t *lock; ec_fop_data_t *fop; @@ -1907,62 +1887,62 @@ gf_boolean_t ec_lock_acquire(ec_lock_link_t *link) static ec_lock_link_t * ec_lock_timer_cancel(xlator_t *xl, ec_lock_t *lock) { - ec_lock_link_t *timer_link; + ec_lock_link_t *timer_link; - /* If we don't have any timer, there's nothing to cancel. */ - if (lock->timer == NULL) { - return NULL; - } + /* If we don't have any timer, there's nothing to cancel. */ + if (lock->timer == NULL) { + return NULL; + } - /* We are trying to access a lock that has an unlock timer active. - * This means that the lock must be idle, i.e. no fop can be in the - * owner, waiting or frozen lists. It also means that the lock cannot - * have been marked as being released (this is done without timers). - * There should only be one owner reference, but it's possible that - * some fops are being prepared to use this lock. */ - GF_ASSERT ((lock->refs_owners == 1) && - list_empty(&lock->owners) && list_empty(&lock->waiting)); - - /* We take the timer_link before cancelling the timer, since a - * successful cancellation will destroy it. It must not be NULL - * because it references the fop responsible for the delayed unlock - * that we are currently trying to cancel. */ - timer_link = lock->timer->data; - GF_ASSERT(timer_link != NULL); - - if (gf_timer_call_cancel(xl->ctx, lock->timer) < 0) { - /* It's too late to avoid the execution of the timer callback. - * Since we need to be sure that the callback has access to all - * needed resources, we cannot resume the execution of the - * timer fop now. This will be done in the callback. */ - timer_link = NULL; - } else { - /* The timer has been cancelled. The fop referenced by - * timer_link holds the last reference. The caller is - * responsible to release it when not needed anymore. */ - ec_trace("UNLOCK_CANCELLED", timer_link->fop, "lock=%p", lock); - } + /* We are trying to access a lock that has an unlock timer active. + * This means that the lock must be idle, i.e. no fop can be in the + * owner, waiting or frozen lists. It also means that the lock cannot + * have been marked as being released (this is done without timers). + * There should only be one owner reference, but it's possible that + * some fops are being prepared to use this lock. */ + GF_ASSERT((lock->refs_owners == 1) && list_empty(&lock->owners) && + list_empty(&lock->waiting)); + + /* We take the timer_link before cancelling the timer, since a + * successful cancellation will destroy it. It must not be NULL + * because it references the fop responsible for the delayed unlock + * that we are currently trying to cancel. */ + timer_link = lock->timer->data; + GF_ASSERT(timer_link != NULL); + + if (gf_timer_call_cancel(xl->ctx, lock->timer) < 0) { + /* It's too late to avoid the execution of the timer callback. + * Since we need to be sure that the callback has access to all + * needed resources, we cannot resume the execution of the + * timer fop now. This will be done in the callback. */ + timer_link = NULL; + } else { + /* The timer has been cancelled. The fop referenced by + * timer_link holds the last reference. The caller is + * responsible to release it when not needed anymore. */ + ec_trace("UNLOCK_CANCELLED", timer_link->fop, "lock=%p", lock); + } - /* We have two options here: - * - * 1. The timer has been successfully cancelled. - * - * This is the easiest case and we can continue with the currently - * acquired lock. - * - * 2. The timer callback has already been fired. - * - * In this case we have not been able to cancel the timer before - * the timer callback has been fired, but we also know that - * lock->timer != NULL. This means that the timer callback is still - * trying to acquire the inode mutex that we currently own. We are - * safe until we release it. In this case we can safely clear - * lock->timer. This will cause that the timer callback does nothing - * once it acquires the mutex. - */ - lock->timer = NULL; + /* We have two options here: + * + * 1. The timer has been successfully cancelled. + * + * This is the easiest case and we can continue with the currently + * acquired lock. + * + * 2. The timer callback has already been fired. + * + * In this case we have not been able to cancel the timer before + * the timer callback has been fired, but we also know that + * lock->timer != NULL. This means that the timer callback is still + * trying to acquire the inode mutex that we currently own. We are + * safe until we release it. In this case we can safely clear + * lock->timer. This will cause that the timer callback does nothing + * once it acquires the mutex. + */ + lock->timer = NULL; - return timer_link; + return timer_link; } static gf_boolean_t @@ -1984,7 +1964,7 @@ ec_lock_assign_owner(ec_lock_link_t *link) /* Since the link has just been prepared but it's not active yet, the * refs_pending must be one at least (the ref owned by this link). */ - GF_ASSERT (lock->refs_pending > 0); + GF_ASSERT(lock->refs_pending > 0); /* The link is not pending any more. It will be assigned to the owner, * waiting or frozen list. */ lock->refs_pending--; @@ -2017,7 +1997,7 @@ ec_lock_assign_owner(ec_lock_link_t *link) * owners, or waiters(to prevent starvation). * Otherwise we need to wait. */ - if (!lock->acquired || ec_link_has_lock_conflict (link, _gf_true)) { + if (!lock->acquired || ec_link_has_lock_conflict(link, _gf_true)) { ec_trace("LOCK_QUEUE_WAIT", fop, "lock=%p", lock); list_add_tail(&link->wait_list, &lock->waiting); @@ -2032,7 +2012,7 @@ ec_lock_assign_owner(ec_lock_link_t *link) * reference assigned to the timer fop. In this case we simply reuse it. * Otherwise we need to increase the number of owners. */ if (timer_link == NULL) { - lock->refs_owners++; + lock->refs_owners++; } assigned = _gf_true; @@ -2090,14 +2070,14 @@ ec_lock_next_owner(ec_lock_link_t *link, ec_cbk_data_t *cbk, /* If the fop fails on any of the good bricks, it is important to mark * it dirty and update versions right away. */ if (link->update[0] || link->update[1]) { - if (lock->good_mask & ~(fop->good | fop->remaining)) { - lock->release = _gf_true; - } + if (lock->good_mask & ~(fop->good | fop->remaining)) { + lock->release = _gf_true; + } } } if (fop->healing) { - lock->healing = fop->healing & (fop->good | fop->remaining); + lock->healing = fop->healing & (fop->good | fop->remaining); } ec_lock_update_good(lock, fop); @@ -2108,7 +2088,8 @@ ec_lock_next_owner(ec_lock_link_t *link, ec_cbk_data_t *cbk, ec_lock_resume_shared(&list); } -void ec_lock(ec_fop_data_t *fop) +void +ec_lock(ec_fop_data_t *fop) { ec_lock_link_t *link; @@ -2116,7 +2097,7 @@ void ec_lock(ec_fop_data_t *fop) * Which can result in refs == 0 for fop leading to use after free in this * function when it calls ec_sleep so do ec_sleep at start and ec_resume at * the end of this function.*/ - ec_sleep (fop); + ec_sleep(fop); while (fop->locked < fop->lock_count) { /* Since there are only up to 2 locks per fop, this xor will change @@ -2186,17 +2167,16 @@ ec_lock_unfreeze(ec_lock_link_t *link) } } -int32_t ec_unlocked(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) +int32_t +ec_unlocked(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + int32_t op_errno, dict_t *xdata) { ec_fop_data_t *fop = cookie; ec_lock_link_t *link = fop->data; if (op_ret < 0) { - gf_msg (this->name, GF_LOG_WARNING, op_errno, - EC_MSG_UNLOCK_FAILED, - "entry/inode unlocking failed (%s)", - ec_fop_name(link->fop->id)); + gf_msg(this->name, GF_LOG_WARNING, op_errno, EC_MSG_UNLOCK_FAILED, + "entry/inode unlocking failed (%s)", ec_fop_name(link->fop->id)); } else { ec_trace("UNLOCKED", link->fop, "lock=%p", link->lock); } @@ -2206,7 +2186,8 @@ int32_t ec_unlocked(call_frame_t *frame, void *cookie, xlator_t *this, return 0; } -void ec_unlock_lock(ec_lock_link_t *link) +void +ec_unlock_lock(ec_lock_link_t *link) { ec_lock_t *lock; ec_fop_data_t *fop; @@ -2232,10 +2213,10 @@ void ec_unlock_lock(ec_lock_link_t *link) } } -int32_t ec_update_size_version_done(call_frame_t * frame, void * cookie, - xlator_t * this, int32_t op_ret, - int32_t op_errno, dict_t * xattr, - dict_t * xdata) +int32_t +ec_update_size_version_done(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xattr, + dict_t *xdata) { ec_fop_data_t *fop = cookie; ec_lock_link_t *link; @@ -2247,10 +2228,9 @@ int32_t ec_update_size_version_done(call_frame_t * frame, void * cookie, ctx = lock->ctx; if (op_ret < 0) { - gf_msg(fop->xl->name, fop_log_level (fop->id, op_errno), op_errno, + gf_msg(fop->xl->name, fop_log_level(fop->id, op_errno), op_errno, EC_MSG_SIZE_VERS_UPDATE_FAIL, - "Failed to update version and size. %s", - ec_msg_str(fop)); + "Failed to update version and size. %s", ec_msg_str(fop)); } else { fop->parent->good &= fop->good; @@ -2287,14 +2267,14 @@ int32_t ec_update_size_version_done(call_frame_t * frame, void * cookie, } void -ec_update_size_version(ec_lock_link_t *link, uint64_t *version, - uint64_t size, uint64_t *dirty) +ec_update_size_version(ec_lock_link_t *link, uint64_t *version, uint64_t size, + uint64_t *dirty) { ec_fop_data_t *fop; ec_lock_t *lock; ec_inode_t *ctx; dict_t *dict = NULL; - uintptr_t update_on = 0; + uintptr_t update_on = 0; int32_t err = -ENOMEM; fop = link->fop; @@ -2331,11 +2311,10 @@ ec_update_size_version(ec_lock_link_t *link, uint64_t *version, } if (dirty[0] || dirty[1]) { - err = ec_dict_set_array(dict, EC_XATTR_DIRTY, - dirty, EC_VERSION_SIZE); - if (err != 0) { - goto out; - } + err = ec_dict_set_array(dict, EC_XATTR_DIRTY, dirty, EC_VERSION_SIZE); + if (err != 0) { + goto out; + } } /* If config information is not known, we request it now. */ @@ -2351,13 +2330,13 @@ ec_update_size_version(ec_lock_link_t *link, uint64_t *version, update_on = lock->good_mask | lock->healing; if (link->lock->fd == NULL) { - ec_xattrop(fop->frame, fop->xl, update_on, EC_MINIMUM_MIN, - ec_update_size_version_done, link, &link->lock->loc, - GF_XATTROP_ADD_ARRAY64, dict, NULL); + ec_xattrop(fop->frame, fop->xl, update_on, EC_MINIMUM_MIN, + ec_update_size_version_done, link, &link->lock->loc, + GF_XATTROP_ADD_ARRAY64, dict, NULL); } else { - ec_fxattrop(fop->frame, fop->xl, update_on, EC_MINIMUM_MIN, - ec_update_size_version_done, link, link->lock->fd, - GF_XATTROP_ADD_ARRAY64, dict, NULL); + ec_fxattrop(fop->frame, fop->xl, update_on, EC_MINIMUM_MIN, + ec_update_size_version_done, link, link->lock->fd, + GF_XATTROP_ADD_ARRAY64, dict, NULL); } fop->frame->root->uid = fop->uid; @@ -2374,14 +2353,12 @@ out: ec_fop_set_error(fop, -err); - gf_msg (fop->xl->name, GF_LOG_ERROR, -err, EC_MSG_SIZE_VERS_UPDATE_FAIL, - "Unable to update version and size. %s", - ec_msg_str(fop)); + gf_msg(fop->xl->name, GF_LOG_ERROR, -err, EC_MSG_SIZE_VERS_UPDATE_FAIL, + "Unable to update version and size. %s", ec_msg_str(fop)); if (lock->unlock_now) { ec_unlock_lock(fop->data); } - } gf_boolean_t @@ -2406,34 +2383,34 @@ ec_update_info(ec_lock_link_t *link) /* If we set the dirty flag for update fop, we have to unset it. * If fop has failed on some bricks, leave the dirty as marked. */ if (lock->unlock_now) { - /* Ensure that nodes are up while doing final - * metadata update.*/ - if (!(ec->node_mask & ~lock->good_mask) && - !(ec->node_mask & ~ec->xl_up)) { - if (ctx->dirty[0] != 0) { - dirty[0] = -1; - } - if (ctx->dirty[1] != 0) { - dirty[1] = -1; - } - /*If everything is fine and we already - *have version xattr set on entry, there - *is no need to update version again*/ - if (ctx->pre_version[0]) { - version[0] = 0; - } - if (ctx->pre_version[1]) { - version[1] = 0; - } - } else { - link->optimistic_changelog = _gf_false; - ec_set_dirty_flag (link, ctx, dirty); + /* Ensure that nodes are up while doing final + * metadata update.*/ + if (!(ec->node_mask & ~lock->good_mask) && + !(ec->node_mask & ~ec->xl_up)) { + if (ctx->dirty[0] != 0) { + dirty[0] = -1; + } + if (ctx->dirty[1] != 0) { + dirty[1] = -1; } - memset(ctx->dirty, 0, sizeof(ctx->dirty)); + /*If everything is fine and we already + *have version xattr set on entry, there + *is no need to update version again*/ + if (ctx->pre_version[0]) { + version[0] = 0; + } + if (ctx->pre_version[1]) { + version[1] = 0; + } + } else { + link->optimistic_changelog = _gf_false; + ec_set_dirty_flag(link, ctx, dirty); + } + memset(ctx->dirty, 0, sizeof(ctx->dirty)); } - if ((version[0] != 0) || (version[1] != 0) || - (dirty[0] != 0) || (dirty[1] != 0)) { + if ((version[0] != 0) || (version[1] != 0) || (dirty[0] != 0) || + (dirty[1] != 0)) { ec_update_size_version(link, version, size, dirty); return _gf_true; } @@ -2464,185 +2441,185 @@ ec_unlock_now(ec_lock_link_t *link) void ec_lock_release(ec_t *ec, inode_t *inode) { - ec_lock_t *lock; - ec_inode_t *ctx; - ec_lock_link_t *timer_link = NULL; + ec_lock_t *lock; + ec_inode_t *ctx; + ec_lock_link_t *timer_link = NULL; - LOCK(&inode->lock); + LOCK(&inode->lock); - ctx = __ec_inode_get(inode, ec->xl); - if (ctx == NULL) { - goto done; - } - lock = ctx->inode_lock; - if ((lock == NULL) || !lock->acquired || lock->release) { - goto done; - } + ctx = __ec_inode_get(inode, ec->xl); + if (ctx == NULL) { + goto done; + } + lock = ctx->inode_lock; + if ((lock == NULL) || !lock->acquired || lock->release) { + goto done; + } - gf_msg_debug(ec->xl->name, 0, - "Releasing inode %p due to lock contention", inode); + gf_msg_debug(ec->xl->name, 0, "Releasing inode %p due to lock contention", + inode); - /* The lock is not marked to be released, so the frozen list should be - * empty. */ - GF_ASSERT(list_empty(&lock->frozen)); + /* The lock is not marked to be released, so the frozen list should be + * empty. */ + GF_ASSERT(list_empty(&lock->frozen)); - timer_link = ec_lock_timer_cancel(ec->xl, lock); + timer_link = ec_lock_timer_cancel(ec->xl, lock); - /* We mark the lock to be released as soon as possible. */ - lock->release = _gf_true; + /* We mark the lock to be released as soon as possible. */ + lock->release = _gf_true; done: - UNLOCK(&inode->lock); - - /* If we have cancelled the timer, we need to start the unlock of the - * inode. If there was a timer but we have been unable to cancel it - * because it was just triggered, the timer callback will take care - * of releasing the inode. */ - if (timer_link != NULL) { - ec_unlock_now(timer_link); - } + UNLOCK(&inode->lock); + + /* If we have cancelled the timer, we need to start the unlock of the + * inode. If there was a timer but we have been unable to cancel it + * because it was just triggered, the timer callback will take care + * of releasing the inode. */ + if (timer_link != NULL) { + ec_unlock_now(timer_link); + } } -void ec_unlock_timer_add(ec_lock_link_t *link); +void +ec_unlock_timer_add(ec_lock_link_t *link); void ec_unlock_timer_del(ec_lock_link_t *link) { - ec_lock_t *lock; - inode_t *inode; - gf_boolean_t now = _gf_false; + ec_lock_t *lock; + inode_t *inode; + gf_boolean_t now = _gf_false; + + /* If we are here, it means that the timer has expired before having + * been cancelled. This guarantees that 'link' is still valid because + * the fop that contains it must be pending (if timer cancellation in + * ec_lock_assign_owner() fails, the fop is left sleeping). + * + * At the same time, the fop still has a reference to the lock, so + * it must also be valid. + */ + lock = link->lock; + + /* 'lock' must have a valid inode since it can only be destroyed + * when the lock itself is destroyed, but we have a reference to the + * lock to avoid this. + */ + inode = lock->loc.inode; + + LOCK(&inode->lock); + + if (lock->timer != NULL) { + ec_trace("UNLOCK_DELAYED", link->fop, "lock=%p", lock); + + /* The unlock timer has expired without anyone cancelling it. + * This means that it shouldn't have any owner, and the waiting + * and frozen lists should be empty. It must have only one + * owner reference, but there can be fops being prepared + * though. + * */ + GF_ASSERT(!lock->release && (lock->refs_owners == 1) && + list_empty(&lock->owners) && list_empty(&lock->waiting) && + list_empty(&lock->frozen)); + + gf_timer_call_cancel(link->fop->xl->ctx, lock->timer); + lock->timer = NULL; + + /* Any fop being processed from now on, will need to wait + * until the next unlock/lock cycle. */ + lock->release = now = _gf_true; + } + + UNLOCK(&inode->lock); - /* If we are here, it means that the timer has expired before having - * been cancelled. This guarantees that 'link' is still valid because - * the fop that contains it must be pending (if timer cancellation in - * ec_lock_assign_owner() fails, the fop is left sleeping). + if (now) { + ec_unlock_now(link); + } else { + /* The timer has been cancelled just after firing it but before + * getting here. This means that another fop has used the lock + * and everything should be handled as if this callback were + * have not been executed. However we still have an owner + * reference. + * + * We need to release our reference. If this is not the last + * reference (the most common case because another fop has + * taken another ref) we only need to decrement the counter. + * Otherwise we have been delayed enough so that the other fop + * has had time to acquire the reference, do its operation and + * release it. At the time of releasing it, the fop did found + * that the ref counter was > 1 (our reference), so the delayed + * unlock timer wasn't started. We need to start it again if we + * are the last reference. * - * At the same time, the fop still has a reference to the lock, so - * it must also be valid. + * ec_unlock_timer_add() handles both cases. */ - lock = link->lock; + ec_unlock_timer_add(link); - /* 'lock' must have a valid inode since it can only be destroyed - * when the lock itself is destroyed, but we have a reference to the - * lock to avoid this. + /* We need to resume the fop that was waiting for the delayed + * unlock. */ - inode = lock->loc.inode; - - LOCK(&inode->lock); - - if (lock->timer != NULL) { - ec_trace("UNLOCK_DELAYED", link->fop, "lock=%p", lock); - - /* The unlock timer has expired without anyone cancelling it. - * This means that it shouldn't have any owner, and the waiting - * and frozen lists should be empty. It must have only one - * owner reference, but there can be fops being prepared - * though. - * */ - GF_ASSERT(!lock->release && - (lock->refs_owners == 1) && - list_empty(&lock->owners) && - list_empty(&lock->waiting) && - list_empty(&lock->frozen)); - - gf_timer_call_cancel(link->fop->xl->ctx, lock->timer); - lock->timer = NULL; - - /* Any fop being processed from now on, will need to wait - * until the next unlock/lock cycle. */ - lock->release = now = _gf_true; - } - - UNLOCK(&inode->lock); - - if (now) { - ec_unlock_now(link); - } else { - /* The timer has been cancelled just after firing it but before - * getting here. This means that another fop has used the lock - * and everything should be handled as if this callback were - * have not been executed. However we still have an owner - * reference. - * - * We need to release our reference. If this is not the last - * reference (the most common case because another fop has - * taken another ref) we only need to decrement the counter. - * Otherwise we have been delayed enough so that the other fop - * has had time to acquire the reference, do its operation and - * release it. At the time of releasing it, the fop did found - * that the ref counter was > 1 (our reference), so the delayed - * unlock timer wasn't started. We need to start it again if we - * are the last reference. - * - * ec_unlock_timer_add() handles both cases. - */ - ec_unlock_timer_add(link); - - /* We need to resume the fop that was waiting for the delayed - * unlock. - */ - ec_resume(link->fop, 0); - } + ec_resume(link->fop, 0); + } } -void ec_unlock_timer_cbk(void *data) +void +ec_unlock_timer_cbk(void *data) { - ec_unlock_timer_del(data); + ec_unlock_timer_del(data); } static gf_boolean_t ec_eager_lock_used(ec_t *ec, ec_fop_data_t *fop) { - /* Fops with no locks at this point mean that they are sent as sub-fops - * of other higher level fops. In this case we simply assume that the - * parent fop will take correct care of the eager lock. */ - if (fop->lock_count == 0) { - return _gf_true; - } + /* Fops with no locks at this point mean that they are sent as sub-fops + * of other higher level fops. In this case we simply assume that the + * parent fop will take correct care of the eager lock. */ + if (fop->lock_count == 0) { + return _gf_true; + } - /* We may have more than one lock, but this only happens in the rename - * fop, and both locks will reference an inode of the same type (a - * directory in this case), so we only need to check the first lock. */ - if (fop->locks[0].lock->loc.inode->ia_type == IA_IFREG) { - return ec->eager_lock; - } + /* We may have more than one lock, but this only happens in the rename + * fop, and both locks will reference an inode of the same type (a + * directory in this case), so we only need to check the first lock. */ + if (fop->locks[0].lock->loc.inode->ia_type == IA_IFREG) { + return ec->eager_lock; + } - return ec->other_eager_lock; + return ec->other_eager_lock; } static uint32_t ec_eager_lock_timeout(ec_t *ec, ec_lock_t *lock) { - if (lock->loc.inode->ia_type == IA_IFREG) { - return ec->eager_lock_timeout; - } + if (lock->loc.inode->ia_type == IA_IFREG) { + return ec->eager_lock_timeout; + } - return ec->other_eager_lock_timeout; + return ec->other_eager_lock_timeout; } static gf_boolean_t ec_lock_delay_create(ec_lock_link_t *link) { - struct timespec delay; - ec_fop_data_t *fop = link->fop; - ec_lock_t *lock = link->lock; + struct timespec delay; + ec_fop_data_t *fop = link->fop; + ec_lock_t *lock = link->lock; - delay.tv_sec = ec_eager_lock_timeout(fop->xl->private, lock); - delay.tv_nsec = 0; - lock->timer = gf_timer_call_after(fop->xl->ctx, delay, - ec_unlock_timer_cbk, link); - if (lock->timer == NULL) { - gf_msg(fop->xl->name, GF_LOG_WARNING, ENOMEM, - EC_MSG_UNLOCK_DELAY_FAILED, - "Unable to delay an unlock"); + delay.tv_sec = ec_eager_lock_timeout(fop->xl->private, lock); + delay.tv_nsec = 0; + lock->timer = gf_timer_call_after(fop->xl->ctx, delay, ec_unlock_timer_cbk, + link); + if (lock->timer == NULL) { + gf_msg(fop->xl->name, GF_LOG_WARNING, ENOMEM, + EC_MSG_UNLOCK_DELAY_FAILED, "Unable to delay an unlock"); - return _gf_false; - } + return _gf_false; + } - return _gf_true; + return _gf_true; } -void ec_unlock_timer_add(ec_lock_link_t *link) +void +ec_unlock_timer_add(ec_lock_link_t *link) { ec_fop_data_t *fop = link->fop; ec_lock_t *lock = link->lock; @@ -2732,7 +2709,8 @@ void ec_unlock_timer_add(ec_lock_link_t *link) } } -void ec_unlock(ec_fop_data_t *fop) +void +ec_unlock(ec_fop_data_t *fop) { int32_t i; @@ -2741,7 +2719,8 @@ void ec_unlock(ec_fop_data_t *fop) } } -void ec_flush_size_version(ec_fop_data_t * fop) +void +ec_flush_size_version(ec_fop_data_t *fop) { GF_ASSERT(fop->lock_count == 1); ec_update_info(&fop->locks[0]); @@ -2751,99 +2730,98 @@ static void ec_update_stripe(ec_t *ec, ec_stripe_list_t *stripe_cache, ec_stripe_t *stripe, ec_fop_data_t *fop) { - off_t base; - - /* On write fops, we only update existing fragments if the write has - * succeeded. Otherwise, we remove them from the cache. */ - if ((fop->id == GF_FOP_WRITE) && (fop->answer != NULL) && - (fop->answer->op_ret >= 0)) { - base = stripe->frag_offset - fop->frag_range.first; - base *= ec->fragments; - - /* We check if the stripe offset falls inside the real region - * modified by the write fop (a write request is allowed, - * though uncommon, to write less bytes than requested). The - * current write fop implementation doesn't allow partial - * writes of fragments, so if there's no error, we are sure - * that a full stripe has been completely modified or not - * touched at all. The value of op_ret may not be a multiple - * of the stripe size because it depends on the requested - * size by the user, so we update the stripe if the write has - * modified at least one byte (meaning ec has written the full - * stripe). */ - if (base < fop->answer->op_ret + fop->head) { - memcpy(stripe->data, fop->vector[0].iov_base + base, - ec->stripe_size); - list_move_tail(&stripe->lru, &stripe_cache->lru); - - GF_ATOMIC_INC(ec->stats.stripe_cache.updates); - } - } else { - stripe->frag_offset = -1; - list_move (&stripe->lru, &stripe_cache->lru); - - GF_ATOMIC_INC(ec->stats.stripe_cache.invals); + off_t base; + + /* On write fops, we only update existing fragments if the write has + * succeeded. Otherwise, we remove them from the cache. */ + if ((fop->id == GF_FOP_WRITE) && (fop->answer != NULL) && + (fop->answer->op_ret >= 0)) { + base = stripe->frag_offset - fop->frag_range.first; + base *= ec->fragments; + + /* We check if the stripe offset falls inside the real region + * modified by the write fop (a write request is allowed, + * though uncommon, to write less bytes than requested). The + * current write fop implementation doesn't allow partial + * writes of fragments, so if there's no error, we are sure + * that a full stripe has been completely modified or not + * touched at all. The value of op_ret may not be a multiple + * of the stripe size because it depends on the requested + * size by the user, so we update the stripe if the write has + * modified at least one byte (meaning ec has written the full + * stripe). */ + if (base < fop->answer->op_ret + fop->head) { + memcpy(stripe->data, fop->vector[0].iov_base + base, + ec->stripe_size); + list_move_tail(&stripe->lru, &stripe_cache->lru); + + GF_ATOMIC_INC(ec->stats.stripe_cache.updates); } + } else { + stripe->frag_offset = -1; + list_move(&stripe->lru, &stripe_cache->lru); + + GF_ATOMIC_INC(ec->stats.stripe_cache.invals); + } } static void -ec_update_cached_stripes (ec_fop_data_t *fop) -{ - uint64_t first; - uint64_t last; - ec_stripe_t *stripe = NULL; - ec_inode_t *ctx = NULL; - ec_stripe_list_t *stripe_cache = NULL; - inode_t *inode = NULL; - struct list_head *temp; - struct list_head sentinel; - - first = fop->frag_range.first; - /* 'last' represents the first stripe not touched by the operation */ - last = fop->frag_range.last; - - /* If there are no modified stripes, we don't need to do anything - * else. */ - if (last <= first) { - return; - } +ec_update_cached_stripes(ec_fop_data_t *fop) +{ + uint64_t first; + uint64_t last; + ec_stripe_t *stripe = NULL; + ec_inode_t *ctx = NULL; + ec_stripe_list_t *stripe_cache = NULL; + inode_t *inode = NULL; + struct list_head *temp; + struct list_head sentinel; + + first = fop->frag_range.first; + /* 'last' represents the first stripe not touched by the operation */ + last = fop->frag_range.last; + + /* If there are no modified stripes, we don't need to do anything + * else. */ + if (last <= first) { + return; + } - if (!fop->use_fd) { - inode = fop->loc[0].inode; - } else { - inode = fop->fd->inode; - } + if (!fop->use_fd) { + inode = fop->loc[0].inode; + } else { + inode = fop->fd->inode; + } - LOCK(&inode->lock); + LOCK(&inode->lock); - ctx = __ec_inode_get (inode, fop->xl); - if (ctx == NULL) { - goto out; - } - stripe_cache = &ctx->stripe_cache; - - /* Since we'll be moving elements of the list to the tail, we might - * end in an infinite loop. To avoid it, we insert a sentinel element - * into the list, so that it will be used to detect when we have - * traversed all existing elements once. */ - list_add_tail(&sentinel, &stripe_cache->lru); - temp = stripe_cache->lru.next; - while (temp != &sentinel) { - stripe = list_entry(temp, ec_stripe_t, lru); - temp = temp->next; - if ((first <= stripe->frag_offset) && - (stripe->frag_offset < last)) { - ec_update_stripe (fop->xl->private, stripe_cache, - stripe, fop); - } + ctx = __ec_inode_get(inode, fop->xl); + if (ctx == NULL) { + goto out; + } + stripe_cache = &ctx->stripe_cache; + + /* Since we'll be moving elements of the list to the tail, we might + * end in an infinite loop. To avoid it, we insert a sentinel element + * into the list, so that it will be used to detect when we have + * traversed all existing elements once. */ + list_add_tail(&sentinel, &stripe_cache->lru); + temp = stripe_cache->lru.next; + while (temp != &sentinel) { + stripe = list_entry(temp, ec_stripe_t, lru); + temp = temp->next; + if ((first <= stripe->frag_offset) && (stripe->frag_offset < last)) { + ec_update_stripe(fop->xl->private, stripe_cache, stripe, fop); } - list_del(&sentinel); + } + list_del(&sentinel); out: - UNLOCK(&inode->lock); + UNLOCK(&inode->lock); } -void ec_lock_reuse(ec_fop_data_t *fop) +void +ec_lock_reuse(ec_fop_data_t *fop) { ec_cbk_data_t *cbk; ec_t *ec = NULL; @@ -2854,13 +2832,13 @@ void ec_lock_reuse(ec_fop_data_t *fop) if (ec_eager_lock_used(ec, fop) && cbk != NULL) { if (cbk->xdata != NULL) { - if ((dict_get_int32(cbk->xdata, GLUSTERFS_INODELK_COUNT, - &count) == 0) && (count > 1)) { + if ((dict_get_int32(cbk->xdata, GLUSTERFS_INODELK_COUNT, &count) == + 0) && + (count > 1)) { release = _gf_true; } if (release) { - gf_msg_debug (fop->xl->name, 0, - "Lock contention detected"); + gf_msg_debug(fop->xl->name, 0, "Lock contention detected"); } } } else { @@ -2869,24 +2847,25 @@ void ec_lock_reuse(ec_fop_data_t *fop) * the lock. */ release = _gf_true; } - ec_update_cached_stripes (fop); + ec_update_cached_stripes(fop); for (i = 0; i < fop->lock_count; i++) { ec_lock_next_owner(&fop->locks[i], cbk, release); } } -void __ec_manager(ec_fop_data_t * fop, int32_t error) +void +__ec_manager(ec_fop_data_t *fop, int32_t error) { ec_t *ec = fop->xl->private; do { ec_trace("MANAGER", fop, "error=%d", error); - if (!ec_must_wind (fop)) { - if (ec->xl_up_count < ec->fragments) { - error = ENOTCONN; - } + if (!ec_must_wind(fop)) { + if (ec->xl_up_count < ec->fragments) { + error = ENOTCONN; + } } if (error != 0) { @@ -2912,20 +2891,20 @@ void __ec_manager(ec_fop_data_t * fop, int32_t error) fop->jobs = 1; fop->state = fop->handler(fop, fop->state); - GF_ASSERT (fop->state >= 0); + GF_ASSERT(fop->state >= 0); error = ec_check_complete(fop, __ec_manager); } while (error >= 0); } -void ec_manager(ec_fop_data_t * fop, int32_t error) +void +ec_manager(ec_fop_data_t *fop, int32_t error) { GF_ASSERT(fop->jobs == 0); GF_ASSERT(fop->winds == 0); GF_ASSERT(fop->error == 0); - if (fop->state == EC_STATE_START) - { + if (fop->state == EC_STATE_START) { fop->state = EC_STATE_INIT; } diff --git a/xlators/cluster/ec/src/ec-data.c b/xlators/cluster/ec/src/ec-data.c index 33a970d922a..fae8843a679 100644 --- a/xlators/cluster/ec/src/ec-data.c +++ b/xlators/cluster/ec/src/ec-data.c @@ -14,47 +14,43 @@ #include "ec-data.h" #include "ec-messages.h" -ec_cbk_data_t * ec_cbk_data_allocate(call_frame_t * frame, xlator_t * this, - ec_fop_data_t * fop, int32_t id, - int32_t idx, int32_t op_ret, - int32_t op_errno) +ec_cbk_data_t * +ec_cbk_data_allocate(call_frame_t *frame, xlator_t *this, ec_fop_data_t *fop, + int32_t id, int32_t idx, int32_t op_ret, int32_t op_errno) { - ec_cbk_data_t * cbk; - ec_t * ec = this->private; + ec_cbk_data_t *cbk; + ec_t *ec = this->private; - if (fop->xl != this) - { - gf_msg (this->name, GF_LOG_ERROR, EINVAL, - EC_MSG_XLATOR_MISMATCH, "Mismatching xlators between request " - "and answer (req=%s, ans=%s).", fop->xl->name, this->name); + if (fop->xl != this) { + gf_msg(this->name, GF_LOG_ERROR, EINVAL, EC_MSG_XLATOR_MISMATCH, + "Mismatching xlators between request " + "and answer (req=%s, ans=%s).", + fop->xl->name, this->name); return NULL; } - if (fop->frame != frame) - { - gf_msg (this->name, GF_LOG_ERROR, EINVAL, - EC_MSG_FRAME_MISMATCH, "Mismatching frames between request " - "and answer (req=%p, ans=%p).", - fop->frame, frame); + if (fop->frame != frame) { + gf_msg(this->name, GF_LOG_ERROR, EINVAL, EC_MSG_FRAME_MISMATCH, + "Mismatching frames between request " + "and answer (req=%p, ans=%p).", + fop->frame, frame); return NULL; } - if (fop->id != id) - { - gf_msg (this->name, GF_LOG_ERROR, EINVAL, - EC_MSG_FOP_MISMATCH, "Mismatching fops between request " - "and answer (req=%d, ans=%d).", - fop->id, id); + if (fop->id != id) { + gf_msg(this->name, GF_LOG_ERROR, EINVAL, EC_MSG_FOP_MISMATCH, + "Mismatching fops between request " + "and answer (req=%d, ans=%d).", + fop->id, id); return NULL; } cbk = mem_get0(ec->cbk_pool); - if (cbk == NULL) - { - gf_msg (this->name, GF_LOG_ERROR, ENOMEM, - EC_MSG_NO_MEMORY, "Failed to allocate memory for an " - "answer."); + if (cbk == NULL) { + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_NO_MEMORY, + "Failed to allocate memory for an " + "answer."); return NULL; } @@ -64,7 +60,7 @@ ec_cbk_data_t * ec_cbk_data_allocate(call_frame_t * frame, xlator_t * this, cbk->count = 1; cbk->op_ret = op_ret; cbk->op_errno = op_errno; - INIT_LIST_HEAD (&cbk->entries.list); + INIT_LIST_HEAD(&cbk->entries.list); LOCK(&fop->lock); @@ -75,50 +71,45 @@ ec_cbk_data_t * ec_cbk_data_allocate(call_frame_t * frame, xlator_t * this, return cbk; } -void ec_cbk_data_destroy(ec_cbk_data_t * cbk) +void +ec_cbk_data_destroy(ec_cbk_data_t *cbk) { - if (cbk->xdata != NULL) - { + if (cbk->xdata != NULL) { dict_unref(cbk->xdata); } - if (cbk->dict != NULL) - { + if (cbk->dict != NULL) { dict_unref(cbk->dict); } - if (cbk->inode != NULL) - { + if (cbk->inode != NULL) { inode_unref(cbk->inode); } - if (cbk->fd != NULL) - { + if (cbk->fd != NULL) { fd_unref(cbk->fd); } - if (cbk->buffers != NULL) - { + if (cbk->buffers != NULL) { iobref_unref(cbk->buffers); } GF_FREE(cbk->vector); - gf_dirent_free (&cbk->entries); - GF_FREE (cbk->str); + gf_dirent_free(&cbk->entries); + GF_FREE(cbk->str); mem_put(cbk); } -ec_fop_data_t * ec_fop_data_allocate(call_frame_t * frame, xlator_t * this, - int32_t id, uint32_t flags, - uintptr_t target, int32_t minimum, - ec_wind_f wind, ec_handler_f handler, - ec_cbk_t cbks, void * data) +ec_fop_data_t * +ec_fop_data_allocate(call_frame_t *frame, xlator_t *this, int32_t id, + uint32_t flags, uintptr_t target, int32_t minimum, + ec_wind_f wind, ec_handler_f handler, ec_cbk_t cbks, + void *data) { - ec_fop_data_t * fop, * parent; - ec_t * ec = this->private; + ec_fop_data_t *fop, *parent; + ec_t *ec = this->private; fop = mem_get0(ec->fop_pool); - if (fop == NULL) - { - gf_msg (this->name, GF_LOG_ERROR, ENOMEM, - EC_MSG_NO_MEMORY, "Failed to allocate memory for a " - "request."); + if (fop == NULL) { + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_NO_MEMORY, + "Failed to allocate memory for a " + "request."); return NULL; } @@ -142,19 +133,15 @@ ec_fop_data_t * ec_fop_data_allocate(call_frame_t * frame, xlator_t * this, * TODO: minimize usage of private frames. Reuse req_frame as much as * possible. */ - if (frame != NULL) - { + if (frame != NULL) { fop->frame = copy_frame(frame); - } - else - { + } else { fop->frame = create_frame(this, this->ctx->pool); } - if (fop->frame == NULL) - { - gf_msg (this->name, GF_LOG_ERROR, ENOMEM, - EC_MSG_NO_MEMORY, "Failed to create a private frame " - "for a request"); + if (fop->frame == NULL) { + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_NO_MEMORY, + "Failed to create a private frame " + "for a request"); mem_put(fop); @@ -179,11 +166,9 @@ ec_fop_data_t * ec_fop_data_allocate(call_frame_t * frame, xlator_t * this, fop->frame->local = fop; - if (frame != NULL) - { + if (frame != NULL) { parent = frame->local; - if (parent != NULL) - { + if (parent != NULL) { ec_sleep(parent); } @@ -199,7 +184,8 @@ ec_fop_data_t * ec_fop_data_allocate(call_frame_t * frame, xlator_t * this, return fop; } -void ec_fop_data_acquire(ec_fop_data_t * fop) +void +ec_fop_data_acquire(ec_fop_data_t *fop) { LOCK(&fop->lock); @@ -211,36 +197,38 @@ void ec_fop_data_acquire(ec_fop_data_t * fop) } static void -ec_handle_last_pending_fop_completion (ec_fop_data_t *fop, gf_boolean_t *notify) +ec_handle_last_pending_fop_completion(ec_fop_data_t *fop, gf_boolean_t *notify) { - ec_t *ec = fop->xl->private; - - if (!list_empty (&fop->pending_list)) { - LOCK(&ec->lock); - { - list_del_init (&fop->pending_list); - *notify = list_empty (&ec->pending_fops); - } - UNLOCK(&ec->lock); + ec_t *ec = fop->xl->private; + + if (!list_empty(&fop->pending_list)) { + LOCK(&ec->lock); + { + list_del_init(&fop->pending_list); + *notify = list_empty(&ec->pending_fops); } + UNLOCK(&ec->lock); + } } void ec_fop_cleanup(ec_fop_data_t *fop) { - ec_cbk_data_t *cbk, *tmp; + ec_cbk_data_t *cbk, *tmp; - list_for_each_entry_safe(cbk, tmp, &fop->answer_list, answer_list) { - list_del_init(&cbk->answer_list); + list_for_each_entry_safe(cbk, tmp, &fop->answer_list, answer_list) + { + list_del_init(&cbk->answer_list); - ec_cbk_data_destroy(cbk); - } - INIT_LIST_HEAD(&fop->cbk_list); + ec_cbk_data_destroy(cbk); + } + INIT_LIST_HEAD(&fop->cbk_list); - fop->answer = NULL; + fop->answer = NULL; } -void ec_fop_data_release(ec_fop_data_t * fop) +void +ec_fop_data_release(ec_fop_data_t *fop) { ec_t *ec = NULL; int32_t refs; @@ -250,36 +238,30 @@ void ec_fop_data_release(ec_fop_data_t * fop) ec_trace("RELEASE", fop, ""); - GF_ASSERT (fop->refs > 0); + GF_ASSERT(fop->refs > 0); refs = --fop->refs; UNLOCK(&fop->lock); - if (refs == 0) - { + if (refs == 0) { fop->frame->local = NULL; STACK_DESTROY(fop->frame->root); LOCK_DESTROY(&fop->lock); - if (fop->xdata != NULL) - { + if (fop->xdata != NULL) { dict_unref(fop->xdata); } - if (fop->dict != NULL) - { + if (fop->dict != NULL) { dict_unref(fop->dict); } - if (fop->inode != NULL) - { + if (fop->inode != NULL) { inode_unref(fop->inode); } - if (fop->fd != NULL) - { + if (fop->fd != NULL) { fd_unref(fop->fd); } - if (fop->buffers != NULL) - { + if (fop->buffers != NULL) { iobref_unref(fop->buffers); } GF_FREE(fop->vector); @@ -294,8 +276,8 @@ void ec_fop_data_release(ec_fop_data_t * fop) ec_fop_cleanup(fop); ec = fop->xl->private; - ec_handle_last_pending_fop_completion (fop, ¬ify); - ec_handle_healers_done (fop); + ec_handle_last_pending_fop_completion(fop, ¬ify); + ec_handle_healers_done(fop); mem_put(fop); if (notify) { ec_pending_fops_completed(ec); diff --git a/xlators/cluster/ec/src/ec-dir-read.c b/xlators/cluster/ec/src/ec-dir-read.c index b44bb4239b1..8db92b9d92d 100644 --- a/xlators/cluster/ec/src/ec-dir-read.c +++ b/xlators/cluster/ec/src/ec-dir-read.c @@ -25,15 +25,14 @@ * ***************************************************************/ -int32_t ec_combine_opendir(ec_fop_data_t * fop, ec_cbk_data_t * dst, - ec_cbk_data_t * src) +int32_t +ec_combine_opendir(ec_fop_data_t *fop, ec_cbk_data_t *dst, ec_cbk_data_t *src) { - if (dst->fd != src->fd) - { - gf_msg (fop->xl->name, GF_LOG_NOTICE, 0, - EC_MSG_FD_MISMATCH, "Mismatching fd in answers " - "of 'GF_FOP_OPENDIR': %p <-> %p", - dst->fd, src->fd); + if (dst->fd != src->fd) { + gf_msg(fop->xl->name, GF_LOG_NOTICE, 0, EC_MSG_FD_MISMATCH, + "Mismatching fd in answers " + "of 'GF_FOP_OPENDIR': %p <-> %p", + dst->fd, src->fd); return 0; } @@ -41,12 +40,12 @@ int32_t ec_combine_opendir(ec_fop_data_t * fop, ec_cbk_data_t * dst, return 1; } -int32_t ec_opendir_cbk(call_frame_t * frame, void * cookie, xlator_t * this, - int32_t op_ret, int32_t op_errno, fd_t * fd, - dict_t * xdata) +int32_t +ec_opendir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata) { - ec_fop_data_t * fop = NULL; - ec_cbk_data_t * cbk = NULL; + ec_fop_data_t *fop = NULL; + ec_cbk_data_t *cbk = NULL; int32_t idx = (int32_t)(uintptr_t)cookie; VALIDATE_OR_GOTO(this, out); @@ -56,36 +55,31 @@ int32_t ec_opendir_cbk(call_frame_t * frame, void * cookie, xlator_t * this, fop = frame->local; - ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx, - frame, op_ret, op_errno); + ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx, frame, + op_ret, op_errno); cbk = ec_cbk_data_allocate(frame, this, fop, GF_FOP_OPENDIR, idx, op_ret, op_errno); - if (cbk != NULL) - { - if (op_ret >= 0) - { - if (fd != NULL) - { + if (cbk != NULL) { + if (op_ret >= 0) { + if (fd != NULL) { cbk->fd = fd_ref(fd); - if (cbk->fd == NULL) - { - gf_msg (this->name, GF_LOG_ERROR, 0, - EC_MSG_FILE_DESC_REF_FAIL, "Failed to reference a " - "file descriptor."); + if (cbk->fd == NULL) { + gf_msg(this->name, GF_LOG_ERROR, 0, + EC_MSG_FILE_DESC_REF_FAIL, + "Failed to reference a " + "file descriptor."); goto out; } } } - if (xdata != NULL) - { + if (xdata != NULL) { cbk->xdata = dict_ref(xdata); - if (cbk->xdata == NULL) - { - gf_msg (this->name, GF_LOG_ERROR, 0, - EC_MSG_DICT_REF_FAIL, "Failed to reference a " - "dictionary."); + if (cbk->xdata == NULL) { + gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL, + "Failed to reference a " + "dictionary."); goto out; } @@ -93,19 +87,19 @@ int32_t ec_opendir_cbk(call_frame_t * frame, void * cookie, xlator_t * this, ec_combine(cbk, ec_combine_opendir); - ec_update_fd_status (fd, this, idx, op_ret); + ec_update_fd_status(fd, this, idx, op_ret); } out: - if (fop != NULL) - { + if (fop != NULL) { ec_complete(fop); } return 0; } -void ec_wind_opendir(ec_t * ec, ec_fop_data_t * fop, int32_t idx) +void +ec_wind_opendir(ec_t *ec, ec_fop_data_t *fop, int32_t idx) { ec_trace("WIND", fop, "idx=%d", idx); @@ -114,14 +108,14 @@ void ec_wind_opendir(ec_t * ec, ec_fop_data_t * fop, int32_t idx) &fop->loc[0], fop->fd, fop->xdata); } -int32_t ec_manager_opendir(ec_fop_data_t * fop, int32_t state) +int32_t +ec_manager_opendir(ec_fop_data_t *fop, int32_t state) { - ec_cbk_data_t * cbk; + ec_cbk_data_t *cbk; ec_fd_t *ctx; int32_t err; - switch (state) - { + switch (state) { case EC_STATE_INIT: LOCK(&fop->fd->lock); @@ -182,8 +176,7 @@ int32_t ec_manager_opendir(ec_fop_data_t * fop, int32_t state) GF_ASSERT(cbk != NULL); - if (fop->cbks.opendir != NULL) - { + if (fop->cbks.opendir != NULL) { fop->cbks.opendir(fop->req_frame, fop, fop->xl, cbk->op_ret, cbk->op_errno, cbk->fd, cbk->xdata); } @@ -197,8 +190,7 @@ int32_t ec_manager_opendir(ec_fop_data_t * fop, int32_t state) case -EC_STATE_REPORT: GF_ASSERT(fop->error != 0); - if (fop->cbks.opendir != NULL) - { + if (fop->cbks.opendir != NULL) { fop->cbks.opendir(fop->req_frame, fop, fop->xl, -1, fop->error, NULL, NULL); } @@ -218,40 +210,39 @@ int32_t ec_manager_opendir(ec_fop_data_t * fop, int32_t state) return EC_STATE_END; default: - gf_msg (fop->xl->name, GF_LOG_ERROR, EINVAL, - EC_MSG_UNHANDLED_STATE, "Unhandled state %d for %s", - state, ec_fop_name(fop->id)); + gf_msg(fop->xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_UNHANDLED_STATE, + "Unhandled state %d for %s", state, ec_fop_name(fop->id)); return EC_STATE_END; } } -void ec_opendir(call_frame_t * frame, xlator_t * this, uintptr_t target, - int32_t minimum, fop_opendir_cbk_t func, void * data, - loc_t * loc, fd_t * fd, dict_t * xdata) +void +ec_opendir(call_frame_t *frame, xlator_t *this, uintptr_t target, + int32_t minimum, fop_opendir_cbk_t func, void *data, loc_t *loc, + fd_t *fd, dict_t *xdata) { - ec_cbk_t callback = { .opendir = func }; - ec_fop_data_t * fop = NULL; + ec_cbk_t callback = {.opendir = func}; + ec_fop_data_t *fop = NULL; int32_t error = ENOMEM; - gf_msg_trace ("ec", 0, "EC(OPENDIR) %p", frame); + gf_msg_trace("ec", 0, "EC(OPENDIR) %p", frame); VALIDATE_OR_GOTO(this, out); GF_VALIDATE_OR_GOTO(this->name, frame, out); GF_VALIDATE_OR_GOTO(this->name, this->private, out); - fop = ec_fop_data_allocate(frame, this, GF_FOP_OPENDIR, - EC_FLAG_LOCK_SHARED, target, minimum, - ec_wind_opendir, ec_manager_opendir, callback, - data); + fop = ec_fop_data_allocate(frame, this, GF_FOP_OPENDIR, EC_FLAG_LOCK_SHARED, + target, minimum, ec_wind_opendir, + ec_manager_opendir, callback, data); if (fop == NULL) { goto out; } if (loc != NULL) { if (loc_copy(&fop->loc[0], loc) != 0) { - gf_msg (this->name, GF_LOG_ERROR, ENOMEM, - EC_MSG_LOC_COPY_FAIL, "Failed to copy a location."); + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_LOC_COPY_FAIL, + "Failed to copy a location."); goto out; } @@ -259,9 +250,9 @@ void ec_opendir(call_frame_t * frame, xlator_t * this, uintptr_t target, if (fd != NULL) { fop->fd = fd_ref(fd); if (fop->fd == NULL) { - gf_msg (this->name, GF_LOG_ERROR, 0, - EC_MSG_FILE_DESC_REF_FAIL, "Failed to reference a " - "file descriptor."); + gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_FILE_DESC_REF_FAIL, + "Failed to reference a " + "file descriptor."); goto out; } @@ -269,9 +260,9 @@ void ec_opendir(call_frame_t * frame, xlator_t * this, uintptr_t target, if (xdata != NULL) { fop->xdata = dict_ref(xdata); if (fop->xdata == NULL) { - gf_msg (this->name, GF_LOG_ERROR, 0, - EC_MSG_DICT_REF_FAIL, "Failed to reference a " - "dictionary."); + gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL, + "Failed to reference a " + "dictionary."); goto out; } @@ -289,50 +280,49 @@ out: /* Returns -1 if client_id is invalid else index of child subvol in xl_list */ int -ec_deitransform (xlator_t *this, off_t offset) +ec_deitransform(xlator_t *this, off_t offset) { - int idx = -1; - int client_id = -1; - ec_t *ec = this->private; - char id[32] = {0}; - int err; - - client_id = gf_deitransform (this, offset); - sprintf (id, "%d", client_id); - err = dict_get_int32 (ec->leaf_to_subvolid, id, &idx); - if (err < 0) { - idx = err; - goto out; - } + int idx = -1; + int client_id = -1; + ec_t *ec = this->private; + char id[32] = {0}; + int err; + + client_id = gf_deitransform(this, offset); + sprintf(id, "%d", client_id); + err = dict_get_int32(ec->leaf_to_subvolid, id, &idx); + if (err < 0) { + idx = err; + goto out; + } out: - if (idx < 0) { - gf_msg (this->name, GF_LOG_ERROR, EINVAL, - EC_MSG_INVALID_REQUEST, - "Invalid index %d in readdirp request", client_id); - idx = -EINVAL; - } - return idx; + if (idx < 0) { + gf_msg(this->name, GF_LOG_ERROR, EINVAL, EC_MSG_INVALID_REQUEST, + "Invalid index %d in readdirp request", client_id); + idx = -EINVAL; + } + return idx; } /* FOP: readdir */ -void ec_adjust_readdirp (ec_t *ec, int32_t idx, gf_dirent_t *entries) +void +ec_adjust_readdirp(ec_t *ec, int32_t idx, gf_dirent_t *entries) { - gf_dirent_t * entry; + gf_dirent_t *entry; list_for_each_entry(entry, &entries->list, list) { if (!entry->inode) - continue; + continue; - if (entry->d_stat.ia_type == IA_IFREG) - { + if (entry->d_stat.ia_type == IA_IFREG) { if ((entry->dict == NULL) || (ec_dict_del_number(entry->dict, EC_XATTR_SIZE, &entry->d_stat.ia_size) != 0)) { - inode_unref (entry->inode); - entry->inode = NULL; + inode_unref(entry->inode); + entry->inode = NULL; } else { ec_iatt_rebuild(ec, &entry->d_stat, 1, 1); } @@ -341,9 +331,9 @@ void ec_adjust_readdirp (ec_t *ec, int32_t idx, gf_dirent_t *entries) } int32_t -ec_common_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, - gf_dirent_t *entries, dict_t *xdata) +ec_common_readdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, gf_dirent_t *entries, + dict_t *xdata) { ec_fop_data_t *fop = NULL; ec_cbk_data_t *cbk = NULL; @@ -356,30 +346,29 @@ ec_common_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, fop = frame->local; - ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx, - frame, op_ret, op_errno); + ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx, frame, + op_ret, op_errno); - cbk = ec_cbk_data_allocate (frame, this, fop, fop->id, - idx, op_ret, op_errno); + cbk = ec_cbk_data_allocate(frame, this, fop, fop->id, idx, op_ret, + op_errno); if (cbk) { if (xdata) - cbk->xdata = dict_ref (xdata); + cbk->xdata = dict_ref(xdata); if (cbk->op_ret >= 0) - list_splice_init (&entries->list, - &cbk->entries.list); - ec_combine (cbk, NULL); + list_splice_init(&entries->list, &cbk->entries.list); + ec_combine(cbk, NULL); } out: - if (fop != NULL) - { + if (fop != NULL) { ec_complete(fop); } return 0; } -void ec_wind_readdir(ec_t * ec, ec_fop_data_t * fop, int32_t idx) +void +ec_wind_readdir(ec_t *ec, ec_fop_data_t *fop, int32_t idx) { ec_trace("WIND", fop, "idx=%d", idx); @@ -388,13 +377,13 @@ void ec_wind_readdir(ec_t * ec, ec_fop_data_t * fop, int32_t idx) fop->fd, fop->size, fop->offset, fop->xdata); } -int32_t ec_manager_readdir(ec_fop_data_t * fop, int32_t state) +int32_t +ec_manager_readdir(ec_fop_data_t *fop, int32_t state) { ec_fd_t *ctx = NULL; ec_cbk_data_t *cbk = NULL; - switch (state) - { + switch (state) { case EC_STATE_INIT: /* Return error if opendir has not been successfully called on * any subvolume. */ @@ -406,42 +395,40 @@ int32_t ec_manager_readdir(ec_fop_data_t * fop, int32_t state) } if (fop->id == GF_FOP_READDIRP) { - int32_t err; + int32_t err; + if (fop->xdata == NULL) { + fop->xdata = dict_new(); if (fop->xdata == NULL) { - fop->xdata = dict_new(); - if (fop->xdata == NULL) { - fop->error = ENOMEM; + fop->error = ENOMEM; - return EC_STATE_REPORT; - } + return EC_STATE_REPORT; } + } - err = dict_set_uint64(fop->xdata, EC_XATTR_SIZE, 0); - if (err != 0) { - fop->error = -err; + err = dict_set_uint64(fop->xdata, EC_XATTR_SIZE, 0); + if (err != 0) { + fop->error = -err; - return EC_STATE_REPORT; - } + return EC_STATE_REPORT; + } } - if (fop->offset != 0) - { - /* Non-zero offset is irrecoverable error as the offset may not be - * valid on other bricks*/ + if (fop->offset != 0) { + /* Non-zero offset is irrecoverable error as the offset may not + * be valid on other bricks*/ int32_t idx = -1; - idx = ec_deitransform (fop->xl, fop->offset); + idx = ec_deitransform(fop->xl, fop->offset); if (idx < 0) { - fop->error = -idx; - return EC_STATE_REPORT; + fop->error = -idx; + return EC_STATE_REPORT; } fop->mask &= 1ULL << idx; } else { - ec_lock_prepare_fd(fop, fop->fd, EC_QUERY_INFO, 0, - LLONG_MAX); - ec_lock(fop); + ec_lock_prepare_fd(fop, fop->fd, EC_QUERY_INFO, 0, LLONG_MAX); + ec_lock(fop); } return EC_STATE_DISPATCH; @@ -458,14 +445,14 @@ int32_t ec_manager_readdir(ec_fop_data_t * fop, int32_t state) if ((cbk != NULL) && (cbk->op_ret > 0) && (fop->id == GF_FOP_READDIRP)) { - ec_adjust_readdirp (fop->xl->private, cbk->idx, &cbk->entries); + ec_adjust_readdirp(fop->xl->private, cbk->idx, &cbk->entries); } return EC_STATE_REPORT; case EC_STATE_REPORT: cbk = fop->answer; - GF_ASSERT (cbk); + GF_ASSERT(cbk); if (fop->id == GF_FOP_READDIR) { if (fop->cbks.readdir != NULL) { fop->cbks.readdir(fop->req_frame, fop, fop->xl, cbk->op_ret, @@ -479,9 +466,9 @@ int32_t ec_manager_readdir(ec_fop_data_t * fop, int32_t state) } } if (fop->offset == 0) - return EC_STATE_LOCK_REUSE; + return EC_STATE_LOCK_REUSE; else - return EC_STATE_END; + return EC_STATE_END; case -EC_STATE_INIT: case -EC_STATE_LOCK: @@ -500,50 +487,49 @@ int32_t ec_manager_readdir(ec_fop_data_t * fop, int32_t state) } } if (fop->offset == 0) - return EC_STATE_LOCK_REUSE; + return EC_STATE_LOCK_REUSE; else - return EC_STATE_END; + return EC_STATE_END; case -EC_STATE_LOCK_REUSE: case EC_STATE_LOCK_REUSE: - GF_ASSERT (fop->offset == 0); + GF_ASSERT(fop->offset == 0); ec_lock_reuse(fop); return EC_STATE_UNLOCK; case -EC_STATE_UNLOCK: case EC_STATE_UNLOCK: - GF_ASSERT (fop->offset == 0); + GF_ASSERT(fop->offset == 0); ec_unlock(fop); return EC_STATE_END; default: - gf_msg (fop->xl->name, GF_LOG_ERROR, EINVAL, - EC_MSG_UNHANDLED_STATE, "Unhandled state %d for %s", - state, ec_fop_name(fop->id)); + gf_msg(fop->xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_UNHANDLED_STATE, + "Unhandled state %d for %s", state, ec_fop_name(fop->id)); return EC_STATE_END; } } -void ec_readdir(call_frame_t * frame, xlator_t * this, uintptr_t target, - int32_t minimum, fop_readdir_cbk_t func, void * data, - fd_t * fd, size_t size, off_t offset, dict_t * xdata) +void +ec_readdir(call_frame_t *frame, xlator_t *this, uintptr_t target, + int32_t minimum, fop_readdir_cbk_t func, void *data, fd_t *fd, + size_t size, off_t offset, dict_t *xdata) { - ec_cbk_t callback = { .readdir = func }; - ec_fop_data_t * fop = NULL; + ec_cbk_t callback = {.readdir = func}; + ec_fop_data_t *fop = NULL; int32_t error = ENOMEM; - gf_msg_trace ("ec", 0, "EC(READDIR) %p", frame); + gf_msg_trace("ec", 0, "EC(READDIR) %p", frame); VALIDATE_OR_GOTO(this, out); GF_VALIDATE_OR_GOTO(this->name, frame, out); GF_VALIDATE_OR_GOTO(this->name, this->private, out); - fop = ec_fop_data_allocate(frame, this, GF_FOP_READDIR, - EC_FLAG_LOCK_SHARED, target, minimum, - ec_wind_readdir, ec_manager_readdir, callback, - data); + fop = ec_fop_data_allocate(frame, this, GF_FOP_READDIR, EC_FLAG_LOCK_SHARED, + target, minimum, ec_wind_readdir, + ec_manager_readdir, callback, data); if (fop == NULL) { goto out; } @@ -556,9 +542,9 @@ void ec_readdir(call_frame_t * frame, xlator_t * this, uintptr_t target, if (fd != NULL) { fop->fd = fd_ref(fd); if (fop->fd == NULL) { - gf_msg (this->name, GF_LOG_ERROR, 0, - EC_MSG_FILE_DESC_REF_FAIL, "Failed to reference a " - "file descriptor."); + gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_FILE_DESC_REF_FAIL, + "Failed to reference a " + "file descriptor."); goto out; } @@ -566,9 +552,9 @@ void ec_readdir(call_frame_t * frame, xlator_t * this, uintptr_t target, if (xdata != NULL) { fop->xdata = dict_ref(xdata); if (fop->xdata == NULL) { - gf_msg (this->name, GF_LOG_ERROR, 0, - EC_MSG_DICT_REF_FAIL, "Failed to reference a " - "dictionary."); + gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL, + "Failed to reference a " + "dictionary."); goto out; } @@ -586,7 +572,8 @@ out: /* FOP: readdirp */ -void ec_wind_readdirp(ec_t * ec, ec_fop_data_t * fop, int32_t idx) +void +ec_wind_readdirp(ec_t *ec, ec_fop_data_t *fop, int32_t idx) { ec_trace("WIND", fop, "idx=%d", idx); @@ -595,24 +582,24 @@ void ec_wind_readdirp(ec_t * ec, ec_fop_data_t * fop, int32_t idx) fop->fd, fop->size, fop->offset, fop->xdata); } -void ec_readdirp(call_frame_t * frame, xlator_t * this, uintptr_t target, - int32_t minimum, fop_readdirp_cbk_t func, void * data, - fd_t * fd, size_t size, off_t offset, dict_t * xdata) +void +ec_readdirp(call_frame_t *frame, xlator_t *this, uintptr_t target, + int32_t minimum, fop_readdirp_cbk_t func, void *data, fd_t *fd, + size_t size, off_t offset, dict_t *xdata) { - ec_cbk_t callback = { .readdirp = func }; - ec_fop_data_t * fop = NULL; + ec_cbk_t callback = {.readdirp = func}; + ec_fop_data_t *fop = NULL; int32_t error = ENOMEM; - gf_msg_trace ("ec", 0, "EC(READDIRP) %p", frame); + gf_msg_trace("ec", 0, "EC(READDIRP) %p", frame); VALIDATE_OR_GOTO(this, out); GF_VALIDATE_OR_GOTO(this->name, frame, out); GF_VALIDATE_OR_GOTO(this->name, this->private, out); - fop = ec_fop_data_allocate(frame, this, GF_FOP_READDIRP, - EC_FLAG_LOCK_SHARED, target, minimum, - ec_wind_readdirp, ec_manager_readdir, callback, - data); + fop = ec_fop_data_allocate( + frame, this, GF_FOP_READDIRP, EC_FLAG_LOCK_SHARED, target, minimum, + ec_wind_readdirp, ec_manager_readdir, callback, data); if (fop == NULL) { goto out; } @@ -625,9 +612,9 @@ void ec_readdirp(call_frame_t * frame, xlator_t * this, uintptr_t target, if (fd != NULL) { fop->fd = fd_ref(fd); if (fop->fd == NULL) { - gf_msg (this->name, GF_LOG_ERROR, 0, - EC_MSG_FILE_DESC_REF_FAIL, "Failed to reference a " - "file descriptor."); + gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_FILE_DESC_REF_FAIL, + "Failed to reference a " + "file descriptor."); goto out; } @@ -635,9 +622,9 @@ void ec_readdirp(call_frame_t * frame, xlator_t * this, uintptr_t target, if (xdata != NULL) { fop->xdata = dict_ref(xdata); if (fop->xdata == NULL) { - gf_msg (this->name, GF_LOG_ERROR, 0, - EC_MSG_DICT_REF_FAIL, "Failed to reference a " - "dictionary."); + gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL, + "Failed to reference a " + "dictionary."); goto out; } diff --git a/xlators/cluster/ec/src/ec-dir-write.c b/xlators/cluster/ec/src/ec-dir-write.c index 7779d4849f3..f5c38e80dd7 100644 --- a/xlators/cluster/ec/src/ec-dir-write.c +++ b/xlators/cluster/ec/src/ec-dir-write.c @@ -20,76 +20,75 @@ #include "ec-fops.h" int -ec_dir_write_cbk (call_frame_t *frame, xlator_t *this, - void *cookie, int op_ret, int op_errno, - struct iatt *poststat, struct iatt *preparent, - struct iatt *postparent, struct iatt *preparent2, - struct iatt *postparent2, dict_t *xdata) +ec_dir_write_cbk(call_frame_t *frame, xlator_t *this, void *cookie, int op_ret, + int op_errno, struct iatt *poststat, struct iatt *preparent, + struct iatt *postparent, struct iatt *preparent2, + struct iatt *postparent2, dict_t *xdata) { - ec_fop_data_t *fop = NULL; - ec_cbk_data_t *cbk = NULL; - int i = 0; - int idx = 0; + ec_fop_data_t *fop = NULL; + ec_cbk_data_t *cbk = NULL; + int i = 0; + int idx = 0; - VALIDATE_OR_GOTO (this, out); - GF_VALIDATE_OR_GOTO (this->name, frame, out); - GF_VALIDATE_OR_GOTO (this->name, frame->local, out); - GF_VALIDATE_OR_GOTO (this->name, this->private, out); + VALIDATE_OR_GOTO(this, out); + GF_VALIDATE_OR_GOTO(this->name, frame, out); + GF_VALIDATE_OR_GOTO(this->name, frame->local, out); + GF_VALIDATE_OR_GOTO(this->name, this->private, out); - fop = frame->local; - idx = (long) cookie; + fop = frame->local; + idx = (long)cookie; - ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx, - frame, op_ret, op_errno); + ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx, frame, + op_ret, op_errno); - cbk = ec_cbk_data_allocate (frame, this, fop, fop->id, idx, op_ret, - op_errno); - if (!cbk) - goto out; + cbk = ec_cbk_data_allocate(frame, this, fop, fop->id, idx, op_ret, + op_errno); + if (!cbk) + goto out; - if (xdata) - cbk->xdata = dict_ref (xdata); + if (xdata) + cbk->xdata = dict_ref(xdata); - if (op_ret < 0) - goto out; + if (op_ret < 0) + goto out; - if (poststat) - cbk->iatt[i++] = *poststat; + if (poststat) + cbk->iatt[i++] = *poststat; - if (preparent) - cbk->iatt[i++] = *preparent; + if (preparent) + cbk->iatt[i++] = *preparent; - if (postparent) - cbk->iatt[i++] = *postparent; + if (postparent) + cbk->iatt[i++] = *postparent; - if (preparent2) - cbk->iatt[i++] = *preparent2; + if (preparent2) + cbk->iatt[i++] = *preparent2; - if (postparent2) - cbk->iatt[i++] = *postparent2; + if (postparent2) + cbk->iatt[i++] = *postparent2; out: - if (cbk) - ec_combine (cbk, ec_combine_write); + if (cbk) + ec_combine(cbk, ec_combine_write); - if (fop) - ec_complete (fop); - return 0; + if (fop) + ec_complete(fop); + return 0; } /* FOP: create */ -int32_t ec_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, fd_t *fd, - inode_t *inode, struct iatt *buf, - struct iatt *preparent, struct iatt *postparent, - dict_t *xdata) +int32_t +ec_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + int32_t op_errno, fd_t *fd, inode_t *inode, struct iatt *buf, + struct iatt *preparent, struct iatt *postparent, dict_t *xdata) { - return ec_dir_write_cbk (frame, this, cookie, op_ret, op_errno, - buf, preparent, postparent, NULL, NULL, xdata); + return ec_dir_write_cbk(frame, this, cookie, op_ret, op_errno, buf, + preparent, postparent, NULL, NULL, xdata); } -void ec_wind_create(ec_t * ec, ec_fop_data_t * fop, int32_t idx) +void +ec_wind_create(ec_t *ec, ec_fop_data_t *fop, int32_t idx) { ec_trace("WIND", fop, "idx=%d", idx); @@ -99,7 +98,8 @@ void ec_wind_create(ec_t * ec, ec_fop_data_t * fop, int32_t idx) fop->fd, fop->xdata); } -int32_t ec_manager_create(ec_fop_data_t * fop, int32_t state) +int32_t +ec_manager_create(ec_fop_data_t *fop, int32_t state) { ec_config_t config; ec_t *ec; @@ -108,8 +108,7 @@ int32_t ec_manager_create(ec_fop_data_t * fop, int32_t state) uint64_t version[2] = {0, 0}; int32_t err; - switch (state) - { + switch (state) { case EC_STATE_INIT: LOCK(&fop->fd->lock); @@ -176,7 +175,7 @@ int32_t ec_manager_create(ec_fop_data_t * fop, int32_t state) * need to remove O_APPEND from flags (if present) */ fop->int32 &= ~O_APPEND; - /* Fall through */ + /* Fall through */ case EC_STATE_LOCK: ec_lock_prepare_parent_inode(fop, &fop->loc[0], NULL, @@ -218,12 +217,11 @@ int32_t ec_manager_create(ec_fop_data_t * fop, int32_t state) GF_ASSERT(cbk != NULL); - if (fop->cbks.create != NULL) - { - fop->cbks.create (fop->req_frame, fop, fop->xl, cbk->op_ret, - cbk->op_errno, fop->fd, fop->loc[0].inode, - &cbk->iatt[0], &cbk->iatt[1], &cbk->iatt[2], - cbk->xdata); + if (fop->cbks.create != NULL) { + fop->cbks.create(fop->req_frame, fop, fop->xl, cbk->op_ret, + cbk->op_errno, fop->fd, fop->loc[0].inode, + &cbk->iatt[0], &cbk->iatt[1], &cbk->iatt[2], + cbk->xdata); } return EC_STATE_LOCK_REUSE; @@ -235,8 +233,7 @@ int32_t ec_manager_create(ec_fop_data_t * fop, int32_t state) case -EC_STATE_REPORT: GF_ASSERT(fop->error != 0); - if (fop->cbks.create != NULL) - { + if (fop->cbks.create != NULL) { fop->cbks.create(fop->req_frame, fop, fop->xl, -1, fop->error, NULL, NULL, NULL, NULL, NULL, NULL); } @@ -256,24 +253,23 @@ int32_t ec_manager_create(ec_fop_data_t * fop, int32_t state) return EC_STATE_END; default: - gf_msg (fop->xl->name, GF_LOG_ERROR, EINVAL, - EC_MSG_UNHANDLED_STATE, "Unhandled state %d for %s", - state, ec_fop_name(fop->id)); + gf_msg(fop->xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_UNHANDLED_STATE, + "Unhandled state %d for %s", state, ec_fop_name(fop->id)); return EC_STATE_END; } } -void ec_create(call_frame_t * frame, xlator_t * this, uintptr_t target, - int32_t minimum, fop_create_cbk_t func, void * data, - loc_t * loc, int32_t flags, mode_t mode, mode_t umask, - fd_t * fd, dict_t * xdata) +void +ec_create(call_frame_t *frame, xlator_t *this, uintptr_t target, + int32_t minimum, fop_create_cbk_t func, void *data, loc_t *loc, + int32_t flags, mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata) { - ec_cbk_t callback = { .create = func }; - ec_fop_data_t * fop = NULL; + ec_cbk_t callback = {.create = func}; + ec_fop_data_t *fop = NULL; int32_t error = ENOMEM; - gf_msg_trace ("ec", 0, "EC(CREATE) %p", frame); + gf_msg_trace("ec", 0, "EC(CREATE) %p", frame); VALIDATE_OR_GOTO(this, out); GF_VALIDATE_OR_GOTO(this->name, frame, out); @@ -292,8 +288,8 @@ void ec_create(call_frame_t * frame, xlator_t * this, uintptr_t target, if (loc != NULL) { if (loc_copy(&fop->loc[0], loc) != 0) { - gf_msg (this->name, GF_LOG_ERROR, ENOMEM, - EC_MSG_LOC_COPY_FAIL, "Failed to copy a location."); + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_LOC_COPY_FAIL, + "Failed to copy a location."); goto out; } @@ -301,19 +297,19 @@ void ec_create(call_frame_t * frame, xlator_t * this, uintptr_t target, if (fd != NULL) { fop->fd = fd_ref(fd); if (fop->fd == NULL) { - gf_msg (this->name, GF_LOG_ERROR, 0, - EC_MSG_FILE_DESC_REF_FAIL, "Failed to reference a " - "file descriptor."); + gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_FILE_DESC_REF_FAIL, + "Failed to reference a " + "file descriptor."); goto out; } } if (xdata != NULL) { - fop->xdata = dict_copy_with_ref (xdata, NULL); + fop->xdata = dict_copy_with_ref(xdata, NULL); if (fop->xdata == NULL) { - gf_msg (this->name, GF_LOG_ERROR, 0, - EC_MSG_DICT_REF_FAIL, "Failed to reference a " - "dictionary."); + gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL, + "Failed to reference a " + "dictionary."); goto out; } @@ -331,16 +327,17 @@ out: /* FOP: link */ -int32_t ec_link_cbk(call_frame_t * frame, void * cookie, xlator_t * this, - int32_t op_ret, int32_t op_errno, inode_t * inode, - struct iatt * buf, struct iatt * preparent, - struct iatt * postparent, dict_t * xdata) +int32_t +ec_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + int32_t op_errno, inode_t *inode, struct iatt *buf, + struct iatt *preparent, struct iatt *postparent, dict_t *xdata) { - return ec_dir_write_cbk (frame, this, cookie, op_ret, op_errno, - buf, preparent, postparent, NULL, NULL, xdata); + return ec_dir_write_cbk(frame, this, cookie, op_ret, op_errno, buf, + preparent, postparent, NULL, NULL, xdata); } -void ec_wind_link(ec_t * ec, ec_fop_data_t * fop, int32_t idx) +void +ec_wind_link(ec_t *ec, ec_fop_data_t *fop, int32_t idx) { ec_trace("WIND", fop, "idx=%d", idx); @@ -349,17 +346,17 @@ void ec_wind_link(ec_t * ec, ec_fop_data_t * fop, int32_t idx) &fop->loc[0], &fop->loc[1], fop->xdata); } -int32_t ec_manager_link(ec_fop_data_t * fop, int32_t state) +int32_t +ec_manager_link(ec_fop_data_t *fop, int32_t state) { - ec_cbk_data_t * cbk; + ec_cbk_data_t *cbk; - switch (state) - { + switch (state) { case EC_STATE_INIT: case EC_STATE_LOCK: - ec_lock_prepare_parent_inode(fop, &fop->loc[1], &fop->loc[0], - EC_UPDATE_DATA | EC_UPDATE_META | - EC_INODE_SIZE); + ec_lock_prepare_parent_inode( + fop, &fop->loc[1], &fop->loc[0], + EC_UPDATE_DATA | EC_UPDATE_META | EC_INODE_SIZE); ec_lock(fop); return EC_STATE_DISPATCH; @@ -392,8 +389,7 @@ int32_t ec_manager_link(ec_fop_data_t * fop, int32_t state) GF_ASSERT(cbk != NULL); - if (fop->cbks.link != NULL) - { + if (fop->cbks.link != NULL) { fop->cbks.link(fop->req_frame, fop, fop->xl, cbk->op_ret, cbk->op_errno, fop->loc[0].inode, &cbk->iatt[0], &cbk->iatt[1], &cbk->iatt[2], cbk->xdata); @@ -408,8 +404,7 @@ int32_t ec_manager_link(ec_fop_data_t * fop, int32_t state) case -EC_STATE_REPORT: GF_ASSERT(fop->error != 0); - if (fop->cbks.link != NULL) - { + if (fop->cbks.link != NULL) { fop->cbks.link(fop->req_frame, fop, fop->xl, -1, fop->error, NULL, NULL, NULL, NULL, NULL); } @@ -429,23 +424,23 @@ int32_t ec_manager_link(ec_fop_data_t * fop, int32_t state) return EC_STATE_END; default: - gf_msg (fop->xl->name, GF_LOG_ERROR, EINVAL, - EC_MSG_UNHANDLED_STATE, "Unhandled state %d for %s", - state, ec_fop_name(fop->id)); + gf_msg(fop->xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_UNHANDLED_STATE, + "Unhandled state %d for %s", state, ec_fop_name(fop->id)); return EC_STATE_END; } } -void ec_link(call_frame_t * frame, xlator_t * this, uintptr_t target, - int32_t minimum, fop_link_cbk_t func, void * data, loc_t * oldloc, - loc_t * newloc, dict_t * xdata) +void +ec_link(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum, + fop_link_cbk_t func, void *data, loc_t *oldloc, loc_t *newloc, + dict_t *xdata) { - ec_cbk_t callback = { .link = func }; - ec_fop_data_t * fop = NULL; + ec_cbk_t callback = {.link = func}; + ec_fop_data_t *fop = NULL; int32_t error = ENOMEM; - gf_msg_trace ("ec", 0, "EC(LINK) %p", frame); + gf_msg_trace("ec", 0, "EC(LINK) %p", frame); VALIDATE_OR_GOTO(this, out); GF_VALIDATE_OR_GOTO(this->name, frame, out); @@ -459,26 +454,26 @@ void ec_link(call_frame_t * frame, xlator_t * this, uintptr_t target, if (oldloc != NULL) { if (loc_copy(&fop->loc[0], oldloc) != 0) { - gf_msg (this->name, GF_LOG_ERROR, ENOMEM, - EC_MSG_LOC_COPY_FAIL, "Failed to copy a location."); + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_LOC_COPY_FAIL, + "Failed to copy a location."); goto out; } } if (newloc != NULL) { if (loc_copy(&fop->loc[1], newloc) != 0) { - gf_msg (this->name, GF_LOG_ERROR, ENOMEM, - EC_MSG_LOC_COPY_FAIL, "Failed to copy a location."); + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_LOC_COPY_FAIL, + "Failed to copy a location."); goto out; } } if (xdata != NULL) { - fop->xdata = dict_copy_with_ref (xdata, NULL); + fop->xdata = dict_copy_with_ref(xdata, NULL); if (fop->xdata == NULL) { - gf_msg (this->name, GF_LOG_ERROR, 0, - EC_MSG_DICT_REF_FAIL, "Failed to reference a " - "dictionary."); + gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL, + "Failed to reference a " + "dictionary."); goto out; } @@ -496,16 +491,17 @@ out: /* FOP: mkdir */ -int32_t ec_mkdir_cbk(call_frame_t * frame, void * cookie, xlator_t * this, - int32_t op_ret, int32_t op_errno, inode_t * inode, - struct iatt * buf, struct iatt * preparent, - struct iatt * postparent, dict_t * xdata) +int32_t +ec_mkdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + int32_t op_errno, inode_t *inode, struct iatt *buf, + struct iatt *preparent, struct iatt *postparent, dict_t *xdata) { - return ec_dir_write_cbk (frame, this, cookie, op_ret, op_errno, - buf, preparent, postparent, NULL, NULL, xdata); + return ec_dir_write_cbk(frame, this, cookie, op_ret, op_errno, buf, + preparent, postparent, NULL, NULL, xdata); } -void ec_wind_mkdir(ec_t * ec, ec_fop_data_t * fop, int32_t idx) +void +ec_wind_mkdir(ec_t *ec, ec_fop_data_t *fop, int32_t idx) { ec_trace("WIND", fop, "idx=%d", idx); @@ -514,14 +510,14 @@ void ec_wind_mkdir(ec_t * ec, ec_fop_data_t * fop, int32_t idx) &fop->loc[0], fop->mode[0], fop->mode[1], fop->xdata); } -int32_t ec_manager_mkdir(ec_fop_data_t * fop, int32_t state) +int32_t +ec_manager_mkdir(ec_fop_data_t *fop, int32_t state) { - ec_cbk_data_t * cbk; + ec_cbk_data_t *cbk; uint64_t version[2] = {0, 0}; int32_t err; - switch (state) - { + switch (state) { case EC_STATE_INIT: if (fop->xdata == NULL) { fop->xdata = dict_new(); @@ -539,7 +535,7 @@ int32_t ec_manager_mkdir(ec_fop_data_t * fop, int32_t state) return EC_STATE_REPORT; } - /* Fall through */ + /* Fall through */ case EC_STATE_LOCK: ec_lock_prepare_parent_inode(fop, &fop->loc[0], NULL, @@ -572,8 +568,7 @@ int32_t ec_manager_mkdir(ec_fop_data_t * fop, int32_t state) GF_ASSERT(cbk != NULL); - if (fop->cbks.mkdir != NULL) - { + if (fop->cbks.mkdir != NULL) { fop->cbks.mkdir(fop->req_frame, fop, fop->xl, cbk->op_ret, cbk->op_errno, fop->loc[0].inode, &cbk->iatt[0], &cbk->iatt[1], &cbk->iatt[2], cbk->xdata); @@ -589,8 +584,7 @@ int32_t ec_manager_mkdir(ec_fop_data_t * fop, int32_t state) cbk = fop->answer; GF_ASSERT(fop->error != 0); - if (fop->cbks.mkdir != NULL) - { + if (fop->cbks.mkdir != NULL) { fop->cbks.mkdir(fop->req_frame, fop, fop->xl, -1, fop->error, NULL, NULL, NULL, NULL, ((cbk) ? cbk->xdata : NULL)); @@ -611,31 +605,30 @@ int32_t ec_manager_mkdir(ec_fop_data_t * fop, int32_t state) return EC_STATE_END; default: - gf_msg (fop->xl->name, GF_LOG_ERROR, EINVAL, - EC_MSG_UNHANDLED_STATE, "Unhandled state %d for %s", - state, ec_fop_name(fop->id)); + gf_msg(fop->xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_UNHANDLED_STATE, + "Unhandled state %d for %s", state, ec_fop_name(fop->id)); return EC_STATE_END; } } -void ec_mkdir(call_frame_t * frame, xlator_t * this, uintptr_t target, - int32_t minimum, fop_mkdir_cbk_t func, void * data, loc_t * loc, - mode_t mode, mode_t umask, dict_t * xdata) +void +ec_mkdir(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum, + fop_mkdir_cbk_t func, void *data, loc_t *loc, mode_t mode, + mode_t umask, dict_t *xdata) { - ec_cbk_t callback = { .mkdir = func }; - ec_fop_data_t * fop = NULL; + ec_cbk_t callback = {.mkdir = func}; + ec_fop_data_t *fop = NULL; int32_t error = ENOMEM; - gf_msg_trace ("ec", 0, "EC(MKDIR) %p", frame); + gf_msg_trace("ec", 0, "EC(MKDIR) %p", frame); VALIDATE_OR_GOTO(this, out); GF_VALIDATE_OR_GOTO(this->name, frame, out); GF_VALIDATE_OR_GOTO(this->name, this->private, out); fop = ec_fop_data_allocate(frame, this, GF_FOP_MKDIR, 0, target, minimum, - ec_wind_mkdir, ec_manager_mkdir, callback, - data); + ec_wind_mkdir, ec_manager_mkdir, callback, data); if (fop == NULL) { goto out; } @@ -645,18 +638,18 @@ void ec_mkdir(call_frame_t * frame, xlator_t * this, uintptr_t target, if (loc != NULL) { if (loc_copy(&fop->loc[0], loc) != 0) { - gf_msg (this->name, GF_LOG_ERROR, ENOMEM, - EC_MSG_LOC_COPY_FAIL, "Failed to copy a location."); + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_LOC_COPY_FAIL, + "Failed to copy a location."); goto out; } } if (xdata != NULL) { - fop->xdata = dict_copy_with_ref (xdata, NULL); + fop->xdata = dict_copy_with_ref(xdata, NULL); if (fop->xdata == NULL) { - gf_msg (this->name, GF_LOG_ERROR, 0, - EC_MSG_DICT_REF_FAIL, "Failed to reference a " - "dictionary."); + gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL, + "Failed to reference a " + "dictionary."); goto out; } @@ -674,16 +667,17 @@ out: /* FOP: mknod */ -int32_t ec_mknod_cbk(call_frame_t * frame, void * cookie, xlator_t * this, - int32_t op_ret, int32_t op_errno, inode_t * inode, - struct iatt * buf, struct iatt * preparent, - struct iatt * postparent, dict_t * xdata) +int32_t +ec_mknod_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + int32_t op_errno, inode_t *inode, struct iatt *buf, + struct iatt *preparent, struct iatt *postparent, dict_t *xdata) { - return ec_dir_write_cbk (frame, this, cookie, op_ret, op_errno, - buf, preparent, postparent, NULL, NULL, xdata); + return ec_dir_write_cbk(frame, this, cookie, op_ret, op_errno, buf, + preparent, postparent, NULL, NULL, xdata); } -void ec_wind_mknod(ec_t * ec, ec_fop_data_t * fop, int32_t idx) +void +ec_wind_mknod(ec_t *ec, ec_fop_data_t *fop, int32_t idx) { ec_trace("WIND", fop, "idx=%d", idx); @@ -693,15 +687,15 @@ void ec_wind_mknod(ec_t * ec, ec_fop_data_t * fop, int32_t idx) fop->xdata); } -int32_t ec_manager_mknod(ec_fop_data_t * fop, int32_t state) +int32_t +ec_manager_mknod(ec_fop_data_t *fop, int32_t state) { ec_config_t config; ec_t *ec; - ec_cbk_data_t * cbk; + ec_cbk_data_t *cbk; uint64_t version[2] = {0, 0}; - switch (state) - { + switch (state) { case EC_STATE_INIT: if (S_ISREG(fop->mode[0])) { int32_t err; @@ -745,7 +739,7 @@ int32_t ec_manager_mknod(ec_fop_data_t * fop, int32_t state) } } - /* Fall through */ + /* Fall through */ case EC_STATE_LOCK: ec_lock_prepare_parent_inode(fop, &fop->loc[0], NULL, @@ -778,8 +772,7 @@ int32_t ec_manager_mknod(ec_fop_data_t * fop, int32_t state) GF_ASSERT(cbk != NULL); - if (fop->cbks.mknod != NULL) - { + if (fop->cbks.mknod != NULL) { fop->cbks.mknod(fop->req_frame, fop, fop->xl, cbk->op_ret, cbk->op_errno, fop->loc[0].inode, &cbk->iatt[0], &cbk->iatt[1], &cbk->iatt[2], cbk->xdata); @@ -794,8 +787,7 @@ int32_t ec_manager_mknod(ec_fop_data_t * fop, int32_t state) case -EC_STATE_REPORT: GF_ASSERT(fop->error != 0); - if (fop->cbks.mknod != NULL) - { + if (fop->cbks.mknod != NULL) { fop->cbks.mknod(fop->req_frame, fop, fop->xl, -1, fop->error, NULL, NULL, NULL, NULL, NULL); } @@ -815,31 +807,30 @@ int32_t ec_manager_mknod(ec_fop_data_t * fop, int32_t state) return EC_STATE_END; default: - gf_msg (fop->xl->name, GF_LOG_ERROR, EINVAL, - EC_MSG_UNHANDLED_STATE, "Unhandled state %d for %s", - state, ec_fop_name(fop->id)); + gf_msg(fop->xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_UNHANDLED_STATE, + "Unhandled state %d for %s", state, ec_fop_name(fop->id)); return EC_STATE_END; } } -void ec_mknod(call_frame_t * frame, xlator_t * this, uintptr_t target, - int32_t minimum, fop_mknod_cbk_t func, void * data, loc_t * loc, - mode_t mode, dev_t rdev, mode_t umask, dict_t * xdata) +void +ec_mknod(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum, + fop_mknod_cbk_t func, void *data, loc_t *loc, mode_t mode, dev_t rdev, + mode_t umask, dict_t *xdata) { - ec_cbk_t callback = { .mknod = func }; - ec_fop_data_t * fop = NULL; + ec_cbk_t callback = {.mknod = func}; + ec_fop_data_t *fop = NULL; int32_t error = ENOMEM; - gf_msg_trace ("ec", 0, "EC(MKNOD) %p", frame); + gf_msg_trace("ec", 0, "EC(MKNOD) %p", frame); VALIDATE_OR_GOTO(this, out); GF_VALIDATE_OR_GOTO(this->name, frame, out); GF_VALIDATE_OR_GOTO(this->name, this->private, out); fop = ec_fop_data_allocate(frame, this, GF_FOP_MKNOD, 0, target, minimum, - ec_wind_mknod, ec_manager_mknod, callback, - data); + ec_wind_mknod, ec_manager_mknod, callback, data); if (fop == NULL) { goto out; } @@ -850,18 +841,18 @@ void ec_mknod(call_frame_t * frame, xlator_t * this, uintptr_t target, if (loc != NULL) { if (loc_copy(&fop->loc[0], loc) != 0) { - gf_msg (this->name, GF_LOG_ERROR, ENOMEM, - EC_MSG_LOC_COPY_FAIL, "Failed to copy a location."); + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_LOC_COPY_FAIL, + "Failed to copy a location."); goto out; } } if (xdata != NULL) { - fop->xdata = dict_copy_with_ref (xdata, NULL); + fop->xdata = dict_copy_with_ref(xdata, NULL); if (fop->xdata == NULL) { - gf_msg (this->name, GF_LOG_ERROR, 0, - EC_MSG_DICT_REF_FAIL, "Failed to reference a " - "dictionary."); + gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL, + "Failed to reference a " + "dictionary."); goto out; } @@ -879,18 +870,19 @@ out: /* FOP: rename */ -int32_t ec_rename_cbk(call_frame_t * frame, void * cookie, xlator_t * this, - int32_t op_ret, int32_t op_errno, struct iatt * buf, - struct iatt * preoldparent, struct iatt * postoldparent, - struct iatt * prenewparent, struct iatt * postnewparent, - dict_t * xdata) +int32_t +ec_rename_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + int32_t op_errno, struct iatt *buf, struct iatt *preoldparent, + struct iatt *postoldparent, struct iatt *prenewparent, + struct iatt *postnewparent, dict_t *xdata) { - return ec_dir_write_cbk (frame, this, cookie, op_ret, op_errno, - buf, preoldparent, postoldparent, prenewparent, - postnewparent, xdata); + return ec_dir_write_cbk(frame, this, cookie, op_ret, op_errno, buf, + preoldparent, postoldparent, prenewparent, + postnewparent, xdata); } -void ec_wind_rename(ec_t * ec, ec_fop_data_t * fop, int32_t idx) +void +ec_wind_rename(ec_t *ec, ec_fop_data_t *fop, int32_t idx) { ec_trace("WIND", fop, "idx=%d", idx); @@ -899,17 +891,17 @@ void ec_wind_rename(ec_t * ec, ec_fop_data_t * fop, int32_t idx) &fop->loc[0], &fop->loc[1], fop->xdata); } -int32_t ec_manager_rename(ec_fop_data_t * fop, int32_t state) +int32_t +ec_manager_rename(ec_fop_data_t *fop, int32_t state) { - ec_cbk_data_t * cbk; + ec_cbk_data_t *cbk; - switch (state) - { + switch (state) { case EC_STATE_INIT: case EC_STATE_LOCK: - ec_lock_prepare_parent_inode(fop, &fop->loc[0], &fop->loc[0], - EC_UPDATE_DATA | EC_UPDATE_META | - EC_INODE_SIZE); + ec_lock_prepare_parent_inode( + fop, &fop->loc[0], &fop->loc[0], + EC_UPDATE_DATA | EC_UPDATE_META | EC_INODE_SIZE); ec_lock_prepare_parent_inode(fop, &fop->loc[1], NULL, EC_UPDATE_DATA | EC_UPDATE_META); ec_lock(fop); @@ -938,8 +930,7 @@ int32_t ec_manager_rename(ec_fop_data_t * fop, int32_t state) GF_ASSERT(cbk != NULL); - if (fop->cbks.rename != NULL) - { + if (fop->cbks.rename != NULL) { fop->cbks.rename(fop->req_frame, fop, fop->xl, cbk->op_ret, cbk->op_errno, &cbk->iatt[0], &cbk->iatt[1], &cbk->iatt[2], &cbk->iatt[3], &cbk->iatt[4], @@ -955,8 +946,7 @@ int32_t ec_manager_rename(ec_fop_data_t * fop, int32_t state) case -EC_STATE_REPORT: GF_ASSERT(fop->error != 0); - if (fop->cbks.rename != NULL) - { + if (fop->cbks.rename != NULL) { fop->cbks.rename(fop->req_frame, fop, fop->xl, -1, fop->error, NULL, NULL, NULL, NULL, NULL, NULL); } @@ -976,23 +966,23 @@ int32_t ec_manager_rename(ec_fop_data_t * fop, int32_t state) return EC_STATE_END; default: - gf_msg (fop->xl->name, GF_LOG_ERROR, EINVAL, - EC_MSG_UNHANDLED_STATE, "Unhandled state %d for %s", - state, ec_fop_name(fop->id)); + gf_msg(fop->xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_UNHANDLED_STATE, + "Unhandled state %d for %s", state, ec_fop_name(fop->id)); return EC_STATE_END; } } -void ec_rename(call_frame_t * frame, xlator_t * this, uintptr_t target, - int32_t minimum, fop_rename_cbk_t func, void * data, - loc_t * oldloc, loc_t * newloc, dict_t * xdata) +void +ec_rename(call_frame_t *frame, xlator_t *this, uintptr_t target, + int32_t minimum, fop_rename_cbk_t func, void *data, loc_t *oldloc, + loc_t *newloc, dict_t *xdata) { - ec_cbk_t callback = { .rename = func }; - ec_fop_data_t * fop = NULL; + ec_cbk_t callback = {.rename = func}; + ec_fop_data_t *fop = NULL; int32_t error = ENOMEM; - gf_msg_trace ("ec", 0, "EC(RENAME) %p", frame); + gf_msg_trace("ec", 0, "EC(RENAME) %p", frame); VALIDATE_OR_GOTO(this, out); GF_VALIDATE_OR_GOTO(this->name, frame, out); @@ -1007,26 +997,26 @@ void ec_rename(call_frame_t * frame, xlator_t * this, uintptr_t target, if (oldloc != NULL) { if (loc_copy(&fop->loc[0], oldloc) != 0) { - gf_msg (this->name, GF_LOG_ERROR, ENOMEM, - EC_MSG_LOC_COPY_FAIL, "Failed to copy a location."); + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_LOC_COPY_FAIL, + "Failed to copy a location."); goto out; } } if (newloc != NULL) { if (loc_copy(&fop->loc[1], newloc) != 0) { - gf_msg (this->name, GF_LOG_ERROR, ENOMEM, - EC_MSG_LOC_COPY_FAIL, "Failed to copy a location."); + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_LOC_COPY_FAIL, + "Failed to copy a location."); goto out; } } if (xdata != NULL) { - fop->xdata = dict_copy_with_ref (xdata, NULL); + fop->xdata = dict_copy_with_ref(xdata, NULL); if (fop->xdata == NULL) { - gf_msg (this->name, GF_LOG_ERROR, 0, - EC_MSG_DICT_REF_FAIL, "Failed to reference a " - "dictionary."); + gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL, + "Failed to reference a " + "dictionary."); goto out; } @@ -1044,15 +1034,17 @@ out: /* FOP: rmdir */ -int32_t ec_rmdir_cbk(call_frame_t * frame, void * cookie, xlator_t * this, - int32_t op_ret, int32_t op_errno, struct iatt * preparent, - struct iatt * postparent, dict_t * xdata) +int32_t +ec_rmdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + int32_t op_errno, struct iatt *preparent, struct iatt *postparent, + dict_t *xdata) { - return ec_dir_write_cbk (frame, this, cookie, op_ret, op_errno, NULL, - preparent, postparent, NULL, NULL, xdata); + return ec_dir_write_cbk(frame, this, cookie, op_ret, op_errno, NULL, + preparent, postparent, NULL, NULL, xdata); } -void ec_wind_rmdir(ec_t * ec, ec_fop_data_t * fop, int32_t idx) +void +ec_wind_rmdir(ec_t *ec, ec_fop_data_t *fop, int32_t idx) { ec_trace("WIND", fop, "idx=%d", idx); @@ -1061,12 +1053,12 @@ void ec_wind_rmdir(ec_t * ec, ec_fop_data_t * fop, int32_t idx) &fop->loc[0], fop->int32, fop->xdata); } -int32_t ec_manager_rmdir(ec_fop_data_t * fop, int32_t state) +int32_t +ec_manager_rmdir(ec_fop_data_t *fop, int32_t state) { - ec_cbk_data_t * cbk; + ec_cbk_data_t *cbk; - switch (state) - { + switch (state) { case EC_STATE_INIT: case EC_STATE_LOCK: ec_lock_prepare_parent_inode(fop, &fop->loc[0], NULL, @@ -1090,8 +1082,7 @@ int32_t ec_manager_rmdir(ec_fop_data_t * fop, int32_t state) GF_ASSERT(cbk != NULL); - if (fop->cbks.rmdir != NULL) - { + if (fop->cbks.rmdir != NULL) { fop->cbks.rmdir(fop->req_frame, fop, fop->xl, cbk->op_ret, cbk->op_errno, &cbk->iatt[0], &cbk->iatt[1], cbk->xdata); @@ -1106,8 +1097,7 @@ int32_t ec_manager_rmdir(ec_fop_data_t * fop, int32_t state) case -EC_STATE_REPORT: GF_ASSERT(fop->error != 0); - if (fop->cbks.rmdir != NULL) - { + if (fop->cbks.rmdir != NULL) { fop->cbks.rmdir(fop->req_frame, fop, fop->xl, -1, fop->error, NULL, NULL, NULL); } @@ -1127,31 +1117,30 @@ int32_t ec_manager_rmdir(ec_fop_data_t * fop, int32_t state) return EC_STATE_END; default: - gf_msg (fop->xl->name, GF_LOG_ERROR, EINVAL, - EC_MSG_UNHANDLED_STATE, "Unhandled state %d for %s", - state, ec_fop_name(fop->id)); + gf_msg(fop->xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_UNHANDLED_STATE, + "Unhandled state %d for %s", state, ec_fop_name(fop->id)); return EC_STATE_END; } } -void ec_rmdir(call_frame_t * frame, xlator_t * this, uintptr_t target, - int32_t minimum, fop_rmdir_cbk_t func, void * data, loc_t * loc, - int xflags, dict_t * xdata) +void +ec_rmdir(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum, + fop_rmdir_cbk_t func, void *data, loc_t *loc, int xflags, + dict_t *xdata) { - ec_cbk_t callback = { .rmdir = func }; - ec_fop_data_t * fop = NULL; + ec_cbk_t callback = {.rmdir = func}; + ec_fop_data_t *fop = NULL; int32_t error = ENOMEM; - gf_msg_trace ("ec", 0, "EC(RMDIR) %p", frame); + gf_msg_trace("ec", 0, "EC(RMDIR) %p", frame); VALIDATE_OR_GOTO(this, out); GF_VALIDATE_OR_GOTO(this->name, frame, out); GF_VALIDATE_OR_GOTO(this->name, this->private, out); fop = ec_fop_data_allocate(frame, this, GF_FOP_RMDIR, 0, target, minimum, - ec_wind_rmdir, ec_manager_rmdir, callback, - data); + ec_wind_rmdir, ec_manager_rmdir, callback, data); if (fop == NULL) { goto out; } @@ -1160,18 +1149,18 @@ void ec_rmdir(call_frame_t * frame, xlator_t * this, uintptr_t target, if (loc != NULL) { if (loc_copy(&fop->loc[0], loc) != 0) { - gf_msg (this->name, GF_LOG_ERROR, ENOMEM, - EC_MSG_LOC_COPY_FAIL, "Failed to copy a location."); + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_LOC_COPY_FAIL, + "Failed to copy a location."); goto out; } } if (xdata != NULL) { - fop->xdata = dict_copy_with_ref (xdata, NULL); + fop->xdata = dict_copy_with_ref(xdata, NULL); if (fop->xdata == NULL) { - gf_msg (this->name, GF_LOG_ERROR, 0, - EC_MSG_DICT_REF_FAIL, "Failed to reference a " - "dictionary."); + gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL, + "Failed to reference a " + "dictionary."); goto out; } @@ -1189,16 +1178,18 @@ out: /* FOP: symlink */ -int32_t ec_symlink_cbk(call_frame_t * frame, void * cookie, xlator_t * this, - int32_t op_ret, int32_t op_errno, inode_t * inode, - struct iatt * buf, struct iatt * preparent, - struct iatt * postparent, dict_t * xdata) +int32_t +ec_symlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) { - return ec_dir_write_cbk (frame, this, cookie, op_ret, op_errno, - buf, preparent, postparent, NULL, NULL, xdata); + return ec_dir_write_cbk(frame, this, cookie, op_ret, op_errno, buf, + preparent, postparent, NULL, NULL, xdata); } -void ec_wind_symlink(ec_t * ec, ec_fop_data_t * fop, int32_t idx) +void +ec_wind_symlink(ec_t *ec, ec_fop_data_t *fop, int32_t idx) { ec_trace("WIND", fop, "idx=%d", idx); @@ -1207,12 +1198,12 @@ void ec_wind_symlink(ec_t * ec, ec_fop_data_t * fop, int32_t idx) fop->str[0], &fop->loc[0], fop->mode[0], fop->xdata); } -int32_t ec_manager_symlink(ec_fop_data_t * fop, int32_t state) +int32_t +ec_manager_symlink(ec_fop_data_t *fop, int32_t state) { - ec_cbk_data_t * cbk; + ec_cbk_data_t *cbk; - switch (state) - { + switch (state) { case EC_STATE_INIT: case EC_STATE_LOCK: ec_lock_prepare_parent_inode(fop, &fop->loc[0], NULL, @@ -1245,8 +1236,7 @@ int32_t ec_manager_symlink(ec_fop_data_t * fop, int32_t state) GF_ASSERT(cbk != NULL); - if (fop->cbks.symlink != NULL) - { + if (fop->cbks.symlink != NULL) { fop->cbks.symlink(fop->req_frame, fop, fop->xl, cbk->op_ret, cbk->op_errno, fop->loc[0].inode, &cbk->iatt[0], &cbk->iatt[1], &cbk->iatt[2], @@ -1262,8 +1252,7 @@ int32_t ec_manager_symlink(ec_fop_data_t * fop, int32_t state) case -EC_STATE_REPORT: GF_ASSERT(fop->error != 0); - if (fop->cbks.symlink != NULL) - { + if (fop->cbks.symlink != NULL) { fop->cbks.symlink(fop->req_frame, fop, fop->xl, -1, fop->error, NULL, NULL, NULL, NULL, NULL); } @@ -1283,24 +1272,23 @@ int32_t ec_manager_symlink(ec_fop_data_t * fop, int32_t state) return EC_STATE_END; default: - gf_msg (fop->xl->name, GF_LOG_ERROR, EINVAL, - EC_MSG_UNHANDLED_STATE, "Unhandled state %d for %s", - state, ec_fop_name(fop->id)); + gf_msg(fop->xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_UNHANDLED_STATE, + "Unhandled state %d for %s", state, ec_fop_name(fop->id)); return EC_STATE_END; } } -void ec_symlink(call_frame_t * frame, xlator_t * this, uintptr_t target, - int32_t minimum, fop_symlink_cbk_t func, void * data, - const char * linkname, loc_t * loc, mode_t umask, - dict_t * xdata) +void +ec_symlink(call_frame_t *frame, xlator_t *this, uintptr_t target, + int32_t minimum, fop_symlink_cbk_t func, void *data, + const char *linkname, loc_t *loc, mode_t umask, dict_t *xdata) { - ec_cbk_t callback = { .symlink = func }; - ec_fop_data_t * fop = NULL; + ec_cbk_t callback = {.symlink = func}; + ec_fop_data_t *fop = NULL; int32_t error = ENOMEM; - gf_msg_trace ("ec", 0, "EC(SYMLINK) %p", frame); + gf_msg_trace("ec", 0, "EC(SYMLINK) %p", frame); VALIDATE_OR_GOTO(this, out); GF_VALIDATE_OR_GOTO(this->name, frame, out); @@ -1318,16 +1306,16 @@ void ec_symlink(call_frame_t * frame, xlator_t * this, uintptr_t target, if (linkname != NULL) { fop->str[0] = gf_strdup(linkname); if (fop->str[0] == NULL) { - gf_msg (this->name, GF_LOG_ERROR, ENOMEM, - EC_MSG_NO_MEMORY, "Failed to duplicate a string."); + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_NO_MEMORY, + "Failed to duplicate a string."); goto out; } } if (loc != NULL) { if (loc_copy(&fop->loc[0], loc) != 0) { - gf_msg (this->name, GF_LOG_ERROR, ENOMEM, - EC_MSG_LOC_COPY_FAIL, "Failed to copy a location."); + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_LOC_COPY_FAIL, + "Failed to copy a location."); goto out; } @@ -1335,9 +1323,9 @@ void ec_symlink(call_frame_t * frame, xlator_t * this, uintptr_t target, if (xdata != NULL) { fop->xdata = dict_copy_with_ref(xdata, NULL); if (fop->xdata == NULL) { - gf_msg (this->name, GF_LOG_ERROR, 0, - EC_MSG_DICT_REF_FAIL, "Failed to reference a " - "dictionary."); + gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL, + "Failed to reference a " + "dictionary."); goto out; } @@ -1355,16 +1343,17 @@ out: /* FOP: unlink */ -int32_t ec_unlink_cbk(call_frame_t * frame, void * cookie, xlator_t * this, - int32_t op_ret, int32_t op_errno, - struct iatt * preparent, struct iatt * postparent, - dict_t * xdata) +int32_t +ec_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + int32_t op_errno, struct iatt *preparent, struct iatt *postparent, + dict_t *xdata) { - return ec_dir_write_cbk (frame, this, cookie, op_ret, op_errno, NULL, - preparent, postparent, NULL, NULL, xdata); + return ec_dir_write_cbk(frame, this, cookie, op_ret, op_errno, NULL, + preparent, postparent, NULL, NULL, xdata); } -void ec_wind_unlink(ec_t * ec, ec_fop_data_t * fop, int32_t idx) +void +ec_wind_unlink(ec_t *ec, ec_fop_data_t *fop, int32_t idx) { ec_trace("WIND", fop, "idx=%d", idx); @@ -1373,12 +1362,12 @@ void ec_wind_unlink(ec_t * ec, ec_fop_data_t * fop, int32_t idx) &fop->loc[0], fop->int32, fop->xdata); } -int32_t ec_manager_unlink(ec_fop_data_t * fop, int32_t state) +int32_t +ec_manager_unlink(ec_fop_data_t *fop, int32_t state) { - ec_cbk_data_t * cbk; + ec_cbk_data_t *cbk; - switch (state) - { + switch (state) { case EC_STATE_INIT: case EC_STATE_LOCK: ec_lock_prepare_parent_inode(fop, &fop->loc[0], NULL, @@ -1402,8 +1391,7 @@ int32_t ec_manager_unlink(ec_fop_data_t * fop, int32_t state) GF_ASSERT(cbk != NULL); - if (fop->cbks.unlink != NULL) - { + if (fop->cbks.unlink != NULL) { fop->cbks.unlink(fop->req_frame, fop, fop->xl, cbk->op_ret, cbk->op_errno, &cbk->iatt[0], &cbk->iatt[1], cbk->xdata); @@ -1418,8 +1406,7 @@ int32_t ec_manager_unlink(ec_fop_data_t * fop, int32_t state) case -EC_STATE_REPORT: GF_ASSERT(fop->error != 0); - if (fop->cbks.unlink != NULL) - { + if (fop->cbks.unlink != NULL) { fop->cbks.unlink(fop->req_frame, fop, fop->xl, -1, fop->error, NULL, NULL, NULL); } @@ -1439,23 +1426,23 @@ int32_t ec_manager_unlink(ec_fop_data_t * fop, int32_t state) return EC_STATE_END; default: - gf_msg (fop->xl->name, GF_LOG_ERROR, EINVAL, - EC_MSG_UNHANDLED_STATE, "Unhandled state %d for %s", - state, ec_fop_name(fop->id)); + gf_msg(fop->xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_UNHANDLED_STATE, + "Unhandled state %d for %s", state, ec_fop_name(fop->id)); return EC_STATE_END; } } -void ec_unlink(call_frame_t * frame, xlator_t * this, uintptr_t target, - int32_t minimum, fop_unlink_cbk_t func, void * data, - loc_t * loc, int xflags, dict_t * xdata) +void +ec_unlink(call_frame_t *frame, xlator_t *this, uintptr_t target, + int32_t minimum, fop_unlink_cbk_t func, void *data, loc_t *loc, + int xflags, dict_t *xdata) { - ec_cbk_t callback = { .unlink = func }; - ec_fop_data_t * fop = NULL; + ec_cbk_t callback = {.unlink = func}; + ec_fop_data_t *fop = NULL; int32_t error = ENOMEM; - gf_msg_trace ("ec", 0, "EC(UNLINK) %p", frame); + gf_msg_trace("ec", 0, "EC(UNLINK) %p", frame); VALIDATE_OR_GOTO(this, out); GF_VALIDATE_OR_GOTO(this->name, frame, out); @@ -1472,18 +1459,18 @@ void ec_unlink(call_frame_t * frame, xlator_t * this, uintptr_t target, if (loc != NULL) { if (loc_copy(&fop->loc[0], loc) != 0) { - gf_msg (this->name, GF_LOG_ERROR, ENOMEM, - EC_MSG_LOC_COPY_FAIL, "Failed to copy a location."); + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_LOC_COPY_FAIL, + "Failed to copy a location."); goto out; } } if (xdata != NULL) { - fop->xdata = dict_copy_with_ref (xdata, NULL); + fop->xdata = dict_copy_with_ref(xdata, NULL); if (fop->xdata == NULL) { - gf_msg (this->name, GF_LOG_ERROR, 0, - EC_MSG_DICT_REF_FAIL, "Failed to reference a " - "dictionary."); + gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL, + "Failed to reference a " + "dictionary."); goto out; } diff --git a/xlators/cluster/ec/src/ec-generic.c b/xlators/cluster/ec/src/ec-generic.c index a5f986e74f4..d12481bdc18 100644 --- a/xlators/cluster/ec/src/ec-generic.c +++ b/xlators/cluster/ec/src/ec-generic.c @@ -22,11 +22,12 @@ /* FOP: flush */ -int32_t ec_flush_cbk(call_frame_t * frame, void * cookie, xlator_t * this, - int32_t op_ret, int32_t op_errno, dict_t * xdata) +int32_t +ec_flush_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + int32_t op_errno, dict_t *xdata) { - ec_fop_data_t * fop = NULL; - ec_cbk_data_t * cbk = NULL; + ec_fop_data_t *fop = NULL; + ec_cbk_data_t *cbk = NULL; int32_t idx = (int32_t)(uintptr_t)cookie; VALIDATE_OR_GOTO(this, out); @@ -36,21 +37,18 @@ int32_t ec_flush_cbk(call_frame_t * frame, void * cookie, xlator_t * this, fop = frame->local; - ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx, - frame, op_ret, op_errno); + ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx, frame, + op_ret, op_errno); cbk = ec_cbk_data_allocate(frame, this, fop, GF_FOP_FLUSH, idx, op_ret, op_errno); - if (cbk != NULL) - { - if (xdata != NULL) - { + if (cbk != NULL) { + if (xdata != NULL) { cbk->xdata = dict_ref(xdata); - if (cbk->xdata == NULL) - { - gf_msg (this->name, GF_LOG_ERROR, 0, - EC_MSG_DICT_REF_FAIL, "Failed to reference a " - "dictionary."); + if (cbk->xdata == NULL) { + gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL, + "Failed to reference a " + "dictionary."); goto out; } @@ -60,15 +58,15 @@ int32_t ec_flush_cbk(call_frame_t * frame, void * cookie, xlator_t * this, } out: - if (fop != NULL) - { + if (fop != NULL) { ec_complete(fop); } return 0; } -void ec_wind_flush(ec_t * ec, ec_fop_data_t * fop, int32_t idx) +void +ec_wind_flush(ec_t *ec, ec_fop_data_t *fop, int32_t idx) { ec_trace("WIND", fop, "idx=%d", idx); @@ -77,12 +75,12 @@ void ec_wind_flush(ec_t * ec, ec_fop_data_t * fop, int32_t idx) fop->xdata); } -int32_t ec_manager_flush(ec_fop_data_t * fop, int32_t state) +int32_t +ec_manager_flush(ec_fop_data_t *fop, int32_t state) { - ec_cbk_data_t * cbk; + ec_cbk_data_t *cbk; - switch (state) - { + switch (state) { case EC_STATE_INIT: case EC_STATE_LOCK: ec_lock_prepare_fd(fop, fop->fd, 0, 0, LLONG_MAX); @@ -110,8 +108,7 @@ int32_t ec_manager_flush(ec_fop_data_t * fop, int32_t state) GF_ASSERT(cbk != NULL); - if (fop->cbks.flush != NULL) - { + if (fop->cbks.flush != NULL) { fop->cbks.flush(fop->req_frame, fop, fop->xl, cbk->op_ret, cbk->op_errno, cbk->xdata); } @@ -126,8 +123,7 @@ int32_t ec_manager_flush(ec_fop_data_t * fop, int32_t state) case -EC_STATE_REPORT: GF_ASSERT(fop->error != 0); - if (fop->cbks.flush != NULL) - { + if (fop->cbks.flush != NULL) { fop->cbks.flush(fop->req_frame, fop, fop->xl, -1, fop->error, NULL); } @@ -147,31 +143,29 @@ int32_t ec_manager_flush(ec_fop_data_t * fop, int32_t state) return EC_STATE_END; default: - gf_msg (fop->xl->name, GF_LOG_ERROR, EINVAL, - EC_MSG_UNHANDLED_STATE, "Unhandled state %d for %s", - state, ec_fop_name(fop->id)); + gf_msg(fop->xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_UNHANDLED_STATE, + "Unhandled state %d for %s", state, ec_fop_name(fop->id)); return EC_STATE_END; } } -void ec_flush(call_frame_t * frame, xlator_t * this, uintptr_t target, - int32_t minimum, fop_flush_cbk_t func, void * data, fd_t * fd, - dict_t * xdata) +void +ec_flush(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum, + fop_flush_cbk_t func, void *data, fd_t *fd, dict_t *xdata) { - ec_cbk_t callback = { .flush = func }; - ec_fop_data_t * fop = NULL; + ec_cbk_t callback = {.flush = func}; + ec_fop_data_t *fop = NULL; int32_t error = ENOMEM; - gf_msg_trace ("ec", 0, "EC(FLUSH) %p", frame); + gf_msg_trace("ec", 0, "EC(FLUSH) %p", frame); VALIDATE_OR_GOTO(this, out); GF_VALIDATE_OR_GOTO(this->name, frame, out); GF_VALIDATE_OR_GOTO(this->name, this->private, out); fop = ec_fop_data_allocate(frame, this, GF_FOP_FLUSH, 0, target, minimum, - ec_wind_flush, ec_manager_flush, callback, - data); + ec_wind_flush, ec_manager_flush, callback, data); if (fop == NULL) { goto out; } @@ -181,9 +175,9 @@ void ec_flush(call_frame_t * frame, xlator_t * this, uintptr_t target, if (fd != NULL) { fop->fd = fd_ref(fd); if (fop->fd == NULL) { - gf_msg (this->name, GF_LOG_ERROR, 0, - EC_MSG_FILE_DESC_REF_FAIL, "Failed to reference a " - "file descriptor."); + gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_FILE_DESC_REF_FAIL, + "Failed to reference a " + "file descriptor."); goto out; } @@ -191,9 +185,9 @@ void ec_flush(call_frame_t * frame, xlator_t * this, uintptr_t target, if (xdata != NULL) { fop->xdata = dict_ref(xdata); if (fop->xdata == NULL) { - gf_msg (this->name, GF_LOG_ERROR, 0, - EC_MSG_DICT_REF_FAIL, "Failed to reference a " - "dictionary."); + gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL, + "Failed to reference a " + "dictionary."); goto out; } @@ -211,13 +205,13 @@ out: /* FOP: fsync */ -int32_t ec_combine_fsync(ec_fop_data_t * fop, ec_cbk_data_t * dst, - ec_cbk_data_t * src) +int32_t +ec_combine_fsync(ec_fop_data_t *fop, ec_cbk_data_t *dst, ec_cbk_data_t *src) { if (!ec_iatt_combine(fop, dst->iatt, src->iatt, 2)) { - gf_msg (fop->xl->name, GF_LOG_NOTICE, 0, - EC_MSG_IATT_MISMATCH, "Mismatching iatt in " - "answers of 'GF_FOP_FSYNC'"); + gf_msg(fop->xl->name, GF_LOG_NOTICE, 0, EC_MSG_IATT_MISMATCH, + "Mismatching iatt in " + "answers of 'GF_FOP_FSYNC'"); return 0; } @@ -225,12 +219,13 @@ int32_t ec_combine_fsync(ec_fop_data_t * fop, ec_cbk_data_t * dst, return 1; } -int32_t ec_fsync_cbk(call_frame_t * frame, void * cookie, xlator_t * this, - int32_t op_ret, int32_t op_errno, struct iatt * prebuf, - struct iatt * postbuf, dict_t * xdata) +int32_t +ec_fsync_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + int32_t op_errno, struct iatt *prebuf, struct iatt *postbuf, + dict_t *xdata) { - ec_fop_data_t * fop = NULL; - ec_cbk_data_t * cbk = NULL; + ec_fop_data_t *fop = NULL; + ec_cbk_data_t *cbk = NULL; int32_t idx = (int32_t)(uintptr_t)cookie; VALIDATE_OR_GOTO(this, out); @@ -240,32 +235,26 @@ int32_t ec_fsync_cbk(call_frame_t * frame, void * cookie, xlator_t * this, fop = frame->local; - ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx, - frame, op_ret, op_errno); + ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx, frame, + op_ret, op_errno); cbk = ec_cbk_data_allocate(frame, this, fop, GF_FOP_FSYNC, idx, op_ret, op_errno); - if (cbk != NULL) - { - if (op_ret >= 0) - { - if (prebuf != NULL) - { + if (cbk != NULL) { + if (op_ret >= 0) { + if (prebuf != NULL) { cbk->iatt[0] = *prebuf; } - if (postbuf != NULL) - { + if (postbuf != NULL) { cbk->iatt[1] = *postbuf; } } - if (xdata != NULL) - { + if (xdata != NULL) { cbk->xdata = dict_ref(xdata); - if (cbk->xdata == NULL) - { - gf_msg (this->name, GF_LOG_ERROR, 0, - EC_MSG_DICT_REF_FAIL, "Failed to reference a " - "dictionary."); + if (cbk->xdata == NULL) { + gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL, + "Failed to reference a " + "dictionary."); goto out; } @@ -275,15 +264,15 @@ int32_t ec_fsync_cbk(call_frame_t * frame, void * cookie, xlator_t * this, } out: - if (fop != NULL) - { + if (fop != NULL) { ec_complete(fop); } return 0; } -void ec_wind_fsync(ec_t * ec, ec_fop_data_t * fop, int32_t idx) +void +ec_wind_fsync(ec_t *ec, ec_fop_data_t *fop, int32_t idx) { ec_trace("WIND", fop, "idx=%d", idx); @@ -292,12 +281,12 @@ void ec_wind_fsync(ec_t * ec, ec_fop_data_t * fop, int32_t idx) fop->int32, fop->xdata); } -int32_t ec_manager_fsync(ec_fop_data_t * fop, int32_t state) +int32_t +ec_manager_fsync(ec_fop_data_t *fop, int32_t state) { - ec_cbk_data_t * cbk; + ec_cbk_data_t *cbk; - switch (state) - { + switch (state) { case EC_STATE_INIT: case EC_STATE_LOCK: ec_lock_prepare_fd(fop, fop->fd, EC_QUERY_INFO, 0, LLONG_MAX); @@ -318,8 +307,7 @@ int32_t ec_manager_fsync(ec_fop_data_t * fop, int32_t state) case EC_STATE_PREPARE_ANSWER: cbk = ec_fop_prepare_answer(fop, _gf_false); if (cbk != NULL) { - ec_iatt_rebuild(fop->xl->private, cbk->iatt, 2, - cbk->count); + ec_iatt_rebuild(fop->xl->private, cbk->iatt, 2, cbk->count); /* This shouldn't fail because we have the inode locked. */ GF_ASSERT(ec_get_inode_size(fop, fop->fd->inode, @@ -334,8 +322,7 @@ int32_t ec_manager_fsync(ec_fop_data_t * fop, int32_t state) GF_ASSERT(cbk != NULL); - if (fop->cbks.fsync != NULL) - { + if (fop->cbks.fsync != NULL) { fop->cbks.fsync(fop->req_frame, fop, fop->xl, cbk->op_ret, cbk->op_errno, &cbk->iatt[0], &cbk->iatt[1], cbk->xdata); @@ -351,8 +338,7 @@ int32_t ec_manager_fsync(ec_fop_data_t * fop, int32_t state) case -EC_STATE_DELAYED_START: GF_ASSERT(fop->error != 0); - if (fop->cbks.fsync != NULL) - { + if (fop->cbks.fsync != NULL) { fop->cbks.fsync(fop->req_frame, fop, fop->xl, -1, fop->error, NULL, NULL, NULL); } @@ -372,31 +358,30 @@ int32_t ec_manager_fsync(ec_fop_data_t * fop, int32_t state) return EC_STATE_END; default: - gf_msg (fop->xl->name, GF_LOG_ERROR, EINVAL, - EC_MSG_UNHANDLED_STATE, "Unhandled state %d for %s", - state, ec_fop_name(fop->id)); + gf_msg(fop->xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_UNHANDLED_STATE, + "Unhandled state %d for %s", state, ec_fop_name(fop->id)); return EC_STATE_END; } } -void ec_fsync(call_frame_t * frame, xlator_t * this, uintptr_t target, - int32_t minimum, fop_fsync_cbk_t func, void * data, fd_t * fd, - int32_t datasync, dict_t * xdata) +void +ec_fsync(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum, + fop_fsync_cbk_t func, void *data, fd_t *fd, int32_t datasync, + dict_t *xdata) { - ec_cbk_t callback = { .fsync = func }; - ec_fop_data_t * fop = NULL; + ec_cbk_t callback = {.fsync = func}; + ec_fop_data_t *fop = NULL; int32_t error = ENOMEM; - gf_msg_trace ("ec", 0, "EC(FSYNC) %p", frame); + gf_msg_trace("ec", 0, "EC(FSYNC) %p", frame); VALIDATE_OR_GOTO(this, out); GF_VALIDATE_OR_GOTO(this->name, frame, out); GF_VALIDATE_OR_GOTO(this->name, this->private, out); fop = ec_fop_data_allocate(frame, this, GF_FOP_FSYNC, 0, target, minimum, - ec_wind_fsync, ec_manager_fsync, callback, - data); + ec_wind_fsync, ec_manager_fsync, callback, data); if (fop == NULL) { goto out; } @@ -408,9 +393,9 @@ void ec_fsync(call_frame_t * frame, xlator_t * this, uintptr_t target, if (fd != NULL) { fop->fd = fd_ref(fd); if (fop->fd == NULL) { - gf_msg (this->name, GF_LOG_ERROR, 0, - EC_MSG_FILE_DESC_REF_FAIL, "Failed to reference a " - "file descriptor."); + gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_FILE_DESC_REF_FAIL, + "Failed to reference a " + "file descriptor."); goto out; } @@ -418,9 +403,9 @@ void ec_fsync(call_frame_t * frame, xlator_t * this, uintptr_t target, if (xdata != NULL) { fop->xdata = dict_ref(xdata); if (fop->xdata == NULL) { - gf_msg (this->name, GF_LOG_ERROR, 0, - EC_MSG_DICT_REF_FAIL, "Failed to reference a " - "dictionary."); + gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL, + "Failed to reference a " + "dictionary."); goto out; } @@ -438,11 +423,12 @@ out: /* FOP: fsyncdir */ -int32_t ec_fsyncdir_cbk(call_frame_t * frame, void * cookie, xlator_t * this, - int32_t op_ret, int32_t op_errno, dict_t * xdata) +int32_t +ec_fsyncdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - ec_fop_data_t * fop = NULL; - ec_cbk_data_t * cbk = NULL; + ec_fop_data_t *fop = NULL; + ec_cbk_data_t *cbk = NULL; int32_t idx = (int32_t)(uintptr_t)cookie; VALIDATE_OR_GOTO(this, out); @@ -452,21 +438,18 @@ int32_t ec_fsyncdir_cbk(call_frame_t * frame, void * cookie, xlator_t * this, fop = frame->local; - ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx, - frame, op_ret, op_errno); + ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx, frame, + op_ret, op_errno); cbk = ec_cbk_data_allocate(frame, this, fop, GF_FOP_FSYNCDIR, idx, op_ret, op_errno); - if (cbk != NULL) - { - if (xdata != NULL) - { + if (cbk != NULL) { + if (xdata != NULL) { cbk->xdata = dict_ref(xdata); - if (cbk->xdata == NULL) - { - gf_msg (this->name, GF_LOG_ERROR, 0, - EC_MSG_DICT_REF_FAIL, "Failed to reference a " - "dictionary."); + if (cbk->xdata == NULL) { + gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL, + "Failed to reference a " + "dictionary."); goto out; } @@ -476,15 +459,15 @@ int32_t ec_fsyncdir_cbk(call_frame_t * frame, void * cookie, xlator_t * this, } out: - if (fop != NULL) - { + if (fop != NULL) { ec_complete(fop); } return 0; } -void ec_wind_fsyncdir(ec_t * ec, ec_fop_data_t * fop, int32_t idx) +void +ec_wind_fsyncdir(ec_t *ec, ec_fop_data_t *fop, int32_t idx) { ec_trace("WIND", fop, "idx=%d", idx); @@ -493,12 +476,12 @@ void ec_wind_fsyncdir(ec_t * ec, ec_fop_data_t * fop, int32_t idx) fop->fd, fop->int32, fop->xdata); } -int32_t ec_manager_fsyncdir(ec_fop_data_t * fop, int32_t state) +int32_t +ec_manager_fsyncdir(ec_fop_data_t *fop, int32_t state) { - ec_cbk_data_t * cbk; + ec_cbk_data_t *cbk; - switch (state) - { + switch (state) { case EC_STATE_INIT: case EC_STATE_LOCK: ec_lock_prepare_fd(fop, fop->fd, 0, 0, LLONG_MAX); @@ -526,8 +509,7 @@ int32_t ec_manager_fsyncdir(ec_fop_data_t * fop, int32_t state) GF_ASSERT(cbk != NULL); - if (fop->cbks.fsyncdir != NULL) - { + if (fop->cbks.fsyncdir != NULL) { fop->cbks.fsyncdir(fop->req_frame, fop, fop->xl, cbk->op_ret, cbk->op_errno, cbk->xdata); } @@ -542,10 +524,9 @@ int32_t ec_manager_fsyncdir(ec_fop_data_t * fop, int32_t state) case -EC_STATE_DELAYED_START: GF_ASSERT(fop->error != 0); - if (fop->cbks.fsyncdir != NULL) - { - fop->cbks.fsyncdir(fop->req_frame, fop, fop->xl, -1, - fop->error, NULL); + if (fop->cbks.fsyncdir != NULL) { + fop->cbks.fsyncdir(fop->req_frame, fop, fop->xl, -1, fop->error, + NULL); } return EC_STATE_LOCK_REUSE; @@ -563,31 +544,31 @@ int32_t ec_manager_fsyncdir(ec_fop_data_t * fop, int32_t state) return EC_STATE_END; default: - gf_msg (fop->xl->name, GF_LOG_ERROR, EINVAL, - EC_MSG_UNHANDLED_STATE, "Unhandled state %d for %s", - state, ec_fop_name(fop->id)); + gf_msg(fop->xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_UNHANDLED_STATE, + "Unhandled state %d for %s", state, ec_fop_name(fop->id)); return EC_STATE_END; } } -void ec_fsyncdir(call_frame_t * frame, xlator_t * this, uintptr_t target, - int32_t minimum, fop_fsyncdir_cbk_t func, void * data, - fd_t * fd, int32_t datasync, dict_t * xdata) +void +ec_fsyncdir(call_frame_t *frame, xlator_t *this, uintptr_t target, + int32_t minimum, fop_fsyncdir_cbk_t func, void *data, fd_t *fd, + int32_t datasync, dict_t *xdata) { - ec_cbk_t callback = { .fsyncdir = func }; - ec_fop_data_t * fop = NULL; + ec_cbk_t callback = {.fsyncdir = func}; + ec_fop_data_t *fop = NULL; int32_t error = ENOMEM; - gf_msg_trace ("ec", 0, "EC(FSYNCDIR) %p", frame); + gf_msg_trace("ec", 0, "EC(FSYNCDIR) %p", frame); VALIDATE_OR_GOTO(this, out); GF_VALIDATE_OR_GOTO(this->name, frame, out); GF_VALIDATE_OR_GOTO(this->name, this->private, out); - fop = ec_fop_data_allocate(frame, this, GF_FOP_FSYNCDIR, 0, target, - minimum, ec_wind_fsyncdir, ec_manager_fsyncdir, - callback, data); + fop = ec_fop_data_allocate(frame, this, GF_FOP_FSYNCDIR, 0, target, minimum, + ec_wind_fsyncdir, ec_manager_fsyncdir, callback, + data); if (fop == NULL) { goto out; } @@ -599,9 +580,9 @@ void ec_fsyncdir(call_frame_t * frame, xlator_t * this, uintptr_t target, if (fd != NULL) { fop->fd = fd_ref(fd); if (fop->fd == NULL) { - gf_msg (this->name, GF_LOG_ERROR, 0, - EC_MSG_FILE_DESC_REF_FAIL, "Failed to reference a " - "file descriptor."); + gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_FILE_DESC_REF_FAIL, + "Failed to reference a " + "file descriptor."); goto out; } @@ -609,9 +590,9 @@ void ec_fsyncdir(call_frame_t * frame, xlator_t * this, uintptr_t target, if (xdata != NULL) { fop->xdata = dict_ref(xdata); if (fop->xdata == NULL) { - gf_msg (this->name, GF_LOG_ERROR, 0, - EC_MSG_DICT_REF_FAIL, "Failed to reference a " - "dictionary."); + gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL, + "Failed to reference a " + "dictionary."); goto out; } @@ -629,9 +610,10 @@ out: /* FOP: lookup */ -void ec_lookup_rebuild(ec_t * ec, ec_fop_data_t * fop, ec_cbk_data_t * cbk) +void +ec_lookup_rebuild(ec_t *ec, ec_fop_data_t *fop, ec_cbk_data_t *cbk) { - ec_inode_t * ctx = NULL; + ec_inode_t *ctx = NULL; uint64_t size = 0; int32_t have_size = 0, err; @@ -650,8 +632,7 @@ void ec_lookup_rebuild(ec_t * ec, ec_fop_data_t * fop, ec_cbk_data_t * cbk) LOCK(&cbk->inode->lock); ctx = __ec_inode_get(cbk->inode, fop->xl); - if (ctx != NULL) - { + if (ctx != NULL) { if (ctx->have_version) { cbk->version[0] = ctx->post_version[0]; cbk->version[1] = ctx->post_version[1]; @@ -664,24 +645,22 @@ void ec_lookup_rebuild(ec_t * ec, ec_fop_data_t * fop, ec_cbk_data_t * cbk) UNLOCK(&cbk->inode->lock); - if (cbk->iatt[0].ia_type == IA_IFREG) - { + if (cbk->iatt[0].ia_type == IA_IFREG) { cbk->size = cbk->iatt[0].ia_size; ec_dict_del_number(cbk->xdata, EC_XATTR_SIZE, &cbk->iatt[0].ia_size); - if (have_size) - { + if (have_size) { cbk->iatt[0].ia_size = size; } } } -int32_t ec_combine_lookup(ec_fop_data_t * fop, ec_cbk_data_t * dst, - ec_cbk_data_t * src) +int32_t +ec_combine_lookup(ec_fop_data_t *fop, ec_cbk_data_t *dst, ec_cbk_data_t *src) { if (!ec_iatt_combine(fop, dst->iatt, src->iatt, 2)) { - gf_msg (fop->xl->name, GF_LOG_NOTICE, 0, - EC_MSG_IATT_MISMATCH, "Mismatching iatt in " - "answers of 'GF_FOP_LOOKUP'"); + gf_msg(fop->xl->name, GF_LOG_NOTICE, 0, EC_MSG_IATT_MISMATCH, + "Mismatching iatt in " + "answers of 'GF_FOP_LOOKUP'"); return 0; } @@ -689,15 +668,15 @@ int32_t ec_combine_lookup(ec_fop_data_t * fop, ec_cbk_data_t * dst, return 1; } -int32_t ec_lookup_cbk(call_frame_t * frame, void * cookie, xlator_t * this, - int32_t op_ret, int32_t op_errno, inode_t * inode, - struct iatt * buf, dict_t * xdata, - struct iatt * postparent) +int32_t +ec_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + int32_t op_errno, inode_t *inode, struct iatt *buf, dict_t *xdata, + struct iatt *postparent) { - ec_fop_data_t * fop = NULL; - ec_cbk_data_t * cbk = NULL; + ec_fop_data_t *fop = NULL; + ec_cbk_data_t *cbk = NULL; int32_t idx = (int32_t)(uintptr_t)cookie; - uint64_t dirty[2] = {0}; + uint64_t dirty[2] = {0}; VALIDATE_OR_GOTO(this, out); GF_VALIDATE_OR_GOTO(this->name, frame, out); @@ -706,63 +685,54 @@ int32_t ec_lookup_cbk(call_frame_t * frame, void * cookie, xlator_t * this, fop = frame->local; - ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx, - frame, op_ret, op_errno); + ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx, frame, + op_ret, op_errno); cbk = ec_cbk_data_allocate(frame, this, fop, GF_FOP_LOOKUP, idx, op_ret, op_errno); - if (cbk != NULL) - { - if (op_ret >= 0) - { - if (inode != NULL) - { + if (cbk != NULL) { + if (op_ret >= 0) { + if (inode != NULL) { cbk->inode = inode_ref(inode); - if (cbk->inode == NULL) - { - gf_msg (this->name, GF_LOG_ERROR, 0, - EC_MSG_INODE_REF_FAIL, - "Failed to reference an inode."); + if (cbk->inode == NULL) { + gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_INODE_REF_FAIL, + "Failed to reference an inode."); goto out; } } - if (buf != NULL) - { + if (buf != NULL) { cbk->iatt[0] = *buf; } - if (postparent != NULL) - { + if (postparent != NULL) { cbk->iatt[1] = *postparent; } } - if (xdata != NULL) - { + if (xdata != NULL) { cbk->xdata = dict_ref(xdata); - if (cbk->xdata == NULL) - { - gf_msg (this->name, GF_LOG_ERROR, 0, - EC_MSG_DICT_REF_FAIL, "Failed to reference a " - "dictionary."); + if (cbk->xdata == NULL) { + gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL, + "Failed to reference a " + "dictionary."); goto out; } - ec_dict_del_array (xdata, EC_XATTR_DIRTY, dirty, EC_VERSION_SIZE); + ec_dict_del_array(xdata, EC_XATTR_DIRTY, dirty, EC_VERSION_SIZE); } ec_combine(cbk, ec_combine_lookup); } out: - if (fop != NULL) - { + if (fop != NULL) { ec_complete(fop); } return 0; } -void ec_wind_lookup(ec_t * ec, ec_fop_data_t * fop, int32_t idx) +void +ec_wind_lookup(ec_t *ec, ec_fop_data_t *fop, int32_t idx) { ec_trace("WIND", fop, "idx=%d", idx); @@ -771,20 +741,21 @@ void ec_wind_lookup(ec_t * ec, ec_fop_data_t * fop, int32_t idx) &fop->loc[0], fop->xdata); } -int32_t ec_manager_lookup(ec_fop_data_t * fop, int32_t state) +int32_t +ec_manager_lookup(ec_fop_data_t *fop, int32_t state) { ec_cbk_data_t *cbk; int32_t err; - switch (state) - { + switch (state) { case EC_STATE_INIT: if (fop->xdata == NULL) { fop->xdata = dict_new(); if (fop->xdata == NULL) { - gf_msg (fop->xl->name, GF_LOG_ERROR, ENOMEM, - EC_MSG_LOOKUP_REQ_PREP_FAIL, "Unable to prepare " - "lookup request"); + gf_msg(fop->xl->name, GF_LOG_ERROR, ENOMEM, + EC_MSG_LOOKUP_REQ_PREP_FAIL, + "Unable to prepare " + "lookup request"); fop->error = ENOMEM; @@ -792,7 +763,7 @@ int32_t ec_manager_lookup(ec_fop_data_t * fop, int32_t state) } } else { /*TODO: To be handled once we have 'syndromes' */ - dict_del (fop->xdata, GF_CONTENT_KEY); + dict_del(fop->xdata, GF_CONTENT_KEY); } err = dict_set_uint64(fop->xdata, EC_XATTR_SIZE, 0); if (err == 0) { @@ -802,16 +773,17 @@ int32_t ec_manager_lookup(ec_fop_data_t * fop, int32_t state) err = dict_set_uint64(fop->xdata, EC_XATTR_DIRTY, 0); } if (err != 0) { - gf_msg (fop->xl->name, GF_LOG_ERROR, -err, - EC_MSG_LOOKUP_REQ_PREP_FAIL, "Unable to prepare lookup " - "request"); + gf_msg(fop->xl->name, GF_LOG_ERROR, -err, + EC_MSG_LOOKUP_REQ_PREP_FAIL, + "Unable to prepare lookup " + "request"); fop->error = -err; return EC_STATE_REPORT; } - /* Fall through */ + /* Fall through */ case EC_STATE_DISPATCH: ec_dispatch_all(fop); @@ -827,8 +799,8 @@ int32_t ec_manager_lookup(ec_fop_data_t * fop, int32_t state) */ if (!fop->answer && !list_empty(&fop->cbk_list)) { - fop->answer = list_entry (fop->cbk_list.next, ec_cbk_data_t, - list); + fop->answer = list_entry(fop->cbk_list.next, ec_cbk_data_t, + list); } cbk = ec_fop_prepare_answer(fop, _gf_true); @@ -845,8 +817,7 @@ int32_t ec_manager_lookup(ec_fop_data_t * fop, int32_t state) GF_ASSERT(cbk != NULL); - if (fop->cbks.lookup != NULL) - { + if (fop->cbks.lookup != NULL) { fop->cbks.lookup(fop->req_frame, fop, fop->xl, cbk->op_ret, cbk->op_errno, cbk->inode, &cbk->iatt[0], cbk->xdata, &cbk->iatt[1]); @@ -860,8 +831,7 @@ int32_t ec_manager_lookup(ec_fop_data_t * fop, int32_t state) case -EC_STATE_REPORT: GF_ASSERT(fop->error != 0); - if (fop->cbks.lookup != NULL) - { + if (fop->cbks.lookup != NULL) { fop->cbks.lookup(fop->req_frame, fop, fop->xl, -1, fop->error, NULL, NULL, NULL, NULL); } @@ -869,23 +839,23 @@ int32_t ec_manager_lookup(ec_fop_data_t * fop, int32_t state) return EC_STATE_END; default: - gf_msg (fop->xl->name, GF_LOG_ERROR, EINVAL, - EC_MSG_UNHANDLED_STATE, "Unhandled state %d for %s", - state, ec_fop_name(fop->id)); + gf_msg(fop->xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_UNHANDLED_STATE, + "Unhandled state %d for %s", state, ec_fop_name(fop->id)); return EC_STATE_END; } } -void ec_lookup(call_frame_t * frame, xlator_t * this, uintptr_t target, - int32_t minimum, fop_lookup_cbk_t func, void * data, - loc_t * loc, dict_t * xdata) +void +ec_lookup(call_frame_t *frame, xlator_t *this, uintptr_t target, + int32_t minimum, fop_lookup_cbk_t func, void *data, loc_t *loc, + dict_t *xdata) { - ec_cbk_t callback = { .lookup = func }; - ec_fop_data_t * fop = NULL; + ec_cbk_t callback = {.lookup = func}; + ec_fop_data_t *fop = NULL; int32_t error = ENOMEM; - gf_msg_trace ("ec", 0, "EC(LOOKUP) %p", frame); + gf_msg_trace("ec", 0, "EC(LOOKUP) %p", frame); VALIDATE_OR_GOTO(this, out); GF_VALIDATE_OR_GOTO(this->name, frame, out); @@ -900,14 +870,14 @@ void ec_lookup(call_frame_t * frame, xlator_t * this, uintptr_t target, if (loc != NULL) { if (loc_copy(&fop->loc[0], loc) != 0) { - gf_msg (this->name, GF_LOG_ERROR, ENOMEM, - EC_MSG_LOC_COPY_FAIL, "Failed to copy a location."); + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_LOC_COPY_FAIL, + "Failed to copy a location."); goto out; } } if (xdata != NULL) { - fop->xdata = dict_copy_with_ref (xdata, NULL); + fop->xdata = dict_copy_with_ref(xdata, NULL); /* Do not log failures here as a memory problem would have already * been logged by the corresponding alloc functions */ if (fop->xdata == NULL) @@ -926,20 +896,20 @@ out: /* FOP: statfs */ -int32_t ec_combine_statfs(ec_fop_data_t * fop, ec_cbk_data_t * dst, - ec_cbk_data_t * src) +int32_t +ec_combine_statfs(ec_fop_data_t *fop, ec_cbk_data_t *dst, ec_cbk_data_t *src) { ec_statvfs_combine(&dst->statvfs, &src->statvfs); return 1; } -int32_t ec_statfs_cbk(call_frame_t * frame, void * cookie, xlator_t * this, - int32_t op_ret, int32_t op_errno, struct statvfs * buf, - dict_t * xdata) +int32_t +ec_statfs_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + int32_t op_errno, struct statvfs *buf, dict_t *xdata) { - ec_fop_data_t * fop = NULL; - ec_cbk_data_t * cbk = NULL; + ec_fop_data_t *fop = NULL; + ec_cbk_data_t *cbk = NULL; int32_t idx = (int32_t)(uintptr_t)cookie; VALIDATE_OR_GOTO(this, out); @@ -949,28 +919,23 @@ int32_t ec_statfs_cbk(call_frame_t * frame, void * cookie, xlator_t * this, fop = frame->local; - ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx, - frame, op_ret, op_errno); + ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx, frame, + op_ret, op_errno); cbk = ec_cbk_data_allocate(frame, this, fop, GF_FOP_STATFS, idx, op_ret, op_errno); - if (cbk != NULL) - { - if (op_ret >= 0) - { - if (buf != NULL) - { + if (cbk != NULL) { + if (op_ret >= 0) { + if (buf != NULL) { cbk->statvfs = *buf; } } - if (xdata != NULL) - { + if (xdata != NULL) { cbk->xdata = dict_ref(xdata); - if (cbk->xdata == NULL) - { - gf_msg (this->name, GF_LOG_ERROR, 0, - EC_MSG_DICT_REF_FAIL, "Failed to reference a " - "dictionary."); + if (cbk->xdata == NULL) { + gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL, + "Failed to reference a " + "dictionary."); goto out; } @@ -980,15 +945,15 @@ int32_t ec_statfs_cbk(call_frame_t * frame, void * cookie, xlator_t * this, } out: - if (fop != NULL) - { + if (fop != NULL) { ec_complete(fop); } return 0; } -void ec_wind_statfs(ec_t * ec, ec_fop_data_t * fop, int32_t idx) +void +ec_wind_statfs(ec_t *ec, ec_fop_data_t *fop, int32_t idx) { ec_trace("WIND", fop, "idx=%d", idx); @@ -997,14 +962,14 @@ void ec_wind_statfs(ec_t * ec, ec_fop_data_t * fop, int32_t idx) &fop->loc[0], fop->xdata); } -int32_t ec_manager_statfs(ec_fop_data_t *fop, int32_t state) +int32_t +ec_manager_statfs(ec_fop_data_t *fop, int32_t state) { - ec_cbk_data_t *cbk = NULL; - gf_boolean_t deem_statfs_enabled = _gf_false; - int32_t err = 0; + ec_cbk_data_t *cbk = NULL; + gf_boolean_t deem_statfs_enabled = _gf_false; + int32_t err = 0; - switch (state) - { + switch (state) { case EC_STATE_INIT: case EC_STATE_DISPATCH: ec_dispatch_all(fop); @@ -1017,8 +982,8 @@ int32_t ec_manager_statfs(ec_fop_data_t *fop, int32_t state) ec_t *ec = fop->xl->private; if (cbk->xdata) { - err = dict_get_int8 (cbk->xdata, "quota-deem-statfs", - (int8_t *)&deem_statfs_enabled); + err = dict_get_int8(cbk->xdata, "quota-deem-statfs", + (int8_t *)&deem_statfs_enabled); if (err != -ENOENT) { ec_cbk_set_error(cbk, -err, _gf_true); } @@ -1038,8 +1003,7 @@ int32_t ec_manager_statfs(ec_fop_data_t *fop, int32_t state) GF_ASSERT(cbk != NULL); - if (fop->cbks.statfs != NULL) - { + if (fop->cbks.statfs != NULL) { fop->cbks.statfs(fop->req_frame, fop, fop->xl, cbk->op_ret, cbk->op_errno, &cbk->statvfs, cbk->xdata); } @@ -1052,8 +1016,7 @@ int32_t ec_manager_statfs(ec_fop_data_t *fop, int32_t state) case -EC_STATE_REPORT: GF_ASSERT(fop->error != 0); - if (fop->cbks.statfs != NULL) - { + if (fop->cbks.statfs != NULL) { fop->cbks.statfs(fop->req_frame, fop, fop->xl, -1, fop->error, NULL, NULL); } @@ -1061,23 +1024,23 @@ int32_t ec_manager_statfs(ec_fop_data_t *fop, int32_t state) return EC_STATE_END; default: - gf_msg (fop->xl->name, GF_LOG_ERROR, EINVAL, - EC_MSG_UNHANDLED_STATE, "Unhandled state %d for %s", - state, ec_fop_name(fop->id)); + gf_msg(fop->xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_UNHANDLED_STATE, + "Unhandled state %d for %s", state, ec_fop_name(fop->id)); return EC_STATE_END; } } -void ec_statfs(call_frame_t * frame, xlator_t * this, uintptr_t target, - int32_t minimum, fop_statfs_cbk_t func, void * data, - loc_t * loc, dict_t * xdata) +void +ec_statfs(call_frame_t *frame, xlator_t *this, uintptr_t target, + int32_t minimum, fop_statfs_cbk_t func, void *data, loc_t *loc, + dict_t *xdata) { - ec_cbk_t callback = { .statfs = func }; - ec_fop_data_t * fop = NULL; + ec_cbk_t callback = {.statfs = func}; + ec_fop_data_t *fop = NULL; int32_t error = ENOMEM; - gf_msg_trace ("ec", 0, "EC(STATFS) %p", frame); + gf_msg_trace("ec", 0, "EC(STATFS) %p", frame); VALIDATE_OR_GOTO(this, out); GF_VALIDATE_OR_GOTO(this->name, frame, out); @@ -1092,8 +1055,8 @@ void ec_statfs(call_frame_t * frame, xlator_t * this, uintptr_t target, if (loc != NULL) { if (loc_copy(&fop->loc[0], loc) != 0) { - gf_msg (this->name, GF_LOG_ERROR, ENOMEM, - EC_MSG_LOC_COPY_FAIL, "Failed to copy a location."); + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_LOC_COPY_FAIL, + "Failed to copy a location."); goto out; } @@ -1101,9 +1064,9 @@ void ec_statfs(call_frame_t * frame, xlator_t * this, uintptr_t target, if (xdata != NULL) { fop->xdata = dict_ref(xdata); if (fop->xdata == NULL) { - gf_msg (this->name, GF_LOG_ERROR, 0, - EC_MSG_DICT_REF_FAIL, "Failed to reference a " - "dictionary."); + gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL, + "Failed to reference a " + "dictionary."); goto out; } @@ -1121,14 +1084,13 @@ out: /* FOP: xattrop */ -int32_t ec_combine_xattrop(ec_fop_data_t *fop, ec_cbk_data_t *dst, - ec_cbk_data_t *src) +int32_t +ec_combine_xattrop(ec_fop_data_t *fop, ec_cbk_data_t *dst, ec_cbk_data_t *src) { - if (!ec_dict_compare(dst->dict, src->dict)) - { - gf_msg (fop->xl->name, GF_LOG_DEBUG, 0, - EC_MSG_DICT_MISMATCH, "Mismatching dictionary in " - "answers of 'GF_FOP_XATTROP'"); + if (!ec_dict_compare(dst->dict, src->dict)) { + gf_msg(fop->xl->name, GF_LOG_DEBUG, 0, EC_MSG_DICT_MISMATCH, + "Mismatching dictionary in " + "answers of 'GF_FOP_XATTROP'"); return 0; } @@ -1137,72 +1099,71 @@ int32_t ec_combine_xattrop(ec_fop_data_t *fop, ec_cbk_data_t *dst, } int32_t -ec_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xattr, - dict_t *xdata) +ec_xattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xattr, dict_t *xdata) { - ec_fop_data_t *fop = NULL; - ec_lock_link_t *link = NULL; - ec_cbk_data_t *cbk = NULL; - uint64_t dirty[2] = {0}; - data_t *data; - uint64_t *version; - int32_t idx = (int32_t)(uintptr_t)cookie; - - VALIDATE_OR_GOTO (this, out); - GF_VALIDATE_OR_GOTO (this->name, frame, out); - GF_VALIDATE_OR_GOTO (this->name, frame->local, out); - GF_VALIDATE_OR_GOTO (this->name, this->private, out); - - fop = frame->local; - - ec_trace ("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx, - frame, op_ret, op_errno); - - cbk = ec_cbk_data_allocate (frame, this, fop, fop->id, idx, op_ret, - op_errno); - if (!cbk) - goto out; + ec_fop_data_t *fop = NULL; + ec_lock_link_t *link = NULL; + ec_cbk_data_t *cbk = NULL; + uint64_t dirty[2] = {0}; + data_t *data; + uint64_t *version; + int32_t idx = (int32_t)(uintptr_t)cookie; - if (op_ret >= 0) { - cbk->dict = dict_ref (xattr); + VALIDATE_OR_GOTO(this, out); + GF_VALIDATE_OR_GOTO(this->name, frame, out); + GF_VALIDATE_OR_GOTO(this->name, frame->local, out); + GF_VALIDATE_OR_GOTO(this->name, this->private, out); - data = dict_get(cbk->dict, EC_XATTR_VERSION); - if ((data != NULL) && (data->len >= sizeof(uint64_t))) { - version = (uint64_t *)data->data; + fop = frame->local; - if (((ntoh64(version[0]) >> EC_SELFHEAL_BIT) & 1) != 0) { - LOCK(&fop->lock); + ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx, frame, + op_ret, op_errno); - fop->healing |= 1ULL << idx; + cbk = ec_cbk_data_allocate(frame, this, fop, fop->id, idx, op_ret, + op_errno); + if (!cbk) + goto out; - UNLOCK(&fop->lock); - } - } + if (op_ret >= 0) { + cbk->dict = dict_ref(xattr); - ec_dict_del_array (xattr, EC_XATTR_DIRTY, dirty, - EC_VERSION_SIZE); - link = fop->data; - if (link) { - /*Keep a note of if the dirty is already set or not*/ - link->dirty[0] |= (dirty[0] != 0); - link->dirty[1] |= (dirty[1] != 0); - } + data = dict_get(cbk->dict, EC_XATTR_VERSION); + if ((data != NULL) && (data->len >= sizeof(uint64_t))) { + version = (uint64_t *)data->data; + + if (((ntoh64(version[0]) >> EC_SELFHEAL_BIT) & 1) != 0) { + LOCK(&fop->lock); + + fop->healing |= 1ULL << idx; + + UNLOCK(&fop->lock); + } + } + + ec_dict_del_array(xattr, EC_XATTR_DIRTY, dirty, EC_VERSION_SIZE); + link = fop->data; + if (link) { + /*Keep a note of if the dirty is already set or not*/ + link->dirty[0] |= (dirty[0] != 0); + link->dirty[1] |= (dirty[1] != 0); } + } - if (xdata) - cbk->xdata = dict_ref(xdata); + if (xdata) + cbk->xdata = dict_ref(xdata); - ec_combine (cbk, ec_combine_xattrop); + ec_combine(cbk, ec_combine_xattrop); out: - if (fop) - ec_complete(fop); + if (fop) + ec_complete(fop); - return 0; + return 0; } -void ec_wind_xattrop(ec_t * ec, ec_fop_data_t * fop, int32_t idx) +void +ec_wind_xattrop(ec_t *ec, ec_fop_data_t *fop, int32_t idx) { ec_trace("WIND", fop, "idx=%d", idx); @@ -1211,20 +1172,19 @@ void ec_wind_xattrop(ec_t * ec, ec_fop_data_t * fop, int32_t idx) &fop->loc[0], fop->xattrop_flags, fop->dict, fop->xdata); } -int32_t ec_manager_xattrop(ec_fop_data_t * fop, int32_t state) +int32_t +ec_manager_xattrop(ec_fop_data_t *fop, int32_t state) { - ec_cbk_data_t * cbk; + ec_cbk_data_t *cbk; - switch (state) - { + switch (state) { case EC_STATE_INIT: case EC_STATE_LOCK: if (fop->fd == NULL) { ec_lock_prepare_inode(fop, &fop->loc[0], EC_UPDATE_META, 0, LLONG_MAX); } else { - ec_lock_prepare_fd(fop, fop->fd, EC_UPDATE_META, 0, - LLONG_MAX); + ec_lock_prepare_fd(fop, fop->fd, EC_UPDATE_META, 0, LLONG_MAX); } ec_lock(fop); @@ -1251,19 +1211,13 @@ int32_t ec_manager_xattrop(ec_fop_data_t * fop, int32_t state) GF_ASSERT(cbk != NULL); - if (fop->id == GF_FOP_XATTROP) - { - if (fop->cbks.xattrop != NULL) - { - fop->cbks.xattrop(fop->req_frame, fop, fop->xl, - cbk->op_ret, cbk->op_errno, cbk->dict, - cbk->xdata); + if (fop->id == GF_FOP_XATTROP) { + if (fop->cbks.xattrop != NULL) { + fop->cbks.xattrop(fop->req_frame, fop, fop->xl, cbk->op_ret, + cbk->op_errno, cbk->dict, cbk->xdata); } - } - else - { - if (fop->cbks.fxattrop != NULL) - { + } else { + if (fop->cbks.fxattrop != NULL) { fop->cbks.fxattrop(fop->req_frame, fop, fop->xl, cbk->op_ret, cbk->op_errno, cbk->dict, cbk->xdata); @@ -1279,18 +1233,13 @@ int32_t ec_manager_xattrop(ec_fop_data_t * fop, int32_t state) case -EC_STATE_REPORT: GF_ASSERT(fop->error != 0); - if (fop->id == GF_FOP_XATTROP) - { - if (fop->cbks.xattrop != NULL) - { + if (fop->id == GF_FOP_XATTROP) { + if (fop->cbks.xattrop != NULL) { fop->cbks.xattrop(fop->req_frame, fop, fop->xl, -1, fop->error, NULL, NULL); } - } - else - { - if (fop->cbks.fxattrop != NULL) - { + } else { + if (fop->cbks.fxattrop != NULL) { fop->cbks.fxattrop(fop->req_frame, fop, fop->xl, -1, fop->error, NULL, NULL); } @@ -1311,24 +1260,23 @@ int32_t ec_manager_xattrop(ec_fop_data_t * fop, int32_t state) return EC_STATE_END; default: - gf_msg (fop->xl->name, GF_LOG_ERROR, EINVAL, - EC_MSG_UNHANDLED_STATE, "Unhandled state %d for %s", - state, ec_fop_name(fop->id)); + gf_msg(fop->xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_UNHANDLED_STATE, + "Unhandled state %d for %s", state, ec_fop_name(fop->id)); return EC_STATE_END; } } -void ec_xattrop(call_frame_t * frame, xlator_t * this, uintptr_t target, - int32_t minimum, fop_xattrop_cbk_t func, void * data, - loc_t * loc, gf_xattrop_flags_t optype, dict_t * xattr, - dict_t * xdata) +void +ec_xattrop(call_frame_t *frame, xlator_t *this, uintptr_t target, + int32_t minimum, fop_xattrop_cbk_t func, void *data, loc_t *loc, + gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata) { - ec_cbk_t callback = { .xattrop = func }; - ec_fop_data_t * fop = NULL; + ec_cbk_t callback = {.xattrop = func}; + ec_fop_data_t *fop = NULL; int32_t error = ENOMEM; - gf_msg_trace ("ec", 0, "EC(XATTROP) %p", frame); + gf_msg_trace("ec", 0, "EC(XATTROP) %p", frame); VALIDATE_OR_GOTO(this, out); GF_VALIDATE_OR_GOTO(this->name, frame, out); @@ -1345,8 +1293,8 @@ void ec_xattrop(call_frame_t * frame, xlator_t * this, uintptr_t target, if (loc != NULL) { if (loc_copy(&fop->loc[0], loc) != 0) { - gf_msg (this->name, GF_LOG_ERROR, ENOMEM, - EC_MSG_LOC_COPY_FAIL, "Failed to copy a location."); + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_LOC_COPY_FAIL, + "Failed to copy a location."); goto out; } @@ -1354,9 +1302,9 @@ void ec_xattrop(call_frame_t * frame, xlator_t * this, uintptr_t target, if (xattr != NULL) { fop->dict = dict_ref(xattr); if (fop->dict == NULL) { - gf_msg (this->name, GF_LOG_ERROR, 0, - EC_MSG_DICT_REF_FAIL, "Failed to reference a " - "dictionary."); + gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL, + "Failed to reference a " + "dictionary."); goto out; } @@ -1364,9 +1312,9 @@ void ec_xattrop(call_frame_t * frame, xlator_t * this, uintptr_t target, if (xdata != NULL) { fop->xdata = dict_ref(xdata); if (fop->xdata == NULL) { - gf_msg (this->name, GF_LOG_ERROR, 0, - EC_MSG_DICT_REF_FAIL, "Failed to reference a " - "dictionary."); + gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL, + "Failed to reference a " + "dictionary."); goto out; } @@ -1382,7 +1330,8 @@ out: } } -void ec_wind_fxattrop(ec_t * ec, ec_fop_data_t * fop, int32_t idx) +void +ec_wind_fxattrop(ec_t *ec, ec_fop_data_t *fop, int32_t idx) { ec_trace("WIND", fop, "idx=%d", idx); @@ -1391,24 +1340,24 @@ void ec_wind_fxattrop(ec_t * ec, ec_fop_data_t * fop, int32_t idx) fop->fd, fop->xattrop_flags, fop->dict, fop->xdata); } -void ec_fxattrop(call_frame_t * frame, xlator_t * this, uintptr_t target, - int32_t minimum, fop_fxattrop_cbk_t func, void * data, - fd_t * fd, gf_xattrop_flags_t optype, dict_t * xattr, - dict_t * xdata) +void +ec_fxattrop(call_frame_t *frame, xlator_t *this, uintptr_t target, + int32_t minimum, fop_fxattrop_cbk_t func, void *data, fd_t *fd, + gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata) { - ec_cbk_t callback = { .fxattrop = func }; - ec_fop_data_t * fop = NULL; + ec_cbk_t callback = {.fxattrop = func}; + ec_fop_data_t *fop = NULL; int32_t error = ENOMEM; - gf_msg_trace ("ec", 0, "EC(FXATTROP) %p", frame); + gf_msg_trace("ec", 0, "EC(FXATTROP) %p", frame); VALIDATE_OR_GOTO(this, out); GF_VALIDATE_OR_GOTO(this->name, frame, out); GF_VALIDATE_OR_GOTO(this->name, this->private, out); - fop = ec_fop_data_allocate(frame, this, GF_FOP_FXATTROP, 0, target, - minimum, ec_wind_fxattrop, ec_manager_xattrop, - callback, data); + fop = ec_fop_data_allocate(frame, this, GF_FOP_FXATTROP, 0, target, minimum, + ec_wind_fxattrop, ec_manager_xattrop, callback, + data); if (fop == NULL) { goto out; } @@ -1420,9 +1369,9 @@ void ec_fxattrop(call_frame_t * frame, xlator_t * this, uintptr_t target, if (fd != NULL) { fop->fd = fd_ref(fd); if (fop->fd == NULL) { - gf_msg (this->name, GF_LOG_ERROR, 0, - EC_MSG_FILE_DESC_REF_FAIL, "Failed to reference a " - "file descriptor."); + gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_FILE_DESC_REF_FAIL, + "Failed to reference a " + "file descriptor."); goto out; } @@ -1430,9 +1379,9 @@ void ec_fxattrop(call_frame_t * frame, xlator_t * this, uintptr_t target, if (xattr != NULL) { fop->dict = dict_ref(xattr); if (fop->dict == NULL) { - gf_msg (this->name, GF_LOG_ERROR, 0, - EC_MSG_DICT_REF_FAIL, "Failed to reference a " - "dictionary."); + gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL, + "Failed to reference a " + "dictionary."); goto out; } @@ -1440,9 +1389,9 @@ void ec_fxattrop(call_frame_t * frame, xlator_t * this, uintptr_t target, if (xdata != NULL) { fop->xdata = dict_ref(xdata); if (fop->xdata == NULL) { - gf_msg (this->name, GF_LOG_ERROR, 0, - EC_MSG_DICT_REF_FAIL, "Failed to reference a " - "dictionary."); + gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL, + "Failed to reference a " + "dictionary."); goto out; } @@ -1460,11 +1409,12 @@ out: /* FOP: IPC */ -int32_t ec_ipc_cbk(call_frame_t * frame, void * cookie, xlator_t * this, - int32_t op_ret, int32_t op_errno, dict_t * xdata) +int32_t +ec_ipc_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + int32_t op_errno, dict_t *xdata) { - ec_fop_data_t * fop = NULL; - ec_cbk_data_t * cbk = NULL; + ec_fop_data_t *fop = NULL; + ec_cbk_data_t *cbk = NULL; int32_t idx = (int32_t)(uintptr_t)cookie; VALIDATE_OR_GOTO(this, out); @@ -1474,16 +1424,14 @@ int32_t ec_ipc_cbk(call_frame_t * frame, void * cookie, xlator_t * this, fop = frame->local; - ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx, - frame, op_ret, op_errno); + ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx, frame, + op_ret, op_errno); cbk = ec_cbk_data_allocate(frame, this, fop, GF_FOP_IPC, idx, op_ret, op_errno); - if (cbk != NULL) - { - if (xdata != NULL) - { + if (cbk != NULL) { + if (xdata != NULL) { cbk->xdata = dict_ref(xdata); } @@ -1491,29 +1439,29 @@ int32_t ec_ipc_cbk(call_frame_t * frame, void * cookie, xlator_t * this, } out: - if (fop != NULL) - { + if (fop != NULL) { ec_complete(fop); } return 0; } -void ec_wind_ipc(ec_t * ec, ec_fop_data_t * fop, int32_t idx) +void +ec_wind_ipc(ec_t *ec, ec_fop_data_t *fop, int32_t idx) { ec_trace("WIND", fop, "idx=%d", idx); STACK_WIND_COOKIE(fop->frame, ec_ipc_cbk, (void *)(uintptr_t)idx, - ec->xl_list[idx], ec->xl_list[idx]->fops->ipc, - fop->int32, fop->xdata); + ec->xl_list[idx], ec->xl_list[idx]->fops->ipc, fop->int32, + fop->xdata); } -int32_t ec_manager_ipc(ec_fop_data_t *fop, int32_t state) +int32_t +ec_manager_ipc(ec_fop_data_t *fop, int32_t state) { - ec_cbk_data_t * cbk; + ec_cbk_data_t *cbk; - switch (state) - { + switch (state) { case EC_STATE_INIT: case EC_STATE_DISPATCH: ec_dispatch_all(fop); @@ -1529,10 +1477,9 @@ int32_t ec_manager_ipc(ec_fop_data_t *fop, int32_t state) cbk = fop->answer; GF_ASSERT(cbk != NULL); - if (fop->cbks.ipc != NULL) - { + if (fop->cbks.ipc != NULL) { fop->cbks.ipc(fop->req_frame, fop, fop->xl, cbk->op_ret, - cbk->op_errno, cbk->xdata); + cbk->op_errno, cbk->xdata); } return EC_STATE_END; @@ -1543,8 +1490,7 @@ int32_t ec_manager_ipc(ec_fop_data_t *fop, int32_t state) case -EC_STATE_REPORT: GF_ASSERT(fop->error != 0); - if (fop->cbks.ipc != NULL) - { + if (fop->cbks.ipc != NULL) { fop->cbks.ipc(fop->req_frame, fop, fop->xl, -1, fop->error, NULL); } @@ -1552,23 +1498,22 @@ int32_t ec_manager_ipc(ec_fop_data_t *fop, int32_t state) return EC_STATE_END; default: - gf_msg (fop->xl->name, GF_LOG_ERROR, EINVAL, - EC_MSG_UNHANDLED_STATE, "Unhandled state %d for %s", - state, ec_fop_name(fop->id)); + gf_msg(fop->xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_UNHANDLED_STATE, + "Unhandled state %d for %s", state, ec_fop_name(fop->id)); return EC_STATE_END; } } -void ec_ipc(call_frame_t *frame, xlator_t *this, uintptr_t target, - int32_t minimum, fop_ipc_cbk_t func, void *data, int32_t op, - dict_t *xdata) +void +ec_ipc(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum, + fop_ipc_cbk_t func, void *data, int32_t op, dict_t *xdata) { - ec_cbk_t callback = { .ipc = func }; - ec_fop_data_t * fop = NULL; + ec_cbk_t callback = {.ipc = func}; + ec_fop_data_t *fop = NULL; int32_t error = ENOMEM; - gf_msg_trace ("ec", 0, "EC(IPC) %p", frame); + gf_msg_trace("ec", 0, "EC(IPC) %p", frame); VALIDATE_OR_GOTO(this, out); GF_VALIDATE_OR_GOTO(this->name, frame, out); diff --git a/xlators/cluster/ec/src/ec-gf8.c b/xlators/cluster/ec/src/ec-gf8.c index 2665632706b..039adae5929 100644 --- a/xlators/cluster/ec/src/ec-gf8.c +++ b/xlators/cluster/ec/src/ec-gf8.c @@ -10,5886 +10,5810 @@ #include "ec-gf8.h" -static ec_gf_op_t ec_gf8_mul_00_ops[] = { - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_00 = { - 0, - { 0, }, - ec_gf8_mul_00_ops -}; - -static ec_gf_op_t ec_gf8_mul_01_ops[] = { - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_01 = { - 8, - { 0, 1, 2, 3, 4, 5, 6, 7, }, - ec_gf8_mul_01_ops -}; - -static ec_gf_op_t ec_gf8_mul_02_ops[] = { - { EC_GF_OP_XOR2, 1, 7, 0 }, - { EC_GF_OP_XOR2, 2, 7, 0 }, - { EC_GF_OP_XOR2, 3, 7, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_02 = { - 8, - { 7, 0, 1, 2, 3, 4, 5, 6, }, - ec_gf8_mul_02_ops -}; +static ec_gf_op_t ec_gf8_mul_00_ops[] = {{EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_00 = {0, + { + 0, + }, + ec_gf8_mul_00_ops}; + +static ec_gf_op_t ec_gf8_mul_01_ops[] = {{EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_01 = {8, + { + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + }, + ec_gf8_mul_01_ops}; + +static ec_gf_op_t ec_gf8_mul_02_ops[] = {{EC_GF_OP_XOR2, 1, 7, 0}, + {EC_GF_OP_XOR2, 2, 7, 0}, + {EC_GF_OP_XOR2, 3, 7, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_02 = {8, + { + 7, + 0, + 1, + 2, + 3, + 4, + 5, + 6, + }, + ec_gf8_mul_02_ops}; static ec_gf_op_t ec_gf8_mul_03_ops[] = { - { EC_GF_OP_XOR2, 3, 7, 0 }, - { EC_GF_OP_COPY, 8, 3, 0 }, - { EC_GF_OP_XOR2, 3, 2, 0 }, - { EC_GF_OP_XOR2, 2, 1, 0 }, - { EC_GF_OP_XOR2, 1, 0, 0 }, - { EC_GF_OP_XOR2, 2, 7, 0 }, - { EC_GF_OP_XOR2, 0, 7, 0 }, - { EC_GF_OP_XOR2, 7, 6, 0 }, - { EC_GF_OP_XOR2, 6, 5, 0 }, - { EC_GF_OP_XOR2, 5, 4, 0 }, - { EC_GF_OP_XOR2, 4, 8, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_03 = { - 9, - { 0, 1, 2, 3, 4, 5, 6, 7, 8, }, - ec_gf8_mul_03_ops -}; + {EC_GF_OP_XOR2, 3, 7, 0}, {EC_GF_OP_COPY, 8, 3, 0}, + {EC_GF_OP_XOR2, 3, 2, 0}, {EC_GF_OP_XOR2, 2, 1, 0}, + {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_XOR2, 2, 7, 0}, + {EC_GF_OP_XOR2, 0, 7, 0}, {EC_GF_OP_XOR2, 7, 6, 0}, + {EC_GF_OP_XOR2, 6, 5, 0}, {EC_GF_OP_XOR2, 5, 4, 0}, + {EC_GF_OP_XOR2, 4, 8, 0}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_03 = {9, + { + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + }, + ec_gf8_mul_03_ops}; static ec_gf_op_t ec_gf8_mul_04_ops[] = { - { EC_GF_OP_XOR3, 8, 6, 7 }, - { EC_GF_OP_XOR2, 2, 8, 0 }, - { EC_GF_OP_XOR2, 3, 7, 0 }, - { EC_GF_OP_XOR2, 1, 8, 0 }, - { EC_GF_OP_XOR2, 0, 6, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_04 = { - 9, - { 6, 7, 0, 1, 2, 3, 4, 5, 8, }, - ec_gf8_mul_04_ops -}; + {EC_GF_OP_XOR3, 8, 6, 7}, {EC_GF_OP_XOR2, 2, 8, 0}, + {EC_GF_OP_XOR2, 3, 7, 0}, {EC_GF_OP_XOR2, 1, 8, 0}, + {EC_GF_OP_XOR2, 0, 6, 0}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_04 = {9, + { + 6, + 7, + 0, + 1, + 2, + 3, + 4, + 5, + 8, + }, + ec_gf8_mul_04_ops}; static ec_gf_op_t ec_gf8_mul_05_ops[] = { - { EC_GF_OP_XOR2, 4, 6, 0 }, - { EC_GF_OP_XOR2, 1, 7, 0 }, - { EC_GF_OP_XOR2, 5, 7, 0 }, - { EC_GF_OP_XOR2, 0, 6, 0 }, - { EC_GF_OP_XOR2, 7, 4, 0 }, - { EC_GF_OP_XOR2, 6, 3, 0 }, - { EC_GF_OP_XOR2, 7, 2, 0 }, - { EC_GF_OP_XOR2, 6, 1, 0 }, - { EC_GF_OP_XOR2, 3, 5, 0 }, - { EC_GF_OP_XOR2, 2, 0, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_05 = { - 8, - { 0, 1, 2, 6, 7, 3, 4, 5, }, - ec_gf8_mul_05_ops -}; + {EC_GF_OP_XOR2, 4, 6, 0}, {EC_GF_OP_XOR2, 1, 7, 0}, + {EC_GF_OP_XOR2, 5, 7, 0}, {EC_GF_OP_XOR2, 0, 6, 0}, + {EC_GF_OP_XOR2, 7, 4, 0}, {EC_GF_OP_XOR2, 6, 3, 0}, + {EC_GF_OP_XOR2, 7, 2, 0}, {EC_GF_OP_XOR2, 6, 1, 0}, + {EC_GF_OP_XOR2, 3, 5, 0}, {EC_GF_OP_XOR2, 2, 0, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_05 = {8, + { + 0, + 1, + 2, + 6, + 7, + 3, + 4, + 5, + }, + ec_gf8_mul_05_ops}; static ec_gf_op_t ec_gf8_mul_06_ops[] = { - { EC_GF_OP_XOR2, 2, 6, 0 }, - { EC_GF_OP_COPY, 8, 2, 0 }, - { EC_GF_OP_XOR2, 8, 3, 0 }, - { EC_GF_OP_XOR2, 2, 1, 0 }, - { EC_GF_OP_XOR2, 3, 4, 0 }, - { EC_GF_OP_XOR2, 1, 0, 0 }, - { EC_GF_OP_XOR2, 3, 7, 0 }, - { EC_GF_OP_XOR2, 0, 7, 0 }, - { EC_GF_OP_XOR2, 7, 6, 0 }, - { EC_GF_OP_XOR2, 4, 5, 0 }, - { EC_GF_OP_XOR2, 1, 7, 0 }, - { EC_GF_OP_XOR2, 5, 6, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_06 = { - 9, - { 7, 0, 1, 2, 8, 3, 4, 5, 6, }, - ec_gf8_mul_06_ops -}; + {EC_GF_OP_XOR2, 2, 6, 0}, {EC_GF_OP_COPY, 8, 2, 0}, + {EC_GF_OP_XOR2, 8, 3, 0}, {EC_GF_OP_XOR2, 2, 1, 0}, + {EC_GF_OP_XOR2, 3, 4, 0}, {EC_GF_OP_XOR2, 1, 0, 0}, + {EC_GF_OP_XOR2, 3, 7, 0}, {EC_GF_OP_XOR2, 0, 7, 0}, + {EC_GF_OP_XOR2, 7, 6, 0}, {EC_GF_OP_XOR2, 4, 5, 0}, + {EC_GF_OP_XOR2, 1, 7, 0}, {EC_GF_OP_XOR2, 5, 6, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_06 = {9, + { + 7, + 0, + 1, + 2, + 8, + 3, + 4, + 5, + 6, + }, + ec_gf8_mul_06_ops}; static ec_gf_op_t ec_gf8_mul_07_ops[] = { - { EC_GF_OP_XOR2, 5, 6, 0 }, - { EC_GF_OP_XOR2, 0, 7, 0 }, - { EC_GF_OP_XOR2, 3, 6, 0 }, - { EC_GF_OP_XOR2, 7, 5, 0 }, - { EC_GF_OP_XOR2, 6, 0, 0 }, - { EC_GF_OP_XOR2, 0, 1, 0 }, - { EC_GF_OP_XOR2, 5, 4, 0 }, - { EC_GF_OP_XOR2, 4, 3, 0 }, - { EC_GF_OP_XOR2, 1, 2, 0 }, - { EC_GF_OP_XOR2, 2, 4, 0 }, - { EC_GF_OP_XOR2, 3, 1, 0 }, - { EC_GF_OP_XOR2, 4, 7, 0 }, - { EC_GF_OP_XOR2, 1, 6, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_07 = { - 8, - { 6, 0, 1, 3, 2, 4, 5, 7, }, - ec_gf8_mul_07_ops -}; + {EC_GF_OP_XOR2, 5, 6, 0}, {EC_GF_OP_XOR2, 0, 7, 0}, + {EC_GF_OP_XOR2, 3, 6, 0}, {EC_GF_OP_XOR2, 7, 5, 0}, + {EC_GF_OP_XOR2, 6, 0, 0}, {EC_GF_OP_XOR2, 0, 1, 0}, + {EC_GF_OP_XOR2, 5, 4, 0}, {EC_GF_OP_XOR2, 4, 3, 0}, + {EC_GF_OP_XOR2, 1, 2, 0}, {EC_GF_OP_XOR2, 2, 4, 0}, + {EC_GF_OP_XOR2, 3, 1, 0}, {EC_GF_OP_XOR2, 4, 7, 0}, + {EC_GF_OP_XOR2, 1, 6, 0}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_07 = {8, + { + 6, + 0, + 1, + 3, + 2, + 4, + 5, + 7, + }, + ec_gf8_mul_07_ops}; static ec_gf_op_t ec_gf8_mul_08_ops[] = { - { EC_GF_OP_XOR2, 0, 5, 0 }, - { EC_GF_OP_XOR2, 1, 5, 0 }, - { EC_GF_OP_XOR2, 0, 6, 0 }, - { EC_GF_OP_XOR3, 8, 6, 7 }, - { EC_GF_OP_XOR2, 1, 8, 0 }, - { EC_GF_OP_XOR2, 3, 7, 0 }, - { EC_GF_OP_XOR2, 7, 5, 0 }, - { EC_GF_OP_XOR2, 2, 8, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_08 = { - 9, - { 5, 6, 7, 0, 1, 2, 3, 4, 8, }, - ec_gf8_mul_08_ops -}; + {EC_GF_OP_XOR2, 0, 5, 0}, {EC_GF_OP_XOR2, 1, 5, 0}, + {EC_GF_OP_XOR2, 0, 6, 0}, {EC_GF_OP_XOR3, 8, 6, 7}, + {EC_GF_OP_XOR2, 1, 8, 0}, {EC_GF_OP_XOR2, 3, 7, 0}, + {EC_GF_OP_XOR2, 7, 5, 0}, {EC_GF_OP_XOR2, 2, 8, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_08 = {9, + { + 5, + 6, + 7, + 0, + 1, + 2, + 3, + 4, + 8, + }, + ec_gf8_mul_08_ops}; static ec_gf_op_t ec_gf8_mul_09_ops[] = { - { EC_GF_OP_XOR2, 2, 4, 0 }, - { EC_GF_OP_XOR2, 4, 7, 0 }, - { EC_GF_OP_XOR2, 0, 5, 0 }, - { EC_GF_OP_XOR2, 5, 4, 0 }, - { EC_GF_OP_XOR2, 3, 6, 0 }, - { EC_GF_OP_XOR2, 2, 5, 0 }, - { EC_GF_OP_XOR2, 1, 6, 0 }, - { EC_GF_OP_XOR2, 7, 3, 0 }, - { EC_GF_OP_XOR2, 6, 2, 0 }, - { EC_GF_OP_XOR2, 5, 1, 0 }, - { EC_GF_OP_XOR2, 3, 0, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_09 = { - 8, - { 0, 1, 2, 3, 5, 6, 7, 4, }, - ec_gf8_mul_09_ops -}; + {EC_GF_OP_XOR2, 2, 4, 0}, {EC_GF_OP_XOR2, 4, 7, 0}, + {EC_GF_OP_XOR2, 0, 5, 0}, {EC_GF_OP_XOR2, 5, 4, 0}, + {EC_GF_OP_XOR2, 3, 6, 0}, {EC_GF_OP_XOR2, 2, 5, 0}, + {EC_GF_OP_XOR2, 1, 6, 0}, {EC_GF_OP_XOR2, 7, 3, 0}, + {EC_GF_OP_XOR2, 6, 2, 0}, {EC_GF_OP_XOR2, 5, 1, 0}, + {EC_GF_OP_XOR2, 3, 0, 0}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_09 = {8, + { + 0, + 1, + 2, + 3, + 5, + 6, + 7, + 4, + }, + ec_gf8_mul_09_ops}; static ec_gf_op_t ec_gf8_mul_0A_ops[] = { - { EC_GF_OP_XOR2, 1, 5, 0 }, - { EC_GF_OP_XOR2, 4, 6, 0 }, - { EC_GF_OP_XOR2, 5, 7, 0 }, - { EC_GF_OP_XOR2, 7, 4, 0 }, - { EC_GF_OP_XOR2, 0, 6, 0 }, - { EC_GF_OP_XOR2, 7, 2, 0 }, - { EC_GF_OP_XOR2, 6, 1, 0 }, - { EC_GF_OP_XOR2, 2, 0, 0 }, - { EC_GF_OP_XOR2, 6, 3, 0 }, - { EC_GF_OP_XOR2, 2, 5, 0 }, - { EC_GF_OP_XOR2, 3, 5, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_0A = { - 8, - { 5, 0, 1, 2, 6, 7, 3, 4, }, - ec_gf8_mul_0A_ops -}; + {EC_GF_OP_XOR2, 1, 5, 0}, {EC_GF_OP_XOR2, 4, 6, 0}, + {EC_GF_OP_XOR2, 5, 7, 0}, {EC_GF_OP_XOR2, 7, 4, 0}, + {EC_GF_OP_XOR2, 0, 6, 0}, {EC_GF_OP_XOR2, 7, 2, 0}, + {EC_GF_OP_XOR2, 6, 1, 0}, {EC_GF_OP_XOR2, 2, 0, 0}, + {EC_GF_OP_XOR2, 6, 3, 0}, {EC_GF_OP_XOR2, 2, 5, 0}, + {EC_GF_OP_XOR2, 3, 5, 0}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_0A = {8, + { + 5, + 0, + 1, + 2, + 6, + 7, + 3, + 4, + }, + ec_gf8_mul_0A_ops}; static ec_gf_op_t ec_gf8_mul_0B_ops[] = { - { EC_GF_OP_XOR2, 4, 7, 0 }, - { EC_GF_OP_XOR2, 7, 0, 0 }, - { EC_GF_OP_XOR2, 7, 5, 0 }, - { EC_GF_OP_XOR2, 3, 7, 0 }, - { EC_GF_OP_COPY, 9, 3, 0 }, - { EC_GF_OP_XOR2, 5, 2, 0 }, - { EC_GF_OP_XOR2, 3, 6, 0 }, - { EC_GF_OP_COPY, 8, 5, 0 }, - { EC_GF_OP_XOR2, 0, 3, 0 }, - { EC_GF_OP_XOR2, 5, 1, 0 }, - { EC_GF_OP_XOR2, 6, 4, 0 }, - { EC_GF_OP_XOR2, 1, 0, 0 }, - { EC_GF_OP_XOR2, 2, 3, 0 }, - { EC_GF_OP_XOR2, 4, 1, 0 }, - { EC_GF_OP_XOR3, 3, 8, 6 }, - { EC_GF_OP_XOR2, 1, 9, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_0B = { - 10, - { 7, 1, 5, 2, 4, 3, 0, 6, 8, 9, }, - ec_gf8_mul_0B_ops -}; + {EC_GF_OP_XOR2, 4, 7, 0}, {EC_GF_OP_XOR2, 7, 0, 0}, + {EC_GF_OP_XOR2, 7, 5, 0}, {EC_GF_OP_XOR2, 3, 7, 0}, + {EC_GF_OP_COPY, 9, 3, 0}, {EC_GF_OP_XOR2, 5, 2, 0}, + {EC_GF_OP_XOR2, 3, 6, 0}, {EC_GF_OP_COPY, 8, 5, 0}, + {EC_GF_OP_XOR2, 0, 3, 0}, {EC_GF_OP_XOR2, 5, 1, 0}, + {EC_GF_OP_XOR2, 6, 4, 0}, {EC_GF_OP_XOR2, 1, 0, 0}, + {EC_GF_OP_XOR2, 2, 3, 0}, {EC_GF_OP_XOR2, 4, 1, 0}, + {EC_GF_OP_XOR3, 3, 8, 6}, {EC_GF_OP_XOR2, 1, 9, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_0B = {10, + { + 7, + 1, + 5, + 2, + 4, + 3, + 0, + 6, + 8, + 9, + }, + ec_gf8_mul_0B_ops}; static ec_gf_op_t ec_gf8_mul_0C_ops[] = { - { EC_GF_OP_XOR2, 1, 5, 0 }, - { EC_GF_OP_COPY, 8, 1, 0 }, - { EC_GF_OP_XOR2, 8, 2, 0 }, - { EC_GF_OP_XOR2, 2, 3, 0 }, - { EC_GF_OP_XOR2, 3, 4, 0 }, - { EC_GF_OP_XOR2, 0, 7, 0 }, - { EC_GF_OP_XOR2, 4, 5, 0 }, - { EC_GF_OP_XOR2, 3, 7, 0 }, - { EC_GF_OP_XOR2, 5, 6, 0 }, - { EC_GF_OP_XOR2, 1, 0, 0 }, - { EC_GF_OP_XOR2, 2, 6, 0 }, - { EC_GF_OP_XOR2, 7, 6, 0 }, - { EC_GF_OP_XOR2, 0, 5, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_0C = { - 9, - { 5, 7, 0, 1, 8, 2, 3, 4, 6, }, - ec_gf8_mul_0C_ops -}; + {EC_GF_OP_XOR2, 1, 5, 0}, {EC_GF_OP_COPY, 8, 1, 0}, + {EC_GF_OP_XOR2, 8, 2, 0}, {EC_GF_OP_XOR2, 2, 3, 0}, + {EC_GF_OP_XOR2, 3, 4, 0}, {EC_GF_OP_XOR2, 0, 7, 0}, + {EC_GF_OP_XOR2, 4, 5, 0}, {EC_GF_OP_XOR2, 3, 7, 0}, + {EC_GF_OP_XOR2, 5, 6, 0}, {EC_GF_OP_XOR2, 1, 0, 0}, + {EC_GF_OP_XOR2, 2, 6, 0}, {EC_GF_OP_XOR2, 7, 6, 0}, + {EC_GF_OP_XOR2, 0, 5, 0}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_0C = {9, + { + 5, + 7, + 0, + 1, + 8, + 2, + 3, + 4, + 6, + }, + ec_gf8_mul_0C_ops}; static ec_gf_op_t ec_gf8_mul_0D_ops[] = { - { EC_GF_OP_XOR2, 5, 7, 0 }, - { EC_GF_OP_XOR2, 4, 5, 0 }, - { EC_GF_OP_XOR2, 5, 0, 0 }, - { EC_GF_OP_XOR2, 5, 1, 0 }, - { EC_GF_OP_XOR2, 1, 7, 0 }, - { EC_GF_OP_XOR2, 0, 3, 0 }, - { EC_GF_OP_XOR2, 6, 1, 0 }, - { EC_GF_OP_XOR2, 3, 5, 0 }, - { EC_GF_OP_XOR3, 8, 2, 4 }, - { EC_GF_OP_XOR2, 5, 6, 0 }, - { EC_GF_OP_XOR2, 2, 5, 0 }, - { EC_GF_OP_XOR2, 1, 8, 0 }, - { EC_GF_OP_XOR2, 0, 2, 0 }, - { EC_GF_OP_XOR2, 7, 2, 0 }, - { EC_GF_OP_XOR3, 2, 8, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_0D = { - 9, - { 5, 6, 7, 3, 1, 0, 2, 4, 8, }, - ec_gf8_mul_0D_ops -}; + {EC_GF_OP_XOR2, 5, 7, 0}, {EC_GF_OP_XOR2, 4, 5, 0}, + {EC_GF_OP_XOR2, 5, 0, 0}, {EC_GF_OP_XOR2, 5, 1, 0}, + {EC_GF_OP_XOR2, 1, 7, 0}, {EC_GF_OP_XOR2, 0, 3, 0}, + {EC_GF_OP_XOR2, 6, 1, 0}, {EC_GF_OP_XOR2, 3, 5, 0}, + {EC_GF_OP_XOR3, 8, 2, 4}, {EC_GF_OP_XOR2, 5, 6, 0}, + {EC_GF_OP_XOR2, 2, 5, 0}, {EC_GF_OP_XOR2, 1, 8, 0}, + {EC_GF_OP_XOR2, 0, 2, 0}, {EC_GF_OP_XOR2, 7, 2, 0}, + {EC_GF_OP_XOR3, 2, 8, 0}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_0D = {9, + { + 5, + 6, + 7, + 3, + 1, + 0, + 2, + 4, + 8, + }, + ec_gf8_mul_0D_ops}; static ec_gf_op_t ec_gf8_mul_0E_ops[] = { - { EC_GF_OP_XOR2, 0, 5, 0 }, - { EC_GF_OP_XOR2, 5, 6, 0 }, - { EC_GF_OP_XOR2, 1, 0, 0 }, - { EC_GF_OP_XOR2, 7, 5, 0 }, - { EC_GF_OP_XOR2, 3, 6, 0 }, - { EC_GF_OP_XOR2, 0, 7, 0 }, - { EC_GF_OP_XOR2, 5, 4, 0 }, - { EC_GF_OP_XOR2, 6, 1, 0 }, - { EC_GF_OP_XOR2, 4, 3, 0 }, - { EC_GF_OP_XOR2, 1, 2, 0 }, - { EC_GF_OP_XOR2, 3, 0, 0 }, - { EC_GF_OP_XOR2, 2, 4, 0 }, - { EC_GF_OP_XOR2, 3, 1, 0 }, - { EC_GF_OP_XOR2, 4, 7, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_0E = { - 8, - { 7, 0, 6, 1, 3, 2, 4, 5, }, - ec_gf8_mul_0E_ops -}; + {EC_GF_OP_XOR2, 0, 5, 0}, {EC_GF_OP_XOR2, 5, 6, 0}, + {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_XOR2, 7, 5, 0}, + {EC_GF_OP_XOR2, 3, 6, 0}, {EC_GF_OP_XOR2, 0, 7, 0}, + {EC_GF_OP_XOR2, 5, 4, 0}, {EC_GF_OP_XOR2, 6, 1, 0}, + {EC_GF_OP_XOR2, 4, 3, 0}, {EC_GF_OP_XOR2, 1, 2, 0}, + {EC_GF_OP_XOR2, 3, 0, 0}, {EC_GF_OP_XOR2, 2, 4, 0}, + {EC_GF_OP_XOR2, 3, 1, 0}, {EC_GF_OP_XOR2, 4, 7, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_0E = {8, + { + 7, + 0, + 6, + 1, + 3, + 2, + 4, + 5, + }, + ec_gf8_mul_0E_ops}; static ec_gf_op_t ec_gf8_mul_0F_ops[] = { - { EC_GF_OP_XOR2, 2, 7, 0 }, - { EC_GF_OP_XOR2, 7, 1, 0 }, - { EC_GF_OP_XOR2, 7, 6, 0 }, - { EC_GF_OP_XOR2, 4, 0, 0 }, - { EC_GF_OP_XOR2, 0, 7, 0 }, - { EC_GF_OP_XOR2, 5, 0, 0 }, - { EC_GF_OP_XOR2, 6, 3, 0 }, - { EC_GF_OP_XOR2, 1, 5, 0 }, - { EC_GF_OP_XOR2, 5, 2, 0 }, - { EC_GF_OP_XOR2, 4, 1, 0 }, - { EC_GF_OP_XOR2, 3, 4, 0 }, - { EC_GF_OP_XOR2, 6, 5, 0 }, - { EC_GF_OP_XOR2, 2, 3, 0 }, - { EC_GF_OP_XOR2, 7, 2, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_0F = { - 8, - { 1, 0, 5, 6, 7, 2, 3, 4, }, - ec_gf8_mul_0F_ops -}; + {EC_GF_OP_XOR2, 2, 7, 0}, {EC_GF_OP_XOR2, 7, 1, 0}, + {EC_GF_OP_XOR2, 7, 6, 0}, {EC_GF_OP_XOR2, 4, 0, 0}, + {EC_GF_OP_XOR2, 0, 7, 0}, {EC_GF_OP_XOR2, 5, 0, 0}, + {EC_GF_OP_XOR2, 6, 3, 0}, {EC_GF_OP_XOR2, 1, 5, 0}, + {EC_GF_OP_XOR2, 5, 2, 0}, {EC_GF_OP_XOR2, 4, 1, 0}, + {EC_GF_OP_XOR2, 3, 4, 0}, {EC_GF_OP_XOR2, 6, 5, 0}, + {EC_GF_OP_XOR2, 2, 3, 0}, {EC_GF_OP_XOR2, 7, 2, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_0F = {8, + { + 1, + 0, + 5, + 6, + 7, + 2, + 3, + 4, + }, + ec_gf8_mul_0F_ops}; static ec_gf_op_t ec_gf8_mul_10_ops[] = { - { EC_GF_OP_XOR2, 3, 7, 0 }, - { EC_GF_OP_XOR2, 7, 6, 0 }, - { EC_GF_OP_XOR2, 2, 7, 0 }, - { EC_GF_OP_XOR2, 7, 5, 0 }, - { EC_GF_OP_XOR2, 6, 4, 0 }, - { EC_GF_OP_XOR2, 0, 5, 0 }, - { EC_GF_OP_XOR2, 1, 7, 0 }, - { EC_GF_OP_XOR2, 0, 6, 0 }, - { EC_GF_OP_XOR2, 7, 6, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_10 = { - 8, - { 4, 5, 6, 7, 0, 1, 2, 3, }, - ec_gf8_mul_10_ops -}; + {EC_GF_OP_XOR2, 3, 7, 0}, {EC_GF_OP_XOR2, 7, 6, 0}, + {EC_GF_OP_XOR2, 2, 7, 0}, {EC_GF_OP_XOR2, 7, 5, 0}, + {EC_GF_OP_XOR2, 6, 4, 0}, {EC_GF_OP_XOR2, 0, 5, 0}, + {EC_GF_OP_XOR2, 1, 7, 0}, {EC_GF_OP_XOR2, 0, 6, 0}, + {EC_GF_OP_XOR2, 7, 6, 0}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_10 = {8, + { + 4, + 5, + 6, + 7, + 0, + 1, + 2, + 3, + }, + ec_gf8_mul_10_ops}; static ec_gf_op_t ec_gf8_mul_11_ops[] = { - { EC_GF_OP_XOR2, 1, 5, 0 }, - { EC_GF_OP_XOR2, 5, 6, 0 }, - { EC_GF_OP_XOR2, 6, 4, 0 }, - { EC_GF_OP_XOR2, 4, 0, 0 }, - { EC_GF_OP_XOR2, 0, 5, 0 }, - { EC_GF_OP_XOR2, 5, 7, 0 }, - { EC_GF_OP_XOR2, 7, 2, 0 }, - { EC_GF_OP_XOR2, 2, 6, 0 }, - { EC_GF_OP_XOR2, 6, 3, 0 }, - { EC_GF_OP_XOR2, 6, 5, 0 }, - { EC_GF_OP_XOR2, 5, 1, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_11 = { - 8, - { 4, 1, 2, 6, 0, 5, 7, 3, }, - ec_gf8_mul_11_ops -}; + {EC_GF_OP_XOR2, 1, 5, 0}, {EC_GF_OP_XOR2, 5, 6, 0}, + {EC_GF_OP_XOR2, 6, 4, 0}, {EC_GF_OP_XOR2, 4, 0, 0}, + {EC_GF_OP_XOR2, 0, 5, 0}, {EC_GF_OP_XOR2, 5, 7, 0}, + {EC_GF_OP_XOR2, 7, 2, 0}, {EC_GF_OP_XOR2, 2, 6, 0}, + {EC_GF_OP_XOR2, 6, 3, 0}, {EC_GF_OP_XOR2, 6, 5, 0}, + {EC_GF_OP_XOR2, 5, 1, 0}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_11 = {8, + { + 4, + 1, + 2, + 6, + 0, + 5, + 7, + 3, + }, + ec_gf8_mul_11_ops}; static ec_gf_op_t ec_gf8_mul_12_ops[] = { - { EC_GF_OP_XOR2, 7, 4, 0 }, - { EC_GF_OP_XOR2, 6, 7, 0 }, - { EC_GF_OP_XOR2, 2, 4, 0 }, - { EC_GF_OP_XOR2, 3, 6, 0 }, - { EC_GF_OP_XOR2, 4, 3, 0 }, - { EC_GF_OP_XOR2, 1, 6, 0 }, - { EC_GF_OP_XOR2, 2, 5, 0 }, - { EC_GF_OP_XOR2, 0, 5, 0 }, - { EC_GF_OP_XOR2, 6, 2, 0 }, - { EC_GF_OP_XOR2, 5, 1, 0 }, - { EC_GF_OP_XOR2, 3, 0, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_12 = { - 8, - { 7, 0, 1, 2, 3, 5, 6, 4, }, - ec_gf8_mul_12_ops -}; + {EC_GF_OP_XOR2, 7, 4, 0}, {EC_GF_OP_XOR2, 6, 7, 0}, + {EC_GF_OP_XOR2, 2, 4, 0}, {EC_GF_OP_XOR2, 3, 6, 0}, + {EC_GF_OP_XOR2, 4, 3, 0}, {EC_GF_OP_XOR2, 1, 6, 0}, + {EC_GF_OP_XOR2, 2, 5, 0}, {EC_GF_OP_XOR2, 0, 5, 0}, + {EC_GF_OP_XOR2, 6, 2, 0}, {EC_GF_OP_XOR2, 5, 1, 0}, + {EC_GF_OP_XOR2, 3, 0, 0}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_12 = {8, + { + 7, + 0, + 1, + 2, + 3, + 5, + 6, + 4, + }, + ec_gf8_mul_12_ops}; static ec_gf_op_t ec_gf8_mul_13_ops[] = { - { EC_GF_OP_XOR2, 4, 7, 0 }, - { EC_GF_OP_XOR2, 7, 5, 0 }, - { EC_GF_OP_XOR2, 3, 6, 0 }, - { EC_GF_OP_XOR2, 5, 1, 0 }, - { EC_GF_OP_XOR2, 6, 4, 0 }, - { EC_GF_OP_XOR2, 7, 2, 0 }, - { EC_GF_OP_XOR2, 4, 0, 0 }, - { EC_GF_OP_XOR2, 5, 0, 0 }, - { EC_GF_OP_XOR2, 1, 6, 0 }, - { EC_GF_OP_XOR3, 8, 3, 7 }, - { EC_GF_OP_XOR2, 0, 2, 0 }, - { EC_GF_OP_XOR2, 6, 8, 0 }, - { EC_GF_OP_XOR2, 2, 1, 0 }, - { EC_GF_OP_XOR2, 0, 8, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_13 = { - 9, - { 4, 5, 2, 6, 0, 1, 7, 3, 8, }, - ec_gf8_mul_13_ops -}; + {EC_GF_OP_XOR2, 4, 7, 0}, {EC_GF_OP_XOR2, 7, 5, 0}, + {EC_GF_OP_XOR2, 3, 6, 0}, {EC_GF_OP_XOR2, 5, 1, 0}, + {EC_GF_OP_XOR2, 6, 4, 0}, {EC_GF_OP_XOR2, 7, 2, 0}, + {EC_GF_OP_XOR2, 4, 0, 0}, {EC_GF_OP_XOR2, 5, 0, 0}, + {EC_GF_OP_XOR2, 1, 6, 0}, {EC_GF_OP_XOR3, 8, 3, 7}, + {EC_GF_OP_XOR2, 0, 2, 0}, {EC_GF_OP_XOR2, 6, 8, 0}, + {EC_GF_OP_XOR2, 2, 1, 0}, {EC_GF_OP_XOR2, 0, 8, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_13 = {9, + { + 4, + 5, + 2, + 6, + 0, + 1, + 7, + 3, + 8, + }, + ec_gf8_mul_13_ops}; static ec_gf_op_t ec_gf8_mul_14_ops[] = { - { EC_GF_OP_XOR2, 6, 4, 0 }, - { EC_GF_OP_XOR2, 7, 5, 0 }, - { EC_GF_OP_XOR2, 5, 6, 0 }, - { EC_GF_OP_XOR2, 2, 7, 0 }, - { EC_GF_OP_XOR2, 0, 4, 0 }, - { EC_GF_OP_XOR2, 1, 5, 0 }, - { EC_GF_OP_XOR2, 5, 2, 0 }, - { EC_GF_OP_XOR2, 4, 3, 0 }, - { EC_GF_OP_XOR2, 2, 0, 0 }, - { EC_GF_OP_XOR2, 3, 7, 0 }, - { EC_GF_OP_XOR2, 4, 1, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_14 = { - 8, - { 6, 7, 0, 1, 2, 4, 5, 3, }, - ec_gf8_mul_14_ops -}; + {EC_GF_OP_XOR2, 6, 4, 0}, {EC_GF_OP_XOR2, 7, 5, 0}, + {EC_GF_OP_XOR2, 5, 6, 0}, {EC_GF_OP_XOR2, 2, 7, 0}, + {EC_GF_OP_XOR2, 0, 4, 0}, {EC_GF_OP_XOR2, 1, 5, 0}, + {EC_GF_OP_XOR2, 5, 2, 0}, {EC_GF_OP_XOR2, 4, 3, 0}, + {EC_GF_OP_XOR2, 2, 0, 0}, {EC_GF_OP_XOR2, 3, 7, 0}, + {EC_GF_OP_XOR2, 4, 1, 0}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_14 = {8, + { + 6, + 7, + 0, + 1, + 2, + 4, + 5, + 3, + }, + ec_gf8_mul_14_ops}; static ec_gf_op_t ec_gf8_mul_15_ops[] = { - { EC_GF_OP_COPY, 8, 0, 0 }, - { EC_GF_OP_XOR2, 0, 4, 0 }, - { EC_GF_OP_XOR2, 2, 0, 0 }, - { EC_GF_OP_XOR2, 0, 6, 0 }, - { EC_GF_OP_XOR2, 6, 1, 0 }, - { EC_GF_OP_XOR2, 1, 5, 0 }, - { EC_GF_OP_XOR2, 4, 5, 0 }, - { EC_GF_OP_XOR2, 1, 7, 0 }, - { EC_GF_OP_XOR2, 6, 3, 0 }, - { EC_GF_OP_XOR2, 7, 2, 0 }, - { EC_GF_OP_XOR2, 3, 5, 0 }, - { EC_GF_OP_XOR3, 5, 8, 7 }, - { EC_GF_OP_XOR2, 7, 4, 0 }, - { EC_GF_OP_XOR2, 4, 6, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_15 = { - 9, - { 0, 1, 2, 4, 7, 6, 5, 3, 8, }, - ec_gf8_mul_15_ops -}; + {EC_GF_OP_COPY, 8, 0, 0}, {EC_GF_OP_XOR2, 0, 4, 0}, + {EC_GF_OP_XOR2, 2, 0, 0}, {EC_GF_OP_XOR2, 0, 6, 0}, + {EC_GF_OP_XOR2, 6, 1, 0}, {EC_GF_OP_XOR2, 1, 5, 0}, + {EC_GF_OP_XOR2, 4, 5, 0}, {EC_GF_OP_XOR2, 1, 7, 0}, + {EC_GF_OP_XOR2, 6, 3, 0}, {EC_GF_OP_XOR2, 7, 2, 0}, + {EC_GF_OP_XOR2, 3, 5, 0}, {EC_GF_OP_XOR3, 5, 8, 7}, + {EC_GF_OP_XOR2, 7, 4, 0}, {EC_GF_OP_XOR2, 4, 6, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_15 = {9, + { + 0, + 1, + 2, + 4, + 7, + 6, + 5, + 3, + 8, + }, + ec_gf8_mul_15_ops}; static ec_gf_op_t ec_gf8_mul_16_ops[] = { - { EC_GF_OP_XOR2, 4, 7, 0 }, - { EC_GF_OP_XOR2, 3, 6, 0 }, - { EC_GF_OP_XOR2, 7, 5, 0 }, - { EC_GF_OP_XOR2, 6, 4, 0 }, - { EC_GF_OP_XOR2, 7, 0, 0 }, - { EC_GF_OP_XOR2, 2, 6, 0 }, - { EC_GF_OP_XOR2, 3, 7, 0 }, - { EC_GF_OP_XOR2, 4, 1, 0 }, - { EC_GF_OP_XOR2, 5, 2, 0 }, - { EC_GF_OP_XOR2, 4, 0, 0 }, - { EC_GF_OP_XOR2, 2, 3, 0 }, - { EC_GF_OP_XOR2, 0, 3, 0 }, - { EC_GF_OP_XOR2, 1, 5, 0 }, - { EC_GF_OP_XOR2, 3, 4, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_16 = { - 8, - { 6, 7, 4, 1, 2, 3, 5, 0, }, - ec_gf8_mul_16_ops -}; + {EC_GF_OP_XOR2, 4, 7, 0}, {EC_GF_OP_XOR2, 3, 6, 0}, + {EC_GF_OP_XOR2, 7, 5, 0}, {EC_GF_OP_XOR2, 6, 4, 0}, + {EC_GF_OP_XOR2, 7, 0, 0}, {EC_GF_OP_XOR2, 2, 6, 0}, + {EC_GF_OP_XOR2, 3, 7, 0}, {EC_GF_OP_XOR2, 4, 1, 0}, + {EC_GF_OP_XOR2, 5, 2, 0}, {EC_GF_OP_XOR2, 4, 0, 0}, + {EC_GF_OP_XOR2, 2, 3, 0}, {EC_GF_OP_XOR2, 0, 3, 0}, + {EC_GF_OP_XOR2, 1, 5, 0}, {EC_GF_OP_XOR2, 3, 4, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_16 = {8, + { + 6, + 7, + 4, + 1, + 2, + 3, + 5, + 0, + }, + ec_gf8_mul_16_ops}; static ec_gf_op_t ec_gf8_mul_17_ops[] = { - { EC_GF_OP_XOR2, 3, 5, 0 }, - { EC_GF_OP_XOR2, 6, 3, 0 }, - { EC_GF_OP_XOR2, 3, 0, 0 }, - { EC_GF_OP_XOR2, 7, 5, 0 }, - { EC_GF_OP_XOR2, 3, 2, 0 }, - { EC_GF_OP_XOR2, 2, 7, 0 }, - { EC_GF_OP_XOR2, 4, 2, 0 }, - { EC_GF_OP_XOR2, 0, 1, 0 }, - { EC_GF_OP_XOR2, 5, 4, 0 }, - { EC_GF_OP_XOR2, 7, 0, 0 }, - { EC_GF_OP_XOR2, 0, 5, 0 }, - { EC_GF_OP_XOR2, 5, 6, 0 }, - { EC_GF_OP_XOR2, 1, 5, 0 }, - { EC_GF_OP_XOR2, 5, 3, 0 }, - { EC_GF_OP_XOR2, 2, 1, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_17 = { - 8, - { 5, 7, 0, 1, 3, 2, 4, 6, }, - ec_gf8_mul_17_ops -}; + {EC_GF_OP_XOR2, 3, 5, 0}, {EC_GF_OP_XOR2, 6, 3, 0}, + {EC_GF_OP_XOR2, 3, 0, 0}, {EC_GF_OP_XOR2, 7, 5, 0}, + {EC_GF_OP_XOR2, 3, 2, 0}, {EC_GF_OP_XOR2, 2, 7, 0}, + {EC_GF_OP_XOR2, 4, 2, 0}, {EC_GF_OP_XOR2, 0, 1, 0}, + {EC_GF_OP_XOR2, 5, 4, 0}, {EC_GF_OP_XOR2, 7, 0, 0}, + {EC_GF_OP_XOR2, 0, 5, 0}, {EC_GF_OP_XOR2, 5, 6, 0}, + {EC_GF_OP_XOR2, 1, 5, 0}, {EC_GF_OP_XOR2, 5, 3, 0}, + {EC_GF_OP_XOR2, 2, 1, 0}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_17 = {8, + { + 5, + 7, + 0, + 1, + 3, + 2, + 4, + 6, + }, + ec_gf8_mul_17_ops}; static ec_gf_op_t ec_gf8_mul_18_ops[] = { - { EC_GF_OP_XOR2, 7, 4, 0 }, - { EC_GF_OP_XOR2, 0, 7, 0 }, - { EC_GF_OP_COPY, 8, 0, 0 }, - { EC_GF_OP_XOR2, 0, 1, 0 }, - { EC_GF_OP_XOR2, 1, 2, 0 }, - { EC_GF_OP_XOR2, 4, 5, 0 }, - { EC_GF_OP_XOR2, 2, 3, 0 }, - { EC_GF_OP_XOR2, 1, 5, 0 }, - { EC_GF_OP_XOR2, 3, 7, 0 }, - { EC_GF_OP_XOR2, 5, 6, 0 }, - { EC_GF_OP_XOR2, 2, 6, 0 }, - { EC_GF_OP_XOR2, 7, 5, 0 }, - { EC_GF_OP_XOR2, 6, 8, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_18 = { - 9, - { 4, 5, 7, 6, 0, 1, 2, 3, 8, }, - ec_gf8_mul_18_ops -}; + {EC_GF_OP_XOR2, 7, 4, 0}, {EC_GF_OP_XOR2, 0, 7, 0}, + {EC_GF_OP_COPY, 8, 0, 0}, {EC_GF_OP_XOR2, 0, 1, 0}, + {EC_GF_OP_XOR2, 1, 2, 0}, {EC_GF_OP_XOR2, 4, 5, 0}, + {EC_GF_OP_XOR2, 2, 3, 0}, {EC_GF_OP_XOR2, 1, 5, 0}, + {EC_GF_OP_XOR2, 3, 7, 0}, {EC_GF_OP_XOR2, 5, 6, 0}, + {EC_GF_OP_XOR2, 2, 6, 0}, {EC_GF_OP_XOR2, 7, 5, 0}, + {EC_GF_OP_XOR2, 6, 8, 0}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_18 = {9, + { + 4, + 5, + 7, + 6, + 0, + 1, + 2, + 3, + 8, + }, + ec_gf8_mul_18_ops}; static ec_gf_op_t ec_gf8_mul_19_ops[] = { - { EC_GF_OP_XOR2, 7, 0, 0 }, - { EC_GF_OP_XOR2, 0, 4, 0 }, - { EC_GF_OP_XOR2, 6, 1, 0 }, - { EC_GF_OP_XOR2, 7, 1, 0 }, - { EC_GF_OP_XOR2, 4, 3, 0 }, - { EC_GF_OP_XOR2, 0, 5, 0 }, - { EC_GF_OP_XOR2, 5, 6, 0 }, - { EC_GF_OP_XOR2, 3, 2, 0 }, - { EC_GF_OP_XOR2, 1, 2, 0 }, - { EC_GF_OP_XOR2, 6, 7, 0 }, - { EC_GF_OP_XOR2, 2, 0, 0 }, - { EC_GF_OP_XOR2, 2, 6, 0 }, - { EC_GF_OP_XOR2, 6, 4, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_19 = { - 8, - { 0, 5, 2, 6, 7, 1, 3, 4, }, - ec_gf8_mul_19_ops -}; + {EC_GF_OP_XOR2, 7, 0, 0}, {EC_GF_OP_XOR2, 0, 4, 0}, + {EC_GF_OP_XOR2, 6, 1, 0}, {EC_GF_OP_XOR2, 7, 1, 0}, + {EC_GF_OP_XOR2, 4, 3, 0}, {EC_GF_OP_XOR2, 0, 5, 0}, + {EC_GF_OP_XOR2, 5, 6, 0}, {EC_GF_OP_XOR2, 3, 2, 0}, + {EC_GF_OP_XOR2, 1, 2, 0}, {EC_GF_OP_XOR2, 6, 7, 0}, + {EC_GF_OP_XOR2, 2, 0, 0}, {EC_GF_OP_XOR2, 2, 6, 0}, + {EC_GF_OP_XOR2, 6, 4, 0}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_19 = {8, + { + 0, + 5, + 2, + 6, + 7, + 1, + 3, + 4, + }, + ec_gf8_mul_19_ops}; static ec_gf_op_t ec_gf8_mul_1A_ops[] = { - { EC_GF_OP_XOR2, 6, 5, 0 }, - { EC_GF_OP_XOR2, 5, 4, 0 }, - { EC_GF_OP_XOR2, 4, 0, 0 }, - { EC_GF_OP_XOR2, 7, 5, 0 }, - { EC_GF_OP_XOR2, 0, 6, 0 }, - { EC_GF_OP_XOR2, 4, 1, 0 }, - { EC_GF_OP_XOR2, 6, 3, 0 }, - { EC_GF_OP_XOR2, 3, 4, 0 }, - { EC_GF_OP_XOR2, 5, 2, 0 }, - { EC_GF_OP_XOR2, 4, 0, 0 }, - { EC_GF_OP_XOR2, 2, 6, 0 }, - { EC_GF_OP_XOR2, 1, 5, 0 }, - { EC_GF_OP_XOR2, 6, 7, 0 }, - { EC_GF_OP_XOR2, 5, 0, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_1A = { - 8, - { 7, 0, 4, 5, 3, 1, 2, 6, }, - ec_gf8_mul_1A_ops -}; + {EC_GF_OP_XOR2, 6, 5, 0}, {EC_GF_OP_XOR2, 5, 4, 0}, + {EC_GF_OP_XOR2, 4, 0, 0}, {EC_GF_OP_XOR2, 7, 5, 0}, + {EC_GF_OP_XOR2, 0, 6, 0}, {EC_GF_OP_XOR2, 4, 1, 0}, + {EC_GF_OP_XOR2, 6, 3, 0}, {EC_GF_OP_XOR2, 3, 4, 0}, + {EC_GF_OP_XOR2, 5, 2, 0}, {EC_GF_OP_XOR2, 4, 0, 0}, + {EC_GF_OP_XOR2, 2, 6, 0}, {EC_GF_OP_XOR2, 1, 5, 0}, + {EC_GF_OP_XOR2, 6, 7, 0}, {EC_GF_OP_XOR2, 5, 0, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_1A = {8, + { + 7, + 0, + 4, + 5, + 3, + 1, + 2, + 6, + }, + ec_gf8_mul_1A_ops}; static ec_gf_op_t ec_gf8_mul_1B_ops[] = { - { EC_GF_OP_XOR2, 1, 0, 0 }, - { EC_GF_OP_XOR2, 0, 2, 0 }, - { EC_GF_OP_XOR2, 2, 5, 0 }, - { EC_GF_OP_XOR2, 7, 2, 0 }, - { EC_GF_OP_XOR2, 2, 3, 0 }, - { EC_GF_OP_XOR2, 4, 0, 0 }, - { EC_GF_OP_XOR2, 3, 1, 0 }, - { EC_GF_OP_XOR2, 1, 4, 0 }, - { EC_GF_OP_XOR2, 7, 4, 0 }, - { EC_GF_OP_XOR2, 6, 1, 0 }, - { EC_GF_OP_XOR2, 5, 6, 0 }, - { EC_GF_OP_XOR2, 6, 3, 0 }, - { EC_GF_OP_XOR2, 4, 5, 0 }, - { EC_GF_OP_XOR2, 0, 6, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_1B = { - 8, - { 7, 4, 5, 6, 3, 1, 2, 0, }, - ec_gf8_mul_1B_ops -}; + {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_XOR2, 0, 2, 0}, + {EC_GF_OP_XOR2, 2, 5, 0}, {EC_GF_OP_XOR2, 7, 2, 0}, + {EC_GF_OP_XOR2, 2, 3, 0}, {EC_GF_OP_XOR2, 4, 0, 0}, + {EC_GF_OP_XOR2, 3, 1, 0}, {EC_GF_OP_XOR2, 1, 4, 0}, + {EC_GF_OP_XOR2, 7, 4, 0}, {EC_GF_OP_XOR2, 6, 1, 0}, + {EC_GF_OP_XOR2, 5, 6, 0}, {EC_GF_OP_XOR2, 6, 3, 0}, + {EC_GF_OP_XOR2, 4, 5, 0}, {EC_GF_OP_XOR2, 0, 6, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_1B = {8, + { + 7, + 4, + 5, + 6, + 3, + 1, + 2, + 0, + }, + ec_gf8_mul_1B_ops}; static ec_gf_op_t ec_gf8_mul_1C_ops[] = { - { EC_GF_OP_XOR2, 6, 4, 0 }, - { EC_GF_OP_XOR2, 5, 6, 0 }, - { EC_GF_OP_XOR2, 6, 3, 0 }, - { EC_GF_OP_XOR2, 3, 0, 0 }, - { EC_GF_OP_XOR2, 0, 1, 0 }, - { EC_GF_OP_XOR2, 2, 6, 0 }, - { EC_GF_OP_XOR2, 0, 4, 0 }, - { EC_GF_OP_XOR2, 7, 5, 0 }, - { EC_GF_OP_XOR2, 1, 2, 0 }, - { EC_GF_OP_XOR2, 4, 7, 0 }, - { EC_GF_OP_XOR2, 7, 1, 0 }, - { EC_GF_OP_XOR2, 6, 4, 0 }, - { EC_GF_OP_XOR2, 1, 3, 0 }, - { EC_GF_OP_XOR2, 3, 6, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_1C = { - 8, - { 5, 4, 3, 0, 1, 7, 2, 6, }, - ec_gf8_mul_1C_ops -}; + {EC_GF_OP_XOR2, 6, 4, 0}, {EC_GF_OP_XOR2, 5, 6, 0}, + {EC_GF_OP_XOR2, 6, 3, 0}, {EC_GF_OP_XOR2, 3, 0, 0}, + {EC_GF_OP_XOR2, 0, 1, 0}, {EC_GF_OP_XOR2, 2, 6, 0}, + {EC_GF_OP_XOR2, 0, 4, 0}, {EC_GF_OP_XOR2, 7, 5, 0}, + {EC_GF_OP_XOR2, 1, 2, 0}, {EC_GF_OP_XOR2, 4, 7, 0}, + {EC_GF_OP_XOR2, 7, 1, 0}, {EC_GF_OP_XOR2, 6, 4, 0}, + {EC_GF_OP_XOR2, 1, 3, 0}, {EC_GF_OP_XOR2, 3, 6, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_1C = {8, + { + 5, + 4, + 3, + 0, + 1, + 7, + 2, + 6, + }, + ec_gf8_mul_1C_ops}; static ec_gf_op_t ec_gf8_mul_1D_ops[] = { - { EC_GF_OP_XOR2, 7, 1, 0 }, - { EC_GF_OP_XOR2, 1, 0, 0 }, - { EC_GF_OP_XOR2, 3, 2, 0 }, - { EC_GF_OP_XOR2, 0, 4, 0 }, - { EC_GF_OP_XOR2, 2, 1, 0 }, - { EC_GF_OP_XOR2, 4, 3, 0 }, - { EC_GF_OP_XOR2, 3, 7, 0 }, - { EC_GF_OP_XOR3, 8, 4, 2 }, - { EC_GF_OP_XOR2, 2, 6, 0 }, - { EC_GF_OP_XOR2, 6, 5, 0 }, - { EC_GF_OP_XOR2, 5, 8, 0 }, - { EC_GF_OP_XOR2, 7, 6, 0 }, - { EC_GF_OP_XOR2, 0, 6, 0 }, - { EC_GF_OP_XOR2, 1, 5, 0 }, - { EC_GF_OP_XOR2, 5, 3, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_1D = { - 9, - { 0, 7, 5, 8, 2, 3, 4, 1, 6, }, - ec_gf8_mul_1D_ops -}; + {EC_GF_OP_XOR2, 7, 1, 0}, {EC_GF_OP_XOR2, 1, 0, 0}, + {EC_GF_OP_XOR2, 3, 2, 0}, {EC_GF_OP_XOR2, 0, 4, 0}, + {EC_GF_OP_XOR2, 2, 1, 0}, {EC_GF_OP_XOR2, 4, 3, 0}, + {EC_GF_OP_XOR2, 3, 7, 0}, {EC_GF_OP_XOR3, 8, 4, 2}, + {EC_GF_OP_XOR2, 2, 6, 0}, {EC_GF_OP_XOR2, 6, 5, 0}, + {EC_GF_OP_XOR2, 5, 8, 0}, {EC_GF_OP_XOR2, 7, 6, 0}, + {EC_GF_OP_XOR2, 0, 6, 0}, {EC_GF_OP_XOR2, 1, 5, 0}, + {EC_GF_OP_XOR2, 5, 3, 0}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_1D = {9, + { + 0, + 7, + 5, + 8, + 2, + 3, + 4, + 1, + 6, + }, + ec_gf8_mul_1D_ops}; static ec_gf_op_t ec_gf8_mul_1E_ops[] = { - { EC_GF_OP_XOR2, 4, 0, 0 }, - { EC_GF_OP_XOR2, 0, 5, 0 }, - { EC_GF_OP_XOR2, 0, 6, 0 }, - { EC_GF_OP_XOR2, 1, 4, 0 }, - { EC_GF_OP_XOR2, 5, 1, 0 }, - { EC_GF_OP_XOR2, 2, 7, 0 }, - { EC_GF_OP_XOR2, 7, 0, 0 }, - { EC_GF_OP_XOR2, 1, 2, 0 }, - { EC_GF_OP_XOR2, 6, 3, 0 }, - { EC_GF_OP_XOR2, 4, 7, 0 }, - { EC_GF_OP_XOR2, 6, 1, 0 }, - { EC_GF_OP_XOR2, 3, 4, 0 }, - { EC_GF_OP_XOR2, 0, 6, 0 }, - { EC_GF_OP_XOR2, 2, 3, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_1E = { - 8, - { 4, 7, 5, 1, 6, 0, 2, 3, }, - ec_gf8_mul_1E_ops -}; + {EC_GF_OP_XOR2, 4, 0, 0}, {EC_GF_OP_XOR2, 0, 5, 0}, + {EC_GF_OP_XOR2, 0, 6, 0}, {EC_GF_OP_XOR2, 1, 4, 0}, + {EC_GF_OP_XOR2, 5, 1, 0}, {EC_GF_OP_XOR2, 2, 7, 0}, + {EC_GF_OP_XOR2, 7, 0, 0}, {EC_GF_OP_XOR2, 1, 2, 0}, + {EC_GF_OP_XOR2, 6, 3, 0}, {EC_GF_OP_XOR2, 4, 7, 0}, + {EC_GF_OP_XOR2, 6, 1, 0}, {EC_GF_OP_XOR2, 3, 4, 0}, + {EC_GF_OP_XOR2, 0, 6, 0}, {EC_GF_OP_XOR2, 2, 3, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_1E = {8, + { + 4, + 7, + 5, + 1, + 6, + 0, + 2, + 3, + }, + ec_gf8_mul_1E_ops}; static ec_gf_op_t ec_gf8_mul_1F_ops[] = { - { EC_GF_OP_XOR2, 5, 2, 0 }, - { EC_GF_OP_XOR2, 5, 4, 0 }, - { EC_GF_OP_XOR2, 3, 5, 0 }, - { EC_GF_OP_XOR2, 5, 0, 0 }, - { EC_GF_OP_XOR2, 5, 1, 0 }, - { EC_GF_OP_XOR2, 6, 3, 0 }, - { EC_GF_OP_XOR2, 7, 5, 0 }, - { EC_GF_OP_XOR2, 2, 6, 0 }, - { EC_GF_OP_XOR2, 7, 2, 0 }, - { EC_GF_OP_XOR3, 8, 3, 7 }, - { EC_GF_OP_XOR2, 4, 8, 0 }, - { EC_GF_OP_XOR2, 1, 8, 0 }, - { EC_GF_OP_XOR2, 6, 4, 0 }, - { EC_GF_OP_XOR2, 0, 6, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_1F = { - 9, - { 1, 4, 5, 6, 7, 0, 3, 2, 8, }, - ec_gf8_mul_1F_ops -}; + {EC_GF_OP_XOR2, 5, 2, 0}, {EC_GF_OP_XOR2, 5, 4, 0}, + {EC_GF_OP_XOR2, 3, 5, 0}, {EC_GF_OP_XOR2, 5, 0, 0}, + {EC_GF_OP_XOR2, 5, 1, 0}, {EC_GF_OP_XOR2, 6, 3, 0}, + {EC_GF_OP_XOR2, 7, 5, 0}, {EC_GF_OP_XOR2, 2, 6, 0}, + {EC_GF_OP_XOR2, 7, 2, 0}, {EC_GF_OP_XOR3, 8, 3, 7}, + {EC_GF_OP_XOR2, 4, 8, 0}, {EC_GF_OP_XOR2, 1, 8, 0}, + {EC_GF_OP_XOR2, 6, 4, 0}, {EC_GF_OP_XOR2, 0, 6, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_1F = {9, + { + 1, + 4, + 5, + 6, + 7, + 0, + 3, + 2, + 8, + }, + ec_gf8_mul_1F_ops}; static ec_gf_op_t ec_gf8_mul_20_ops[] = { - { EC_GF_OP_XOR2, 6, 7, 0 }, - { EC_GF_OP_XOR2, 1, 5, 0 }, - { EC_GF_OP_XOR2, 7, 3, 0 }, - { EC_GF_OP_XOR2, 1, 6, 0 }, - { EC_GF_OP_XOR2, 3, 4, 0 }, - { EC_GF_OP_XOR2, 2, 6, 0 }, - { EC_GF_OP_XOR2, 6, 3, 0 }, - { EC_GF_OP_XOR2, 3, 5, 0 }, - { EC_GF_OP_XOR2, 5, 7, 0 }, - { EC_GF_OP_XOR2, 0, 5, 0 }, - { EC_GF_OP_XOR2, 0, 6, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_20 = { - 8, - { 7, 4, 5, 6, 3, 0, 1, 2, }, - ec_gf8_mul_20_ops -}; + {EC_GF_OP_XOR2, 6, 7, 0}, {EC_GF_OP_XOR2, 1, 5, 0}, + {EC_GF_OP_XOR2, 7, 3, 0}, {EC_GF_OP_XOR2, 1, 6, 0}, + {EC_GF_OP_XOR2, 3, 4, 0}, {EC_GF_OP_XOR2, 2, 6, 0}, + {EC_GF_OP_XOR2, 6, 3, 0}, {EC_GF_OP_XOR2, 3, 5, 0}, + {EC_GF_OP_XOR2, 5, 7, 0}, {EC_GF_OP_XOR2, 0, 5, 0}, + {EC_GF_OP_XOR2, 0, 6, 0}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_20 = {8, + { + 7, + 4, + 5, + 6, + 3, + 0, + 1, + 2, + }, + ec_gf8_mul_20_ops}; static ec_gf_op_t ec_gf8_mul_21_ops[] = { - { EC_GF_OP_COPY, 9, 0, 0 }, - { EC_GF_OP_XOR2, 0, 3, 0 }, - { EC_GF_OP_XOR2, 5, 3, 0 }, - { EC_GF_OP_XOR2, 3, 1, 0 }, - { EC_GF_OP_XOR2, 1, 4, 0 }, - { EC_GF_OP_XOR2, 4, 6, 0 }, - { EC_GF_OP_XOR3, 8, 7, 5 }, - { EC_GF_OP_XOR2, 0, 7, 0 }, - { EC_GF_OP_XOR2, 6, 2, 0 }, - { EC_GF_OP_XOR2, 7, 4, 0 }, - { EC_GF_OP_XOR2, 3, 8, 0 }, - { EC_GF_OP_XOR2, 2, 8, 0 }, - { EC_GF_OP_XOR2, 4, 9, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_21 = { - 10, - { 0, 1, 2, 7, 5, 4, 3, 6, 8, 9, }, - ec_gf8_mul_21_ops -}; + {EC_GF_OP_COPY, 9, 0, 0}, {EC_GF_OP_XOR2, 0, 3, 0}, + {EC_GF_OP_XOR2, 5, 3, 0}, {EC_GF_OP_XOR2, 3, 1, 0}, + {EC_GF_OP_XOR2, 1, 4, 0}, {EC_GF_OP_XOR2, 4, 6, 0}, + {EC_GF_OP_XOR3, 8, 7, 5}, {EC_GF_OP_XOR2, 0, 7, 0}, + {EC_GF_OP_XOR2, 6, 2, 0}, {EC_GF_OP_XOR2, 7, 4, 0}, + {EC_GF_OP_XOR2, 3, 8, 0}, {EC_GF_OP_XOR2, 2, 8, 0}, + {EC_GF_OP_XOR2, 4, 9, 0}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_21 = {10, + { + 0, + 1, + 2, + 7, + 5, + 4, + 3, + 6, + 8, + 9, + }, + ec_gf8_mul_21_ops}; static ec_gf_op_t ec_gf8_mul_22_ops[] = { - { EC_GF_OP_XOR2, 0, 4, 0 }, - { EC_GF_OP_XOR2, 4, 6, 0 }, - { EC_GF_OP_XOR2, 6, 7, 0 }, - { EC_GF_OP_XOR2, 7, 2, 0 }, - { EC_GF_OP_XOR2, 2, 3, 0 }, - { EC_GF_OP_XOR2, 2, 4, 0 }, - { EC_GF_OP_XOR2, 4, 5, 0 }, - { EC_GF_OP_XOR2, 5, 3, 0 }, - { EC_GF_OP_XOR2, 5, 1, 0 }, - { EC_GF_OP_XOR2, 1, 6, 0 }, - { EC_GF_OP_XOR2, 6, 4, 0 }, - { EC_GF_OP_XOR2, 4, 0, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_22 = { - 8, - { 3, 0, 5, 2, 6, 4, 1, 7, }, - ec_gf8_mul_22_ops -}; + {EC_GF_OP_XOR2, 0, 4, 0}, {EC_GF_OP_XOR2, 4, 6, 0}, + {EC_GF_OP_XOR2, 6, 7, 0}, {EC_GF_OP_XOR2, 7, 2, 0}, + {EC_GF_OP_XOR2, 2, 3, 0}, {EC_GF_OP_XOR2, 2, 4, 0}, + {EC_GF_OP_XOR2, 4, 5, 0}, {EC_GF_OP_XOR2, 5, 3, 0}, + {EC_GF_OP_XOR2, 5, 1, 0}, {EC_GF_OP_XOR2, 1, 6, 0}, + {EC_GF_OP_XOR2, 6, 4, 0}, {EC_GF_OP_XOR2, 4, 0, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_22 = {8, + { + 3, + 0, + 5, + 2, + 6, + 4, + 1, + 7, + }, + ec_gf8_mul_22_ops}; static ec_gf_op_t ec_gf8_mul_23_ops[] = { - { EC_GF_OP_COPY, 8, 2, 0 }, - { EC_GF_OP_XOR2, 2, 4, 0 }, - { EC_GF_OP_XOR2, 2, 6, 0 }, - { EC_GF_OP_XOR2, 4, 0, 0 }, - { EC_GF_OP_XOR2, 6, 0, 0 }, - { EC_GF_OP_XOR2, 0, 3, 0 }, - { EC_GF_OP_XOR2, 3, 5, 0 }, - { EC_GF_OP_XOR2, 3, 8, 0 }, - { EC_GF_OP_XOR2, 4, 1, 0 }, - { EC_GF_OP_XOR2, 5, 7, 0 }, - { EC_GF_OP_XOR2, 3, 1, 0 }, - { EC_GF_OP_XOR2, 1, 7, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_23 = { - 9, - { 0, 4, 3, 2, 5, 6, 1, 8, 7, }, - ec_gf8_mul_23_ops -}; + {EC_GF_OP_COPY, 8, 2, 0}, {EC_GF_OP_XOR2, 2, 4, 0}, + {EC_GF_OP_XOR2, 2, 6, 0}, {EC_GF_OP_XOR2, 4, 0, 0}, + {EC_GF_OP_XOR2, 6, 0, 0}, {EC_GF_OP_XOR2, 0, 3, 0}, + {EC_GF_OP_XOR2, 3, 5, 0}, {EC_GF_OP_XOR2, 3, 8, 0}, + {EC_GF_OP_XOR2, 4, 1, 0}, {EC_GF_OP_XOR2, 5, 7, 0}, + {EC_GF_OP_XOR2, 3, 1, 0}, {EC_GF_OP_XOR2, 1, 7, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_23 = {9, + { + 0, + 4, + 3, + 2, + 5, + 6, + 1, + 8, + 7, + }, + ec_gf8_mul_23_ops}; static ec_gf_op_t ec_gf8_mul_24_ops[] = { - { EC_GF_OP_XOR2, 6, 3, 0 }, - { EC_GF_OP_XOR2, 6, 7, 0 }, - { EC_GF_OP_XOR2, 5, 6, 0 }, - { EC_GF_OP_XOR2, 2, 0, 0 }, - { EC_GF_OP_XOR2, 0, 5, 0 }, - { EC_GF_OP_XOR2, 7, 4, 0 }, - { EC_GF_OP_XOR2, 3, 4, 0 }, - { EC_GF_OP_XOR2, 4, 0, 0 }, - { EC_GF_OP_XOR2, 1, 3, 0 }, - { EC_GF_OP_XOR2, 2, 4, 0 }, - { EC_GF_OP_XOR2, 5, 1, 0 }, - { EC_GF_OP_XOR2, 3, 2, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_24 = { - 8, - { 6, 7, 0, 1, 2, 4, 5, 3, }, - ec_gf8_mul_24_ops -}; + {EC_GF_OP_XOR2, 6, 3, 0}, {EC_GF_OP_XOR2, 6, 7, 0}, + {EC_GF_OP_XOR2, 5, 6, 0}, {EC_GF_OP_XOR2, 2, 0, 0}, + {EC_GF_OP_XOR2, 0, 5, 0}, {EC_GF_OP_XOR2, 7, 4, 0}, + {EC_GF_OP_XOR2, 3, 4, 0}, {EC_GF_OP_XOR2, 4, 0, 0}, + {EC_GF_OP_XOR2, 1, 3, 0}, {EC_GF_OP_XOR2, 2, 4, 0}, + {EC_GF_OP_XOR2, 5, 1, 0}, {EC_GF_OP_XOR2, 3, 2, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_24 = {8, + { + 6, + 7, + 0, + 1, + 2, + 4, + 5, + 3, + }, + ec_gf8_mul_24_ops}; static ec_gf_op_t ec_gf8_mul_25_ops[] = { - { EC_GF_OP_XOR2, 2, 5, 0 }, - { EC_GF_OP_XOR2, 6, 2, 0 }, - { EC_GF_OP_XOR2, 3, 7, 0 }, - { EC_GF_OP_XOR2, 3, 6, 0 }, - { EC_GF_OP_XOR2, 0, 3, 0 }, - { EC_GF_OP_XOR2, 1, 4, 0 }, - { EC_GF_OP_XOR2, 2, 0, 0 }, - { EC_GF_OP_XOR2, 7, 1, 0 }, - { EC_GF_OP_XOR2, 4, 2, 0 }, - { EC_GF_OP_XOR2, 5, 7, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_25 = { - 8, - { 2, 7, 0, 1, 3, 4, 5, 6, }, - ec_gf8_mul_25_ops -}; + {EC_GF_OP_XOR2, 2, 5, 0}, {EC_GF_OP_XOR2, 6, 2, 0}, + {EC_GF_OP_XOR2, 3, 7, 0}, {EC_GF_OP_XOR2, 3, 6, 0}, + {EC_GF_OP_XOR2, 0, 3, 0}, {EC_GF_OP_XOR2, 1, 4, 0}, + {EC_GF_OP_XOR2, 2, 0, 0}, {EC_GF_OP_XOR2, 7, 1, 0}, + {EC_GF_OP_XOR2, 4, 2, 0}, {EC_GF_OP_XOR2, 5, 7, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_25 = {8, + { + 2, + 7, + 0, + 1, + 3, + 4, + 5, + 6, + }, + ec_gf8_mul_25_ops}; static ec_gf_op_t ec_gf8_mul_26_ops[] = { - { EC_GF_OP_XOR2, 1, 0, 0 }, - { EC_GF_OP_XOR2, 0, 7, 0 }, - { EC_GF_OP_XOR2, 4, 0, 0 }, - { EC_GF_OP_XOR2, 3, 6, 0 }, - { EC_GF_OP_XOR2, 6, 4, 0 }, - { EC_GF_OP_XOR2, 2, 5, 0 }, - { EC_GF_OP_XOR2, 7, 2, 0 }, - { EC_GF_OP_XOR2, 5, 3, 0 }, - { EC_GF_OP_XOR2, 2, 6, 0 }, - { EC_GF_OP_XOR2, 6, 1, 0 }, - { EC_GF_OP_XOR2, 1, 5, 0 }, - { EC_GF_OP_XOR2, 5, 0, 0 }, - { EC_GF_OP_XOR2, 0, 2, 0 }, - { EC_GF_OP_XOR2, 2, 1, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_26 = { - 8, - { 3, 4, 1, 2, 0, 5, 6, 7, }, - ec_gf8_mul_26_ops -}; + {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_XOR2, 0, 7, 0}, + {EC_GF_OP_XOR2, 4, 0, 0}, {EC_GF_OP_XOR2, 3, 6, 0}, + {EC_GF_OP_XOR2, 6, 4, 0}, {EC_GF_OP_XOR2, 2, 5, 0}, + {EC_GF_OP_XOR2, 7, 2, 0}, {EC_GF_OP_XOR2, 5, 3, 0}, + {EC_GF_OP_XOR2, 2, 6, 0}, {EC_GF_OP_XOR2, 6, 1, 0}, + {EC_GF_OP_XOR2, 1, 5, 0}, {EC_GF_OP_XOR2, 5, 0, 0}, + {EC_GF_OP_XOR2, 0, 2, 0}, {EC_GF_OP_XOR2, 2, 1, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_26 = {8, + { + 3, + 4, + 1, + 2, + 0, + 5, + 6, + 7, + }, + ec_gf8_mul_26_ops}; static ec_gf_op_t ec_gf8_mul_27_ops[] = { - { EC_GF_OP_XOR2, 3, 0, 0 }, - { EC_GF_OP_XOR2, 4, 1, 0 }, - { EC_GF_OP_XOR2, 5, 2, 0 }, - { EC_GF_OP_XOR2, 4, 7, 0 }, - { EC_GF_OP_XOR2, 1, 5, 0 }, - { EC_GF_OP_XOR2, 3, 6, 0 }, - { EC_GF_OP_XOR2, 2, 4, 0 }, - { EC_GF_OP_XOR2, 0, 4, 0 }, - { EC_GF_OP_XOR2, 6, 5, 0 }, - { EC_GF_OP_XOR2, 1, 3, 0 }, - { EC_GF_OP_XOR2, 7, 3, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_27 = { - 8, - { 3, 0, 1, 2, 6, 7, 4, 5, }, - ec_gf8_mul_27_ops -}; + {EC_GF_OP_XOR2, 3, 0, 0}, {EC_GF_OP_XOR2, 4, 1, 0}, + {EC_GF_OP_XOR2, 5, 2, 0}, {EC_GF_OP_XOR2, 4, 7, 0}, + {EC_GF_OP_XOR2, 1, 5, 0}, {EC_GF_OP_XOR2, 3, 6, 0}, + {EC_GF_OP_XOR2, 2, 4, 0}, {EC_GF_OP_XOR2, 0, 4, 0}, + {EC_GF_OP_XOR2, 6, 5, 0}, {EC_GF_OP_XOR2, 1, 3, 0}, + {EC_GF_OP_XOR2, 7, 3, 0}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_27 = {8, + { + 3, + 0, + 1, + 2, + 6, + 7, + 4, + 5, + }, + ec_gf8_mul_27_ops}; static ec_gf_op_t ec_gf8_mul_28_ops[] = { - { EC_GF_OP_XOR2, 6, 4, 0 }, - { EC_GF_OP_XOR2, 5, 7, 0 }, - { EC_GF_OP_XOR2, 7, 6, 0 }, - { EC_GF_OP_XOR2, 4, 5, 0 }, - { EC_GF_OP_XOR2, 1, 3, 0 }, - { EC_GF_OP_XOR2, 5, 3, 0 }, - { EC_GF_OP_XOR2, 1, 7, 0 }, - { EC_GF_OP_XOR2, 0, 4, 0 }, - { EC_GF_OP_XOR2, 4, 1, 0 }, - { EC_GF_OP_XOR2, 7, 2, 0 }, - { EC_GF_OP_XOR2, 2, 0, 0 }, - { EC_GF_OP_XOR2, 0, 3, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_28 = { - 8, - { 5, 6, 3, 0, 1, 2, 4, 7, }, - ec_gf8_mul_28_ops -}; + {EC_GF_OP_XOR2, 6, 4, 0}, {EC_GF_OP_XOR2, 5, 7, 0}, + {EC_GF_OP_XOR2, 7, 6, 0}, {EC_GF_OP_XOR2, 4, 5, 0}, + {EC_GF_OP_XOR2, 1, 3, 0}, {EC_GF_OP_XOR2, 5, 3, 0}, + {EC_GF_OP_XOR2, 1, 7, 0}, {EC_GF_OP_XOR2, 0, 4, 0}, + {EC_GF_OP_XOR2, 4, 1, 0}, {EC_GF_OP_XOR2, 7, 2, 0}, + {EC_GF_OP_XOR2, 2, 0, 0}, {EC_GF_OP_XOR2, 0, 3, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_28 = {8, + { + 5, + 6, + 3, + 0, + 1, + 2, + 4, + 7, + }, + ec_gf8_mul_28_ops}; static ec_gf_op_t ec_gf8_mul_29_ops[] = { - { EC_GF_OP_XOR2, 6, 4, 0 }, - { EC_GF_OP_XOR2, 5, 3, 0 }, - { EC_GF_OP_XOR2, 4, 3, 0 }, - { EC_GF_OP_XOR2, 3, 2, 0 }, - { EC_GF_OP_XOR2, 7, 4, 0 }, - { EC_GF_OP_XOR2, 2, 6, 0 }, - { EC_GF_OP_XOR2, 6, 1, 0 }, - { EC_GF_OP_XOR2, 1, 5, 0 }, - { EC_GF_OP_XOR2, 0, 7, 0 }, - { EC_GF_OP_XOR2, 5, 0, 0 }, - { EC_GF_OP_XOR2, 7, 6, 0 }, - { EC_GF_OP_XOR2, 0, 3, 0 }, - { EC_GF_OP_XOR2, 4, 5, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_29 = { - 8, - { 4, 6, 3, 5, 7, 0, 1, 2, }, - ec_gf8_mul_29_ops -}; + {EC_GF_OP_XOR2, 6, 4, 0}, {EC_GF_OP_XOR2, 5, 3, 0}, + {EC_GF_OP_XOR2, 4, 3, 0}, {EC_GF_OP_XOR2, 3, 2, 0}, + {EC_GF_OP_XOR2, 7, 4, 0}, {EC_GF_OP_XOR2, 2, 6, 0}, + {EC_GF_OP_XOR2, 6, 1, 0}, {EC_GF_OP_XOR2, 1, 5, 0}, + {EC_GF_OP_XOR2, 0, 7, 0}, {EC_GF_OP_XOR2, 5, 0, 0}, + {EC_GF_OP_XOR2, 7, 6, 0}, {EC_GF_OP_XOR2, 0, 3, 0}, + {EC_GF_OP_XOR2, 4, 5, 0}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_29 = {8, + { + 4, + 6, + 3, + 5, + 7, + 0, + 1, + 2, + }, + ec_gf8_mul_29_ops}; static ec_gf_op_t ec_gf8_mul_2A_ops[] = { - { EC_GF_OP_COPY, 8, 1, 0 }, - { EC_GF_OP_XOR2, 8, 0, 0 }, - { EC_GF_OP_XOR2, 4, 0, 0 }, - { EC_GF_OP_XOR2, 0, 2, 0 }, - { EC_GF_OP_XOR2, 1, 3, 0 }, - { EC_GF_OP_XOR2, 3, 5, 0 }, - { EC_GF_OP_XOR2, 0, 7, 0 }, - { EC_GF_OP_XOR2, 2, 3, 0 }, - { EC_GF_OP_XOR2, 7, 1, 0 }, - { EC_GF_OP_XOR2, 5, 0, 0 }, - { EC_GF_OP_XOR2, 2, 4, 0 }, - { EC_GF_OP_XOR2, 0, 4, 0 }, - { EC_GF_OP_XOR2, 1, 6, 0 }, - { EC_GF_OP_XOR2, 4, 6, 0 }, - { EC_GF_OP_XOR3, 6, 8, 4 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_2A = { - 9, - { 3, 4, 7, 2, 6, 5, 1, 0, 8, }, - ec_gf8_mul_2A_ops -}; + {EC_GF_OP_COPY, 8, 1, 0}, {EC_GF_OP_XOR2, 8, 0, 0}, + {EC_GF_OP_XOR2, 4, 0, 0}, {EC_GF_OP_XOR2, 0, 2, 0}, + {EC_GF_OP_XOR2, 1, 3, 0}, {EC_GF_OP_XOR2, 3, 5, 0}, + {EC_GF_OP_XOR2, 0, 7, 0}, {EC_GF_OP_XOR2, 2, 3, 0}, + {EC_GF_OP_XOR2, 7, 1, 0}, {EC_GF_OP_XOR2, 5, 0, 0}, + {EC_GF_OP_XOR2, 2, 4, 0}, {EC_GF_OP_XOR2, 0, 4, 0}, + {EC_GF_OP_XOR2, 1, 6, 0}, {EC_GF_OP_XOR2, 4, 6, 0}, + {EC_GF_OP_XOR3, 6, 8, 4}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_2A = {9, + { + 3, + 4, + 7, + 2, + 6, + 5, + 1, + 0, + 8, + }, + ec_gf8_mul_2A_ops}; static ec_gf_op_t ec_gf8_mul_2B_ops[] = { - { EC_GF_OP_XOR2, 7, 2, 0 }, - { EC_GF_OP_XOR2, 2, 4, 0 }, - { EC_GF_OP_XOR2, 6, 1, 0 }, - { EC_GF_OP_XOR2, 4, 0, 0 }, - { EC_GF_OP_XOR2, 5, 0, 0 }, - { EC_GF_OP_XOR2, 4, 6, 0 }, - { EC_GF_OP_XOR2, 1, 3, 0 }, - { EC_GF_OP_XOR2, 0, 7, 0 }, - { EC_GF_OP_XOR2, 3, 5, 0 }, - { EC_GF_OP_XOR2, 7, 1, 0 }, - { EC_GF_OP_XOR2, 5, 2, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_2B = { - 8, - { 3, 4, 7, 5, 6, 0, 1, 2, }, - ec_gf8_mul_2B_ops -}; + {EC_GF_OP_XOR2, 7, 2, 0}, {EC_GF_OP_XOR2, 2, 4, 0}, + {EC_GF_OP_XOR2, 6, 1, 0}, {EC_GF_OP_XOR2, 4, 0, 0}, + {EC_GF_OP_XOR2, 5, 0, 0}, {EC_GF_OP_XOR2, 4, 6, 0}, + {EC_GF_OP_XOR2, 1, 3, 0}, {EC_GF_OP_XOR2, 0, 7, 0}, + {EC_GF_OP_XOR2, 3, 5, 0}, {EC_GF_OP_XOR2, 7, 1, 0}, + {EC_GF_OP_XOR2, 5, 2, 0}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_2B = {8, + { + 3, + 4, + 7, + 5, + 6, + 0, + 1, + 2, + }, + ec_gf8_mul_2B_ops}; static ec_gf_op_t ec_gf8_mul_2C_ops[] = { - { EC_GF_OP_XOR2, 3, 4, 0 }, - { EC_GF_OP_XOR2, 4, 7, 0 }, - { EC_GF_OP_XOR2, 2, 1, 0 }, - { EC_GF_OP_XOR2, 6, 4, 0 }, - { EC_GF_OP_XOR2, 2, 3, 0 }, - { EC_GF_OP_XOR2, 3, 6, 0 }, - { EC_GF_OP_XOR2, 5, 3, 0 }, - { EC_GF_OP_XOR2, 1, 5, 0 }, - { EC_GF_OP_XOR2, 3, 0, 0 }, - { EC_GF_OP_XOR2, 4, 1, 0 }, - { EC_GF_OP_XOR2, 7, 3, 0 }, - { EC_GF_OP_XOR2, 1, 2, 0 }, - { EC_GF_OP_XOR2, 0, 4, 0 }, - { EC_GF_OP_XOR2, 3, 1, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_2C = { - 8, - { 5, 6, 7, 0, 2, 3, 4, 1, }, - ec_gf8_mul_2C_ops -}; + {EC_GF_OP_XOR2, 3, 4, 0}, {EC_GF_OP_XOR2, 4, 7, 0}, + {EC_GF_OP_XOR2, 2, 1, 0}, {EC_GF_OP_XOR2, 6, 4, 0}, + {EC_GF_OP_XOR2, 2, 3, 0}, {EC_GF_OP_XOR2, 3, 6, 0}, + {EC_GF_OP_XOR2, 5, 3, 0}, {EC_GF_OP_XOR2, 1, 5, 0}, + {EC_GF_OP_XOR2, 3, 0, 0}, {EC_GF_OP_XOR2, 4, 1, 0}, + {EC_GF_OP_XOR2, 7, 3, 0}, {EC_GF_OP_XOR2, 1, 2, 0}, + {EC_GF_OP_XOR2, 0, 4, 0}, {EC_GF_OP_XOR2, 3, 1, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_2C = {8, + { + 5, + 6, + 7, + 0, + 2, + 3, + 4, + 1, + }, + ec_gf8_mul_2C_ops}; static ec_gf_op_t ec_gf8_mul_2D_ops[] = { - { EC_GF_OP_XOR2, 3, 2, 0 }, - { EC_GF_OP_XOR2, 1, 3, 0 }, - { EC_GF_OP_XOR2, 3, 0, 0 }, - { EC_GF_OP_XOR2, 4, 3, 0 }, - { EC_GF_OP_XOR2, 3, 6, 0 }, - { EC_GF_OP_XOR2, 6, 1, 0 }, - { EC_GF_OP_XOR2, 2, 3, 0 }, - { EC_GF_OP_XOR3, 8, 4, 6 }, - { EC_GF_OP_XOR2, 5, 8, 0 }, - { EC_GF_OP_XOR2, 7, 8, 0 }, - { EC_GF_OP_XOR2, 2, 5, 0 }, - { EC_GF_OP_XOR2, 0, 7, 0 }, - { EC_GF_OP_XOR2, 6, 2, 0 }, - { EC_GF_OP_XOR2, 7, 2, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_2D = { - 9, - { 7, 0, 3, 5, 1, 4, 2, 6, 8, }, - ec_gf8_mul_2D_ops -}; + {EC_GF_OP_XOR2, 3, 2, 0}, {EC_GF_OP_XOR2, 1, 3, 0}, + {EC_GF_OP_XOR2, 3, 0, 0}, {EC_GF_OP_XOR2, 4, 3, 0}, + {EC_GF_OP_XOR2, 3, 6, 0}, {EC_GF_OP_XOR2, 6, 1, 0}, + {EC_GF_OP_XOR2, 2, 3, 0}, {EC_GF_OP_XOR3, 8, 4, 6}, + {EC_GF_OP_XOR2, 5, 8, 0}, {EC_GF_OP_XOR2, 7, 8, 0}, + {EC_GF_OP_XOR2, 2, 5, 0}, {EC_GF_OP_XOR2, 0, 7, 0}, + {EC_GF_OP_XOR2, 6, 2, 0}, {EC_GF_OP_XOR2, 7, 2, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_2D = {9, + { + 7, + 0, + 3, + 5, + 1, + 4, + 2, + 6, + 8, + }, + ec_gf8_mul_2D_ops}; static ec_gf_op_t ec_gf8_mul_2E_ops[] = { - { EC_GF_OP_XOR2, 4, 1, 0 }, - { EC_GF_OP_XOR2, 0, 1, 0 }, - { EC_GF_OP_COPY, 8, 4, 0 }, - { EC_GF_OP_XOR2, 3, 6, 0 }, - { EC_GF_OP_XOR2, 1, 5, 0 }, - { EC_GF_OP_XOR2, 4, 3, 0 }, - { EC_GF_OP_XOR2, 8, 7, 0 }, - { EC_GF_OP_XOR2, 5, 3, 0 }, - { EC_GF_OP_XOR2, 2, 8, 0 }, - { EC_GF_OP_XOR2, 3, 0, 0 }, - { EC_GF_OP_XOR2, 6, 8, 0 }, - { EC_GF_OP_XOR2, 1, 2, 0 }, - { EC_GF_OP_XOR2, 7, 3, 0 }, - { EC_GF_OP_XOR2, 0, 6, 0 }, - { EC_GF_OP_XOR2, 3, 1, 0 }, - { EC_GF_OP_XOR2, 6, 3, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_2E = { - 9, - { 5, 0, 7, 3, 2, 6, 4, 1, 8, }, - ec_gf8_mul_2E_ops -}; + {EC_GF_OP_XOR2, 4, 1, 0}, {EC_GF_OP_XOR2, 0, 1, 0}, + {EC_GF_OP_COPY, 8, 4, 0}, {EC_GF_OP_XOR2, 3, 6, 0}, + {EC_GF_OP_XOR2, 1, 5, 0}, {EC_GF_OP_XOR2, 4, 3, 0}, + {EC_GF_OP_XOR2, 8, 7, 0}, {EC_GF_OP_XOR2, 5, 3, 0}, + {EC_GF_OP_XOR2, 2, 8, 0}, {EC_GF_OP_XOR2, 3, 0, 0}, + {EC_GF_OP_XOR2, 6, 8, 0}, {EC_GF_OP_XOR2, 1, 2, 0}, + {EC_GF_OP_XOR2, 7, 3, 0}, {EC_GF_OP_XOR2, 0, 6, 0}, + {EC_GF_OP_XOR2, 3, 1, 0}, {EC_GF_OP_XOR2, 6, 3, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_2E = {9, + { + 5, + 0, + 7, + 3, + 2, + 6, + 4, + 1, + 8, + }, + ec_gf8_mul_2E_ops}; static ec_gf_op_t ec_gf8_mul_2F_ops[] = { - { EC_GF_OP_XOR2, 0, 2, 0 }, - { EC_GF_OP_XOR2, 0, 3, 0 }, - { EC_GF_OP_XOR2, 7, 1, 0 }, - { EC_GF_OP_XOR2, 6, 0, 0 }, - { EC_GF_OP_XOR2, 7, 2, 0 }, - { EC_GF_OP_XOR2, 3, 4, 0 }, - { EC_GF_OP_XOR3, 8, 7, 6 }, - { EC_GF_OP_XOR2, 1, 3, 0 }, - { EC_GF_OP_XOR2, 5, 2, 0 }, - { EC_GF_OP_XOR2, 3, 8, 0 }, - { EC_GF_OP_XOR2, 2, 8, 0 }, - { EC_GF_OP_XOR2, 4, 5, 0 }, - { EC_GF_OP_XOR2, 6, 5, 0 }, - { EC_GF_OP_XOR2, 5, 3, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_2F = { - 9, - { 6, 3, 2, 5, 7, 0, 1, 4, 8, }, - ec_gf8_mul_2F_ops -}; + {EC_GF_OP_XOR2, 0, 2, 0}, {EC_GF_OP_XOR2, 0, 3, 0}, + {EC_GF_OP_XOR2, 7, 1, 0}, {EC_GF_OP_XOR2, 6, 0, 0}, + {EC_GF_OP_XOR2, 7, 2, 0}, {EC_GF_OP_XOR2, 3, 4, 0}, + {EC_GF_OP_XOR3, 8, 7, 6}, {EC_GF_OP_XOR2, 1, 3, 0}, + {EC_GF_OP_XOR2, 5, 2, 0}, {EC_GF_OP_XOR2, 3, 8, 0}, + {EC_GF_OP_XOR2, 2, 8, 0}, {EC_GF_OP_XOR2, 4, 5, 0}, + {EC_GF_OP_XOR2, 6, 5, 0}, {EC_GF_OP_XOR2, 5, 3, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_2F = {9, + { + 6, + 3, + 2, + 5, + 7, + 0, + 1, + 4, + 8, + }, + ec_gf8_mul_2F_ops}; static ec_gf_op_t ec_gf8_mul_30_ops[] = { - { EC_GF_OP_COPY, 8, 0, 0 }, - { EC_GF_OP_XOR2, 8, 1, 0 }, - { EC_GF_OP_XOR2, 1, 2, 0 }, - { EC_GF_OP_XOR2, 7, 4, 0 }, - { EC_GF_OP_XOR2, 6, 3, 0 }, - { EC_GF_OP_XOR2, 3, 7, 0 }, - { EC_GF_OP_XOR2, 0, 6, 0 }, - { EC_GF_OP_XOR2, 1, 5, 0 }, - { EC_GF_OP_XOR2, 4, 5, 0 }, - { EC_GF_OP_XOR2, 2, 6, 0 }, - { EC_GF_OP_XOR2, 5, 6, 0 }, - { EC_GF_OP_XOR3, 6, 8, 7 }, - { EC_GF_OP_XOR2, 7, 5, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_30 = { - 9, - { 3, 4, 7, 5, 0, 6, 1, 2, 8, }, - ec_gf8_mul_30_ops -}; + {EC_GF_OP_COPY, 8, 0, 0}, {EC_GF_OP_XOR2, 8, 1, 0}, + {EC_GF_OP_XOR2, 1, 2, 0}, {EC_GF_OP_XOR2, 7, 4, 0}, + {EC_GF_OP_XOR2, 6, 3, 0}, {EC_GF_OP_XOR2, 3, 7, 0}, + {EC_GF_OP_XOR2, 0, 6, 0}, {EC_GF_OP_XOR2, 1, 5, 0}, + {EC_GF_OP_XOR2, 4, 5, 0}, {EC_GF_OP_XOR2, 2, 6, 0}, + {EC_GF_OP_XOR2, 5, 6, 0}, {EC_GF_OP_XOR3, 6, 8, 7}, + {EC_GF_OP_XOR2, 7, 5, 0}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_30 = {9, + { + 3, + 4, + 7, + 5, + 0, + 6, + 1, + 2, + 8, + }, + ec_gf8_mul_30_ops}; static ec_gf_op_t ec_gf8_mul_31_ops[] = { - { EC_GF_OP_XOR2, 3, 0, 0 }, - { EC_GF_OP_XOR2, 3, 4, 0 }, - { EC_GF_OP_XOR2, 4, 5, 0 }, - { EC_GF_OP_XOR2, 2, 1, 0 }, - { EC_GF_OP_XOR2, 1, 4, 0 }, - { EC_GF_OP_XOR2, 5, 6, 0 }, - { EC_GF_OP_XOR2, 6, 3, 0 }, - { EC_GF_OP_XOR2, 0, 7, 0 }, - { EC_GF_OP_XOR2, 2, 5, 0 }, - { EC_GF_OP_XOR2, 7, 3, 0 }, - { EC_GF_OP_XOR2, 0, 1, 0 }, - { EC_GF_OP_XOR2, 3, 2, 0 }, - { EC_GF_OP_XOR2, 3, 0, 0 }, - { EC_GF_OP_XOR2, 4, 3, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_31 = { - 8, - { 7, 1, 4, 5, 6, 0, 2, 3, }, - ec_gf8_mul_31_ops -}; + {EC_GF_OP_XOR2, 3, 0, 0}, {EC_GF_OP_XOR2, 3, 4, 0}, + {EC_GF_OP_XOR2, 4, 5, 0}, {EC_GF_OP_XOR2, 2, 1, 0}, + {EC_GF_OP_XOR2, 1, 4, 0}, {EC_GF_OP_XOR2, 5, 6, 0}, + {EC_GF_OP_XOR2, 6, 3, 0}, {EC_GF_OP_XOR2, 0, 7, 0}, + {EC_GF_OP_XOR2, 2, 5, 0}, {EC_GF_OP_XOR2, 7, 3, 0}, + {EC_GF_OP_XOR2, 0, 1, 0}, {EC_GF_OP_XOR2, 3, 2, 0}, + {EC_GF_OP_XOR2, 3, 0, 0}, {EC_GF_OP_XOR2, 4, 3, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_31 = {8, + { + 7, + 1, + 4, + 5, + 6, + 0, + 2, + 3, + }, + ec_gf8_mul_31_ops}; static ec_gf_op_t ec_gf8_mul_32_ops[] = { - { EC_GF_OP_XOR2, 6, 5, 0 }, - { EC_GF_OP_XOR2, 5, 0, 0 }, - { EC_GF_OP_XOR2, 0, 1, 0 }, - { EC_GF_OP_XOR2, 0, 7, 0 }, - { EC_GF_OP_XOR2, 7, 6, 0 }, - { EC_GF_OP_XOR2, 6, 1, 0 }, - { EC_GF_OP_XOR2, 1, 2, 0 }, - { EC_GF_OP_XOR2, 2, 3, 0 }, - { EC_GF_OP_XOR2, 3, 4, 0 }, - { EC_GF_OP_XOR2, 4, 5, 0 }, - { EC_GF_OP_XOR2, 6, 3, 0 }, - { EC_GF_OP_XOR2, 5, 7, 0 }, - { EC_GF_OP_XOR2, 7, 2, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_32 = { - 8, - { 3, 4, 6, 7, 5, 0, 1, 2, }, - ec_gf8_mul_32_ops -}; + {EC_GF_OP_XOR2, 6, 5, 0}, {EC_GF_OP_XOR2, 5, 0, 0}, + {EC_GF_OP_XOR2, 0, 1, 0}, {EC_GF_OP_XOR2, 0, 7, 0}, + {EC_GF_OP_XOR2, 7, 6, 0}, {EC_GF_OP_XOR2, 6, 1, 0}, + {EC_GF_OP_XOR2, 1, 2, 0}, {EC_GF_OP_XOR2, 2, 3, 0}, + {EC_GF_OP_XOR2, 3, 4, 0}, {EC_GF_OP_XOR2, 4, 5, 0}, + {EC_GF_OP_XOR2, 6, 3, 0}, {EC_GF_OP_XOR2, 5, 7, 0}, + {EC_GF_OP_XOR2, 7, 2, 0}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_32 = {8, + { + 3, + 4, + 6, + 7, + 5, + 0, + 1, + 2, + }, + ec_gf8_mul_32_ops}; static ec_gf_op_t ec_gf8_mul_33_ops[] = { - { EC_GF_OP_XOR2, 3, 2, 0 }, - { EC_GF_OP_XOR2, 2, 1, 0 }, - { EC_GF_OP_XOR2, 1, 0, 0 }, - { EC_GF_OP_XOR2, 1, 5, 0 }, - { EC_GF_OP_XOR2, 4, 1, 0 }, - { EC_GF_OP_XOR2, 6, 2, 0 }, - { EC_GF_OP_XOR2, 1, 7, 0 }, - { EC_GF_OP_XOR2, 2, 4, 0 }, - { EC_GF_OP_XOR2, 7, 3, 0 }, - { EC_GF_OP_XOR2, 0, 6, 0 }, - { EC_GF_OP_XOR2, 3, 2, 0 }, - { EC_GF_OP_XOR2, 5, 3, 0 }, - { EC_GF_OP_XOR2, 3, 0, 0 }, - { EC_GF_OP_XOR2, 0, 1, 0 }, - { EC_GF_OP_XOR2, 2, 0, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_33 = { - 8, - { 5, 4, 3, 0, 2, 1, 6, 7, }, - ec_gf8_mul_33_ops -}; + {EC_GF_OP_XOR2, 3, 2, 0}, {EC_GF_OP_XOR2, 2, 1, 0}, + {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_XOR2, 1, 5, 0}, + {EC_GF_OP_XOR2, 4, 1, 0}, {EC_GF_OP_XOR2, 6, 2, 0}, + {EC_GF_OP_XOR2, 1, 7, 0}, {EC_GF_OP_XOR2, 2, 4, 0}, + {EC_GF_OP_XOR2, 7, 3, 0}, {EC_GF_OP_XOR2, 0, 6, 0}, + {EC_GF_OP_XOR2, 3, 2, 0}, {EC_GF_OP_XOR2, 5, 3, 0}, + {EC_GF_OP_XOR2, 3, 0, 0}, {EC_GF_OP_XOR2, 0, 1, 0}, + {EC_GF_OP_XOR2, 2, 0, 0}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_33 = {8, + { + 5, + 4, + 3, + 0, + 2, + 1, + 6, + 7, + }, + ec_gf8_mul_33_ops}; static ec_gf_op_t ec_gf8_mul_34_ops[] = { - { EC_GF_OP_XOR2, 5, 4, 0 }, - { EC_GF_OP_XOR2, 4, 3, 0 }, - { EC_GF_OP_XOR2, 2, 5, 0 }, - { EC_GF_OP_XOR2, 6, 4, 0 }, - { EC_GF_OP_XOR2, 5, 7, 0 }, - { EC_GF_OP_XOR2, 4, 0, 0 }, - { EC_GF_OP_XOR2, 7, 6, 0 }, - { EC_GF_OP_XOR2, 0, 5, 0 }, - { EC_GF_OP_XOR2, 6, 2, 0 }, - { EC_GF_OP_XOR2, 4, 1, 0 }, - { EC_GF_OP_XOR2, 3, 0, 0 }, - { EC_GF_OP_XOR2, 1, 2, 0 }, - { EC_GF_OP_XOR2, 0, 4, 0 }, - { EC_GF_OP_XOR2, 2, 3, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_34 = { - 8, - { 7, 5, 3, 0, 2, 4, 1, 6, }, - ec_gf8_mul_34_ops -}; + {EC_GF_OP_XOR2, 5, 4, 0}, {EC_GF_OP_XOR2, 4, 3, 0}, + {EC_GF_OP_XOR2, 2, 5, 0}, {EC_GF_OP_XOR2, 6, 4, 0}, + {EC_GF_OP_XOR2, 5, 7, 0}, {EC_GF_OP_XOR2, 4, 0, 0}, + {EC_GF_OP_XOR2, 7, 6, 0}, {EC_GF_OP_XOR2, 0, 5, 0}, + {EC_GF_OP_XOR2, 6, 2, 0}, {EC_GF_OP_XOR2, 4, 1, 0}, + {EC_GF_OP_XOR2, 3, 0, 0}, {EC_GF_OP_XOR2, 1, 2, 0}, + {EC_GF_OP_XOR2, 0, 4, 0}, {EC_GF_OP_XOR2, 2, 3, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_34 = {8, + { + 7, + 5, + 3, + 0, + 2, + 4, + 1, + 6, + }, + ec_gf8_mul_34_ops}; static ec_gf_op_t ec_gf8_mul_35_ops[] = { - { EC_GF_OP_XOR2, 0, 1, 0 }, - { EC_GF_OP_XOR2, 1, 4, 0 }, - { EC_GF_OP_XOR2, 7, 1, 0 }, - { EC_GF_OP_XOR2, 3, 7, 0 }, - { EC_GF_OP_XOR2, 0, 3, 0 }, - { EC_GF_OP_XOR2, 7, 5, 0 }, - { EC_GF_OP_XOR2, 2, 0, 0 }, - { EC_GF_OP_XOR2, 4, 7, 0 }, - { EC_GF_OP_XOR2, 5, 2, 0 }, - { EC_GF_OP_XOR2, 6, 0, 0 }, - { EC_GF_OP_XOR2, 1, 5, 0 }, - { EC_GF_OP_XOR2, 0, 4, 0 }, - { EC_GF_OP_XOR2, 1, 6, 0 }, - { EC_GF_OP_XOR2, 3, 1, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_35 = { - 8, - { 6, 7, 5, 4, 2, 0, 1, 3, }, - ec_gf8_mul_35_ops -}; + {EC_GF_OP_XOR2, 0, 1, 0}, {EC_GF_OP_XOR2, 1, 4, 0}, + {EC_GF_OP_XOR2, 7, 1, 0}, {EC_GF_OP_XOR2, 3, 7, 0}, + {EC_GF_OP_XOR2, 0, 3, 0}, {EC_GF_OP_XOR2, 7, 5, 0}, + {EC_GF_OP_XOR2, 2, 0, 0}, {EC_GF_OP_XOR2, 4, 7, 0}, + {EC_GF_OP_XOR2, 5, 2, 0}, {EC_GF_OP_XOR2, 6, 0, 0}, + {EC_GF_OP_XOR2, 1, 5, 0}, {EC_GF_OP_XOR2, 0, 4, 0}, + {EC_GF_OP_XOR2, 1, 6, 0}, {EC_GF_OP_XOR2, 3, 1, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_35 = {8, + { + 6, + 7, + 5, + 4, + 2, + 0, + 1, + 3, + }, + ec_gf8_mul_35_ops}; static ec_gf_op_t ec_gf8_mul_36_ops[] = { - { EC_GF_OP_XOR2, 5, 2, 0 }, - { EC_GF_OP_XOR2, 2, 0, 0 }, - { EC_GF_OP_XOR2, 6, 4, 0 }, - { EC_GF_OP_XOR2, 4, 2, 0 }, - { EC_GF_OP_XOR2, 7, 4, 0 }, - { EC_GF_OP_XOR2, 0, 1, 0 }, - { EC_GF_OP_XOR2, 6, 3, 0 }, - { EC_GF_OP_XOR2, 7, 5, 0 }, - { EC_GF_OP_XOR2, 5, 3, 0 }, - { EC_GF_OP_XOR2, 3, 0, 0 }, - { EC_GF_OP_XOR2, 1, 5, 0 }, - { EC_GF_OP_XOR2, 0, 4, 0 }, - { EC_GF_OP_XOR2, 4, 1, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_36 = { - 8, - { 6, 7, 4, 1, 2, 3, 0, 5, }, - ec_gf8_mul_36_ops -}; + {EC_GF_OP_XOR2, 5, 2, 0}, {EC_GF_OP_XOR2, 2, 0, 0}, + {EC_GF_OP_XOR2, 6, 4, 0}, {EC_GF_OP_XOR2, 4, 2, 0}, + {EC_GF_OP_XOR2, 7, 4, 0}, {EC_GF_OP_XOR2, 0, 1, 0}, + {EC_GF_OP_XOR2, 6, 3, 0}, {EC_GF_OP_XOR2, 7, 5, 0}, + {EC_GF_OP_XOR2, 5, 3, 0}, {EC_GF_OP_XOR2, 3, 0, 0}, + {EC_GF_OP_XOR2, 1, 5, 0}, {EC_GF_OP_XOR2, 0, 4, 0}, + {EC_GF_OP_XOR2, 4, 1, 0}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_36 = {8, + { + 6, + 7, + 4, + 1, + 2, + 3, + 0, + 5, + }, + ec_gf8_mul_36_ops}; static ec_gf_op_t ec_gf8_mul_37_ops[] = { - { EC_GF_OP_XOR2, 1, 2, 0 }, - { EC_GF_OP_XOR2, 0, 2, 0 }, - { EC_GF_OP_XOR2, 6, 1, 0 }, - { EC_GF_OP_XOR2, 0, 4, 0 }, - { EC_GF_OP_XOR2, 1, 5, 0 }, - { EC_GF_OP_XOR3, 8, 0, 1 }, - { EC_GF_OP_XOR2, 3, 8, 0 }, - { EC_GF_OP_XOR2, 7, 8, 0 }, - { EC_GF_OP_XOR2, 2, 3, 0 }, - { EC_GF_OP_XOR2, 3, 4, 0 }, - { EC_GF_OP_XOR2, 5, 2, 0 }, - { EC_GF_OP_XOR2, 4, 6, 0 }, - { EC_GF_OP_XOR2, 6, 5, 0 }, - { EC_GF_OP_XOR2, 5, 7, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_37 = { - 9, - { 6, 7, 2, 1, 0, 3, 4, 5, 8, }, - ec_gf8_mul_37_ops -}; + {EC_GF_OP_XOR2, 1, 2, 0}, {EC_GF_OP_XOR2, 0, 2, 0}, + {EC_GF_OP_XOR2, 6, 1, 0}, {EC_GF_OP_XOR2, 0, 4, 0}, + {EC_GF_OP_XOR2, 1, 5, 0}, {EC_GF_OP_XOR3, 8, 0, 1}, + {EC_GF_OP_XOR2, 3, 8, 0}, {EC_GF_OP_XOR2, 7, 8, 0}, + {EC_GF_OP_XOR2, 2, 3, 0}, {EC_GF_OP_XOR2, 3, 4, 0}, + {EC_GF_OP_XOR2, 5, 2, 0}, {EC_GF_OP_XOR2, 4, 6, 0}, + {EC_GF_OP_XOR2, 6, 5, 0}, {EC_GF_OP_XOR2, 5, 7, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_37 = {9, + { + 6, + 7, + 2, + 1, + 0, + 3, + 4, + 5, + 8, + }, + ec_gf8_mul_37_ops}; static ec_gf_op_t ec_gf8_mul_38_ops[] = { - { EC_GF_OP_XOR2, 6, 4, 0 }, - { EC_GF_OP_XOR2, 5, 6, 0 }, - { EC_GF_OP_XOR2, 6, 3, 0 }, - { EC_GF_OP_XOR2, 3, 0, 0 }, - { EC_GF_OP_XOR2, 2, 6, 0 }, - { EC_GF_OP_XOR2, 7, 5, 0 }, - { EC_GF_OP_XOR2, 0, 1, 0 }, - { EC_GF_OP_XOR3, 8, 6, 7 }, - { EC_GF_OP_XOR2, 1, 2, 0 }, - { EC_GF_OP_XOR2, 7, 1, 0 }, - { EC_GF_OP_XOR2, 0, 8, 0 }, - { EC_GF_OP_XOR2, 4, 8, 0 }, - { EC_GF_OP_XOR2, 1, 3, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_38 = { - 9, - { 4, 5, 6, 3, 0, 1, 7, 2, 8, }, - ec_gf8_mul_38_ops -}; + {EC_GF_OP_XOR2, 6, 4, 0}, {EC_GF_OP_XOR2, 5, 6, 0}, + {EC_GF_OP_XOR2, 6, 3, 0}, {EC_GF_OP_XOR2, 3, 0, 0}, + {EC_GF_OP_XOR2, 2, 6, 0}, {EC_GF_OP_XOR2, 7, 5, 0}, + {EC_GF_OP_XOR2, 0, 1, 0}, {EC_GF_OP_XOR3, 8, 6, 7}, + {EC_GF_OP_XOR2, 1, 2, 0}, {EC_GF_OP_XOR2, 7, 1, 0}, + {EC_GF_OP_XOR2, 0, 8, 0}, {EC_GF_OP_XOR2, 4, 8, 0}, + {EC_GF_OP_XOR2, 1, 3, 0}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_38 = {9, + { + 4, + 5, + 6, + 3, + 0, + 1, + 7, + 2, + 8, + }, + ec_gf8_mul_38_ops}; static ec_gf_op_t ec_gf8_mul_39_ops[] = { - { EC_GF_OP_XOR2, 5, 1, 0 }, - { EC_GF_OP_XOR2, 4, 5, 0 }, - { EC_GF_OP_XOR2, 6, 4, 0 }, - { EC_GF_OP_XOR2, 2, 0, 0 }, - { EC_GF_OP_XOR2, 2, 6, 0 }, - { EC_GF_OP_XOR2, 3, 0, 0 }, - { EC_GF_OP_XOR2, 5, 2, 0 }, - { EC_GF_OP_XOR2, 3, 5, 0 }, - { EC_GF_OP_XOR2, 7, 3, 0 }, - { EC_GF_OP_XOR2, 4, 7, 0 }, - { EC_GF_OP_XOR2, 5, 4, 0 }, - { EC_GF_OP_XOR2, 1, 5, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_39 = { - 8, - { 1, 6, 3, 0, 5, 2, 4, 7, }, - ec_gf8_mul_39_ops -}; + {EC_GF_OP_XOR2, 5, 1, 0}, {EC_GF_OP_XOR2, 4, 5, 0}, + {EC_GF_OP_XOR2, 6, 4, 0}, {EC_GF_OP_XOR2, 2, 0, 0}, + {EC_GF_OP_XOR2, 2, 6, 0}, {EC_GF_OP_XOR2, 3, 0, 0}, + {EC_GF_OP_XOR2, 5, 2, 0}, {EC_GF_OP_XOR2, 3, 5, 0}, + {EC_GF_OP_XOR2, 7, 3, 0}, {EC_GF_OP_XOR2, 4, 7, 0}, + {EC_GF_OP_XOR2, 5, 4, 0}, {EC_GF_OP_XOR2, 1, 5, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_39 = {8, + { + 1, + 6, + 3, + 0, + 5, + 2, + 4, + 7, + }, + ec_gf8_mul_39_ops}; static ec_gf_op_t ec_gf8_mul_3A_ops[] = { - { EC_GF_OP_XOR2, 6, 1, 0 }, - { EC_GF_OP_XOR2, 7, 3, 0 }, - { EC_GF_OP_XOR2, 1, 0, 0 }, - { EC_GF_OP_XOR2, 3, 4, 0 }, - { EC_GF_OP_XOR2, 0, 2, 0 }, - { EC_GF_OP_XOR2, 4, 6, 0 }, - { EC_GF_OP_XOR2, 2, 3, 0 }, - { EC_GF_OP_XOR2, 6, 0, 0 }, - { EC_GF_OP_XOR2, 3, 5, 0 }, - { EC_GF_OP_XOR2, 0, 7, 0 }, - { EC_GF_OP_XOR2, 5, 1, 0 }, - { EC_GF_OP_XOR2, 7, 4, 0 }, - { EC_GF_OP_XOR2, 1, 0, 0 }, - { EC_GF_OP_XOR2, 4, 5, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_3A = { - 8, - { 3, 4, 7, 0, 5, 6, 1, 2, }, - ec_gf8_mul_3A_ops -}; + {EC_GF_OP_XOR2, 6, 1, 0}, {EC_GF_OP_XOR2, 7, 3, 0}, + {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_XOR2, 3, 4, 0}, + {EC_GF_OP_XOR2, 0, 2, 0}, {EC_GF_OP_XOR2, 4, 6, 0}, + {EC_GF_OP_XOR2, 2, 3, 0}, {EC_GF_OP_XOR2, 6, 0, 0}, + {EC_GF_OP_XOR2, 3, 5, 0}, {EC_GF_OP_XOR2, 0, 7, 0}, + {EC_GF_OP_XOR2, 5, 1, 0}, {EC_GF_OP_XOR2, 7, 4, 0}, + {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_XOR2, 4, 5, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_3A = {8, + { + 3, + 4, + 7, + 0, + 5, + 6, + 1, + 2, + }, + ec_gf8_mul_3A_ops}; static ec_gf_op_t ec_gf8_mul_3B_ops[] = { - { EC_GF_OP_XOR2, 2, 0, 0 }, - { EC_GF_OP_XOR2, 0, 4, 0 }, - { EC_GF_OP_XOR2, 3, 0, 0 }, - { EC_GF_OP_XOR2, 7, 2, 0 }, - { EC_GF_OP_XOR3, 8, 7, 3 }, - { EC_GF_OP_XOR2, 1, 6, 0 }, - { EC_GF_OP_XOR2, 3, 5, 0 }, - { EC_GF_OP_XOR2, 5, 1, 0 }, - { EC_GF_OP_XOR2, 1, 8, 0 }, - { EC_GF_OP_XOR2, 0, 5, 0 }, - { EC_GF_OP_XOR2, 2, 5, 0 }, - { EC_GF_OP_XOR2, 4, 1, 0 }, - { EC_GF_OP_XOR2, 6, 0, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_3B = { - 9, - { 3, 0, 1, 7, 6, 2, 4, 8, 5, }, - ec_gf8_mul_3B_ops -}; + {EC_GF_OP_XOR2, 2, 0, 0}, {EC_GF_OP_XOR2, 0, 4, 0}, + {EC_GF_OP_XOR2, 3, 0, 0}, {EC_GF_OP_XOR2, 7, 2, 0}, + {EC_GF_OP_XOR3, 8, 7, 3}, {EC_GF_OP_XOR2, 1, 6, 0}, + {EC_GF_OP_XOR2, 3, 5, 0}, {EC_GF_OP_XOR2, 5, 1, 0}, + {EC_GF_OP_XOR2, 1, 8, 0}, {EC_GF_OP_XOR2, 0, 5, 0}, + {EC_GF_OP_XOR2, 2, 5, 0}, {EC_GF_OP_XOR2, 4, 1, 0}, + {EC_GF_OP_XOR2, 6, 0, 0}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_3B = {9, + { + 3, + 0, + 1, + 7, + 6, + 2, + 4, + 8, + 5, + }, + ec_gf8_mul_3B_ops}; static ec_gf_op_t ec_gf8_mul_3C_ops[] = { - { EC_GF_OP_XOR2, 7, 5, 0 }, - { EC_GF_OP_XOR2, 7, 4, 0 }, - { EC_GF_OP_XOR2, 1, 0, 0 }, - { EC_GF_OP_XOR2, 6, 7, 0 }, - { EC_GF_OP_XOR2, 0, 3, 0 }, - { EC_GF_OP_XOR2, 3, 6, 0 }, - { EC_GF_OP_XOR2, 5, 3, 0 }, - { EC_GF_OP_XOR2, 1, 5, 0 }, - { EC_GF_OP_XOR2, 2, 1, 0 }, - { EC_GF_OP_XOR2, 1, 4, 0 }, - { EC_GF_OP_XOR2, 7, 2, 0 }, - { EC_GF_OP_XOR2, 4, 0, 0 }, - { EC_GF_OP_XOR2, 5, 7, 0 }, - { EC_GF_OP_XOR2, 0, 5, 0 }, - { EC_GF_OP_XOR2, 5, 1, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_3C = { - 8, - { 3, 6, 4, 1, 7, 2, 0, 5, }, - ec_gf8_mul_3C_ops -}; + {EC_GF_OP_XOR2, 7, 5, 0}, {EC_GF_OP_XOR2, 7, 4, 0}, + {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_XOR2, 6, 7, 0}, + {EC_GF_OP_XOR2, 0, 3, 0}, {EC_GF_OP_XOR2, 3, 6, 0}, + {EC_GF_OP_XOR2, 5, 3, 0}, {EC_GF_OP_XOR2, 1, 5, 0}, + {EC_GF_OP_XOR2, 2, 1, 0}, {EC_GF_OP_XOR2, 1, 4, 0}, + {EC_GF_OP_XOR2, 7, 2, 0}, {EC_GF_OP_XOR2, 4, 0, 0}, + {EC_GF_OP_XOR2, 5, 7, 0}, {EC_GF_OP_XOR2, 0, 5, 0}, + {EC_GF_OP_XOR2, 5, 1, 0}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_3C = {8, + { + 3, + 6, + 4, + 1, + 7, + 2, + 0, + 5, + }, + ec_gf8_mul_3C_ops}; static ec_gf_op_t ec_gf8_mul_3D_ops[] = { - { EC_GF_OP_XOR2, 2, 0, 0 }, - { EC_GF_OP_XOR2, 3, 2, 0 }, - { EC_GF_OP_XOR2, 5, 1, 0 }, - { EC_GF_OP_XOR2, 4, 3, 0 }, - { EC_GF_OP_XOR2, 5, 4, 0 }, - { EC_GF_OP_XOR2, 6, 5, 0 }, - { EC_GF_OP_XOR2, 7, 6, 0 }, - { EC_GF_OP_XOR2, 0, 7, 0 }, - { EC_GF_OP_XOR2, 3, 7, 0 }, - { EC_GF_OP_XOR2, 1, 0, 0 }, - { EC_GF_OP_XOR2, 2, 1, 0 }, - { EC_GF_OP_XOR2, 5, 1, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_3D = { - 8, - { 2, 3, 4, 5, 6, 7, 0, 1, }, - ec_gf8_mul_3D_ops -}; + {EC_GF_OP_XOR2, 2, 0, 0}, {EC_GF_OP_XOR2, 3, 2, 0}, + {EC_GF_OP_XOR2, 5, 1, 0}, {EC_GF_OP_XOR2, 4, 3, 0}, + {EC_GF_OP_XOR2, 5, 4, 0}, {EC_GF_OP_XOR2, 6, 5, 0}, + {EC_GF_OP_XOR2, 7, 6, 0}, {EC_GF_OP_XOR2, 0, 7, 0}, + {EC_GF_OP_XOR2, 3, 7, 0}, {EC_GF_OP_XOR2, 1, 0, 0}, + {EC_GF_OP_XOR2, 2, 1, 0}, {EC_GF_OP_XOR2, 5, 1, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_3D = {8, + { + 2, + 3, + 4, + 5, + 6, + 7, + 0, + 1, + }, + ec_gf8_mul_3D_ops}; static ec_gf_op_t ec_gf8_mul_3E_ops[] = { - { EC_GF_OP_XOR2, 3, 5, 0 }, - { EC_GF_OP_XOR2, 4, 3, 0 }, - { EC_GF_OP_XOR2, 6, 4, 0 }, - { EC_GF_OP_XOR2, 4, 2, 0 }, - { EC_GF_OP_XOR2, 1, 7, 0 }, - { EC_GF_OP_XOR2, 1, 4, 0 }, - { EC_GF_OP_XOR2, 5, 1, 0 }, - { EC_GF_OP_XOR2, 0, 5, 0 }, - { EC_GF_OP_XOR2, 2, 0, 0 }, - { EC_GF_OP_XOR2, 0, 6, 0 }, - { EC_GF_OP_XOR2, 3, 0, 0 }, - { EC_GF_OP_XOR2, 7, 3, 0 }, - { EC_GF_OP_XOR2, 1, 7, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_3E = { - 8, - { 6, 1, 2, 7, 0, 3, 5, 4, }, - ec_gf8_mul_3E_ops -}; + {EC_GF_OP_XOR2, 3, 5, 0}, {EC_GF_OP_XOR2, 4, 3, 0}, + {EC_GF_OP_XOR2, 6, 4, 0}, {EC_GF_OP_XOR2, 4, 2, 0}, + {EC_GF_OP_XOR2, 1, 7, 0}, {EC_GF_OP_XOR2, 1, 4, 0}, + {EC_GF_OP_XOR2, 5, 1, 0}, {EC_GF_OP_XOR2, 0, 5, 0}, + {EC_GF_OP_XOR2, 2, 0, 0}, {EC_GF_OP_XOR2, 0, 6, 0}, + {EC_GF_OP_XOR2, 3, 0, 0}, {EC_GF_OP_XOR2, 7, 3, 0}, + {EC_GF_OP_XOR2, 1, 7, 0}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_3E = {8, + { + 6, + 1, + 2, + 7, + 0, + 3, + 5, + 4, + }, + ec_gf8_mul_3E_ops}; static ec_gf_op_t ec_gf8_mul_3F_ops[] = { - { EC_GF_OP_COPY, 8, 0, 0 }, - { EC_GF_OP_XOR2, 0, 1, 0 }, - { EC_GF_OP_COPY, 10, 4, 0 }, - { EC_GF_OP_XOR2, 0, 6, 0 }, - { EC_GF_OP_XOR2, 4, 5, 0 }, - { EC_GF_OP_XOR2, 4, 0, 0 }, - { EC_GF_OP_XOR2, 1, 3, 0 }, - { EC_GF_OP_COPY, 9, 2, 0 }, - { EC_GF_OP_XOR2, 1, 4, 0 }, - { EC_GF_OP_XOR2, 2, 0, 0 }, - { EC_GF_OP_XOR2, 7, 4, 0 }, - { EC_GF_OP_XOR3, 4, 9, 7 }, - { EC_GF_OP_XOR2, 3, 4, 0 }, - { EC_GF_OP_XOR2, 5, 3, 0 }, - { EC_GF_OP_XOR2, 0, 3, 0 }, - { EC_GF_OP_XOR2, 6, 5, 0 }, - { EC_GF_OP_XOR2, 3, 10, 0 }, - { EC_GF_OP_XOR2, 5, 8, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_3F = { - 11, - { 1, 7, 6, 2, 4, 3, 5, 0, 8, 9, 10, }, - ec_gf8_mul_3F_ops -}; + {EC_GF_OP_COPY, 8, 0, 0}, {EC_GF_OP_XOR2, 0, 1, 0}, + {EC_GF_OP_COPY, 10, 4, 0}, {EC_GF_OP_XOR2, 0, 6, 0}, + {EC_GF_OP_XOR2, 4, 5, 0}, {EC_GF_OP_XOR2, 4, 0, 0}, + {EC_GF_OP_XOR2, 1, 3, 0}, {EC_GF_OP_COPY, 9, 2, 0}, + {EC_GF_OP_XOR2, 1, 4, 0}, {EC_GF_OP_XOR2, 2, 0, 0}, + {EC_GF_OP_XOR2, 7, 4, 0}, {EC_GF_OP_XOR3, 4, 9, 7}, + {EC_GF_OP_XOR2, 3, 4, 0}, {EC_GF_OP_XOR2, 5, 3, 0}, + {EC_GF_OP_XOR2, 0, 3, 0}, {EC_GF_OP_XOR2, 6, 5, 0}, + {EC_GF_OP_XOR2, 3, 10, 0}, {EC_GF_OP_XOR2, 5, 8, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_3F = {11, + { + 1, + 7, + 6, + 2, + 4, + 3, + 5, + 0, + 8, + 9, + 10, + }, + ec_gf8_mul_3F_ops}; static ec_gf_op_t ec_gf8_mul_40_ops[] = { - { EC_GF_OP_XOR2, 1, 2, 0 }, - { EC_GF_OP_XOR2, 2, 3, 0 }, - { EC_GF_OP_XOR2, 2, 4, 0 }, - { EC_GF_OP_XOR2, 4, 5, 0 }, - { EC_GF_OP_XOR2, 6, 4, 0 }, - { EC_GF_OP_XOR2, 0, 6, 0 }, - { EC_GF_OP_XOR2, 7, 3, 0 }, - { EC_GF_OP_XOR2, 6, 2, 0 }, - { EC_GF_OP_XOR2, 3, 4, 0 }, - { EC_GF_OP_XOR3, 8, 7, 6 }, - { EC_GF_OP_XOR2, 1, 8, 0 }, - { EC_GF_OP_XOR2, 5, 8, 0 }, - { EC_GF_OP_XOR2, 4, 8, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_40 = { - 9, - { 5, 7, 4, 6, 2, 3, 0, 1, 8, }, - ec_gf8_mul_40_ops -}; + {EC_GF_OP_XOR2, 1, 2, 0}, {EC_GF_OP_XOR2, 2, 3, 0}, + {EC_GF_OP_XOR2, 2, 4, 0}, {EC_GF_OP_XOR2, 4, 5, 0}, + {EC_GF_OP_XOR2, 6, 4, 0}, {EC_GF_OP_XOR2, 0, 6, 0}, + {EC_GF_OP_XOR2, 7, 3, 0}, {EC_GF_OP_XOR2, 6, 2, 0}, + {EC_GF_OP_XOR2, 3, 4, 0}, {EC_GF_OP_XOR3, 8, 7, 6}, + {EC_GF_OP_XOR2, 1, 8, 0}, {EC_GF_OP_XOR2, 5, 8, 0}, + {EC_GF_OP_XOR2, 4, 8, 0}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_40 = {9, + { + 5, + 7, + 4, + 6, + 2, + 3, + 0, + 1, + 8, + }, + ec_gf8_mul_40_ops}; static ec_gf_op_t ec_gf8_mul_41_ops[] = { - { EC_GF_OP_COPY, 8, 0, 0 }, - { EC_GF_OP_XOR2, 8, 4, 0 }, - { EC_GF_OP_XOR2, 8, 5, 0 }, - { EC_GF_OP_XOR2, 5, 6, 0 }, - { EC_GF_OP_XOR2, 0, 2, 0 }, - { EC_GF_OP_XOR2, 6, 7, 0 }, - { EC_GF_OP_XOR2, 0, 6, 0 }, - { EC_GF_OP_XOR2, 7, 1, 0 }, - { EC_GF_OP_XOR2, 6, 4, 0 }, - { EC_GF_OP_XOR2, 1, 5, 0 }, - { EC_GF_OP_XOR2, 7, 3, 0 }, - { EC_GF_OP_XOR2, 4, 3, 0 }, - { EC_GF_OP_XOR2, 5, 2, 0 }, - { EC_GF_OP_XOR2, 3, 2, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_41 = { - 9, - { 0, 7, 6, 5, 3, 4, 8, 1, 2, }, - ec_gf8_mul_41_ops -}; + {EC_GF_OP_COPY, 8, 0, 0}, {EC_GF_OP_XOR2, 8, 4, 0}, + {EC_GF_OP_XOR2, 8, 5, 0}, {EC_GF_OP_XOR2, 5, 6, 0}, + {EC_GF_OP_XOR2, 0, 2, 0}, {EC_GF_OP_XOR2, 6, 7, 0}, + {EC_GF_OP_XOR2, 0, 6, 0}, {EC_GF_OP_XOR2, 7, 1, 0}, + {EC_GF_OP_XOR2, 6, 4, 0}, {EC_GF_OP_XOR2, 1, 5, 0}, + {EC_GF_OP_XOR2, 7, 3, 0}, {EC_GF_OP_XOR2, 4, 3, 0}, + {EC_GF_OP_XOR2, 5, 2, 0}, {EC_GF_OP_XOR2, 3, 2, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_41 = {9, + { + 0, + 7, + 6, + 5, + 3, + 4, + 8, + 1, + 2, + }, + ec_gf8_mul_41_ops}; static ec_gf_op_t ec_gf8_mul_42_ops[] = { - { EC_GF_OP_COPY, 8, 0, 0 }, - { EC_GF_OP_XOR2, 0, 2, 0 }, - { EC_GF_OP_XOR2, 8, 3, 0 }, - { EC_GF_OP_XOR2, 2, 6, 0 }, - { EC_GF_OP_XOR2, 3, 5, 0 }, - { EC_GF_OP_XOR2, 4, 2, 0 }, - { EC_GF_OP_XOR2, 5, 1, 0 }, - { EC_GF_OP_XOR2, 0, 4, 0 }, - { EC_GF_OP_XOR2, 6, 7, 0 }, - { EC_GF_OP_XOR2, 1, 4, 0 }, - { EC_GF_OP_XOR2, 5, 7, 0 }, - { EC_GF_OP_XOR2, 4, 6, 0 }, - { EC_GF_OP_XOR2, 7, 8, 0 }, - { EC_GF_OP_XOR2, 6, 3, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_42 = { - 9, - { 2, 7, 1, 6, 4, 3, 0, 5, 8, }, - ec_gf8_mul_42_ops -}; + {EC_GF_OP_COPY, 8, 0, 0}, {EC_GF_OP_XOR2, 0, 2, 0}, + {EC_GF_OP_XOR2, 8, 3, 0}, {EC_GF_OP_XOR2, 2, 6, 0}, + {EC_GF_OP_XOR2, 3, 5, 0}, {EC_GF_OP_XOR2, 4, 2, 0}, + {EC_GF_OP_XOR2, 5, 1, 0}, {EC_GF_OP_XOR2, 0, 4, 0}, + {EC_GF_OP_XOR2, 6, 7, 0}, {EC_GF_OP_XOR2, 1, 4, 0}, + {EC_GF_OP_XOR2, 5, 7, 0}, {EC_GF_OP_XOR2, 4, 6, 0}, + {EC_GF_OP_XOR2, 7, 8, 0}, {EC_GF_OP_XOR2, 6, 3, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_42 = {9, + { + 2, + 7, + 1, + 6, + 4, + 3, + 0, + 5, + 8, + }, + ec_gf8_mul_42_ops}; static ec_gf_op_t ec_gf8_mul_43_ops[] = { - { EC_GF_OP_XOR2, 5, 1, 0 }, - { EC_GF_OP_XOR2, 1, 6, 0 }, - { EC_GF_OP_XOR2, 6, 0, 0 }, - { EC_GF_OP_XOR2, 0, 4, 0 }, - { EC_GF_OP_XOR2, 4, 1, 0 }, - { EC_GF_OP_XOR2, 1, 7, 0 }, - { EC_GF_OP_XOR2, 7, 2, 0 }, - { EC_GF_OP_XOR2, 2, 6, 0 }, - { EC_GF_OP_XOR2, 6, 3, 0 }, - { EC_GF_OP_XOR2, 6, 1, 0 }, - { EC_GF_OP_XOR2, 1, 5, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_43 = { - 8, - { 2, 6, 4, 1, 7, 3, 0, 5, }, - ec_gf8_mul_43_ops -}; + {EC_GF_OP_XOR2, 5, 1, 0}, {EC_GF_OP_XOR2, 1, 6, 0}, + {EC_GF_OP_XOR2, 6, 0, 0}, {EC_GF_OP_XOR2, 0, 4, 0}, + {EC_GF_OP_XOR2, 4, 1, 0}, {EC_GF_OP_XOR2, 1, 7, 0}, + {EC_GF_OP_XOR2, 7, 2, 0}, {EC_GF_OP_XOR2, 2, 6, 0}, + {EC_GF_OP_XOR2, 6, 3, 0}, {EC_GF_OP_XOR2, 6, 1, 0}, + {EC_GF_OP_XOR2, 1, 5, 0}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_43 = {8, + { + 2, + 6, + 4, + 1, + 7, + 3, + 0, + 5, + }, + ec_gf8_mul_43_ops}; static ec_gf_op_t ec_gf8_mul_44_ops[] = { - { EC_GF_OP_XOR2, 1, 6, 0 }, - { EC_GF_OP_XOR2, 4, 7, 0 }, - { EC_GF_OP_XOR2, 6, 5, 0 }, - { EC_GF_OP_XOR2, 5, 4, 0 }, - { EC_GF_OP_XOR2, 4, 0, 0 }, - { EC_GF_OP_XOR2, 4, 2, 0 }, - { EC_GF_OP_XOR2, 0, 6, 0 }, - { EC_GF_OP_XOR2, 2, 7, 0 }, - { EC_GF_OP_XOR2, 6, 3, 0 }, - { EC_GF_OP_XOR2, 7, 1, 0 }, - { EC_GF_OP_XOR2, 1, 2, 0 }, - { EC_GF_OP_XOR2, 1, 6, 0 }, - { EC_GF_OP_XOR2, 6, 5, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_44 = { - 8, - { 2, 3, 4, 1, 6, 5, 0, 7, }, - ec_gf8_mul_44_ops -}; + {EC_GF_OP_XOR2, 1, 6, 0}, {EC_GF_OP_XOR2, 4, 7, 0}, + {EC_GF_OP_XOR2, 6, 5, 0}, {EC_GF_OP_XOR2, 5, 4, 0}, + {EC_GF_OP_XOR2, 4, 0, 0}, {EC_GF_OP_XOR2, 4, 2, 0}, + {EC_GF_OP_XOR2, 0, 6, 0}, {EC_GF_OP_XOR2, 2, 7, 0}, + {EC_GF_OP_XOR2, 6, 3, 0}, {EC_GF_OP_XOR2, 7, 1, 0}, + {EC_GF_OP_XOR2, 1, 2, 0}, {EC_GF_OP_XOR2, 1, 6, 0}, + {EC_GF_OP_XOR2, 6, 5, 0}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_44 = {8, + { + 2, + 3, + 4, + 1, + 6, + 5, + 0, + 7, + }, + ec_gf8_mul_44_ops}; static ec_gf_op_t ec_gf8_mul_45_ops[] = { - { EC_GF_OP_XOR2, 2, 0, 0 }, - { EC_GF_OP_XOR2, 4, 7, 0 }, - { EC_GF_OP_XOR2, 2, 7, 0 }, - { EC_GF_OP_XOR2, 7, 3, 0 }, - { EC_GF_OP_XOR2, 7, 6, 0 }, - { EC_GF_OP_XOR2, 5, 0, 0 }, - { EC_GF_OP_XOR2, 6, 1, 0 }, - { EC_GF_OP_XOR2, 3, 1, 0 }, - { EC_GF_OP_XOR2, 0, 4, 0 }, - { EC_GF_OP_XOR2, 1, 5, 0 }, - { EC_GF_OP_XOR2, 1, 2, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_45 = { - 8, - { 2, 3, 0, 1, 7, 4, 5, 6, }, - ec_gf8_mul_45_ops -}; + {EC_GF_OP_XOR2, 2, 0, 0}, {EC_GF_OP_XOR2, 4, 7, 0}, + {EC_GF_OP_XOR2, 2, 7, 0}, {EC_GF_OP_XOR2, 7, 3, 0}, + {EC_GF_OP_XOR2, 7, 6, 0}, {EC_GF_OP_XOR2, 5, 0, 0}, + {EC_GF_OP_XOR2, 6, 1, 0}, {EC_GF_OP_XOR2, 3, 1, 0}, + {EC_GF_OP_XOR2, 0, 4, 0}, {EC_GF_OP_XOR2, 1, 5, 0}, + {EC_GF_OP_XOR2, 1, 2, 0}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_45 = {8, + { + 2, + 3, + 0, + 1, + 7, + 4, + 5, + 6, + }, + ec_gf8_mul_45_ops}; static ec_gf_op_t ec_gf8_mul_46_ops[] = { - { EC_GF_OP_XOR3, 8, 2, 4 }, - { EC_GF_OP_XOR2, 4, 6, 0 }, - { EC_GF_OP_XOR2, 8, 0, 0 }, - { EC_GF_OP_XOR2, 6, 0, 0 }, - { EC_GF_OP_XOR2, 0, 3, 0 }, - { EC_GF_OP_XOR2, 3, 1, 0 }, - { EC_GF_OP_XOR2, 3, 5, 0 }, - { EC_GF_OP_XOR2, 5, 7, 0 }, - { EC_GF_OP_XOR2, 7, 1, 0 }, - { EC_GF_OP_XOR2, 1, 8, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_46 = { - 9, - { 2, 0, 1, 3, 4, 5, 6, 7, 8, }, - ec_gf8_mul_46_ops -}; + {EC_GF_OP_XOR3, 8, 2, 4}, {EC_GF_OP_XOR2, 4, 6, 0}, + {EC_GF_OP_XOR2, 8, 0, 0}, {EC_GF_OP_XOR2, 6, 0, 0}, + {EC_GF_OP_XOR2, 0, 3, 0}, {EC_GF_OP_XOR2, 3, 1, 0}, + {EC_GF_OP_XOR2, 3, 5, 0}, {EC_GF_OP_XOR2, 5, 7, 0}, + {EC_GF_OP_XOR2, 7, 1, 0}, {EC_GF_OP_XOR2, 1, 8, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_46 = {9, + { + 2, + 0, + 1, + 3, + 4, + 5, + 6, + 7, + 8, + }, + ec_gf8_mul_46_ops}; static ec_gf_op_t ec_gf8_mul_47_ops[] = { - { EC_GF_OP_XOR3, 8, 0, 1 }, - { EC_GF_OP_XOR2, 2, 0, 0 }, - { EC_GF_OP_XOR2, 3, 8, 0 }, - { EC_GF_OP_XOR2, 5, 1, 0 }, - { EC_GF_OP_XOR2, 4, 8, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_47 = { - 9, - { 2, 3, 4, 5, 6, 7, 0, 1, 8, }, - ec_gf8_mul_47_ops -}; + {EC_GF_OP_XOR3, 8, 0, 1}, {EC_GF_OP_XOR2, 2, 0, 0}, + {EC_GF_OP_XOR2, 3, 8, 0}, {EC_GF_OP_XOR2, 5, 1, 0}, + {EC_GF_OP_XOR2, 4, 8, 0}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_47 = {9, + { + 2, + 3, + 4, + 5, + 6, + 7, + 0, + 1, + 8, + }, + ec_gf8_mul_47_ops}; static ec_gf_op_t ec_gf8_mul_48_ops[] = { - { EC_GF_OP_XOR2, 5, 2, 0 }, - { EC_GF_OP_XOR2, 5, 4, 0 }, - { EC_GF_OP_XOR2, 6, 5, 0 }, - { EC_GF_OP_XOR2, 7, 6, 0 }, - { EC_GF_OP_XOR2, 2, 3, 0 }, - { EC_GF_OP_XOR2, 4, 7, 0 }, - { EC_GF_OP_XOR2, 3, 7, 0 }, - { EC_GF_OP_XOR2, 1, 3, 0 }, - { EC_GF_OP_XOR2, 0, 2, 0 }, - { EC_GF_OP_XOR2, 5, 3, 0 }, - { EC_GF_OP_XOR2, 2, 1, 0 }, - { EC_GF_OP_XOR2, 7, 0, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_48 = { - 8, - { 4, 5, 6, 0, 1, 3, 7, 2, }, - ec_gf8_mul_48_ops -}; + {EC_GF_OP_XOR2, 5, 2, 0}, {EC_GF_OP_XOR2, 5, 4, 0}, + {EC_GF_OP_XOR2, 6, 5, 0}, {EC_GF_OP_XOR2, 7, 6, 0}, + {EC_GF_OP_XOR2, 2, 3, 0}, {EC_GF_OP_XOR2, 4, 7, 0}, + {EC_GF_OP_XOR2, 3, 7, 0}, {EC_GF_OP_XOR2, 1, 3, 0}, + {EC_GF_OP_XOR2, 0, 2, 0}, {EC_GF_OP_XOR2, 5, 3, 0}, + {EC_GF_OP_XOR2, 2, 1, 0}, {EC_GF_OP_XOR2, 7, 0, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_48 = {8, + { + 4, + 5, + 6, + 0, + 1, + 3, + 7, + 2, + }, + ec_gf8_mul_48_ops}; static ec_gf_op_t ec_gf8_mul_49_ops[] = { - { EC_GF_OP_XOR2, 3, 0, 0 }, - { EC_GF_OP_XOR2, 0, 2, 0 }, - { EC_GF_OP_XOR2, 6, 5, 0 }, - { EC_GF_OP_XOR3, 8, 0, 6 }, - { EC_GF_OP_XOR2, 3, 1, 0 }, - { EC_GF_OP_XOR2, 7, 8, 0 }, - { EC_GF_OP_XOR2, 1, 4, 0 }, - { EC_GF_OP_XOR2, 3, 7, 0 }, - { EC_GF_OP_XOR2, 4, 6, 0 }, - { EC_GF_OP_XOR2, 5, 3, 0 }, - { EC_GF_OP_XOR2, 6, 1, 0 }, - { EC_GF_OP_XOR2, 2, 5, 0 }, - { EC_GF_OP_XOR2, 5, 1, 0 }, - { EC_GF_OP_XOR3, 1, 8, 5 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_49 = { - 9, - { 7, 2, 4, 0, 3, 5, 1, 6, 8, }, - ec_gf8_mul_49_ops -}; + {EC_GF_OP_XOR2, 3, 0, 0}, {EC_GF_OP_XOR2, 0, 2, 0}, + {EC_GF_OP_XOR2, 6, 5, 0}, {EC_GF_OP_XOR3, 8, 0, 6}, + {EC_GF_OP_XOR2, 3, 1, 0}, {EC_GF_OP_XOR2, 7, 8, 0}, + {EC_GF_OP_XOR2, 1, 4, 0}, {EC_GF_OP_XOR2, 3, 7, 0}, + {EC_GF_OP_XOR2, 4, 6, 0}, {EC_GF_OP_XOR2, 5, 3, 0}, + {EC_GF_OP_XOR2, 6, 1, 0}, {EC_GF_OP_XOR2, 2, 5, 0}, + {EC_GF_OP_XOR2, 5, 1, 0}, {EC_GF_OP_XOR3, 1, 8, 5}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_49 = {9, + { + 7, + 2, + 4, + 0, + 3, + 5, + 1, + 6, + 8, + }, + ec_gf8_mul_49_ops}; static ec_gf_op_t ec_gf8_mul_4A_ops[] = { - { EC_GF_OP_XOR2, 2, 6, 0 }, - { EC_GF_OP_XOR2, 1, 4, 0 }, - { EC_GF_OP_XOR2, 3, 7, 0 }, - { EC_GF_OP_XOR2, 5, 2, 0 }, - { EC_GF_OP_XOR2, 1, 5, 0 }, - { EC_GF_OP_XOR2, 0, 3, 0 }, - { EC_GF_OP_XOR2, 7, 1, 0 }, - { EC_GF_OP_XOR2, 6, 0, 0 }, - { EC_GF_OP_XOR2, 3, 5, 0 }, - { EC_GF_OP_XOR2, 2, 7, 0 }, - { EC_GF_OP_XOR2, 4, 6, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_4A = { - 8, - { 5, 6, 7, 0, 1, 3, 4, 2, }, - ec_gf8_mul_4A_ops -}; + {EC_GF_OP_XOR2, 2, 6, 0}, {EC_GF_OP_XOR2, 1, 4, 0}, + {EC_GF_OP_XOR2, 3, 7, 0}, {EC_GF_OP_XOR2, 5, 2, 0}, + {EC_GF_OP_XOR2, 1, 5, 0}, {EC_GF_OP_XOR2, 0, 3, 0}, + {EC_GF_OP_XOR2, 7, 1, 0}, {EC_GF_OP_XOR2, 6, 0, 0}, + {EC_GF_OP_XOR2, 3, 5, 0}, {EC_GF_OP_XOR2, 2, 7, 0}, + {EC_GF_OP_XOR2, 4, 6, 0}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_4A = {8, + { + 5, + 6, + 7, + 0, + 1, + 3, + 4, + 2, + }, + ec_gf8_mul_4A_ops}; static ec_gf_op_t ec_gf8_mul_4B_ops[] = { - { EC_GF_OP_XOR2, 6, 7, 0 }, - { EC_GF_OP_XOR2, 7, 0, 0 }, - { EC_GF_OP_XOR2, 0, 1, 0 }, - { EC_GF_OP_XOR2, 1, 4, 0 }, - { EC_GF_OP_XOR3, 8, 3, 7 }, - { EC_GF_OP_XOR2, 3, 6, 0 }, - { EC_GF_OP_XOR2, 1, 5, 0 }, - { EC_GF_OP_XOR2, 4, 8, 0 }, - { EC_GF_OP_XOR2, 2, 3, 0 }, - { EC_GF_OP_XOR2, 5, 8, 0 }, - { EC_GF_OP_XOR2, 3, 0, 0 }, - { EC_GF_OP_XOR2, 6, 1, 0 }, - { EC_GF_OP_XOR2, 5, 2, 0 }, - { EC_GF_OP_XOR2, 0, 5, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_4B = { - 9, - { 5, 3, 6, 7, 0, 2, 4, 1, 8, }, - ec_gf8_mul_4B_ops -}; + {EC_GF_OP_XOR2, 6, 7, 0}, {EC_GF_OP_XOR2, 7, 0, 0}, + {EC_GF_OP_XOR2, 0, 1, 0}, {EC_GF_OP_XOR2, 1, 4, 0}, + {EC_GF_OP_XOR3, 8, 3, 7}, {EC_GF_OP_XOR2, 3, 6, 0}, + {EC_GF_OP_XOR2, 1, 5, 0}, {EC_GF_OP_XOR2, 4, 8, 0}, + {EC_GF_OP_XOR2, 2, 3, 0}, {EC_GF_OP_XOR2, 5, 8, 0}, + {EC_GF_OP_XOR2, 3, 0, 0}, {EC_GF_OP_XOR2, 6, 1, 0}, + {EC_GF_OP_XOR2, 5, 2, 0}, {EC_GF_OP_XOR2, 0, 5, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_4B = {9, + { + 5, + 3, + 6, + 7, + 0, + 2, + 4, + 1, + 8, + }, + ec_gf8_mul_4B_ops}; static ec_gf_op_t ec_gf8_mul_4C_ops[] = { - { EC_GF_OP_XOR2, 5, 2, 0 }, - { EC_GF_OP_XOR2, 2, 0, 0 }, - { EC_GF_OP_XOR2, 3, 6, 0 }, - { EC_GF_OP_XOR2, 2, 3, 0 }, - { EC_GF_OP_XOR2, 4, 5, 0 }, - { EC_GF_OP_XOR2, 1, 2, 0 }, - { EC_GF_OP_XOR2, 5, 7, 0 }, - { EC_GF_OP_XOR2, 6, 4, 0 }, - { EC_GF_OP_XOR2, 7, 1, 0 }, - { EC_GF_OP_XOR2, 4, 0, 0 }, - { EC_GF_OP_XOR2, 1, 6, 0 }, - { EC_GF_OP_XOR2, 2, 5, 0 }, - { EC_GF_OP_XOR2, 0, 1, 0 }, - { EC_GF_OP_XOR2, 1, 2, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_4C = { - 8, - { 5, 3, 4, 7, 0, 6, 2, 1, }, - ec_gf8_mul_4C_ops -}; + {EC_GF_OP_XOR2, 5, 2, 0}, {EC_GF_OP_XOR2, 2, 0, 0}, + {EC_GF_OP_XOR2, 3, 6, 0}, {EC_GF_OP_XOR2, 2, 3, 0}, + {EC_GF_OP_XOR2, 4, 5, 0}, {EC_GF_OP_XOR2, 1, 2, 0}, + {EC_GF_OP_XOR2, 5, 7, 0}, {EC_GF_OP_XOR2, 6, 4, 0}, + {EC_GF_OP_XOR2, 7, 1, 0}, {EC_GF_OP_XOR2, 4, 0, 0}, + {EC_GF_OP_XOR2, 1, 6, 0}, {EC_GF_OP_XOR2, 2, 5, 0}, + {EC_GF_OP_XOR2, 0, 1, 0}, {EC_GF_OP_XOR2, 1, 2, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_4C = {8, + { + 5, + 3, + 4, + 7, + 0, + 6, + 2, + 1, + }, + ec_gf8_mul_4C_ops}; static ec_gf_op_t ec_gf8_mul_4D_ops[] = { - { EC_GF_OP_COPY, 8, 3, 0 }, - { EC_GF_OP_XOR2, 3, 4, 0 }, - { EC_GF_OP_XOR2, 4, 6, 0 }, - { EC_GF_OP_XOR2, 0, 1, 0 }, - { EC_GF_OP_XOR2, 1, 4, 0 }, - { EC_GF_OP_XOR3, 9, 3, 1 }, - { EC_GF_OP_XOR2, 5, 9, 0 }, - { EC_GF_OP_XOR2, 4, 2, 0 }, - { EC_GF_OP_XOR2, 6, 5, 0 }, - { EC_GF_OP_XOR2, 0, 6, 0 }, - { EC_GF_OP_XOR2, 7, 0, 0 }, - { EC_GF_OP_XOR2, 3, 0, 0 }, - { EC_GF_OP_XOR2, 2, 7, 0 }, - { EC_GF_OP_XOR3, 0, 8, 2 }, - { EC_GF_OP_XOR2, 5, 2, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_4D = { - 10, - { 0, 9, 3, 5, 6, 4, 7, 1, 2, 8, }, - ec_gf8_mul_4D_ops -}; + {EC_GF_OP_COPY, 8, 3, 0}, {EC_GF_OP_XOR2, 3, 4, 0}, + {EC_GF_OP_XOR2, 4, 6, 0}, {EC_GF_OP_XOR2, 0, 1, 0}, + {EC_GF_OP_XOR2, 1, 4, 0}, {EC_GF_OP_XOR3, 9, 3, 1}, + {EC_GF_OP_XOR2, 5, 9, 0}, {EC_GF_OP_XOR2, 4, 2, 0}, + {EC_GF_OP_XOR2, 6, 5, 0}, {EC_GF_OP_XOR2, 0, 6, 0}, + {EC_GF_OP_XOR2, 7, 0, 0}, {EC_GF_OP_XOR2, 3, 0, 0}, + {EC_GF_OP_XOR2, 2, 7, 0}, {EC_GF_OP_XOR3, 0, 8, 2}, + {EC_GF_OP_XOR2, 5, 2, 0}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_4D = {10, + { + 0, + 9, + 3, + 5, + 6, + 4, + 7, + 1, + 2, + 8, + }, + ec_gf8_mul_4D_ops}; static ec_gf_op_t ec_gf8_mul_4E_ops[] = { - { EC_GF_OP_XOR2, 3, 0, 0 }, - { EC_GF_OP_XOR2, 4, 1, 0 }, - { EC_GF_OP_XOR2, 0, 2, 0 }, - { EC_GF_OP_XOR2, 4, 7, 0 }, - { EC_GF_OP_XOR2, 2, 5, 0 }, - { EC_GF_OP_XOR2, 5, 4, 0 }, - { EC_GF_OP_XOR2, 3, 6, 0 }, - { EC_GF_OP_XOR2, 0, 5, 0 }, - { EC_GF_OP_XOR2, 6, 2, 0 }, - { EC_GF_OP_XOR2, 7, 3, 0 }, - { EC_GF_OP_XOR2, 1, 3, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_4E = { - 8, - { 2, 3, 0, 1, 5, 6, 7, 4, }, - ec_gf8_mul_4E_ops -}; + {EC_GF_OP_XOR2, 3, 0, 0}, {EC_GF_OP_XOR2, 4, 1, 0}, + {EC_GF_OP_XOR2, 0, 2, 0}, {EC_GF_OP_XOR2, 4, 7, 0}, + {EC_GF_OP_XOR2, 2, 5, 0}, {EC_GF_OP_XOR2, 5, 4, 0}, + {EC_GF_OP_XOR2, 3, 6, 0}, {EC_GF_OP_XOR2, 0, 5, 0}, + {EC_GF_OP_XOR2, 6, 2, 0}, {EC_GF_OP_XOR2, 7, 3, 0}, + {EC_GF_OP_XOR2, 1, 3, 0}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_4E = {8, + { + 2, + 3, + 0, + 1, + 5, + 6, + 7, + 4, + }, + ec_gf8_mul_4E_ops}; static ec_gf_op_t ec_gf8_mul_4F_ops[] = { - { EC_GF_OP_XOR2, 4, 1, 0 }, - { EC_GF_OP_XOR2, 1, 0, 0 }, - { EC_GF_OP_XOR2, 7, 0, 0 }, - { EC_GF_OP_XOR2, 0, 2, 0 }, - { EC_GF_OP_XOR2, 0, 5, 0 }, - { EC_GF_OP_XOR2, 2, 6, 0 }, - { EC_GF_OP_XOR2, 5, 7, 0 }, - { EC_GF_OP_XOR2, 6, 1, 0 }, - { EC_GF_OP_XOR2, 7, 3, 0 }, - { EC_GF_OP_XOR2, 1, 5, 0 }, - { EC_GF_OP_XOR2, 3, 6, 0 }, - { EC_GF_OP_XOR2, 5, 4, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_4F = { - 8, - { 0, 3, 5, 6, 1, 2, 7, 4, }, - ec_gf8_mul_4F_ops -}; + {EC_GF_OP_XOR2, 4, 1, 0}, {EC_GF_OP_XOR2, 1, 0, 0}, + {EC_GF_OP_XOR2, 7, 0, 0}, {EC_GF_OP_XOR2, 0, 2, 0}, + {EC_GF_OP_XOR2, 0, 5, 0}, {EC_GF_OP_XOR2, 2, 6, 0}, + {EC_GF_OP_XOR2, 5, 7, 0}, {EC_GF_OP_XOR2, 6, 1, 0}, + {EC_GF_OP_XOR2, 7, 3, 0}, {EC_GF_OP_XOR2, 1, 5, 0}, + {EC_GF_OP_XOR2, 3, 6, 0}, {EC_GF_OP_XOR2, 5, 4, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_4F = {8, + { + 0, + 3, + 5, + 6, + 1, + 2, + 7, + 4, + }, + ec_gf8_mul_4F_ops}; static ec_gf_op_t ec_gf8_mul_50_ops[] = { - { EC_GF_OP_XOR2, 5, 3, 0 }, - { EC_GF_OP_XOR2, 4, 6, 0 }, - { EC_GF_OP_XOR2, 6, 5, 0 }, - { EC_GF_OP_XOR2, 5, 7, 0 }, - { EC_GF_OP_XOR2, 7, 2, 0 }, - { EC_GF_OP_XOR2, 0, 2, 0 }, - { EC_GF_OP_XOR2, 4, 7, 0 }, - { EC_GF_OP_XOR2, 0, 6, 0 }, - { EC_GF_OP_XOR2, 3, 4, 0 }, - { EC_GF_OP_XOR2, 6, 1, 0 }, - { EC_GF_OP_XOR2, 2, 3, 0 }, - { EC_GF_OP_XOR2, 1, 2, 0 }, - { EC_GF_OP_XOR2, 2, 0, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_50 = { - 8, - { 4, 5, 7, 3, 0, 1, 2, 6, }, - ec_gf8_mul_50_ops -}; + {EC_GF_OP_XOR2, 5, 3, 0}, {EC_GF_OP_XOR2, 4, 6, 0}, + {EC_GF_OP_XOR2, 6, 5, 0}, {EC_GF_OP_XOR2, 5, 7, 0}, + {EC_GF_OP_XOR2, 7, 2, 0}, {EC_GF_OP_XOR2, 0, 2, 0}, + {EC_GF_OP_XOR2, 4, 7, 0}, {EC_GF_OP_XOR2, 0, 6, 0}, + {EC_GF_OP_XOR2, 3, 4, 0}, {EC_GF_OP_XOR2, 6, 1, 0}, + {EC_GF_OP_XOR2, 2, 3, 0}, {EC_GF_OP_XOR2, 1, 2, 0}, + {EC_GF_OP_XOR2, 2, 0, 0}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_50 = {8, + { + 4, + 5, + 7, + 3, + 0, + 1, + 2, + 6, + }, + ec_gf8_mul_50_ops}; static ec_gf_op_t ec_gf8_mul_51_ops[] = { - { EC_GF_OP_XOR2, 2, 1, 0 }, - { EC_GF_OP_XOR2, 3, 5, 0 }, - { EC_GF_OP_XOR2, 2, 3, 0 }, - { EC_GF_OP_XOR2, 3, 7, 0 }, - { EC_GF_OP_XOR2, 1, 3, 0 }, - { EC_GF_OP_XOR2, 6, 1, 0 }, - { EC_GF_OP_XOR2, 4, 6, 0 }, - { EC_GF_OP_XOR2, 2, 4, 0 }, - { EC_GF_OP_XOR2, 0, 2, 0 }, - { EC_GF_OP_XOR2, 5, 0, 0 }, - { EC_GF_OP_XOR2, 3, 0, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_51 = { - 8, - { 0, 1, 7, 2, 3, 4, 5, 6, }, - ec_gf8_mul_51_ops -}; + {EC_GF_OP_XOR2, 2, 1, 0}, {EC_GF_OP_XOR2, 3, 5, 0}, + {EC_GF_OP_XOR2, 2, 3, 0}, {EC_GF_OP_XOR2, 3, 7, 0}, + {EC_GF_OP_XOR2, 1, 3, 0}, {EC_GF_OP_XOR2, 6, 1, 0}, + {EC_GF_OP_XOR2, 4, 6, 0}, {EC_GF_OP_XOR2, 2, 4, 0}, + {EC_GF_OP_XOR2, 0, 2, 0}, {EC_GF_OP_XOR2, 5, 0, 0}, + {EC_GF_OP_XOR2, 3, 0, 0}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_51 = {8, + { + 0, + 1, + 7, + 2, + 3, + 4, + 5, + 6, + }, + ec_gf8_mul_51_ops}; static ec_gf_op_t ec_gf8_mul_52_ops[] = { - { EC_GF_OP_XOR2, 0, 2, 0 }, - { EC_GF_OP_XOR2, 0, 7, 0 }, - { EC_GF_OP_COPY, 8, 0, 0 }, - { EC_GF_OP_XOR2, 0, 4, 0 }, - { EC_GF_OP_XOR2, 4, 6, 0 }, - { EC_GF_OP_COPY, 9, 4, 0 }, - { EC_GF_OP_XOR2, 4, 3, 0 }, - { EC_GF_OP_XOR2, 3, 1, 0 }, - { EC_GF_OP_XOR2, 5, 3, 0 }, - { EC_GF_OP_XOR2, 6, 3, 0 }, - { EC_GF_OP_XOR2, 1, 2, 0 }, - { EC_GF_OP_XOR3, 3, 5, 8 }, - { EC_GF_OP_XOR2, 7, 6, 0 }, - { EC_GF_OP_XOR2, 2, 9, 0 }, - { EC_GF_OP_XOR2, 6, 3, 0 }, - { EC_GF_OP_XOR2, 3, 1, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_52 = { - 10, - { 2, 3, 1, 4, 6, 7, 0, 5, 8, 9, }, - ec_gf8_mul_52_ops -}; + {EC_GF_OP_XOR2, 0, 2, 0}, {EC_GF_OP_XOR2, 0, 7, 0}, + {EC_GF_OP_COPY, 8, 0, 0}, {EC_GF_OP_XOR2, 0, 4, 0}, + {EC_GF_OP_XOR2, 4, 6, 0}, {EC_GF_OP_COPY, 9, 4, 0}, + {EC_GF_OP_XOR2, 4, 3, 0}, {EC_GF_OP_XOR2, 3, 1, 0}, + {EC_GF_OP_XOR2, 5, 3, 0}, {EC_GF_OP_XOR2, 6, 3, 0}, + {EC_GF_OP_XOR2, 1, 2, 0}, {EC_GF_OP_XOR3, 3, 5, 8}, + {EC_GF_OP_XOR2, 7, 6, 0}, {EC_GF_OP_XOR2, 2, 9, 0}, + {EC_GF_OP_XOR2, 6, 3, 0}, {EC_GF_OP_XOR2, 3, 1, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_52 = {10, + { + 2, + 3, + 1, + 4, + 6, + 7, + 0, + 5, + 8, + 9, + }, + ec_gf8_mul_52_ops}; static ec_gf_op_t ec_gf8_mul_53_ops[] = { - { EC_GF_OP_XOR2, 2, 0, 0 }, - { EC_GF_OP_XOR2, 3, 1, 0 }, - { EC_GF_OP_XOR2, 4, 6, 0 }, - { EC_GF_OP_XOR2, 2, 4, 0 }, - { EC_GF_OP_XOR2, 5, 7, 0 }, - { EC_GF_OP_XOR2, 3, 5, 0 }, - { EC_GF_OP_XOR2, 7, 2, 0 }, - { EC_GF_OP_XOR2, 5, 2, 0 }, - { EC_GF_OP_XOR2, 6, 3, 0 }, - { EC_GF_OP_XOR2, 0, 3, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_53 = { - 8, - { 2, 0, 1, 4, 5, 6, 7, 3, }, - ec_gf8_mul_53_ops -}; + {EC_GF_OP_XOR2, 2, 0, 0}, {EC_GF_OP_XOR2, 3, 1, 0}, + {EC_GF_OP_XOR2, 4, 6, 0}, {EC_GF_OP_XOR2, 2, 4, 0}, + {EC_GF_OP_XOR2, 5, 7, 0}, {EC_GF_OP_XOR2, 3, 5, 0}, + {EC_GF_OP_XOR2, 7, 2, 0}, {EC_GF_OP_XOR2, 5, 2, 0}, + {EC_GF_OP_XOR2, 6, 3, 0}, {EC_GF_OP_XOR2, 0, 3, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_53 = {8, + { + 2, + 0, + 1, + 4, + 5, + 6, + 7, + 3, + }, + ec_gf8_mul_53_ops}; static ec_gf_op_t ec_gf8_mul_54_ops[] = { - { EC_GF_OP_XOR2, 7, 2, 0 }, - { EC_GF_OP_XOR2, 0, 7, 0 }, - { EC_GF_OP_XOR2, 7, 4, 0 }, - { EC_GF_OP_XOR2, 4, 1, 0 }, - { EC_GF_OP_XOR2, 2, 3, 0 }, - { EC_GF_OP_XOR2, 1, 3, 0 }, - { EC_GF_OP_XOR2, 3, 5, 0 }, - { EC_GF_OP_XOR2, 1, 6, 0 }, - { EC_GF_OP_XOR2, 5, 0, 0 }, - { EC_GF_OP_XOR2, 0, 6, 0 }, - { EC_GF_OP_XOR2, 6, 4, 0 }, - { EC_GF_OP_XOR2, 4, 2, 0 }, - { EC_GF_OP_XOR2, 2, 5, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_54 = { - 8, - { 7, 3, 0, 4, 2, 6, 5, 1, }, - ec_gf8_mul_54_ops -}; + {EC_GF_OP_XOR2, 7, 2, 0}, {EC_GF_OP_XOR2, 0, 7, 0}, + {EC_GF_OP_XOR2, 7, 4, 0}, {EC_GF_OP_XOR2, 4, 1, 0}, + {EC_GF_OP_XOR2, 2, 3, 0}, {EC_GF_OP_XOR2, 1, 3, 0}, + {EC_GF_OP_XOR2, 3, 5, 0}, {EC_GF_OP_XOR2, 1, 6, 0}, + {EC_GF_OP_XOR2, 5, 0, 0}, {EC_GF_OP_XOR2, 0, 6, 0}, + {EC_GF_OP_XOR2, 6, 4, 0}, {EC_GF_OP_XOR2, 4, 2, 0}, + {EC_GF_OP_XOR2, 2, 5, 0}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_54 = {8, + { + 7, + 3, + 0, + 4, + 2, + 6, + 5, + 1, + }, + ec_gf8_mul_54_ops}; static ec_gf_op_t ec_gf8_mul_55_ops[] = { - { EC_GF_OP_XOR2, 4, 2, 0 }, - { EC_GF_OP_XOR2, 7, 0, 0 }, - { EC_GF_OP_XOR2, 4, 1, 0 }, - { EC_GF_OP_XOR2, 3, 1, 0 }, - { EC_GF_OP_XOR2, 6, 7, 0 }, - { EC_GF_OP_XOR2, 2, 5, 0 }, - { EC_GF_OP_XOR2, 7, 4, 0 }, - { EC_GF_OP_XOR2, 5, 3, 0 }, - { EC_GF_OP_XOR2, 2, 6, 0 }, - { EC_GF_OP_XOR2, 1, 7, 0 }, - { EC_GF_OP_XOR2, 3, 6, 0 }, - { EC_GF_OP_XOR2, 7, 2, 0 }, - { EC_GF_OP_XOR2, 0, 3, 0 }, - { EC_GF_OP_XOR2, 3, 7, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_55 = { - 8, - { 1, 5, 6, 4, 3, 7, 2, 0, }, - ec_gf8_mul_55_ops -}; + {EC_GF_OP_XOR2, 4, 2, 0}, {EC_GF_OP_XOR2, 7, 0, 0}, + {EC_GF_OP_XOR2, 4, 1, 0}, {EC_GF_OP_XOR2, 3, 1, 0}, + {EC_GF_OP_XOR2, 6, 7, 0}, {EC_GF_OP_XOR2, 2, 5, 0}, + {EC_GF_OP_XOR2, 7, 4, 0}, {EC_GF_OP_XOR2, 5, 3, 0}, + {EC_GF_OP_XOR2, 2, 6, 0}, {EC_GF_OP_XOR2, 1, 7, 0}, + {EC_GF_OP_XOR2, 3, 6, 0}, {EC_GF_OP_XOR2, 7, 2, 0}, + {EC_GF_OP_XOR2, 0, 3, 0}, {EC_GF_OP_XOR2, 3, 7, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_55 = {8, + { + 1, + 5, + 6, + 4, + 3, + 7, + 2, + 0, + }, + ec_gf8_mul_55_ops}; static ec_gf_op_t ec_gf8_mul_56_ops[] = { - { EC_GF_OP_XOR2, 5, 0, 0 }, - { EC_GF_OP_XOR2, 6, 1, 0 }, - { EC_GF_OP_XOR2, 0, 2, 0 }, - { EC_GF_OP_XOR2, 1, 3, 0 }, - { EC_GF_OP_XOR2, 2, 4, 0 }, - { EC_GF_OP_XOR2, 3, 5, 0 }, - { EC_GF_OP_XOR2, 4, 1, 0 }, - { EC_GF_OP_XOR2, 4, 7, 0 }, - { EC_GF_OP_XOR2, 7, 0, 0 }, - { EC_GF_OP_XOR2, 0, 6, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_56 = { - 8, - { 2, 3, 0, 4, 5, 6, 7, 1, }, - ec_gf8_mul_56_ops -}; + {EC_GF_OP_XOR2, 5, 0, 0}, {EC_GF_OP_XOR2, 6, 1, 0}, + {EC_GF_OP_XOR2, 0, 2, 0}, {EC_GF_OP_XOR2, 1, 3, 0}, + {EC_GF_OP_XOR2, 2, 4, 0}, {EC_GF_OP_XOR2, 3, 5, 0}, + {EC_GF_OP_XOR2, 4, 1, 0}, {EC_GF_OP_XOR2, 4, 7, 0}, + {EC_GF_OP_XOR2, 7, 0, 0}, {EC_GF_OP_XOR2, 0, 6, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_56 = {8, + { + 2, + 3, + 0, + 4, + 5, + 6, + 7, + 1, + }, + ec_gf8_mul_56_ops}; static ec_gf_op_t ec_gf8_mul_57_ops[] = { - { EC_GF_OP_XOR2, 2, 0, 0 }, - { EC_GF_OP_XOR2, 5, 0, 0 }, - { EC_GF_OP_XOR2, 0, 1, 0 }, - { EC_GF_OP_XOR2, 0, 6, 0 }, - { EC_GF_OP_XOR2, 3, 1, 0 }, - { EC_GF_OP_XOR2, 6, 7, 0 }, - { EC_GF_OP_XOR2, 1, 4, 0 }, - { EC_GF_OP_XOR2, 6, 2, 0 }, - { EC_GF_OP_XOR2, 1, 7, 0 }, - { EC_GF_OP_XOR2, 2, 4, 0 }, - { EC_GF_OP_XOR2, 7, 3, 0 }, - { EC_GF_OP_XOR2, 4, 5, 0 }, - { EC_GF_OP_XOR2, 3, 5, 0 }, - { EC_GF_OP_XOR2, 5, 0, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_57 = { - 8, - { 2, 3, 0, 1, 4, 5, 6, 7, }, - ec_gf8_mul_57_ops -}; + {EC_GF_OP_XOR2, 2, 0, 0}, {EC_GF_OP_XOR2, 5, 0, 0}, + {EC_GF_OP_XOR2, 0, 1, 0}, {EC_GF_OP_XOR2, 0, 6, 0}, + {EC_GF_OP_XOR2, 3, 1, 0}, {EC_GF_OP_XOR2, 6, 7, 0}, + {EC_GF_OP_XOR2, 1, 4, 0}, {EC_GF_OP_XOR2, 6, 2, 0}, + {EC_GF_OP_XOR2, 1, 7, 0}, {EC_GF_OP_XOR2, 2, 4, 0}, + {EC_GF_OP_XOR2, 7, 3, 0}, {EC_GF_OP_XOR2, 4, 5, 0}, + {EC_GF_OP_XOR2, 3, 5, 0}, {EC_GF_OP_XOR2, 5, 0, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_57 = {8, + { + 2, + 3, + 0, + 1, + 4, + 5, + 6, + 7, + }, + ec_gf8_mul_57_ops}; static ec_gf_op_t ec_gf8_mul_58_ops[] = { - { EC_GF_OP_XOR2, 3, 2, 0 }, - { EC_GF_OP_XOR2, 2, 5, 0 }, - { EC_GF_OP_XOR2, 4, 3, 0 }, - { EC_GF_OP_XOR2, 5, 0, 0 }, - { EC_GF_OP_XOR2, 0, 1, 0 }, - { EC_GF_OP_XOR2, 5, 4, 0 }, - { EC_GF_OP_XOR2, 3, 7, 0 }, - { EC_GF_OP_XOR2, 1, 4, 0 }, - { EC_GF_OP_XOR2, 7, 5, 0 }, - { EC_GF_OP_XOR2, 0, 3, 0 }, - { EC_GF_OP_XOR2, 6, 2, 0 }, - { EC_GF_OP_XOR2, 3, 6, 0 }, - { EC_GF_OP_XOR2, 6, 1, 0 }, - { EC_GF_OP_XOR2, 4, 3, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_58 = { - 8, - { 4, 3, 2, 7, 0, 1, 5, 6, }, - ec_gf8_mul_58_ops -}; + {EC_GF_OP_XOR2, 3, 2, 0}, {EC_GF_OP_XOR2, 2, 5, 0}, + {EC_GF_OP_XOR2, 4, 3, 0}, {EC_GF_OP_XOR2, 5, 0, 0}, + {EC_GF_OP_XOR2, 0, 1, 0}, {EC_GF_OP_XOR2, 5, 4, 0}, + {EC_GF_OP_XOR2, 3, 7, 0}, {EC_GF_OP_XOR2, 1, 4, 0}, + {EC_GF_OP_XOR2, 7, 5, 0}, {EC_GF_OP_XOR2, 0, 3, 0}, + {EC_GF_OP_XOR2, 6, 2, 0}, {EC_GF_OP_XOR2, 3, 6, 0}, + {EC_GF_OP_XOR2, 6, 1, 0}, {EC_GF_OP_XOR2, 4, 3, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_58 = {8, + { + 4, + 3, + 2, + 7, + 0, + 1, + 5, + 6, + }, + ec_gf8_mul_58_ops}; static ec_gf_op_t ec_gf8_mul_59_ops[] = { - { EC_GF_OP_XOR2, 0, 1, 0 }, - { EC_GF_OP_XOR2, 1, 3, 0 }, - { EC_GF_OP_XOR2, 1, 5, 0 }, - { EC_GF_OP_XOR2, 2, 4, 0 }, - { EC_GF_OP_XOR2, 0, 6, 0 }, - { EC_GF_OP_XOR2, 2, 1, 0 }, - { EC_GF_OP_XOR2, 0, 2, 0 }, - { EC_GF_OP_XOR2, 3, 0, 0 }, - { EC_GF_OP_XOR2, 7, 3, 0 }, - { EC_GF_OP_XOR2, 6, 7, 0 }, - { EC_GF_OP_XOR2, 1, 6, 0 }, - { EC_GF_OP_XOR2, 3, 1, 0 }, - { EC_GF_OP_XOR2, 4, 3, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_59 = { - 8, - { 7, 3, 5, 6, 1, 2, 0, 4, }, - ec_gf8_mul_59_ops -}; + {EC_GF_OP_XOR2, 0, 1, 0}, {EC_GF_OP_XOR2, 1, 3, 0}, + {EC_GF_OP_XOR2, 1, 5, 0}, {EC_GF_OP_XOR2, 2, 4, 0}, + {EC_GF_OP_XOR2, 0, 6, 0}, {EC_GF_OP_XOR2, 2, 1, 0}, + {EC_GF_OP_XOR2, 0, 2, 0}, {EC_GF_OP_XOR2, 3, 0, 0}, + {EC_GF_OP_XOR2, 7, 3, 0}, {EC_GF_OP_XOR2, 6, 7, 0}, + {EC_GF_OP_XOR2, 1, 6, 0}, {EC_GF_OP_XOR2, 3, 1, 0}, + {EC_GF_OP_XOR2, 4, 3, 0}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_59 = {8, + { + 7, + 3, + 5, + 6, + 1, + 2, + 0, + 4, + }, + ec_gf8_mul_59_ops}; static ec_gf_op_t ec_gf8_mul_5A_ops[] = { - { EC_GF_OP_XOR2, 5, 2, 0 }, - { EC_GF_OP_XOR2, 2, 1, 0 }, - { EC_GF_OP_XOR2, 4, 5, 0 }, - { EC_GF_OP_XOR2, 3, 2, 0 }, - { EC_GF_OP_XOR2, 6, 4, 0 }, - { EC_GF_OP_XOR2, 4, 3, 0 }, - { EC_GF_OP_XOR2, 2, 0, 0 }, - { EC_GF_OP_XOR2, 0, 4, 0 }, - { EC_GF_OP_XOR2, 1, 0, 0 }, - { EC_GF_OP_XOR2, 5, 1, 0 }, - { EC_GF_OP_XOR2, 7, 5, 0 }, - { EC_GF_OP_XOR2, 0, 7, 0 }, - { EC_GF_OP_XOR2, 7, 6, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_5A = { - 8, - { 6, 7, 0, 1, 2, 3, 5, 4, }, - ec_gf8_mul_5A_ops -}; + {EC_GF_OP_XOR2, 5, 2, 0}, {EC_GF_OP_XOR2, 2, 1, 0}, + {EC_GF_OP_XOR2, 4, 5, 0}, {EC_GF_OP_XOR2, 3, 2, 0}, + {EC_GF_OP_XOR2, 6, 4, 0}, {EC_GF_OP_XOR2, 4, 3, 0}, + {EC_GF_OP_XOR2, 2, 0, 0}, {EC_GF_OP_XOR2, 0, 4, 0}, + {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_XOR2, 5, 1, 0}, + {EC_GF_OP_XOR2, 7, 5, 0}, {EC_GF_OP_XOR2, 0, 7, 0}, + {EC_GF_OP_XOR2, 7, 6, 0}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_5A = {8, + { + 6, + 7, + 0, + 1, + 2, + 3, + 5, + 4, + }, + ec_gf8_mul_5A_ops}; static ec_gf_op_t ec_gf8_mul_5B_ops[] = { - { EC_GF_OP_XOR2, 0, 4, 0 }, - { EC_GF_OP_XOR2, 1, 5, 0 }, - { EC_GF_OP_XOR2, 4, 3, 0 }, - { EC_GF_OP_XOR2, 5, 0, 0 }, - { EC_GF_OP_XOR2, 3, 2, 0 }, - { EC_GF_OP_XOR2, 2, 5, 0 }, - { EC_GF_OP_XOR2, 0, 6, 0 }, - { EC_GF_OP_XOR2, 6, 2, 0 }, - { EC_GF_OP_XOR2, 7, 1, 0 }, - { EC_GF_OP_XOR2, 2, 1, 0 }, - { EC_GF_OP_XOR2, 1, 3, 0 }, - { EC_GF_OP_XOR2, 4, 7, 0 }, - { EC_GF_OP_XOR2, 3, 0, 0 }, - { EC_GF_OP_XOR2, 0, 4, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_5B = { - 8, - { 6, 0, 7, 5, 2, 1, 3, 4, }, - ec_gf8_mul_5B_ops -}; + {EC_GF_OP_XOR2, 0, 4, 0}, {EC_GF_OP_XOR2, 1, 5, 0}, + {EC_GF_OP_XOR2, 4, 3, 0}, {EC_GF_OP_XOR2, 5, 0, 0}, + {EC_GF_OP_XOR2, 3, 2, 0}, {EC_GF_OP_XOR2, 2, 5, 0}, + {EC_GF_OP_XOR2, 0, 6, 0}, {EC_GF_OP_XOR2, 6, 2, 0}, + {EC_GF_OP_XOR2, 7, 1, 0}, {EC_GF_OP_XOR2, 2, 1, 0}, + {EC_GF_OP_XOR2, 1, 3, 0}, {EC_GF_OP_XOR2, 4, 7, 0}, + {EC_GF_OP_XOR2, 3, 0, 0}, {EC_GF_OP_XOR2, 0, 4, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_5B = {8, + { + 6, + 0, + 7, + 5, + 2, + 1, + 3, + 4, + }, + ec_gf8_mul_5B_ops}; static ec_gf_op_t ec_gf8_mul_5C_ops[] = { - { EC_GF_OP_COPY, 8, 3, 0 }, - { EC_GF_OP_XOR2, 3, 6, 0 }, - { EC_GF_OP_XOR2, 4, 1, 0 }, - { EC_GF_OP_XOR2, 5, 3, 0 }, - { EC_GF_OP_XOR2, 1, 0, 0 }, - { EC_GF_OP_XOR2, 0, 5, 0 }, - { EC_GF_OP_XOR2, 1, 3, 0 }, - { EC_GF_OP_XOR2, 2, 0, 0 }, - { EC_GF_OP_XOR2, 3, 4, 0 }, - { EC_GF_OP_XOR2, 4, 2, 0 }, - { EC_GF_OP_XOR2, 6, 2, 0 }, - { EC_GF_OP_XOR2, 7, 4, 0 }, - { EC_GF_OP_XOR2, 2, 8, 0 }, - { EC_GF_OP_XOR2, 0, 7, 0 }, - { EC_GF_OP_XOR2, 7, 1, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_5C = { - 9, - { 7, 5, 2, 4, 1, 0, 6, 3, 8, }, - ec_gf8_mul_5C_ops -}; + {EC_GF_OP_COPY, 8, 3, 0}, {EC_GF_OP_XOR2, 3, 6, 0}, + {EC_GF_OP_XOR2, 4, 1, 0}, {EC_GF_OP_XOR2, 5, 3, 0}, + {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_XOR2, 0, 5, 0}, + {EC_GF_OP_XOR2, 1, 3, 0}, {EC_GF_OP_XOR2, 2, 0, 0}, + {EC_GF_OP_XOR2, 3, 4, 0}, {EC_GF_OP_XOR2, 4, 2, 0}, + {EC_GF_OP_XOR2, 6, 2, 0}, {EC_GF_OP_XOR2, 7, 4, 0}, + {EC_GF_OP_XOR2, 2, 8, 0}, {EC_GF_OP_XOR2, 0, 7, 0}, + {EC_GF_OP_XOR2, 7, 1, 0}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_5C = {9, + { + 7, + 5, + 2, + 4, + 1, + 0, + 6, + 3, + 8, + }, + ec_gf8_mul_5C_ops}; static ec_gf_op_t ec_gf8_mul_5D_ops[] = { - { EC_GF_OP_XOR2, 4, 1, 0 }, - { EC_GF_OP_XOR2, 1, 0, 0 }, - { EC_GF_OP_XOR2, 6, 0, 0 }, - { EC_GF_OP_XOR2, 4, 3, 0 }, - { EC_GF_OP_XOR2, 5, 6, 0 }, - { EC_GF_OP_XOR2, 6, 4, 0 }, - { EC_GF_OP_XOR2, 3, 5, 0 }, - { EC_GF_OP_XOR2, 7, 6, 0 }, - { EC_GF_OP_XOR2, 2, 3, 0 }, - { EC_GF_OP_XOR2, 0, 7, 0 }, - { EC_GF_OP_XOR2, 3, 1, 0 }, - { EC_GF_OP_XOR2, 7, 2, 0 }, - { EC_GF_OP_XOR2, 4, 2, 0 }, - { EC_GF_OP_XOR2, 1, 7, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_5D = { - 8, - { 1, 3, 5, 4, 6, 7, 2, 0, }, - ec_gf8_mul_5D_ops -}; + {EC_GF_OP_XOR2, 4, 1, 0}, {EC_GF_OP_XOR2, 1, 0, 0}, + {EC_GF_OP_XOR2, 6, 0, 0}, {EC_GF_OP_XOR2, 4, 3, 0}, + {EC_GF_OP_XOR2, 5, 6, 0}, {EC_GF_OP_XOR2, 6, 4, 0}, + {EC_GF_OP_XOR2, 3, 5, 0}, {EC_GF_OP_XOR2, 7, 6, 0}, + {EC_GF_OP_XOR2, 2, 3, 0}, {EC_GF_OP_XOR2, 0, 7, 0}, + {EC_GF_OP_XOR2, 3, 1, 0}, {EC_GF_OP_XOR2, 7, 2, 0}, + {EC_GF_OP_XOR2, 4, 2, 0}, {EC_GF_OP_XOR2, 1, 7, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_5D = {8, + { + 1, + 3, + 5, + 4, + 6, + 7, + 2, + 0, + }, + ec_gf8_mul_5D_ops}; static ec_gf_op_t ec_gf8_mul_5E_ops[] = { - { EC_GF_OP_XOR2, 6, 0, 0 }, - { EC_GF_OP_XOR2, 6, 5, 0 }, - { EC_GF_OP_XOR2, 1, 2, 0 }, - { EC_GF_OP_XOR2, 5, 2, 0 }, - { EC_GF_OP_XOR2, 2, 3, 0 }, - { EC_GF_OP_XOR2, 7, 1, 0 }, - { EC_GF_OP_XOR2, 3, 6, 0 }, - { EC_GF_OP_XOR2, 0, 2, 0 }, - { EC_GF_OP_XOR2, 6, 7, 0 }, - { EC_GF_OP_XOR2, 2, 4, 0 }, - { EC_GF_OP_XOR2, 4, 5, 0 }, - { EC_GF_OP_XOR2, 1, 2, 0 }, - { EC_GF_OP_XOR2, 5, 6, 0 }, - { EC_GF_OP_XOR2, 2, 6, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_5E = { - 8, - { 4, 3, 6, 2, 5, 7, 0, 1, }, - ec_gf8_mul_5E_ops -}; + {EC_GF_OP_XOR2, 6, 0, 0}, {EC_GF_OP_XOR2, 6, 5, 0}, + {EC_GF_OP_XOR2, 1, 2, 0}, {EC_GF_OP_XOR2, 5, 2, 0}, + {EC_GF_OP_XOR2, 2, 3, 0}, {EC_GF_OP_XOR2, 7, 1, 0}, + {EC_GF_OP_XOR2, 3, 6, 0}, {EC_GF_OP_XOR2, 0, 2, 0}, + {EC_GF_OP_XOR2, 6, 7, 0}, {EC_GF_OP_XOR2, 2, 4, 0}, + {EC_GF_OP_XOR2, 4, 5, 0}, {EC_GF_OP_XOR2, 1, 2, 0}, + {EC_GF_OP_XOR2, 5, 6, 0}, {EC_GF_OP_XOR2, 2, 6, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_5E = {8, + { + 4, + 3, + 6, + 2, + 5, + 7, + 0, + 1, + }, + ec_gf8_mul_5E_ops}; static ec_gf_op_t ec_gf8_mul_5F_ops[] = { - { EC_GF_OP_XOR2, 0, 3, 0 }, - { EC_GF_OP_XOR2, 1, 5, 0 }, - { EC_GF_OP_XOR2, 0, 6, 0 }, - { EC_GF_OP_XOR2, 1, 0, 0 }, - { EC_GF_OP_XOR2, 7, 1, 0 }, - { EC_GF_OP_XOR2, 3, 7, 0 }, - { EC_GF_OP_XOR2, 2, 0, 0 }, - { EC_GF_OP_XOR2, 4, 3, 0 }, - { EC_GF_OP_XOR2, 7, 2, 0 }, - { EC_GF_OP_XOR2, 5, 4, 0 }, - { EC_GF_OP_XOR2, 6, 5, 0 }, - { EC_GF_OP_XOR2, 0, 5, 0 }, - { EC_GF_OP_XOR2, 6, 7, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_5F = { - 8, - { 6, 1, 3, 4, 5, 7, 2, 0, }, - ec_gf8_mul_5F_ops -}; + {EC_GF_OP_XOR2, 0, 3, 0}, {EC_GF_OP_XOR2, 1, 5, 0}, + {EC_GF_OP_XOR2, 0, 6, 0}, {EC_GF_OP_XOR2, 1, 0, 0}, + {EC_GF_OP_XOR2, 7, 1, 0}, {EC_GF_OP_XOR2, 3, 7, 0}, + {EC_GF_OP_XOR2, 2, 0, 0}, {EC_GF_OP_XOR2, 4, 3, 0}, + {EC_GF_OP_XOR2, 7, 2, 0}, {EC_GF_OP_XOR2, 5, 4, 0}, + {EC_GF_OP_XOR2, 6, 5, 0}, {EC_GF_OP_XOR2, 0, 5, 0}, + {EC_GF_OP_XOR2, 6, 7, 0}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_5F = {8, + { + 6, + 1, + 3, + 4, + 5, + 7, + 2, + 0, + }, + ec_gf8_mul_5F_ops}; static ec_gf_op_t ec_gf8_mul_60_ops[] = { - { EC_GF_OP_XOR2, 5, 2, 0 }, - { EC_GF_OP_XOR2, 7, 4, 0 }, - { EC_GF_OP_XOR2, 4, 5, 0 }, - { EC_GF_OP_XOR2, 6, 3, 0 }, - { EC_GF_OP_XOR2, 2, 6, 0 }, - { EC_GF_OP_XOR2, 4, 6, 0 }, - { EC_GF_OP_XOR2, 3, 7, 0 }, - { EC_GF_OP_XOR2, 6, 0, 0 }, - { EC_GF_OP_XOR2, 0, 1, 0 }, - { EC_GF_OP_XOR2, 0, 7, 0 }, - { EC_GF_OP_XOR2, 1, 5, 0 }, - { EC_GF_OP_XOR2, 7, 5, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_60 = { - 8, - { 2, 3, 4, 7, 5, 6, 0, 1, }, - ec_gf8_mul_60_ops -}; + {EC_GF_OP_XOR2, 5, 2, 0}, {EC_GF_OP_XOR2, 7, 4, 0}, + {EC_GF_OP_XOR2, 4, 5, 0}, {EC_GF_OP_XOR2, 6, 3, 0}, + {EC_GF_OP_XOR2, 2, 6, 0}, {EC_GF_OP_XOR2, 4, 6, 0}, + {EC_GF_OP_XOR2, 3, 7, 0}, {EC_GF_OP_XOR2, 6, 0, 0}, + {EC_GF_OP_XOR2, 0, 1, 0}, {EC_GF_OP_XOR2, 0, 7, 0}, + {EC_GF_OP_XOR2, 1, 5, 0}, {EC_GF_OP_XOR2, 7, 5, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_60 = {8, + { + 2, + 3, + 4, + 7, + 5, + 6, + 0, + 1, + }, + ec_gf8_mul_60_ops}; static ec_gf_op_t ec_gf8_mul_61_ops[] = { - { EC_GF_OP_XOR2, 1, 2, 0 }, - { EC_GF_OP_XOR2, 6, 2, 0 }, - { EC_GF_OP_XOR2, 2, 5, 0 }, - { EC_GF_OP_XOR2, 4, 2, 0 }, - { EC_GF_OP_XOR2, 0, 3, 0 }, - { EC_GF_OP_XOR2, 3, 4, 0 }, - { EC_GF_OP_XOR2, 0, 6, 0 }, - { EC_GF_OP_XOR2, 7, 3, 0 }, - { EC_GF_OP_XOR2, 2, 0, 0 }, - { EC_GF_OP_XOR2, 6, 3, 0 }, - { EC_GF_OP_XOR2, 1, 7, 0 }, - { EC_GF_OP_XOR2, 5, 1, 0 }, - { EC_GF_OP_XOR2, 1, 2, 0 }, - { EC_GF_OP_XOR2, 3, 5, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_61 = { - 8, - { 0, 5, 6, 7, 4, 2, 1, 3, }, - ec_gf8_mul_61_ops -}; + {EC_GF_OP_XOR2, 1, 2, 0}, {EC_GF_OP_XOR2, 6, 2, 0}, + {EC_GF_OP_XOR2, 2, 5, 0}, {EC_GF_OP_XOR2, 4, 2, 0}, + {EC_GF_OP_XOR2, 0, 3, 0}, {EC_GF_OP_XOR2, 3, 4, 0}, + {EC_GF_OP_XOR2, 0, 6, 0}, {EC_GF_OP_XOR2, 7, 3, 0}, + {EC_GF_OP_XOR2, 2, 0, 0}, {EC_GF_OP_XOR2, 6, 3, 0}, + {EC_GF_OP_XOR2, 1, 7, 0}, {EC_GF_OP_XOR2, 5, 1, 0}, + {EC_GF_OP_XOR2, 1, 2, 0}, {EC_GF_OP_XOR2, 3, 5, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_61 = {8, + { + 0, + 5, + 6, + 7, + 4, + 2, + 1, + 3, + }, + ec_gf8_mul_61_ops}; static ec_gf_op_t ec_gf8_mul_62_ops[] = { - { EC_GF_OP_XOR2, 3, 7, 0 }, - { EC_GF_OP_XOR2, 1, 2, 0 }, - { EC_GF_OP_XOR2, 2, 3, 0 }, - { EC_GF_OP_XOR2, 3, 4, 0 }, - { EC_GF_OP_XOR2, 4, 5, 0 }, - { EC_GF_OP_XOR2, 0, 3, 0 }, - { EC_GF_OP_XOR2, 5, 2, 0 }, - { EC_GF_OP_XOR2, 7, 0, 0 }, - { EC_GF_OP_XOR2, 2, 6, 0 }, - { EC_GF_OP_XOR2, 1, 5, 0 }, - { EC_GF_OP_XOR2, 6, 7, 0 }, - { EC_GF_OP_XOR2, 7, 1, 0 }, - { EC_GF_OP_XOR2, 1, 2, 0 }, - { EC_GF_OP_XOR2, 3, 1, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_62 = { - 8, - { 2, 0, 3, 4, 5, 6, 7, 1, }, - ec_gf8_mul_62_ops -}; + {EC_GF_OP_XOR2, 3, 7, 0}, {EC_GF_OP_XOR2, 1, 2, 0}, + {EC_GF_OP_XOR2, 2, 3, 0}, {EC_GF_OP_XOR2, 3, 4, 0}, + {EC_GF_OP_XOR2, 4, 5, 0}, {EC_GF_OP_XOR2, 0, 3, 0}, + {EC_GF_OP_XOR2, 5, 2, 0}, {EC_GF_OP_XOR2, 7, 0, 0}, + {EC_GF_OP_XOR2, 2, 6, 0}, {EC_GF_OP_XOR2, 1, 5, 0}, + {EC_GF_OP_XOR2, 6, 7, 0}, {EC_GF_OP_XOR2, 7, 1, 0}, + {EC_GF_OP_XOR2, 1, 2, 0}, {EC_GF_OP_XOR2, 3, 1, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_62 = {8, + { + 2, + 0, + 3, + 4, + 5, + 6, + 7, + 1, + }, + ec_gf8_mul_62_ops}; static ec_gf_op_t ec_gf8_mul_63_ops[] = { - { EC_GF_OP_XOR2, 4, 3, 0 }, - { EC_GF_OP_XOR2, 5, 4, 0 }, - { EC_GF_OP_XOR2, 6, 5, 0 }, - { EC_GF_OP_XOR2, 1, 7, 0 }, - { EC_GF_OP_XOR2, 0, 6, 0 }, - { EC_GF_OP_XOR2, 6, 1, 0 }, - { EC_GF_OP_XOR2, 7, 2, 0 }, - { EC_GF_OP_XOR2, 3, 0, 0 }, - { EC_GF_OP_XOR2, 4, 6, 0 }, - { EC_GF_OP_XOR2, 7, 5, 0 }, - { EC_GF_OP_XOR2, 1, 3, 0 }, - { EC_GF_OP_XOR2, 2, 4, 0 }, - { EC_GF_OP_XOR2, 3, 7, 0 }, - { EC_GF_OP_XOR2, 4, 0, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_63 = { - 8, - { 3, 4, 6, 5, 7, 0, 1, 2, }, - ec_gf8_mul_63_ops -}; + {EC_GF_OP_XOR2, 4, 3, 0}, {EC_GF_OP_XOR2, 5, 4, 0}, + {EC_GF_OP_XOR2, 6, 5, 0}, {EC_GF_OP_XOR2, 1, 7, 0}, + {EC_GF_OP_XOR2, 0, 6, 0}, {EC_GF_OP_XOR2, 6, 1, 0}, + {EC_GF_OP_XOR2, 7, 2, 0}, {EC_GF_OP_XOR2, 3, 0, 0}, + {EC_GF_OP_XOR2, 4, 6, 0}, {EC_GF_OP_XOR2, 7, 5, 0}, + {EC_GF_OP_XOR2, 1, 3, 0}, {EC_GF_OP_XOR2, 2, 4, 0}, + {EC_GF_OP_XOR2, 3, 7, 0}, {EC_GF_OP_XOR2, 4, 0, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_63 = {8, + { + 3, + 4, + 6, + 5, + 7, + 0, + 1, + 2, + }, + ec_gf8_mul_63_ops}; static ec_gf_op_t ec_gf8_mul_64_ops[] = { - { EC_GF_OP_COPY, 8, 1, 0 }, - { EC_GF_OP_XOR2, 8, 0, 0 }, - { EC_GF_OP_XOR2, 1, 2, 0 }, - { EC_GF_OP_XOR2, 8, 7, 0 }, - { EC_GF_OP_XOR2, 2, 3, 0 }, - { EC_GF_OP_XOR2, 3, 4, 0 }, - { EC_GF_OP_XOR2, 7, 6, 0 }, - { EC_GF_OP_XOR2, 4, 5, 0 }, - { EC_GF_OP_XOR2, 6, 1, 0 }, - { EC_GF_OP_XOR2, 5, 7, 0 }, - { EC_GF_OP_XOR2, 6, 4, 0 }, - { EC_GF_OP_XOR2, 7, 0, 0 }, - { EC_GF_OP_XOR2, 4, 2, 0 }, - { EC_GF_OP_XOR2, 4, 0, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_64 = { - 9, - { 2, 3, 4, 6, 5, 7, 8, 1, 0, }, - ec_gf8_mul_64_ops -}; + {EC_GF_OP_COPY, 8, 1, 0}, {EC_GF_OP_XOR2, 8, 0, 0}, + {EC_GF_OP_XOR2, 1, 2, 0}, {EC_GF_OP_XOR2, 8, 7, 0}, + {EC_GF_OP_XOR2, 2, 3, 0}, {EC_GF_OP_XOR2, 3, 4, 0}, + {EC_GF_OP_XOR2, 7, 6, 0}, {EC_GF_OP_XOR2, 4, 5, 0}, + {EC_GF_OP_XOR2, 6, 1, 0}, {EC_GF_OP_XOR2, 5, 7, 0}, + {EC_GF_OP_XOR2, 6, 4, 0}, {EC_GF_OP_XOR2, 7, 0, 0}, + {EC_GF_OP_XOR2, 4, 2, 0}, {EC_GF_OP_XOR2, 4, 0, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_64 = {9, + { + 2, + 3, + 4, + 6, + 5, + 7, + 8, + 1, + 0, + }, + ec_gf8_mul_64_ops}; static ec_gf_op_t ec_gf8_mul_65_ops[] = { - { EC_GF_OP_XOR2, 6, 7, 0 }, - { EC_GF_OP_XOR2, 7, 2, 0 }, - { EC_GF_OP_XOR2, 5, 6, 0 }, - { EC_GF_OP_XOR2, 2, 0, 0 }, - { EC_GF_OP_XOR2, 4, 5, 0 }, - { EC_GF_OP_XOR2, 2, 3, 0 }, - { EC_GF_OP_XOR2, 6, 1, 0 }, - { EC_GF_OP_XOR2, 3, 4, 0 }, - { EC_GF_OP_XOR2, 7, 1, 0 }, - { EC_GF_OP_XOR2, 6, 0, 0 }, - { EC_GF_OP_XOR2, 1, 3, 0 }, - { EC_GF_OP_XOR2, 0, 5, 0 }, - { EC_GF_OP_XOR2, 3, 7, 0 }, - { EC_GF_OP_XOR2, 5, 1, 0 }, - { EC_GF_OP_XOR2, 1, 6, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_65 = { - 8, - { 2, 5, 1, 3, 4, 0, 6, 7, }, - ec_gf8_mul_65_ops -}; + {EC_GF_OP_XOR2, 6, 7, 0}, {EC_GF_OP_XOR2, 7, 2, 0}, + {EC_GF_OP_XOR2, 5, 6, 0}, {EC_GF_OP_XOR2, 2, 0, 0}, + {EC_GF_OP_XOR2, 4, 5, 0}, {EC_GF_OP_XOR2, 2, 3, 0}, + {EC_GF_OP_XOR2, 6, 1, 0}, {EC_GF_OP_XOR2, 3, 4, 0}, + {EC_GF_OP_XOR2, 7, 1, 0}, {EC_GF_OP_XOR2, 6, 0, 0}, + {EC_GF_OP_XOR2, 1, 3, 0}, {EC_GF_OP_XOR2, 0, 5, 0}, + {EC_GF_OP_XOR2, 3, 7, 0}, {EC_GF_OP_XOR2, 5, 1, 0}, + {EC_GF_OP_XOR2, 1, 6, 0}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_65 = {8, + { + 2, + 5, + 1, + 3, + 4, + 0, + 6, + 7, + }, + ec_gf8_mul_65_ops}; static ec_gf_op_t ec_gf8_mul_66_ops[] = { - { EC_GF_OP_XOR2, 4, 0, 0 }, - { EC_GF_OP_XOR2, 0, 1, 0 }, - { EC_GF_OP_XOR2, 1, 2, 0 }, - { EC_GF_OP_XOR2, 2, 3, 0 }, - { EC_GF_OP_XOR2, 3, 4, 0 }, - { EC_GF_OP_XOR2, 5, 7, 0 }, - { EC_GF_OP_XOR2, 2, 7, 0 }, - { EC_GF_OP_XOR2, 0, 5, 0 }, - { EC_GF_OP_XOR2, 4, 6, 0 }, - { EC_GF_OP_XOR2, 5, 3, 0 }, - { EC_GF_OP_XOR2, 6, 1, 0 }, - { EC_GF_OP_XOR2, 7, 4, 0 }, - { EC_GF_OP_XOR2, 1, 5, 0 }, - { EC_GF_OP_XOR2, 4, 0, 0 }, - { EC_GF_OP_XOR2, 5, 7, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_66 = { - 8, - { 2, 3, 1, 4, 5, 7, 0, 6, }, - ec_gf8_mul_66_ops -}; + {EC_GF_OP_XOR2, 4, 0, 0}, {EC_GF_OP_XOR2, 0, 1, 0}, + {EC_GF_OP_XOR2, 1, 2, 0}, {EC_GF_OP_XOR2, 2, 3, 0}, + {EC_GF_OP_XOR2, 3, 4, 0}, {EC_GF_OP_XOR2, 5, 7, 0}, + {EC_GF_OP_XOR2, 2, 7, 0}, {EC_GF_OP_XOR2, 0, 5, 0}, + {EC_GF_OP_XOR2, 4, 6, 0}, {EC_GF_OP_XOR2, 5, 3, 0}, + {EC_GF_OP_XOR2, 6, 1, 0}, {EC_GF_OP_XOR2, 7, 4, 0}, + {EC_GF_OP_XOR2, 1, 5, 0}, {EC_GF_OP_XOR2, 4, 0, 0}, + {EC_GF_OP_XOR2, 5, 7, 0}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_66 = {8, + { + 2, + 3, + 1, + 4, + 5, + 7, + 0, + 6, + }, + ec_gf8_mul_66_ops}; static ec_gf_op_t ec_gf8_mul_67_ops[] = { - { EC_GF_OP_XOR2, 3, 0, 0 }, - { EC_GF_OP_XOR2, 3, 1, 0 }, - { EC_GF_OP_XOR2, 1, 4, 0 }, - { EC_GF_OP_XOR2, 4, 3, 0 }, - { EC_GF_OP_XOR2, 7, 4, 0 }, - { EC_GF_OP_XOR2, 5, 7, 0 }, - { EC_GF_OP_XOR2, 0, 5, 0 }, - { EC_GF_OP_XOR2, 6, 0, 0 }, - { EC_GF_OP_XOR2, 3, 6, 0 }, - { EC_GF_OP_XOR2, 1, 3, 0 }, - { EC_GF_OP_XOR2, 7, 1, 0 }, - { EC_GF_OP_XOR2, 2, 7, 0 }, - { EC_GF_OP_XOR2, 0, 2, 0 }, - { EC_GF_OP_XOR2, 2, 3, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_67 = { - 8, - { 2, 4, 5, 6, 7, 3, 1, 0, }, - ec_gf8_mul_67_ops -}; + {EC_GF_OP_XOR2, 3, 0, 0}, {EC_GF_OP_XOR2, 3, 1, 0}, + {EC_GF_OP_XOR2, 1, 4, 0}, {EC_GF_OP_XOR2, 4, 3, 0}, + {EC_GF_OP_XOR2, 7, 4, 0}, {EC_GF_OP_XOR2, 5, 7, 0}, + {EC_GF_OP_XOR2, 0, 5, 0}, {EC_GF_OP_XOR2, 6, 0, 0}, + {EC_GF_OP_XOR2, 3, 6, 0}, {EC_GF_OP_XOR2, 1, 3, 0}, + {EC_GF_OP_XOR2, 7, 1, 0}, {EC_GF_OP_XOR2, 2, 7, 0}, + {EC_GF_OP_XOR2, 0, 2, 0}, {EC_GF_OP_XOR2, 2, 3, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_67 = {8, + { + 2, + 4, + 5, + 6, + 7, + 3, + 1, + 0, + }, + ec_gf8_mul_67_ops}; static ec_gf_op_t ec_gf8_mul_68_ops[] = { - { EC_GF_OP_XOR2, 5, 2, 0 }, - { EC_GF_OP_XOR2, 5, 4, 0 }, - { EC_GF_OP_XOR2, 4, 3, 0 }, - { EC_GF_OP_XOR2, 6, 4, 0 }, - { EC_GF_OP_XOR2, 7, 6, 0 }, - { EC_GF_OP_XOR2, 4, 0, 0 }, - { EC_GF_OP_XOR2, 2, 7, 0 }, - { EC_GF_OP_XOR2, 4, 1, 0 }, - { EC_GF_OP_XOR2, 0, 2, 0 }, - { EC_GF_OP_XOR2, 1, 5, 0 }, - { EC_GF_OP_XOR2, 3, 0, 0 }, - { EC_GF_OP_XOR2, 5, 6, 0 }, - { EC_GF_OP_XOR2, 0, 4, 0 }, - { EC_GF_OP_XOR2, 6, 3, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_68 = { - 8, - { 5, 7, 2, 3, 0, 6, 4, 1, }, - ec_gf8_mul_68_ops -}; + {EC_GF_OP_XOR2, 5, 2, 0}, {EC_GF_OP_XOR2, 5, 4, 0}, + {EC_GF_OP_XOR2, 4, 3, 0}, {EC_GF_OP_XOR2, 6, 4, 0}, + {EC_GF_OP_XOR2, 7, 6, 0}, {EC_GF_OP_XOR2, 4, 0, 0}, + {EC_GF_OP_XOR2, 2, 7, 0}, {EC_GF_OP_XOR2, 4, 1, 0}, + {EC_GF_OP_XOR2, 0, 2, 0}, {EC_GF_OP_XOR2, 1, 5, 0}, + {EC_GF_OP_XOR2, 3, 0, 0}, {EC_GF_OP_XOR2, 5, 6, 0}, + {EC_GF_OP_XOR2, 0, 4, 0}, {EC_GF_OP_XOR2, 6, 3, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_68 = {8, + { + 5, + 7, + 2, + 3, + 0, + 6, + 4, + 1, + }, + ec_gf8_mul_68_ops}; static ec_gf_op_t ec_gf8_mul_69_ops[] = { - { EC_GF_OP_XOR2, 4, 6, 0 }, - { EC_GF_OP_XOR2, 4, 7, 0 }, - { EC_GF_OP_XOR2, 3, 4, 0 }, - { EC_GF_OP_XOR2, 2, 1, 0 }, - { EC_GF_OP_XOR2, 1, 3, 0 }, - { EC_GF_OP_XOR2, 4, 2, 0 }, - { EC_GF_OP_XOR2, 0, 1, 0 }, - { EC_GF_OP_XOR2, 5, 4, 0 }, - { EC_GF_OP_XOR2, 7, 0, 0 }, - { EC_GF_OP_XOR2, 6, 5, 0 }, - { EC_GF_OP_XOR2, 2, 0, 0 }, - { EC_GF_OP_XOR2, 5, 7, 0 }, - { EC_GF_OP_XOR2, 0, 6, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_69 = { - 8, - { 0, 1, 3, 2, 4, 5, 7, 6, }, - ec_gf8_mul_69_ops -}; + {EC_GF_OP_XOR2, 4, 6, 0}, {EC_GF_OP_XOR2, 4, 7, 0}, + {EC_GF_OP_XOR2, 3, 4, 0}, {EC_GF_OP_XOR2, 2, 1, 0}, + {EC_GF_OP_XOR2, 1, 3, 0}, {EC_GF_OP_XOR2, 4, 2, 0}, + {EC_GF_OP_XOR2, 0, 1, 0}, {EC_GF_OP_XOR2, 5, 4, 0}, + {EC_GF_OP_XOR2, 7, 0, 0}, {EC_GF_OP_XOR2, 6, 5, 0}, + {EC_GF_OP_XOR2, 2, 0, 0}, {EC_GF_OP_XOR2, 5, 7, 0}, + {EC_GF_OP_XOR2, 0, 6, 0}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_69 = {8, + { + 0, + 1, + 3, + 2, + 4, + 5, + 7, + 6, + }, + ec_gf8_mul_69_ops}; static ec_gf_op_t ec_gf8_mul_6A_ops[] = { - { EC_GF_OP_XOR2, 7, 3, 0 }, - { EC_GF_OP_XOR2, 3, 5, 0 }, - { EC_GF_OP_XOR2, 0, 4, 0 }, - { EC_GF_OP_XOR2, 2, 6, 0 }, - { EC_GF_OP_XOR2, 6, 0, 0 }, - { EC_GF_OP_XOR2, 5, 2, 0 }, - { EC_GF_OP_XOR2, 1, 3, 0 }, - { EC_GF_OP_XOR2, 0, 1, 0 }, - { EC_GF_OP_XOR2, 1, 5, 0 }, - { EC_GF_OP_XOR2, 5, 7, 0 }, - { EC_GF_OP_XOR2, 4, 1, 0 }, - { EC_GF_OP_XOR2, 7, 6, 0 }, - { EC_GF_OP_XOR2, 3, 4, 0 }, - { EC_GF_OP_XOR2, 2, 7, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_6A = { - 8, - { 5, 7, 4, 6, 1, 2, 0, 3, }, - ec_gf8_mul_6A_ops -}; + {EC_GF_OP_XOR2, 7, 3, 0}, {EC_GF_OP_XOR2, 3, 5, 0}, + {EC_GF_OP_XOR2, 0, 4, 0}, {EC_GF_OP_XOR2, 2, 6, 0}, + {EC_GF_OP_XOR2, 6, 0, 0}, {EC_GF_OP_XOR2, 5, 2, 0}, + {EC_GF_OP_XOR2, 1, 3, 0}, {EC_GF_OP_XOR2, 0, 1, 0}, + {EC_GF_OP_XOR2, 1, 5, 0}, {EC_GF_OP_XOR2, 5, 7, 0}, + {EC_GF_OP_XOR2, 4, 1, 0}, {EC_GF_OP_XOR2, 7, 6, 0}, + {EC_GF_OP_XOR2, 3, 4, 0}, {EC_GF_OP_XOR2, 2, 7, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_6A = {8, + { + 5, + 7, + 4, + 6, + 1, + 2, + 0, + 3, + }, + ec_gf8_mul_6A_ops}; static ec_gf_op_t ec_gf8_mul_6B_ops[] = { - { EC_GF_OP_COPY, 8, 1, 0 }, - { EC_GF_OP_XOR2, 1, 2, 0 }, - { EC_GF_OP_XOR2, 1, 6, 0 }, - { EC_GF_OP_XOR2, 3, 4, 0 }, - { EC_GF_OP_XOR2, 3, 1, 0 }, - { EC_GF_OP_XOR2, 2, 3, 0 }, - { EC_GF_OP_XOR2, 0, 2, 0 }, - { EC_GF_OP_XOR2, 1, 5, 0 }, - { EC_GF_OP_XOR2, 7, 0, 0 }, - { EC_GF_OP_XOR2, 5, 0, 0 }, - { EC_GF_OP_XOR2, 1, 7, 0 }, - { EC_GF_OP_XOR2, 4, 1, 0 }, - { EC_GF_OP_XOR2, 6, 4, 0 }, - { EC_GF_OP_XOR2, 4, 0, 0 }, - { EC_GF_OP_XOR2, 0, 8, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_6B = { - 9, - { 6, 7, 2, 0, 3, 1, 5, 4, 8, }, - ec_gf8_mul_6B_ops -}; + {EC_GF_OP_COPY, 8, 1, 0}, {EC_GF_OP_XOR2, 1, 2, 0}, + {EC_GF_OP_XOR2, 1, 6, 0}, {EC_GF_OP_XOR2, 3, 4, 0}, + {EC_GF_OP_XOR2, 3, 1, 0}, {EC_GF_OP_XOR2, 2, 3, 0}, + {EC_GF_OP_XOR2, 0, 2, 0}, {EC_GF_OP_XOR2, 1, 5, 0}, + {EC_GF_OP_XOR2, 7, 0, 0}, {EC_GF_OP_XOR2, 5, 0, 0}, + {EC_GF_OP_XOR2, 1, 7, 0}, {EC_GF_OP_XOR2, 4, 1, 0}, + {EC_GF_OP_XOR2, 6, 4, 0}, {EC_GF_OP_XOR2, 4, 0, 0}, + {EC_GF_OP_XOR2, 0, 8, 0}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_6B = {9, + { + 6, + 7, + 2, + 0, + 3, + 1, + 5, + 4, + 8, + }, + ec_gf8_mul_6B_ops}; static ec_gf_op_t ec_gf8_mul_6C_ops[] = { - { EC_GF_OP_XOR2, 5, 2, 0 }, - { EC_GF_OP_XOR2, 4, 3, 0 }, - { EC_GF_OP_XOR2, 5, 3, 0 }, - { EC_GF_OP_XOR2, 2, 0, 0 }, - { EC_GF_OP_XOR2, 6, 4, 0 }, - { EC_GF_OP_XOR2, 4, 2, 0 }, - { EC_GF_OP_XOR2, 3, 1, 0 }, - { EC_GF_OP_XOR2, 3, 0, 0 }, - { EC_GF_OP_XOR2, 7, 4, 0 }, - { EC_GF_OP_XOR2, 4, 3, 0 }, - { EC_GF_OP_XOR2, 0, 4, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_6C = { - 8, - { 5, 6, 7, 0, 1, 2, 3, 4, }, - ec_gf8_mul_6C_ops -}; + {EC_GF_OP_XOR2, 5, 2, 0}, {EC_GF_OP_XOR2, 4, 3, 0}, + {EC_GF_OP_XOR2, 5, 3, 0}, {EC_GF_OP_XOR2, 2, 0, 0}, + {EC_GF_OP_XOR2, 6, 4, 0}, {EC_GF_OP_XOR2, 4, 2, 0}, + {EC_GF_OP_XOR2, 3, 1, 0}, {EC_GF_OP_XOR2, 3, 0, 0}, + {EC_GF_OP_XOR2, 7, 4, 0}, {EC_GF_OP_XOR2, 4, 3, 0}, + {EC_GF_OP_XOR2, 0, 4, 0}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_6C = {8, + { + 5, + 6, + 7, + 0, + 1, + 2, + 3, + 4, + }, + ec_gf8_mul_6C_ops}; static ec_gf_op_t ec_gf8_mul_6D_ops[] = { - { EC_GF_OP_XOR2, 4, 1, 0 }, - { EC_GF_OP_XOR2, 1, 0, 0 }, - { EC_GF_OP_XOR2, 7, 4, 0 }, - { EC_GF_OP_XOR2, 0, 2, 0 }, - { EC_GF_OP_XOR2, 1, 3, 0 }, - { EC_GF_OP_XOR2, 2, 7, 0 }, - { EC_GF_OP_XOR3, 8, 3, 4 }, - { EC_GF_OP_XOR2, 7, 1, 0 }, - { EC_GF_OP_XOR2, 5, 0, 0 }, - { EC_GF_OP_XOR2, 1, 6, 0 }, - { EC_GF_OP_XOR2, 0, 8, 0 }, - { EC_GF_OP_XOR2, 3, 5, 0 }, - { EC_GF_OP_XOR2, 6, 8, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_6D = { - 9, - { 3, 6, 7, 0, 4, 5, 1, 2, 8, }, - ec_gf8_mul_6D_ops -}; + {EC_GF_OP_XOR2, 4, 1, 0}, {EC_GF_OP_XOR2, 1, 0, 0}, + {EC_GF_OP_XOR2, 7, 4, 0}, {EC_GF_OP_XOR2, 0, 2, 0}, + {EC_GF_OP_XOR2, 1, 3, 0}, {EC_GF_OP_XOR2, 2, 7, 0}, + {EC_GF_OP_XOR3, 8, 3, 4}, {EC_GF_OP_XOR2, 7, 1, 0}, + {EC_GF_OP_XOR2, 5, 0, 0}, {EC_GF_OP_XOR2, 1, 6, 0}, + {EC_GF_OP_XOR2, 0, 8, 0}, {EC_GF_OP_XOR2, 3, 5, 0}, + {EC_GF_OP_XOR2, 6, 8, 0}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_6D = {9, + { + 3, + 6, + 7, + 0, + 4, + 5, + 1, + 2, + 8, + }, + ec_gf8_mul_6D_ops}; static ec_gf_op_t ec_gf8_mul_6E_ops[] = { - { EC_GF_OP_XOR2, 3, 1, 0 }, - { EC_GF_OP_XOR2, 1, 2, 0 }, - { EC_GF_OP_XOR2, 2, 0, 0 }, - { EC_GF_OP_XOR2, 7, 3, 0 }, - { EC_GF_OP_XOR2, 0, 5, 0 }, - { EC_GF_OP_XOR2, 6, 1, 0 }, - { EC_GF_OP_XOR2, 0, 3, 0 }, - { EC_GF_OP_XOR2, 2, 4, 0 }, - { EC_GF_OP_XOR2, 1, 7, 0 }, - { EC_GF_OP_XOR2, 4, 6, 0 }, - { EC_GF_OP_XOR2, 3, 2, 0 }, - { EC_GF_OP_XOR2, 5, 1, 0 }, - { EC_GF_OP_XOR2, 6, 3, 0 }, - { EC_GF_OP_XOR2, 1, 3, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_6E = { - 8, - { 5, 6, 3, 1, 7, 2, 0, 4, }, - ec_gf8_mul_6E_ops -}; + {EC_GF_OP_XOR2, 3, 1, 0}, {EC_GF_OP_XOR2, 1, 2, 0}, + {EC_GF_OP_XOR2, 2, 0, 0}, {EC_GF_OP_XOR2, 7, 3, 0}, + {EC_GF_OP_XOR2, 0, 5, 0}, {EC_GF_OP_XOR2, 6, 1, 0}, + {EC_GF_OP_XOR2, 0, 3, 0}, {EC_GF_OP_XOR2, 2, 4, 0}, + {EC_GF_OP_XOR2, 1, 7, 0}, {EC_GF_OP_XOR2, 4, 6, 0}, + {EC_GF_OP_XOR2, 3, 2, 0}, {EC_GF_OP_XOR2, 5, 1, 0}, + {EC_GF_OP_XOR2, 6, 3, 0}, {EC_GF_OP_XOR2, 1, 3, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_6E = {8, + { + 5, + 6, + 3, + 1, + 7, + 2, + 0, + 4, + }, + ec_gf8_mul_6E_ops}; static ec_gf_op_t ec_gf8_mul_6F_ops[] = { - { EC_GF_OP_COPY, 8, 0, 0 }, - { EC_GF_OP_XOR2, 0, 1, 0 }, - { EC_GF_OP_XOR2, 0, 4, 0 }, - { EC_GF_OP_XOR2, 3, 0, 0 }, - { EC_GF_OP_XOR2, 2, 5, 0 }, - { EC_GF_OP_XOR2, 7, 3, 0 }, - { EC_GF_OP_XOR2, 2, 0, 0 }, - { EC_GF_OP_XOR2, 6, 3, 0 }, - { EC_GF_OP_XOR3, 0, 8, 7 }, - { EC_GF_OP_XOR2, 1, 2, 0 }, - { EC_GF_OP_XOR2, 5, 6, 0 }, - { EC_GF_OP_XOR2, 2, 0, 0 }, - { EC_GF_OP_XOR2, 4, 5, 0 }, - { EC_GF_OP_XOR2, 5, 2, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_6F = { - 9, - { 2, 6, 3, 7, 0, 1, 4, 5, 8, }, - ec_gf8_mul_6F_ops -}; + {EC_GF_OP_COPY, 8, 0, 0}, {EC_GF_OP_XOR2, 0, 1, 0}, + {EC_GF_OP_XOR2, 0, 4, 0}, {EC_GF_OP_XOR2, 3, 0, 0}, + {EC_GF_OP_XOR2, 2, 5, 0}, {EC_GF_OP_XOR2, 7, 3, 0}, + {EC_GF_OP_XOR2, 2, 0, 0}, {EC_GF_OP_XOR2, 6, 3, 0}, + {EC_GF_OP_XOR3, 0, 8, 7}, {EC_GF_OP_XOR2, 1, 2, 0}, + {EC_GF_OP_XOR2, 5, 6, 0}, {EC_GF_OP_XOR2, 2, 0, 0}, + {EC_GF_OP_XOR2, 4, 5, 0}, {EC_GF_OP_XOR2, 5, 2, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_6F = {9, + { + 2, + 6, + 3, + 7, + 0, + 1, + 4, + 5, + 8, + }, + ec_gf8_mul_6F_ops}; static ec_gf_op_t ec_gf8_mul_70_ops[] = { - { EC_GF_OP_XOR2, 3, 2, 0 }, - { EC_GF_OP_XOR2, 5, 3, 0 }, - { EC_GF_OP_XOR2, 6, 4, 0 }, - { EC_GF_OP_XOR2, 4, 2, 0 }, - { EC_GF_OP_XOR2, 7, 5, 0 }, - { EC_GF_OP_XOR2, 0, 2, 0 }, - { EC_GF_OP_XOR2, 3, 6, 0 }, - { EC_GF_OP_XOR2, 4, 7, 0 }, - { EC_GF_OP_XOR2, 6, 0, 0 }, - { EC_GF_OP_XOR2, 7, 1, 0 }, - { EC_GF_OP_XOR2, 1, 6, 0 }, - { EC_GF_OP_XOR2, 0, 7, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_70 = { - 8, - { 3, 4, 5, 2, 6, 0, 1, 7, }, - ec_gf8_mul_70_ops -}; + {EC_GF_OP_XOR2, 3, 2, 0}, {EC_GF_OP_XOR2, 5, 3, 0}, + {EC_GF_OP_XOR2, 6, 4, 0}, {EC_GF_OP_XOR2, 4, 2, 0}, + {EC_GF_OP_XOR2, 7, 5, 0}, {EC_GF_OP_XOR2, 0, 2, 0}, + {EC_GF_OP_XOR2, 3, 6, 0}, {EC_GF_OP_XOR2, 4, 7, 0}, + {EC_GF_OP_XOR2, 6, 0, 0}, {EC_GF_OP_XOR2, 7, 1, 0}, + {EC_GF_OP_XOR2, 1, 6, 0}, {EC_GF_OP_XOR2, 0, 7, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_70 = {8, + { + 3, + 4, + 5, + 2, + 6, + 0, + 1, + 7, + }, + ec_gf8_mul_70_ops}; static ec_gf_op_t ec_gf8_mul_71_ops[] = { - { EC_GF_OP_XOR2, 6, 0, 0 }, - { EC_GF_OP_XOR2, 0, 1, 0 }, - { EC_GF_OP_XOR2, 1, 5, 0 }, - { EC_GF_OP_XOR2, 0, 4, 0 }, - { EC_GF_OP_XOR2, 5, 3, 0 }, - { EC_GF_OP_XOR2, 4, 3, 0 }, - { EC_GF_OP_XOR2, 6, 2, 0 }, - { EC_GF_OP_XOR2, 3, 2, 0 }, - { EC_GF_OP_XOR2, 7, 4, 0 }, - { EC_GF_OP_XOR2, 2, 0, 0 }, - { EC_GF_OP_XOR2, 4, 6, 0 }, - { EC_GF_OP_XOR2, 0, 7, 0 }, - { EC_GF_OP_XOR2, 7, 1, 0 }, - { EC_GF_OP_XOR2, 1, 3, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_71 = { - 8, - { 4, 7, 5, 3, 6, 0, 2, 1, }, - ec_gf8_mul_71_ops -}; + {EC_GF_OP_XOR2, 6, 0, 0}, {EC_GF_OP_XOR2, 0, 1, 0}, + {EC_GF_OP_XOR2, 1, 5, 0}, {EC_GF_OP_XOR2, 0, 4, 0}, + {EC_GF_OP_XOR2, 5, 3, 0}, {EC_GF_OP_XOR2, 4, 3, 0}, + {EC_GF_OP_XOR2, 6, 2, 0}, {EC_GF_OP_XOR2, 3, 2, 0}, + {EC_GF_OP_XOR2, 7, 4, 0}, {EC_GF_OP_XOR2, 2, 0, 0}, + {EC_GF_OP_XOR2, 4, 6, 0}, {EC_GF_OP_XOR2, 0, 7, 0}, + {EC_GF_OP_XOR2, 7, 1, 0}, {EC_GF_OP_XOR2, 1, 3, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_71 = {8, + { + 4, + 7, + 5, + 3, + 6, + 0, + 2, + 1, + }, + ec_gf8_mul_71_ops}; static ec_gf_op_t ec_gf8_mul_72_ops[] = { - { EC_GF_OP_XOR2, 4, 0, 0 }, - { EC_GF_OP_XOR2, 3, 7, 0 }, - { EC_GF_OP_XOR2, 3, 4, 0 }, - { EC_GF_OP_XOR2, 5, 3, 0 }, - { EC_GF_OP_XOR2, 1, 5, 0 }, - { EC_GF_OP_XOR2, 4, 1, 0 }, - { EC_GF_OP_XOR2, 2, 4, 0 }, - { EC_GF_OP_XOR2, 6, 2, 0 }, - { EC_GF_OP_XOR2, 3, 6, 0 }, - { EC_GF_OP_XOR2, 4, 3, 0 }, - { EC_GF_OP_XOR2, 0, 4, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_72 = { - 8, - { 0, 5, 2, 7, 4, 1, 3, 6, }, - ec_gf8_mul_72_ops -}; + {EC_GF_OP_XOR2, 4, 0, 0}, {EC_GF_OP_XOR2, 3, 7, 0}, + {EC_GF_OP_XOR2, 3, 4, 0}, {EC_GF_OP_XOR2, 5, 3, 0}, + {EC_GF_OP_XOR2, 1, 5, 0}, {EC_GF_OP_XOR2, 4, 1, 0}, + {EC_GF_OP_XOR2, 2, 4, 0}, {EC_GF_OP_XOR2, 6, 2, 0}, + {EC_GF_OP_XOR2, 3, 6, 0}, {EC_GF_OP_XOR2, 4, 3, 0}, + {EC_GF_OP_XOR2, 0, 4, 0}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_72 = {8, + { + 0, + 5, + 2, + 7, + 4, + 1, + 3, + 6, + }, + ec_gf8_mul_72_ops}; static ec_gf_op_t ec_gf8_mul_73_ops[] = { - { EC_GF_OP_XOR2, 1, 5, 0 }, - { EC_GF_OP_XOR2, 2, 1, 0 }, - { EC_GF_OP_XOR2, 7, 3, 0 }, - { EC_GF_OP_XOR2, 1, 7, 0 }, - { EC_GF_OP_XOR2, 0, 4, 0 }, - { EC_GF_OP_XOR2, 6, 2, 0 }, - { EC_GF_OP_XOR2, 2, 0, 0 }, - { EC_GF_OP_XOR2, 3, 6, 0 }, - { EC_GF_OP_XOR2, 0, 1, 0 }, - { EC_GF_OP_XOR2, 6, 0, 0 }, - { EC_GF_OP_XOR2, 5, 0, 0 }, - { EC_GF_OP_XOR2, 4, 6, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_73 = { - 8, - { 6, 0, 1, 7, 4, 5, 2, 3, }, - ec_gf8_mul_73_ops -}; + {EC_GF_OP_XOR2, 1, 5, 0}, {EC_GF_OP_XOR2, 2, 1, 0}, + {EC_GF_OP_XOR2, 7, 3, 0}, {EC_GF_OP_XOR2, 1, 7, 0}, + {EC_GF_OP_XOR2, 0, 4, 0}, {EC_GF_OP_XOR2, 6, 2, 0}, + {EC_GF_OP_XOR2, 2, 0, 0}, {EC_GF_OP_XOR2, 3, 6, 0}, + {EC_GF_OP_XOR2, 0, 1, 0}, {EC_GF_OP_XOR2, 6, 0, 0}, + {EC_GF_OP_XOR2, 5, 0, 0}, {EC_GF_OP_XOR2, 4, 6, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_73 = {8, + { + 6, + 0, + 1, + 7, + 4, + 5, + 2, + 3, + }, + ec_gf8_mul_73_ops}; static ec_gf_op_t ec_gf8_mul_74_ops[] = { - { EC_GF_OP_XOR2, 3, 2, 0 }, - { EC_GF_OP_XOR2, 2, 5, 0 }, - { EC_GF_OP_XOR2, 5, 0, 0 }, - { EC_GF_OP_XOR2, 5, 1, 0 }, - { EC_GF_OP_XOR2, 1, 3, 0 }, - { EC_GF_OP_XOR2, 0, 7, 0 }, - { EC_GF_OP_XOR2, 6, 5, 0 }, - { EC_GF_OP_XOR2, 7, 1, 0 }, - { EC_GF_OP_XOR2, 1, 6, 0 }, - { EC_GF_OP_XOR2, 3, 4, 0 }, - { EC_GF_OP_XOR2, 6, 2, 0 }, - { EC_GF_OP_XOR2, 4, 0, 0 }, - { EC_GF_OP_XOR2, 2, 3, 0 }, - { EC_GF_OP_XOR2, 0, 6, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_74 = { - 8, - { 3, 2, 1, 0, 4, 5, 6, 7, }, - ec_gf8_mul_74_ops -}; + {EC_GF_OP_XOR2, 3, 2, 0}, {EC_GF_OP_XOR2, 2, 5, 0}, + {EC_GF_OP_XOR2, 5, 0, 0}, {EC_GF_OP_XOR2, 5, 1, 0}, + {EC_GF_OP_XOR2, 1, 3, 0}, {EC_GF_OP_XOR2, 0, 7, 0}, + {EC_GF_OP_XOR2, 6, 5, 0}, {EC_GF_OP_XOR2, 7, 1, 0}, + {EC_GF_OP_XOR2, 1, 6, 0}, {EC_GF_OP_XOR2, 3, 4, 0}, + {EC_GF_OP_XOR2, 6, 2, 0}, {EC_GF_OP_XOR2, 4, 0, 0}, + {EC_GF_OP_XOR2, 2, 3, 0}, {EC_GF_OP_XOR2, 0, 6, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_74 = {8, + { + 3, + 2, + 1, + 0, + 4, + 5, + 6, + 7, + }, + ec_gf8_mul_74_ops}; static ec_gf_op_t ec_gf8_mul_75_ops[] = { - { EC_GF_OP_XOR2, 2, 0, 0 }, - { EC_GF_OP_XOR2, 3, 2, 0 }, - { EC_GF_OP_XOR2, 2, 1, 0 }, - { EC_GF_OP_XOR2, 1, 0, 0 }, - { EC_GF_OP_XOR2, 4, 3, 0 }, - { EC_GF_OP_XOR2, 3, 1, 0 }, - { EC_GF_OP_XOR2, 0, 7, 0 }, - { EC_GF_OP_XOR2, 6, 3, 0 }, - { EC_GF_OP_XOR2, 5, 2, 0 }, - { EC_GF_OP_XOR2, 7, 6, 0 }, - { EC_GF_OP_XOR2, 6, 5, 0 }, - { EC_GF_OP_XOR2, 5, 4, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_75 = { - 8, - { 4, 5, 6, 7, 0, 1, 2, 3, }, - ec_gf8_mul_75_ops -}; + {EC_GF_OP_XOR2, 2, 0, 0}, {EC_GF_OP_XOR2, 3, 2, 0}, + {EC_GF_OP_XOR2, 2, 1, 0}, {EC_GF_OP_XOR2, 1, 0, 0}, + {EC_GF_OP_XOR2, 4, 3, 0}, {EC_GF_OP_XOR2, 3, 1, 0}, + {EC_GF_OP_XOR2, 0, 7, 0}, {EC_GF_OP_XOR2, 6, 3, 0}, + {EC_GF_OP_XOR2, 5, 2, 0}, {EC_GF_OP_XOR2, 7, 6, 0}, + {EC_GF_OP_XOR2, 6, 5, 0}, {EC_GF_OP_XOR2, 5, 4, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_75 = {8, + { + 4, + 5, + 6, + 7, + 0, + 1, + 2, + 3, + }, + ec_gf8_mul_75_ops}; static ec_gf_op_t ec_gf8_mul_76_ops[] = { - { EC_GF_OP_XOR2, 7, 3, 0 }, - { EC_GF_OP_XOR2, 6, 1, 0 }, - { EC_GF_OP_XOR2, 2, 7, 0 }, - { EC_GF_OP_XOR3, 8, 6, 2 }, - { EC_GF_OP_XOR2, 0, 5, 0 }, - { EC_GF_OP_XOR2, 2, 4, 0 }, - { EC_GF_OP_XOR2, 4, 0, 0 }, - { EC_GF_OP_XOR2, 0, 8, 0 }, - { EC_GF_OP_XOR2, 3, 4, 0 }, - { EC_GF_OP_XOR2, 1, 4, 0 }, - { EC_GF_OP_XOR2, 7, 0, 0 }, - { EC_GF_OP_XOR2, 5, 3, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_76 = { - 9, - { 2, 3, 0, 6, 5, 1, 7, 8, 4, }, - ec_gf8_mul_76_ops -}; + {EC_GF_OP_XOR2, 7, 3, 0}, {EC_GF_OP_XOR2, 6, 1, 0}, + {EC_GF_OP_XOR2, 2, 7, 0}, {EC_GF_OP_XOR3, 8, 6, 2}, + {EC_GF_OP_XOR2, 0, 5, 0}, {EC_GF_OP_XOR2, 2, 4, 0}, + {EC_GF_OP_XOR2, 4, 0, 0}, {EC_GF_OP_XOR2, 0, 8, 0}, + {EC_GF_OP_XOR2, 3, 4, 0}, {EC_GF_OP_XOR2, 1, 4, 0}, + {EC_GF_OP_XOR2, 7, 0, 0}, {EC_GF_OP_XOR2, 5, 3, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_76 = {9, + { + 2, + 3, + 0, + 6, + 5, + 1, + 7, + 8, + 4, + }, + ec_gf8_mul_76_ops}; static ec_gf_op_t ec_gf8_mul_77_ops[] = { - { EC_GF_OP_XOR2, 7, 6, 0 }, - { EC_GF_OP_XOR2, 6, 1, 0 }, - { EC_GF_OP_XOR2, 1, 0, 0 }, - { EC_GF_OP_XOR2, 5, 1, 0 }, - { EC_GF_OP_XOR2, 6, 3, 0 }, - { EC_GF_OP_XOR2, 0, 3, 0 }, - { EC_GF_OP_XOR2, 1, 4, 0 }, - { EC_GF_OP_XOR2, 3, 5, 0 }, - { EC_GF_OP_XOR2, 4, 3, 0 }, - { EC_GF_OP_XOR2, 5, 2, 0 }, - { EC_GF_OP_XOR2, 3, 7, 0 }, - { EC_GF_OP_XOR2, 2, 6, 0 }, - { EC_GF_OP_XOR2, 7, 1, 0 }, - { EC_GF_OP_XOR2, 7, 2, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_77 = { - 8, - { 7, 4, 3, 6, 0, 1, 5, 2, }, - ec_gf8_mul_77_ops -}; + {EC_GF_OP_XOR2, 7, 6, 0}, {EC_GF_OP_XOR2, 6, 1, 0}, + {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_XOR2, 5, 1, 0}, + {EC_GF_OP_XOR2, 6, 3, 0}, {EC_GF_OP_XOR2, 0, 3, 0}, + {EC_GF_OP_XOR2, 1, 4, 0}, {EC_GF_OP_XOR2, 3, 5, 0}, + {EC_GF_OP_XOR2, 4, 3, 0}, {EC_GF_OP_XOR2, 5, 2, 0}, + {EC_GF_OP_XOR2, 3, 7, 0}, {EC_GF_OP_XOR2, 2, 6, 0}, + {EC_GF_OP_XOR2, 7, 1, 0}, {EC_GF_OP_XOR2, 7, 2, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_77 = {8, + { + 7, + 4, + 3, + 6, + 0, + 1, + 5, + 2, + }, + ec_gf8_mul_77_ops}; static ec_gf_op_t ec_gf8_mul_78_ops[] = { - { EC_GF_OP_XOR2, 6, 5, 0 }, - { EC_GF_OP_XOR2, 6, 0, 0 }, - { EC_GF_OP_XOR2, 1, 0, 0 }, - { EC_GF_OP_XOR2, 7, 2, 0 }, - { EC_GF_OP_XOR2, 2, 6, 0 }, - { EC_GF_OP_XOR2, 0, 3, 0 }, - { EC_GF_OP_XOR2, 3, 7, 0 }, - { EC_GF_OP_XOR3, 8, 0, 2 }, - { EC_GF_OP_XOR2, 4, 8, 0 }, - { EC_GF_OP_XOR2, 1, 8, 0 }, - { EC_GF_OP_XOR2, 7, 4, 0 }, - { EC_GF_OP_XOR2, 5, 7, 0 }, - { EC_GF_OP_XOR2, 5, 1, 0 }, - { EC_GF_OP_XOR2, 0, 5, 0 }, - { EC_GF_OP_XOR2, 6, 0, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_78 = { - 9, - { 4, 7, 3, 2, 5, 1, 6, 0, 8, }, - ec_gf8_mul_78_ops -}; + {EC_GF_OP_XOR2, 6, 5, 0}, {EC_GF_OP_XOR2, 6, 0, 0}, + {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_XOR2, 7, 2, 0}, + {EC_GF_OP_XOR2, 2, 6, 0}, {EC_GF_OP_XOR2, 0, 3, 0}, + {EC_GF_OP_XOR2, 3, 7, 0}, {EC_GF_OP_XOR3, 8, 0, 2}, + {EC_GF_OP_XOR2, 4, 8, 0}, {EC_GF_OP_XOR2, 1, 8, 0}, + {EC_GF_OP_XOR2, 7, 4, 0}, {EC_GF_OP_XOR2, 5, 7, 0}, + {EC_GF_OP_XOR2, 5, 1, 0}, {EC_GF_OP_XOR2, 0, 5, 0}, + {EC_GF_OP_XOR2, 6, 0, 0}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_78 = {9, + { + 4, + 7, + 3, + 2, + 5, + 1, + 6, + 0, + 8, + }, + ec_gf8_mul_78_ops}; static ec_gf_op_t ec_gf8_mul_79_ops[] = { - { EC_GF_OP_XOR2, 7, 3, 0 }, - { EC_GF_OP_XOR3, 8, 4, 7 }, - { EC_GF_OP_XOR2, 0, 8, 0 }, - { EC_GF_OP_XOR2, 2, 1, 0 }, - { EC_GF_OP_XOR2, 6, 8, 0 }, - { EC_GF_OP_XOR2, 2, 0, 0 }, - { EC_GF_OP_XOR2, 1, 6, 0 }, - { EC_GF_OP_XOR2, 3, 2, 0 }, - { EC_GF_OP_XOR2, 5, 1, 0 }, - { EC_GF_OP_XOR2, 6, 3, 0 }, - { EC_GF_OP_XOR2, 3, 5, 0 }, - { EC_GF_OP_XOR2, 4, 3, 0 }, - { EC_GF_OP_XOR2, 1, 4, 0 }, - { EC_GF_OP_XOR2, 0, 1, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_79 = { - 9, - { 4, 5, 7, 3, 1, 6, 2, 0, 8, }, - ec_gf8_mul_79_ops -}; + {EC_GF_OP_XOR2, 7, 3, 0}, {EC_GF_OP_XOR3, 8, 4, 7}, + {EC_GF_OP_XOR2, 0, 8, 0}, {EC_GF_OP_XOR2, 2, 1, 0}, + {EC_GF_OP_XOR2, 6, 8, 0}, {EC_GF_OP_XOR2, 2, 0, 0}, + {EC_GF_OP_XOR2, 1, 6, 0}, {EC_GF_OP_XOR2, 3, 2, 0}, + {EC_GF_OP_XOR2, 5, 1, 0}, {EC_GF_OP_XOR2, 6, 3, 0}, + {EC_GF_OP_XOR2, 3, 5, 0}, {EC_GF_OP_XOR2, 4, 3, 0}, + {EC_GF_OP_XOR2, 1, 4, 0}, {EC_GF_OP_XOR2, 0, 1, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_79 = {9, + { + 4, + 5, + 7, + 3, + 1, + 6, + 2, + 0, + 8, + }, + ec_gf8_mul_79_ops}; static ec_gf_op_t ec_gf8_mul_7A_ops[] = { - { EC_GF_OP_XOR2, 2, 1, 0 }, - { EC_GF_OP_XOR2, 3, 2, 0 }, - { EC_GF_OP_XOR2, 4, 0, 0 }, - { EC_GF_OP_XOR2, 4, 3, 0 }, - { EC_GF_OP_XOR2, 5, 4, 0 }, - { EC_GF_OP_XOR2, 6, 5, 0 }, - { EC_GF_OP_XOR2, 7, 6, 0 }, - { EC_GF_OP_XOR2, 0, 7, 0 }, - { EC_GF_OP_XOR2, 2, 7, 0 }, - { EC_GF_OP_XOR2, 1, 0, 0 }, - { EC_GF_OP_XOR2, 4, 0, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_7A = { - 8, - { 1, 2, 3, 4, 5, 6, 7, 0, }, - ec_gf8_mul_7A_ops -}; + {EC_GF_OP_XOR2, 2, 1, 0}, {EC_GF_OP_XOR2, 3, 2, 0}, + {EC_GF_OP_XOR2, 4, 0, 0}, {EC_GF_OP_XOR2, 4, 3, 0}, + {EC_GF_OP_XOR2, 5, 4, 0}, {EC_GF_OP_XOR2, 6, 5, 0}, + {EC_GF_OP_XOR2, 7, 6, 0}, {EC_GF_OP_XOR2, 0, 7, 0}, + {EC_GF_OP_XOR2, 2, 7, 0}, {EC_GF_OP_XOR2, 1, 0, 0}, + {EC_GF_OP_XOR2, 4, 0, 0}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_7A = {8, + { + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 0, + }, + ec_gf8_mul_7A_ops}; static ec_gf_op_t ec_gf8_mul_7B_ops[] = { - { EC_GF_OP_XOR2, 3, 1, 0 }, - { EC_GF_OP_XOR3, 8, 5, 3 }, - { EC_GF_OP_XOR2, 8, 0, 0 }, - { EC_GF_OP_COPY, 9, 4, 0 }, - { EC_GF_OP_XOR2, 8, 2, 0 }, - { EC_GF_OP_XOR2, 4, 6, 0 }, - { EC_GF_OP_XOR2, 4, 8, 0 }, - { EC_GF_OP_XOR2, 7, 4, 0 }, - { EC_GF_OP_XOR2, 5, 4, 0 }, - { EC_GF_OP_XOR2, 0, 4, 0 }, - { EC_GF_OP_XOR2, 2, 7, 0 }, - { EC_GF_OP_XOR3, 4, 1, 9 }, - { EC_GF_OP_XOR2, 6, 7, 0 }, - { EC_GF_OP_XOR2, 1, 7, 0 }, - { EC_GF_OP_XOR2, 4, 2, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_7B = { - 10, - { 1, 2, 3, 4, 8, 5, 6, 0, 7, 9, }, - ec_gf8_mul_7B_ops -}; + {EC_GF_OP_XOR2, 3, 1, 0}, {EC_GF_OP_XOR3, 8, 5, 3}, + {EC_GF_OP_XOR2, 8, 0, 0}, {EC_GF_OP_COPY, 9, 4, 0}, + {EC_GF_OP_XOR2, 8, 2, 0}, {EC_GF_OP_XOR2, 4, 6, 0}, + {EC_GF_OP_XOR2, 4, 8, 0}, {EC_GF_OP_XOR2, 7, 4, 0}, + {EC_GF_OP_XOR2, 5, 4, 0}, {EC_GF_OP_XOR2, 0, 4, 0}, + {EC_GF_OP_XOR2, 2, 7, 0}, {EC_GF_OP_XOR3, 4, 1, 9}, + {EC_GF_OP_XOR2, 6, 7, 0}, {EC_GF_OP_XOR2, 1, 7, 0}, + {EC_GF_OP_XOR2, 4, 2, 0}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_7B = {10, + { + 1, + 2, + 3, + 4, + 8, + 5, + 6, + 0, + 7, + 9, + }, + ec_gf8_mul_7B_ops}; static ec_gf_op_t ec_gf8_mul_7C_ops[] = { - { EC_GF_OP_XOR2, 5, 3, 0 }, - { EC_GF_OP_XOR2, 4, 5, 0 }, - { EC_GF_OP_XOR2, 2, 4, 0 }, - { EC_GF_OP_XOR2, 4, 6, 0 }, - { EC_GF_OP_XOR2, 3, 1, 0 }, - { EC_GF_OP_XOR2, 0, 4, 0 }, - { EC_GF_OP_XOR2, 7, 2, 0 }, - { EC_GF_OP_XOR2, 3, 0, 0 }, - { EC_GF_OP_XOR2, 7, 3, 0 }, - { EC_GF_OP_XOR2, 5, 7, 0 }, - { EC_GF_OP_XOR2, 1, 7, 0 }, - { EC_GF_OP_XOR2, 6, 5, 0 }, - { EC_GF_OP_XOR2, 0, 5, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_7C = { - 8, - { 2, 4, 1, 6, 3, 5, 7, 0, }, - ec_gf8_mul_7C_ops -}; + {EC_GF_OP_XOR2, 5, 3, 0}, {EC_GF_OP_XOR2, 4, 5, 0}, + {EC_GF_OP_XOR2, 2, 4, 0}, {EC_GF_OP_XOR2, 4, 6, 0}, + {EC_GF_OP_XOR2, 3, 1, 0}, {EC_GF_OP_XOR2, 0, 4, 0}, + {EC_GF_OP_XOR2, 7, 2, 0}, {EC_GF_OP_XOR2, 3, 0, 0}, + {EC_GF_OP_XOR2, 7, 3, 0}, {EC_GF_OP_XOR2, 5, 7, 0}, + {EC_GF_OP_XOR2, 1, 7, 0}, {EC_GF_OP_XOR2, 6, 5, 0}, + {EC_GF_OP_XOR2, 0, 5, 0}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_7C = {8, + { + 2, + 4, + 1, + 6, + 3, + 5, + 7, + 0, + }, + ec_gf8_mul_7C_ops}; static ec_gf_op_t ec_gf8_mul_7D_ops[] = { - { EC_GF_OP_XOR2, 2, 1, 0 }, - { EC_GF_OP_XOR2, 3, 2, 0 }, - { EC_GF_OP_XOR2, 2, 6, 0 }, - { EC_GF_OP_XOR2, 1, 0, 0 }, - { EC_GF_OP_XOR2, 0, 2, 0 }, - { EC_GF_OP_XOR2, 1, 5, 0 }, - { EC_GF_OP_XOR2, 7, 0, 0 }, - { EC_GF_OP_XOR2, 6, 1, 0 }, - { EC_GF_OP_XOR2, 4, 3, 0 }, - { EC_GF_OP_XOR2, 3, 7, 0 }, - { EC_GF_OP_XOR2, 1, 4, 0 }, - { EC_GF_OP_XOR2, 2, 3, 0 }, - { EC_GF_OP_XOR2, 0, 1, 0 }, - { EC_GF_OP_XOR2, 5, 2, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_7D = { - 8, - { 1, 0, 3, 5, 6, 7, 2, 4, }, - ec_gf8_mul_7D_ops -}; + {EC_GF_OP_XOR2, 2, 1, 0}, {EC_GF_OP_XOR2, 3, 2, 0}, + {EC_GF_OP_XOR2, 2, 6, 0}, {EC_GF_OP_XOR2, 1, 0, 0}, + {EC_GF_OP_XOR2, 0, 2, 0}, {EC_GF_OP_XOR2, 1, 5, 0}, + {EC_GF_OP_XOR2, 7, 0, 0}, {EC_GF_OP_XOR2, 6, 1, 0}, + {EC_GF_OP_XOR2, 4, 3, 0}, {EC_GF_OP_XOR2, 3, 7, 0}, + {EC_GF_OP_XOR2, 1, 4, 0}, {EC_GF_OP_XOR2, 2, 3, 0}, + {EC_GF_OP_XOR2, 0, 1, 0}, {EC_GF_OP_XOR2, 5, 2, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_7D = {8, + { + 1, + 0, + 3, + 5, + 6, + 7, + 2, + 4, + }, + ec_gf8_mul_7D_ops}; static ec_gf_op_t ec_gf8_mul_7E_ops[] = { - { EC_GF_OP_XOR2, 0, 5, 0 }, - { EC_GF_OP_COPY, 8, 0, 0 }, - { EC_GF_OP_XOR2, 0, 1, 0 }, - { EC_GF_OP_XOR2, 6, 3, 0 }, - { EC_GF_OP_XOR2, 6, 0, 0 }, - { EC_GF_OP_XOR2, 5, 6, 0 }, - { EC_GF_OP_XOR2, 6, 4, 0 }, - { EC_GF_OP_XOR2, 7, 6, 0 }, - { EC_GF_OP_XOR2, 1, 6, 0 }, - { EC_GF_OP_XOR3, 6, 2, 7 }, - { EC_GF_OP_XOR2, 3, 6, 0 }, - { EC_GF_OP_XOR2, 2, 5, 0 }, - { EC_GF_OP_XOR2, 4, 6, 0 }, - { EC_GF_OP_XOR2, 5, 3, 0 }, - { EC_GF_OP_XOR2, 6, 8, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_7E = { - 9, - { 5, 1, 2, 0, 7, 3, 4, 6, 8, }, - ec_gf8_mul_7E_ops -}; + {EC_GF_OP_XOR2, 0, 5, 0}, {EC_GF_OP_COPY, 8, 0, 0}, + {EC_GF_OP_XOR2, 0, 1, 0}, {EC_GF_OP_XOR2, 6, 3, 0}, + {EC_GF_OP_XOR2, 6, 0, 0}, {EC_GF_OP_XOR2, 5, 6, 0}, + {EC_GF_OP_XOR2, 6, 4, 0}, {EC_GF_OP_XOR2, 7, 6, 0}, + {EC_GF_OP_XOR2, 1, 6, 0}, {EC_GF_OP_XOR3, 6, 2, 7}, + {EC_GF_OP_XOR2, 3, 6, 0}, {EC_GF_OP_XOR2, 2, 5, 0}, + {EC_GF_OP_XOR2, 4, 6, 0}, {EC_GF_OP_XOR2, 5, 3, 0}, + {EC_GF_OP_XOR2, 6, 8, 0}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_7E = {9, + { + 5, + 1, + 2, + 0, + 7, + 3, + 4, + 6, + 8, + }, + ec_gf8_mul_7E_ops}; static ec_gf_op_t ec_gf8_mul_7F_ops[] = { - { EC_GF_OP_COPY, 8, 0, 0 }, - { EC_GF_OP_XOR2, 0, 1, 0 }, - { EC_GF_OP_XOR2, 0, 3, 0 }, - { EC_GF_OP_XOR2, 5, 0, 0 }, - { EC_GF_OP_XOR3, 9, 7, 5 }, - { EC_GF_OP_XOR2, 2, 9, 0 }, - { EC_GF_OP_XOR2, 1, 2, 0 }, - { EC_GF_OP_XOR2, 0, 6, 0 }, - { EC_GF_OP_XOR2, 4, 1, 0 }, - { EC_GF_OP_XOR2, 6, 9, 0 }, - { EC_GF_OP_XOR3, 9, 6, 4 }, - { EC_GF_OP_XOR2, 7, 9, 0 }, - { EC_GF_OP_XOR2, 3, 9, 0 }, - { EC_GF_OP_XOR2, 1, 7, 0 }, - { EC_GF_OP_XOR2, 7, 8, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_7F = { - 10, - { 4, 1, 0, 5, 6, 7, 2, 3, 8, 9, }, - ec_gf8_mul_7F_ops -}; + {EC_GF_OP_COPY, 8, 0, 0}, {EC_GF_OP_XOR2, 0, 1, 0}, + {EC_GF_OP_XOR2, 0, 3, 0}, {EC_GF_OP_XOR2, 5, 0, 0}, + {EC_GF_OP_XOR3, 9, 7, 5}, {EC_GF_OP_XOR2, 2, 9, 0}, + {EC_GF_OP_XOR2, 1, 2, 0}, {EC_GF_OP_XOR2, 0, 6, 0}, + {EC_GF_OP_XOR2, 4, 1, 0}, {EC_GF_OP_XOR2, 6, 9, 0}, + {EC_GF_OP_XOR3, 9, 6, 4}, {EC_GF_OP_XOR2, 7, 9, 0}, + {EC_GF_OP_XOR2, 3, 9, 0}, {EC_GF_OP_XOR2, 1, 7, 0}, + {EC_GF_OP_XOR2, 7, 8, 0}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_7F = {10, + { + 4, + 1, + 0, + 5, + 6, + 7, + 2, + 3, + 8, + 9, + }, + ec_gf8_mul_7F_ops}; static ec_gf_op_t ec_gf8_mul_80_ops[] = { - { EC_GF_OP_XOR2, 7, 3, 0 }, - { EC_GF_OP_XOR2, 3, 4, 0 }, - { EC_GF_OP_XOR2, 1, 2, 0 }, - { EC_GF_OP_XOR2, 2, 3, 0 }, - { EC_GF_OP_XOR2, 3, 5, 0 }, - { EC_GF_OP_XOR2, 5, 1, 0 }, - { EC_GF_OP_XOR2, 0, 1, 0 }, - { EC_GF_OP_XOR2, 4, 5, 0 }, - { EC_GF_OP_XOR2, 1, 7, 0 }, - { EC_GF_OP_XOR2, 6, 4, 0 }, - { EC_GF_OP_XOR2, 0, 6, 0 }, - { EC_GF_OP_XOR2, 6, 2, 0 }, - { EC_GF_OP_XOR2, 7, 6, 0 }, - { EC_GF_OP_XOR2, 5, 7, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_80 = { - 8, - { 7, 5, 6, 4, 1, 2, 3, 0, }, - ec_gf8_mul_80_ops -}; + {EC_GF_OP_XOR2, 7, 3, 0}, {EC_GF_OP_XOR2, 3, 4, 0}, + {EC_GF_OP_XOR2, 1, 2, 0}, {EC_GF_OP_XOR2, 2, 3, 0}, + {EC_GF_OP_XOR2, 3, 5, 0}, {EC_GF_OP_XOR2, 5, 1, 0}, + {EC_GF_OP_XOR2, 0, 1, 0}, {EC_GF_OP_XOR2, 4, 5, 0}, + {EC_GF_OP_XOR2, 1, 7, 0}, {EC_GF_OP_XOR2, 6, 4, 0}, + {EC_GF_OP_XOR2, 0, 6, 0}, {EC_GF_OP_XOR2, 6, 2, 0}, + {EC_GF_OP_XOR2, 7, 6, 0}, {EC_GF_OP_XOR2, 5, 7, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_80 = {8, + { + 7, + 5, + 6, + 4, + 1, + 2, + 3, + 0, + }, + ec_gf8_mul_80_ops}; static ec_gf_op_t ec_gf8_mul_81_ops[] = { - { EC_GF_OP_XOR2, 3, 5, 0 }, - { EC_GF_OP_XOR2, 4, 6, 0 }, - { EC_GF_OP_XOR2, 3, 4, 0 }, - { EC_GF_OP_XOR2, 2, 3, 0 }, - { EC_GF_OP_XOR2, 6, 2, 0 }, - { EC_GF_OP_XOR2, 1, 6, 0 }, - { EC_GF_OP_XOR2, 7, 1, 0 }, - { EC_GF_OP_XOR2, 4, 1, 0 }, - { EC_GF_OP_XOR2, 5, 7, 0 }, - { EC_GF_OP_XOR2, 0, 5, 0 }, - { EC_GF_OP_XOR2, 7, 3, 0 }, - { EC_GF_OP_XOR2, 2, 0, 0 }, - { EC_GF_OP_XOR2, 0, 4, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_81 = { - 8, - { 2, 7, 4, 1, 5, 6, 3, 0, }, - ec_gf8_mul_81_ops -}; + {EC_GF_OP_XOR2, 3, 5, 0}, {EC_GF_OP_XOR2, 4, 6, 0}, + {EC_GF_OP_XOR2, 3, 4, 0}, {EC_GF_OP_XOR2, 2, 3, 0}, + {EC_GF_OP_XOR2, 6, 2, 0}, {EC_GF_OP_XOR2, 1, 6, 0}, + {EC_GF_OP_XOR2, 7, 1, 0}, {EC_GF_OP_XOR2, 4, 1, 0}, + {EC_GF_OP_XOR2, 5, 7, 0}, {EC_GF_OP_XOR2, 0, 5, 0}, + {EC_GF_OP_XOR2, 7, 3, 0}, {EC_GF_OP_XOR2, 2, 0, 0}, + {EC_GF_OP_XOR2, 0, 4, 0}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_81 = {8, + { + 2, + 7, + 4, + 1, + 5, + 6, + 3, + 0, + }, + ec_gf8_mul_81_ops}; static ec_gf_op_t ec_gf8_mul_82_ops[] = { - { EC_GF_OP_XOR2, 7, 6, 0 }, - { EC_GF_OP_XOR2, 6, 1, 0 }, - { EC_GF_OP_COPY, 8, 6, 0 }, - { EC_GF_OP_XOR2, 6, 5, 0 }, - { EC_GF_OP_XOR2, 5, 4, 0 }, - { EC_GF_OP_XOR2, 4, 3, 0 }, - { EC_GF_OP_XOR2, 1, 2, 0 }, - { EC_GF_OP_XOR2, 3, 2, 0 }, - { EC_GF_OP_XOR2, 2, 0, 0 }, - { EC_GF_OP_XOR2, 2, 7, 0 }, - { EC_GF_OP_XOR2, 0, 5, 0 }, - { EC_GF_OP_XOR2, 7, 5, 0 }, - { EC_GF_OP_XOR3, 5, 8, 7 }, - { EC_GF_OP_XOR2, 7, 4, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_82 = { - 9, - { 6, 2, 7, 5, 1, 3, 4, 0, 8, }, - ec_gf8_mul_82_ops -}; + {EC_GF_OP_XOR2, 7, 6, 0}, {EC_GF_OP_XOR2, 6, 1, 0}, + {EC_GF_OP_COPY, 8, 6, 0}, {EC_GF_OP_XOR2, 6, 5, 0}, + {EC_GF_OP_XOR2, 5, 4, 0}, {EC_GF_OP_XOR2, 4, 3, 0}, + {EC_GF_OP_XOR2, 1, 2, 0}, {EC_GF_OP_XOR2, 3, 2, 0}, + {EC_GF_OP_XOR2, 2, 0, 0}, {EC_GF_OP_XOR2, 2, 7, 0}, + {EC_GF_OP_XOR2, 0, 5, 0}, {EC_GF_OP_XOR2, 7, 5, 0}, + {EC_GF_OP_XOR3, 5, 8, 7}, {EC_GF_OP_XOR2, 7, 4, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_82 = {9, + { + 6, + 2, + 7, + 5, + 1, + 3, + 4, + 0, + 8, + }, + ec_gf8_mul_82_ops}; static ec_gf_op_t ec_gf8_mul_83_ops[] = { - { EC_GF_OP_XOR2, 0, 1, 0 }, - { EC_GF_OP_XOR2, 1, 2, 0 }, - { EC_GF_OP_XOR2, 2, 3, 0 }, - { EC_GF_OP_XOR2, 0, 3, 0 }, - { EC_GF_OP_XOR2, 2, 5, 0 }, - { EC_GF_OP_XOR2, 7, 2, 0 }, - { EC_GF_OP_XOR2, 5, 0, 0 }, - { EC_GF_OP_XOR2, 3, 6, 0 }, - { EC_GF_OP_XOR2, 1, 4, 0 }, - { EC_GF_OP_XOR2, 6, 7, 0 }, - { EC_GF_OP_XOR2, 4, 3, 0 }, - { EC_GF_OP_XOR2, 7, 1, 0 }, - { EC_GF_OP_XOR2, 3, 5, 0 }, - { EC_GF_OP_XOR2, 0, 7, 0 }, - { EC_GF_OP_XOR2, 5, 6, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_83 = { - 8, - { 3, 5, 6, 7, 1, 2, 4, 0, }, - ec_gf8_mul_83_ops -}; + {EC_GF_OP_XOR2, 0, 1, 0}, {EC_GF_OP_XOR2, 1, 2, 0}, + {EC_GF_OP_XOR2, 2, 3, 0}, {EC_GF_OP_XOR2, 0, 3, 0}, + {EC_GF_OP_XOR2, 2, 5, 0}, {EC_GF_OP_XOR2, 7, 2, 0}, + {EC_GF_OP_XOR2, 5, 0, 0}, {EC_GF_OP_XOR2, 3, 6, 0}, + {EC_GF_OP_XOR2, 1, 4, 0}, {EC_GF_OP_XOR2, 6, 7, 0}, + {EC_GF_OP_XOR2, 4, 3, 0}, {EC_GF_OP_XOR2, 7, 1, 0}, + {EC_GF_OP_XOR2, 3, 5, 0}, {EC_GF_OP_XOR2, 0, 7, 0}, + {EC_GF_OP_XOR2, 5, 6, 0}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_83 = {8, + { + 3, + 5, + 6, + 7, + 1, + 2, + 4, + 0, + }, + ec_gf8_mul_83_ops}; static ec_gf_op_t ec_gf8_mul_84_ops[] = { - { EC_GF_OP_XOR2, 3, 5, 0 }, - { EC_GF_OP_XOR2, 7, 5, 0 }, - { EC_GF_OP_XOR2, 5, 6, 0 }, - { EC_GF_OP_XOR2, 6, 2, 0 }, - { EC_GF_OP_XOR2, 4, 6, 0 }, - { EC_GF_OP_XOR2, 2, 0, 0 }, - { EC_GF_OP_XOR2, 2, 4, 0 }, - { EC_GF_OP_XOR2, 4, 7, 0 }, - { EC_GF_OP_XOR2, 7, 1, 0 }, - { EC_GF_OP_XOR2, 1, 3, 0 }, - { EC_GF_OP_XOR2, 0, 1, 0 }, - { EC_GF_OP_XOR2, 1, 5, 0 }, - { EC_GF_OP_XOR2, 5, 4, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_84 = { - 8, - { 7, 6, 0, 4, 1, 5, 3, 2, }, - ec_gf8_mul_84_ops -}; + {EC_GF_OP_XOR2, 3, 5, 0}, {EC_GF_OP_XOR2, 7, 5, 0}, + {EC_GF_OP_XOR2, 5, 6, 0}, {EC_GF_OP_XOR2, 6, 2, 0}, + {EC_GF_OP_XOR2, 4, 6, 0}, {EC_GF_OP_XOR2, 2, 0, 0}, + {EC_GF_OP_XOR2, 2, 4, 0}, {EC_GF_OP_XOR2, 4, 7, 0}, + {EC_GF_OP_XOR2, 7, 1, 0}, {EC_GF_OP_XOR2, 1, 3, 0}, + {EC_GF_OP_XOR2, 0, 1, 0}, {EC_GF_OP_XOR2, 1, 5, 0}, + {EC_GF_OP_XOR2, 5, 4, 0}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_84 = {8, + { + 7, + 6, + 0, + 4, + 1, + 5, + 3, + 2, + }, + ec_gf8_mul_84_ops}; static ec_gf_op_t ec_gf8_mul_85_ops[] = { - { EC_GF_OP_XOR2, 3, 6, 0 }, - { EC_GF_OP_XOR2, 7, 5, 0 }, - { EC_GF_OP_XOR2, 6, 2, 0 }, - { EC_GF_OP_XOR2, 5, 3, 0 }, - { EC_GF_OP_XOR2, 2, 7, 0 }, - { EC_GF_OP_XOR2, 4, 2, 0 }, - { EC_GF_OP_XOR2, 7, 0, 0 }, - { EC_GF_OP_XOR2, 3, 4, 0 }, - { EC_GF_OP_XOR2, 6, 1, 0 }, - { EC_GF_OP_XOR2, 7, 1, 0 }, - { EC_GF_OP_XOR2, 0, 6, 0 }, - { EC_GF_OP_XOR2, 1, 3, 0 }, - { EC_GF_OP_XOR2, 0, 5, 0 }, - { EC_GF_OP_XOR2, 2, 1, 0 }, - { EC_GF_OP_XOR2, 1, 0, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_85 = { - 8, - { 7, 6, 0, 3, 2, 4, 5, 1, }, - ec_gf8_mul_85_ops -}; + {EC_GF_OP_XOR2, 3, 6, 0}, {EC_GF_OP_XOR2, 7, 5, 0}, + {EC_GF_OP_XOR2, 6, 2, 0}, {EC_GF_OP_XOR2, 5, 3, 0}, + {EC_GF_OP_XOR2, 2, 7, 0}, {EC_GF_OP_XOR2, 4, 2, 0}, + {EC_GF_OP_XOR2, 7, 0, 0}, {EC_GF_OP_XOR2, 3, 4, 0}, + {EC_GF_OP_XOR2, 6, 1, 0}, {EC_GF_OP_XOR2, 7, 1, 0}, + {EC_GF_OP_XOR2, 0, 6, 0}, {EC_GF_OP_XOR2, 1, 3, 0}, + {EC_GF_OP_XOR2, 0, 5, 0}, {EC_GF_OP_XOR2, 2, 1, 0}, + {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_85 = {8, + { + 7, + 6, + 0, + 3, + 2, + 4, + 5, + 1, + }, + ec_gf8_mul_85_ops}; static ec_gf_op_t ec_gf8_mul_86_ops[] = { - { EC_GF_OP_XOR2, 1, 5, 0 }, - { EC_GF_OP_XOR2, 5, 6, 0 }, - { EC_GF_OP_XOR2, 6, 0, 0 }, - { EC_GF_OP_XOR2, 0, 4, 0 }, - { EC_GF_OP_XOR2, 4, 5, 0 }, - { EC_GF_OP_XOR2, 5, 7, 0 }, - { EC_GF_OP_XOR2, 7, 2, 0 }, - { EC_GF_OP_XOR2, 2, 6, 0 }, - { EC_GF_OP_XOR2, 6, 3, 0 }, - { EC_GF_OP_XOR2, 6, 5, 0 }, - { EC_GF_OP_XOR2, 5, 1, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_86 = { - 8, - { 1, 2, 6, 4, 5, 7, 3, 0, }, - ec_gf8_mul_86_ops -}; + {EC_GF_OP_XOR2, 1, 5, 0}, {EC_GF_OP_XOR2, 5, 6, 0}, + {EC_GF_OP_XOR2, 6, 0, 0}, {EC_GF_OP_XOR2, 0, 4, 0}, + {EC_GF_OP_XOR2, 4, 5, 0}, {EC_GF_OP_XOR2, 5, 7, 0}, + {EC_GF_OP_XOR2, 7, 2, 0}, {EC_GF_OP_XOR2, 2, 6, 0}, + {EC_GF_OP_XOR2, 6, 3, 0}, {EC_GF_OP_XOR2, 6, 5, 0}, + {EC_GF_OP_XOR2, 5, 1, 0}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_86 = {8, + { + 1, + 2, + 6, + 4, + 5, + 7, + 3, + 0, + }, + ec_gf8_mul_86_ops}; static ec_gf_op_t ec_gf8_mul_87_ops[] = { - { EC_GF_OP_XOR2, 1, 0, 0 }, - { EC_GF_OP_COPY, 8, 1, 0 }, - { EC_GF_OP_XOR2, 8, 6, 0 }, - { EC_GF_OP_XOR2, 6, 3, 0 }, - { EC_GF_OP_XOR2, 3, 0, 0 }, - { EC_GF_OP_XOR2, 0, 4, 0 }, - { EC_GF_OP_XOR2, 1, 5, 0 }, - { EC_GF_OP_XOR2, 4, 5, 0 }, - { EC_GF_OP_XOR2, 0, 7, 0 }, - { EC_GF_OP_XOR2, 7, 5, 0 }, - { EC_GF_OP_XOR2, 4, 6, 0 }, - { EC_GF_OP_XOR3, 5, 8, 0 }, - { EC_GF_OP_XOR2, 7, 2, 0 }, - { EC_GF_OP_XOR2, 2, 8, 0 }, - { EC_GF_OP_XOR2, 3, 7, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_87 = { - 9, - { 1, 2, 3, 4, 5, 7, 6, 0, 8, }, - ec_gf8_mul_87_ops -}; + {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_COPY, 8, 1, 0}, + {EC_GF_OP_XOR2, 8, 6, 0}, {EC_GF_OP_XOR2, 6, 3, 0}, + {EC_GF_OP_XOR2, 3, 0, 0}, {EC_GF_OP_XOR2, 0, 4, 0}, + {EC_GF_OP_XOR2, 1, 5, 0}, {EC_GF_OP_XOR2, 4, 5, 0}, + {EC_GF_OP_XOR2, 0, 7, 0}, {EC_GF_OP_XOR2, 7, 5, 0}, + {EC_GF_OP_XOR2, 4, 6, 0}, {EC_GF_OP_XOR3, 5, 8, 0}, + {EC_GF_OP_XOR2, 7, 2, 0}, {EC_GF_OP_XOR2, 2, 8, 0}, + {EC_GF_OP_XOR2, 3, 7, 0}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_87 = {9, + { + 1, + 2, + 3, + 4, + 5, + 7, + 6, + 0, + 8, + }, + ec_gf8_mul_87_ops}; static ec_gf_op_t ec_gf8_mul_88_ops[] = { - { EC_GF_OP_XOR2, 5, 6, 0 }, - { EC_GF_OP_XOR2, 6, 7, 0 }, - { EC_GF_OP_XOR2, 4, 6, 0 }, - { EC_GF_OP_XOR2, 6, 1, 0 }, - { EC_GF_OP_XOR2, 7, 2, 0 }, - { EC_GF_OP_XOR2, 1, 0, 0 }, - { EC_GF_OP_XOR2, 2, 3, 0 }, - { EC_GF_OP_XOR2, 1, 7, 0 }, - { EC_GF_OP_XOR2, 0, 5, 0 }, - { EC_GF_OP_XOR2, 2, 5, 0 }, - { EC_GF_OP_XOR2, 1, 4, 0 }, - { EC_GF_OP_XOR2, 5, 4, 0 }, - { EC_GF_OP_XOR2, 4, 3, 0 }, - { EC_GF_OP_XOR2, 3, 6, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_88 = { - 8, - { 6, 7, 3, 1, 2, 4, 5, 0, }, - ec_gf8_mul_88_ops -}; + {EC_GF_OP_XOR2, 5, 6, 0}, {EC_GF_OP_XOR2, 6, 7, 0}, + {EC_GF_OP_XOR2, 4, 6, 0}, {EC_GF_OP_XOR2, 6, 1, 0}, + {EC_GF_OP_XOR2, 7, 2, 0}, {EC_GF_OP_XOR2, 1, 0, 0}, + {EC_GF_OP_XOR2, 2, 3, 0}, {EC_GF_OP_XOR2, 1, 7, 0}, + {EC_GF_OP_XOR2, 0, 5, 0}, {EC_GF_OP_XOR2, 2, 5, 0}, + {EC_GF_OP_XOR2, 1, 4, 0}, {EC_GF_OP_XOR2, 5, 4, 0}, + {EC_GF_OP_XOR2, 4, 3, 0}, {EC_GF_OP_XOR2, 3, 6, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_88 = {8, + { + 6, + 7, + 3, + 1, + 2, + 4, + 5, + 0, + }, + ec_gf8_mul_88_ops}; static ec_gf_op_t ec_gf8_mul_89_ops[] = { - { EC_GF_OP_XOR2, 7, 2, 0 }, - { EC_GF_OP_XOR2, 5, 1, 0 }, - { EC_GF_OP_XOR2, 1, 7, 0 }, - { EC_GF_OP_XOR2, 2, 0, 0 }, - { EC_GF_OP_XOR2, 5, 0, 0 }, - { EC_GF_OP_XOR2, 6, 1, 0 }, - { EC_GF_OP_XOR2, 2, 6, 0 }, - { EC_GF_OP_XOR3, 8, 5, 2 }, - { EC_GF_OP_XOR2, 4, 8, 0 }, - { EC_GF_OP_XOR2, 6, 3, 0 }, - { EC_GF_OP_XOR2, 0, 8, 0 }, - { EC_GF_OP_XOR2, 3, 4, 0 }, - { EC_GF_OP_XOR2, 7, 3, 0 }, - { EC_GF_OP_XOR2, 5, 7, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_89 = { - 9, - { 2, 1, 6, 5, 7, 3, 4, 0, 8, }, - ec_gf8_mul_89_ops -}; + {EC_GF_OP_XOR2, 7, 2, 0}, {EC_GF_OP_XOR2, 5, 1, 0}, + {EC_GF_OP_XOR2, 1, 7, 0}, {EC_GF_OP_XOR2, 2, 0, 0}, + {EC_GF_OP_XOR2, 5, 0, 0}, {EC_GF_OP_XOR2, 6, 1, 0}, + {EC_GF_OP_XOR2, 2, 6, 0}, {EC_GF_OP_XOR3, 8, 5, 2}, + {EC_GF_OP_XOR2, 4, 8, 0}, {EC_GF_OP_XOR2, 6, 3, 0}, + {EC_GF_OP_XOR2, 0, 8, 0}, {EC_GF_OP_XOR2, 3, 4, 0}, + {EC_GF_OP_XOR2, 7, 3, 0}, {EC_GF_OP_XOR2, 5, 7, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_89 = {9, + { + 2, + 1, + 6, + 5, + 7, + 3, + 4, + 0, + 8, + }, + ec_gf8_mul_89_ops}; static ec_gf_op_t ec_gf8_mul_8A_ops[] = { - { EC_GF_OP_XOR2, 2, 0, 0 }, - { EC_GF_OP_XOR2, 5, 0, 0 }, - { EC_GF_OP_XOR2, 1, 6, 0 }, - { EC_GF_OP_XOR2, 3, 6, 0 }, - { EC_GF_OP_XOR2, 0, 1, 0 }, - { EC_GF_OP_XOR2, 6, 5, 0 }, - { EC_GF_OP_XOR2, 2, 7, 0 }, - { EC_GF_OP_XOR2, 4, 7, 0 }, - { EC_GF_OP_XOR2, 6, 2, 0 }, - { EC_GF_OP_XOR2, 7, 3, 0 }, - { EC_GF_OP_XOR2, 0, 4, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_8A = { - 8, - { 1, 2, 3, 0, 6, 7, 4, 5, }, - ec_gf8_mul_8A_ops -}; + {EC_GF_OP_XOR2, 2, 0, 0}, {EC_GF_OP_XOR2, 5, 0, 0}, + {EC_GF_OP_XOR2, 1, 6, 0}, {EC_GF_OP_XOR2, 3, 6, 0}, + {EC_GF_OP_XOR2, 0, 1, 0}, {EC_GF_OP_XOR2, 6, 5, 0}, + {EC_GF_OP_XOR2, 2, 7, 0}, {EC_GF_OP_XOR2, 4, 7, 0}, + {EC_GF_OP_XOR2, 6, 2, 0}, {EC_GF_OP_XOR2, 7, 3, 0}, + {EC_GF_OP_XOR2, 0, 4, 0}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_8A = {8, + { + 1, + 2, + 3, + 0, + 6, + 7, + 4, + 5, + }, + ec_gf8_mul_8A_ops}; static ec_gf_op_t ec_gf8_mul_8B_ops[] = { - { EC_GF_OP_XOR2, 1, 0, 0 }, - { EC_GF_OP_XOR2, 3, 6, 0 }, - { EC_GF_OP_XOR2, 6, 1, 0 }, - { EC_GF_OP_XOR2, 1, 7, 0 }, - { EC_GF_OP_XOR2, 7, 5, 0 }, - { EC_GF_OP_XOR2, 4, 1, 0 }, - { EC_GF_OP_XOR2, 5, 2, 0 }, - { EC_GF_OP_XOR2, 0, 7, 0 }, - { EC_GF_OP_XOR2, 1, 2, 0 }, - { EC_GF_OP_XOR2, 7, 3, 0 }, - { EC_GF_OP_XOR2, 2, 3, 0 }, - { EC_GF_OP_XOR2, 3, 4, 0 }, - { EC_GF_OP_XOR2, 4, 6, 0 }, - { EC_GF_OP_XOR2, 5, 4, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_8B = { - 8, - { 6, 1, 2, 3, 5, 7, 4, 0, }, - ec_gf8_mul_8B_ops -}; + {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_XOR2, 3, 6, 0}, + {EC_GF_OP_XOR2, 6, 1, 0}, {EC_GF_OP_XOR2, 1, 7, 0}, + {EC_GF_OP_XOR2, 7, 5, 0}, {EC_GF_OP_XOR2, 4, 1, 0}, + {EC_GF_OP_XOR2, 5, 2, 0}, {EC_GF_OP_XOR2, 0, 7, 0}, + {EC_GF_OP_XOR2, 1, 2, 0}, {EC_GF_OP_XOR2, 7, 3, 0}, + {EC_GF_OP_XOR2, 2, 3, 0}, {EC_GF_OP_XOR2, 3, 4, 0}, + {EC_GF_OP_XOR2, 4, 6, 0}, {EC_GF_OP_XOR2, 5, 4, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_8B = {8, + { + 6, + 1, + 2, + 3, + 5, + 7, + 4, + 0, + }, + ec_gf8_mul_8B_ops}; static ec_gf_op_t ec_gf8_mul_8C_ops[] = { - { EC_GF_OP_XOR2, 1, 7, 0 }, - { EC_GF_OP_XOR2, 5, 7, 0 }, - { EC_GF_OP_XOR2, 7, 4, 0 }, - { EC_GF_OP_XOR2, 7, 2, 0 }, - { EC_GF_OP_XOR2, 4, 6, 0 }, - { EC_GF_OP_XOR2, 7, 0, 0 }, - { EC_GF_OP_XOR2, 6, 0, 0 }, - { EC_GF_OP_XOR2, 0, 3, 0 }, - { EC_GF_OP_XOR2, 3, 5, 0 }, - { EC_GF_OP_XOR2, 0, 1, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_8C = { - 8, - { 1, 2, 0, 7, 3, 4, 5, 6, }, - ec_gf8_mul_8C_ops -}; + {EC_GF_OP_XOR2, 1, 7, 0}, {EC_GF_OP_XOR2, 5, 7, 0}, + {EC_GF_OP_XOR2, 7, 4, 0}, {EC_GF_OP_XOR2, 7, 2, 0}, + {EC_GF_OP_XOR2, 4, 6, 0}, {EC_GF_OP_XOR2, 7, 0, 0}, + {EC_GF_OP_XOR2, 6, 0, 0}, {EC_GF_OP_XOR2, 0, 3, 0}, + {EC_GF_OP_XOR2, 3, 5, 0}, {EC_GF_OP_XOR2, 0, 1, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_8C = {8, + { + 1, + 2, + 0, + 7, + 3, + 4, + 5, + 6, + }, + ec_gf8_mul_8C_ops}; static ec_gf_op_t ec_gf8_mul_8D_ops[] = { - { EC_GF_OP_XOR2, 7, 0, 0 }, - { EC_GF_OP_XOR2, 0, 5, 0 }, - { EC_GF_OP_XOR2, 5, 4, 0 }, - { EC_GF_OP_XOR2, 5, 6, 0 }, - { EC_GF_OP_XOR2, 3, 7, 0 }, - { EC_GF_OP_XOR2, 6, 7, 0 }, - { EC_GF_OP_XOR2, 7, 1, 0 }, - { EC_GF_OP_XOR2, 4, 3, 0 }, - { EC_GF_OP_XOR2, 1, 2, 0 }, - { EC_GF_OP_XOR2, 2, 4, 0 }, - { EC_GF_OP_XOR2, 3, 1, 0 }, - { EC_GF_OP_XOR2, 4, 0, 0 }, - { EC_GF_OP_XOR2, 0, 6, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_8D = { - 8, - { 7, 1, 3, 2, 4, 5, 0, 6, }, - ec_gf8_mul_8D_ops -}; - -static ec_gf_op_t ec_gf8_mul_8E_ops[] = { - { EC_GF_OP_XOR2, 2, 0, 0 }, - { EC_GF_OP_XOR2, 3, 0, 0 }, - { EC_GF_OP_XOR2, 4, 0, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_8E = { - 8, - { 1, 2, 3, 4, 5, 6, 7, 0, }, - ec_gf8_mul_8E_ops -}; + {EC_GF_OP_XOR2, 7, 0, 0}, {EC_GF_OP_XOR2, 0, 5, 0}, + {EC_GF_OP_XOR2, 5, 4, 0}, {EC_GF_OP_XOR2, 5, 6, 0}, + {EC_GF_OP_XOR2, 3, 7, 0}, {EC_GF_OP_XOR2, 6, 7, 0}, + {EC_GF_OP_XOR2, 7, 1, 0}, {EC_GF_OP_XOR2, 4, 3, 0}, + {EC_GF_OP_XOR2, 1, 2, 0}, {EC_GF_OP_XOR2, 2, 4, 0}, + {EC_GF_OP_XOR2, 3, 1, 0}, {EC_GF_OP_XOR2, 4, 0, 0}, + {EC_GF_OP_XOR2, 0, 6, 0}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_8D = {8, + { + 7, + 1, + 3, + 2, + 4, + 5, + 0, + 6, + }, + ec_gf8_mul_8D_ops}; + +static ec_gf_op_t ec_gf8_mul_8E_ops[] = {{EC_GF_OP_XOR2, 2, 0, 0}, + {EC_GF_OP_XOR2, 3, 0, 0}, + {EC_GF_OP_XOR2, 4, 0, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_8E = {8, + { + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 0, + }, + ec_gf8_mul_8E_ops}; static ec_gf_op_t ec_gf8_mul_8F_ops[] = { - { EC_GF_OP_XOR2, 1, 0, 0 }, - { EC_GF_OP_XOR2, 3, 0, 0 }, - { EC_GF_OP_XOR2, 0, 7, 0 }, - { EC_GF_OP_XOR2, 7, 6, 0 }, - { EC_GF_OP_XOR2, 6, 5, 0 }, - { EC_GF_OP_XOR2, 5, 4, 0 }, - { EC_GF_OP_XOR2, 4, 3, 0 }, - { EC_GF_OP_XOR2, 3, 2, 0 }, - { EC_GF_OP_XOR2, 2, 1, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_8F = { - 8, - { 1, 2, 3, 4, 5, 6, 7, 0, }, - ec_gf8_mul_8F_ops -}; + {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_XOR2, 3, 0, 0}, + {EC_GF_OP_XOR2, 0, 7, 0}, {EC_GF_OP_XOR2, 7, 6, 0}, + {EC_GF_OP_XOR2, 6, 5, 0}, {EC_GF_OP_XOR2, 5, 4, 0}, + {EC_GF_OP_XOR2, 4, 3, 0}, {EC_GF_OP_XOR2, 3, 2, 0}, + {EC_GF_OP_XOR2, 2, 1, 0}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_8F = {8, + { + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 0, + }, + ec_gf8_mul_8F_ops}; static ec_gf_op_t ec_gf8_mul_90_ops[] = { - { EC_GF_OP_XOR2, 2, 1, 0 }, - { EC_GF_OP_XOR2, 7, 2, 0 }, - { EC_GF_OP_XOR2, 6, 7, 0 }, - { EC_GF_OP_XOR2, 5, 1, 0 }, - { EC_GF_OP_XOR2, 5, 6, 0 }, - { EC_GF_OP_XOR2, 4, 5, 0 }, - { EC_GF_OP_XOR2, 3, 4, 0 }, - { EC_GF_OP_XOR2, 4, 2, 0 }, - { EC_GF_OP_XOR2, 1, 3, 0 }, - { EC_GF_OP_XOR2, 6, 3, 0 }, - { EC_GF_OP_XOR2, 0, 1, 0 }, - { EC_GF_OP_XOR2, 2, 0, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_90 = { - 8, - { 4, 5, 6, 7, 0, 1, 3, 2, }, - ec_gf8_mul_90_ops -}; + {EC_GF_OP_XOR2, 2, 1, 0}, {EC_GF_OP_XOR2, 7, 2, 0}, + {EC_GF_OP_XOR2, 6, 7, 0}, {EC_GF_OP_XOR2, 5, 1, 0}, + {EC_GF_OP_XOR2, 5, 6, 0}, {EC_GF_OP_XOR2, 4, 5, 0}, + {EC_GF_OP_XOR2, 3, 4, 0}, {EC_GF_OP_XOR2, 4, 2, 0}, + {EC_GF_OP_XOR2, 1, 3, 0}, {EC_GF_OP_XOR2, 6, 3, 0}, + {EC_GF_OP_XOR2, 0, 1, 0}, {EC_GF_OP_XOR2, 2, 0, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_90 = {8, + { + 4, + 5, + 6, + 7, + 0, + 1, + 3, + 2, + }, + ec_gf8_mul_90_ops}; static ec_gf_op_t ec_gf8_mul_91_ops[] = { - { EC_GF_OP_XOR2, 0, 3, 0 }, - { EC_GF_OP_XOR2, 3, 4, 0 }, - { EC_GF_OP_COPY, 9, 1, 0 }, - { EC_GF_OP_COPY, 8, 3, 0 }, - { EC_GF_OP_XOR2, 3, 5, 0 }, - { EC_GF_OP_XOR2, 1, 2, 0 }, - { EC_GF_OP_XOR2, 1, 3, 0 }, - { EC_GF_OP_XOR2, 7, 1, 0 }, - { EC_GF_OP_XOR2, 5, 7, 0 }, - { EC_GF_OP_XOR2, 7, 9, 0 }, - { EC_GF_OP_XOR2, 6, 5, 0 }, - { EC_GF_OP_XOR2, 4, 5, 0 }, - { EC_GF_OP_XOR2, 3, 6, 0 }, - { EC_GF_OP_XOR2, 0, 3, 0 }, - { EC_GF_OP_XOR3, 5, 8, 0 }, - { EC_GF_OP_XOR2, 2, 5, 0 }, - { EC_GF_OP_XOR2, 5, 4, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_91 = { - 10, - { 2, 3, 1, 4, 0, 6, 7, 5, 8, 9, }, - ec_gf8_mul_91_ops -}; + {EC_GF_OP_XOR2, 0, 3, 0}, {EC_GF_OP_XOR2, 3, 4, 0}, + {EC_GF_OP_COPY, 9, 1, 0}, {EC_GF_OP_COPY, 8, 3, 0}, + {EC_GF_OP_XOR2, 3, 5, 0}, {EC_GF_OP_XOR2, 1, 2, 0}, + {EC_GF_OP_XOR2, 1, 3, 0}, {EC_GF_OP_XOR2, 7, 1, 0}, + {EC_GF_OP_XOR2, 5, 7, 0}, {EC_GF_OP_XOR2, 7, 9, 0}, + {EC_GF_OP_XOR2, 6, 5, 0}, {EC_GF_OP_XOR2, 4, 5, 0}, + {EC_GF_OP_XOR2, 3, 6, 0}, {EC_GF_OP_XOR2, 0, 3, 0}, + {EC_GF_OP_XOR3, 5, 8, 0}, {EC_GF_OP_XOR2, 2, 5, 0}, + {EC_GF_OP_XOR2, 5, 4, 0}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_91 = {10, + { + 2, + 3, + 1, + 4, + 0, + 6, + 7, + 5, + 8, + 9, + }, + ec_gf8_mul_91_ops}; static ec_gf_op_t ec_gf8_mul_92_ops[] = { - { EC_GF_OP_XOR2, 4, 1, 0 }, - { EC_GF_OP_XOR2, 5, 4, 0 }, - { EC_GF_OP_XOR2, 6, 5, 0 }, - { EC_GF_OP_XOR2, 2, 0, 0 }, - { EC_GF_OP_XOR2, 2, 6, 0 }, - { EC_GF_OP_XOR2, 3, 1, 0 }, - { EC_GF_OP_XOR2, 4, 2, 0 }, - { EC_GF_OP_XOR2, 3, 0, 0 }, - { EC_GF_OP_XOR2, 7, 4, 0 }, - { EC_GF_OP_XOR2, 3, 7, 0 }, - { EC_GF_OP_XOR2, 5, 3, 0 }, - { EC_GF_OP_XOR2, 4, 5, 0 }, - { EC_GF_OP_XOR2, 0, 4, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_92 = { - 8, - { 6, 7, 0, 1, 2, 3, 5, 4, }, - ec_gf8_mul_92_ops -}; + {EC_GF_OP_XOR2, 4, 1, 0}, {EC_GF_OP_XOR2, 5, 4, 0}, + {EC_GF_OP_XOR2, 6, 5, 0}, {EC_GF_OP_XOR2, 2, 0, 0}, + {EC_GF_OP_XOR2, 2, 6, 0}, {EC_GF_OP_XOR2, 3, 1, 0}, + {EC_GF_OP_XOR2, 4, 2, 0}, {EC_GF_OP_XOR2, 3, 0, 0}, + {EC_GF_OP_XOR2, 7, 4, 0}, {EC_GF_OP_XOR2, 3, 7, 0}, + {EC_GF_OP_XOR2, 5, 3, 0}, {EC_GF_OP_XOR2, 4, 5, 0}, + {EC_GF_OP_XOR2, 0, 4, 0}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_92 = {8, + { + 6, + 7, + 0, + 1, + 2, + 3, + 5, + 4, + }, + ec_gf8_mul_92_ops}; static ec_gf_op_t ec_gf8_mul_93_ops[] = { - { EC_GF_OP_XOR2, 2, 7, 0 }, - { EC_GF_OP_XOR2, 4, 2, 0 }, - { EC_GF_OP_XOR2, 1, 3, 0 }, - { EC_GF_OP_XOR2, 3, 4, 0 }, - { EC_GF_OP_XOR2, 0, 2, 0 }, - { EC_GF_OP_XOR2, 5, 3, 0 }, - { EC_GF_OP_XOR2, 6, 1, 0 }, - { EC_GF_OP_XOR2, 0, 5, 0 }, - { EC_GF_OP_XOR2, 2, 6, 0 }, - { EC_GF_OP_XOR2, 6, 0, 0 }, - { EC_GF_OP_XOR2, 4, 6, 0 }, - { EC_GF_OP_XOR2, 7, 4, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_93 = { - 8, - { 6, 4, 5, 1, 7, 2, 3, 0, }, - ec_gf8_mul_93_ops -}; + {EC_GF_OP_XOR2, 2, 7, 0}, {EC_GF_OP_XOR2, 4, 2, 0}, + {EC_GF_OP_XOR2, 1, 3, 0}, {EC_GF_OP_XOR2, 3, 4, 0}, + {EC_GF_OP_XOR2, 0, 2, 0}, {EC_GF_OP_XOR2, 5, 3, 0}, + {EC_GF_OP_XOR2, 6, 1, 0}, {EC_GF_OP_XOR2, 0, 5, 0}, + {EC_GF_OP_XOR2, 2, 6, 0}, {EC_GF_OP_XOR2, 6, 0, 0}, + {EC_GF_OP_XOR2, 4, 6, 0}, {EC_GF_OP_XOR2, 7, 4, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_93 = {8, + { + 6, + 4, + 5, + 1, + 7, + 2, + 3, + 0, + }, + ec_gf8_mul_93_ops}; static ec_gf_op_t ec_gf8_mul_94_ops[] = { - { EC_GF_OP_XOR2, 0, 2, 0 }, - { EC_GF_OP_XOR2, 2, 6, 0 }, - { EC_GF_OP_XOR2, 7, 2, 0 }, - { EC_GF_OP_XOR2, 3, 7, 0 }, - { EC_GF_OP_XOR2, 0, 3, 0 }, - { EC_GF_OP_XOR2, 3, 5, 0 }, - { EC_GF_OP_XOR2, 1, 4, 0 }, - { EC_GF_OP_XOR2, 5, 2, 0 }, - { EC_GF_OP_XOR2, 4, 0, 0 }, - { EC_GF_OP_XOR2, 1, 5, 0 }, - { EC_GF_OP_XOR2, 7, 1, 0 }, - { EC_GF_OP_XOR2, 0, 7, 0 }, - { EC_GF_OP_XOR2, 6, 0, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_94 = { - 8, - { 7, 5, 0, 2, 6, 1, 3, 4, }, - ec_gf8_mul_94_ops -}; + {EC_GF_OP_XOR2, 0, 2, 0}, {EC_GF_OP_XOR2, 2, 6, 0}, + {EC_GF_OP_XOR2, 7, 2, 0}, {EC_GF_OP_XOR2, 3, 7, 0}, + {EC_GF_OP_XOR2, 0, 3, 0}, {EC_GF_OP_XOR2, 3, 5, 0}, + {EC_GF_OP_XOR2, 1, 4, 0}, {EC_GF_OP_XOR2, 5, 2, 0}, + {EC_GF_OP_XOR2, 4, 0, 0}, {EC_GF_OP_XOR2, 1, 5, 0}, + {EC_GF_OP_XOR2, 7, 1, 0}, {EC_GF_OP_XOR2, 0, 7, 0}, + {EC_GF_OP_XOR2, 6, 0, 0}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_94 = {8, + { + 7, + 5, + 0, + 2, + 6, + 1, + 3, + 4, + }, + ec_gf8_mul_94_ops}; static ec_gf_op_t ec_gf8_mul_95_ops[] = { - { EC_GF_OP_XOR2, 3, 2, 0 }, - { EC_GF_OP_XOR2, 7, 3, 0 }, - { EC_GF_OP_XOR2, 3, 6, 0 }, - { EC_GF_OP_XOR2, 0, 3, 0 }, - { EC_GF_OP_XOR2, 4, 0, 0 }, - { EC_GF_OP_XOR2, 2, 4, 0 }, - { EC_GF_OP_XOR2, 4, 5, 0 }, - { EC_GF_OP_XOR2, 1, 4, 0 }, - { EC_GF_OP_XOR2, 5, 7, 0 }, - { EC_GF_OP_XOR2, 6, 1, 0 }, - { EC_GF_OP_XOR2, 7, 6, 0 }, - { EC_GF_OP_XOR2, 6, 2, 0 }, - { EC_GF_OP_XOR2, 0, 6, 0 }, - { EC_GF_OP_XOR2, 4, 0, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_95 = { - 8, - { 7, 6, 1, 3, 0, 4, 5, 2, }, - ec_gf8_mul_95_ops -}; + {EC_GF_OP_XOR2, 3, 2, 0}, {EC_GF_OP_XOR2, 7, 3, 0}, + {EC_GF_OP_XOR2, 3, 6, 0}, {EC_GF_OP_XOR2, 0, 3, 0}, + {EC_GF_OP_XOR2, 4, 0, 0}, {EC_GF_OP_XOR2, 2, 4, 0}, + {EC_GF_OP_XOR2, 4, 5, 0}, {EC_GF_OP_XOR2, 1, 4, 0}, + {EC_GF_OP_XOR2, 5, 7, 0}, {EC_GF_OP_XOR2, 6, 1, 0}, + {EC_GF_OP_XOR2, 7, 6, 0}, {EC_GF_OP_XOR2, 6, 2, 0}, + {EC_GF_OP_XOR2, 0, 6, 0}, {EC_GF_OP_XOR2, 4, 0, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_95 = {8, + { + 7, + 6, + 1, + 3, + 0, + 4, + 5, + 2, + }, + ec_gf8_mul_95_ops}; static ec_gf_op_t ec_gf8_mul_96_ops[] = { - { EC_GF_OP_XOR2, 5, 1, 0 }, - { EC_GF_OP_XOR2, 4, 5, 0 }, - { EC_GF_OP_XOR2, 5, 6, 0 }, - { EC_GF_OP_XOR2, 6, 7, 0 }, - { EC_GF_OP_XOR3, 8, 0, 4 }, - { EC_GF_OP_XOR2, 3, 6, 0 }, - { EC_GF_OP_XOR2, 7, 8, 0 }, - { EC_GF_OP_XOR2, 0, 1, 0 }, - { EC_GF_OP_XOR2, 8, 3, 0 }, - { EC_GF_OP_XOR2, 3, 2, 0 }, - { EC_GF_OP_XOR2, 1, 8, 0 }, - { EC_GF_OP_XOR2, 2, 5, 0 }, - { EC_GF_OP_XOR2, 5, 8, 0 }, - { EC_GF_OP_XOR2, 0, 2, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_96 = { - 9, - { 4, 0, 1, 6, 7, 2, 3, 5, 8, }, - ec_gf8_mul_96_ops -}; + {EC_GF_OP_XOR2, 5, 1, 0}, {EC_GF_OP_XOR2, 4, 5, 0}, + {EC_GF_OP_XOR2, 5, 6, 0}, {EC_GF_OP_XOR2, 6, 7, 0}, + {EC_GF_OP_XOR3, 8, 0, 4}, {EC_GF_OP_XOR2, 3, 6, 0}, + {EC_GF_OP_XOR2, 7, 8, 0}, {EC_GF_OP_XOR2, 0, 1, 0}, + {EC_GF_OP_XOR2, 8, 3, 0}, {EC_GF_OP_XOR2, 3, 2, 0}, + {EC_GF_OP_XOR2, 1, 8, 0}, {EC_GF_OP_XOR2, 2, 5, 0}, + {EC_GF_OP_XOR2, 5, 8, 0}, {EC_GF_OP_XOR2, 0, 2, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_96 = {9, + { + 4, + 0, + 1, + 6, + 7, + 2, + 3, + 5, + 8, + }, + ec_gf8_mul_96_ops}; static ec_gf_op_t ec_gf8_mul_97_ops[] = { - { EC_GF_OP_XOR2, 5, 0, 0 }, - { EC_GF_OP_COPY, 8, 2, 0 }, - { EC_GF_OP_XOR2, 0, 3, 0 }, - { EC_GF_OP_XOR2, 8, 6, 0 }, - { EC_GF_OP_XOR2, 5, 1, 0 }, - { EC_GF_OP_XOR2, 3, 7, 0 }, - { EC_GF_OP_XOR2, 1, 8, 0 }, - { EC_GF_OP_XOR2, 7, 5, 0 }, - { EC_GF_OP_XOR2, 2, 3, 0 }, - { EC_GF_OP_XOR2, 6, 3, 0 }, - { EC_GF_OP_XOR2, 0, 4, 0 }, - { EC_GF_OP_XOR2, 3, 1, 0 }, - { EC_GF_OP_XOR2, 4, 5, 0 }, - { EC_GF_OP_XOR2, 5, 8, 0 }, - { EC_GF_OP_XOR2, 3, 4, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_97 = { - 9, - { 4, 5, 3, 6, 7, 1, 2, 0, 8, }, - ec_gf8_mul_97_ops -}; + {EC_GF_OP_XOR2, 5, 0, 0}, {EC_GF_OP_COPY, 8, 2, 0}, + {EC_GF_OP_XOR2, 0, 3, 0}, {EC_GF_OP_XOR2, 8, 6, 0}, + {EC_GF_OP_XOR2, 5, 1, 0}, {EC_GF_OP_XOR2, 3, 7, 0}, + {EC_GF_OP_XOR2, 1, 8, 0}, {EC_GF_OP_XOR2, 7, 5, 0}, + {EC_GF_OP_XOR2, 2, 3, 0}, {EC_GF_OP_XOR2, 6, 3, 0}, + {EC_GF_OP_XOR2, 0, 4, 0}, {EC_GF_OP_XOR2, 3, 1, 0}, + {EC_GF_OP_XOR2, 4, 5, 0}, {EC_GF_OP_XOR2, 5, 8, 0}, + {EC_GF_OP_XOR2, 3, 4, 0}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_97 = {9, + { + 4, + 5, + 3, + 6, + 7, + 1, + 2, + 0, + 8, + }, + ec_gf8_mul_97_ops}; static ec_gf_op_t ec_gf8_mul_98_ops[] = { - { EC_GF_OP_XOR2, 5, 7, 0 }, - { EC_GF_OP_XOR2, 4, 1, 0 }, - { EC_GF_OP_XOR2, 2, 5, 0 }, - { EC_GF_OP_XOR2, 4, 7, 0 }, - { EC_GF_OP_XOR2, 1, 2, 0 }, - { EC_GF_OP_XOR2, 3, 4, 0 }, - { EC_GF_OP_XOR2, 0, 1, 0 }, - { EC_GF_OP_XOR2, 4, 6, 0 }, - { EC_GF_OP_XOR2, 5, 3, 0 }, - { EC_GF_OP_XOR2, 6, 0, 0 }, - { EC_GF_OP_XOR2, 1, 4, 0 }, - { EC_GF_OP_XOR2, 0, 5, 0 }, - { EC_GF_OP_XOR2, 7, 0, 0 }, - { EC_GF_OP_XOR2, 0, 1, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_98 = { - 8, - { 4, 2, 3, 6, 7, 5, 1, 0, }, - ec_gf8_mul_98_ops -}; + {EC_GF_OP_XOR2, 5, 7, 0}, {EC_GF_OP_XOR2, 4, 1, 0}, + {EC_GF_OP_XOR2, 2, 5, 0}, {EC_GF_OP_XOR2, 4, 7, 0}, + {EC_GF_OP_XOR2, 1, 2, 0}, {EC_GF_OP_XOR2, 3, 4, 0}, + {EC_GF_OP_XOR2, 0, 1, 0}, {EC_GF_OP_XOR2, 4, 6, 0}, + {EC_GF_OP_XOR2, 5, 3, 0}, {EC_GF_OP_XOR2, 6, 0, 0}, + {EC_GF_OP_XOR2, 1, 4, 0}, {EC_GF_OP_XOR2, 0, 5, 0}, + {EC_GF_OP_XOR2, 7, 0, 0}, {EC_GF_OP_XOR2, 0, 1, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_98 = {8, + { + 4, + 2, + 3, + 6, + 7, + 5, + 1, + 0, + }, + ec_gf8_mul_98_ops}; static ec_gf_op_t ec_gf8_mul_99_ops[] = { - { EC_GF_OP_XOR2, 0, 2, 0 }, - { EC_GF_OP_XOR2, 7, 1, 0 }, - { EC_GF_OP_XOR2, 0, 3, 0 }, - { EC_GF_OP_XOR2, 7, 2, 0 }, - { EC_GF_OP_XOR2, 3, 4, 0 }, - { EC_GF_OP_XOR2, 2, 5, 0 }, - { EC_GF_OP_XOR2, 1, 3, 0 }, - { EC_GF_OP_XOR2, 5, 7, 0 }, - { EC_GF_OP_XOR2, 4, 2, 0 }, - { EC_GF_OP_XOR2, 3, 7, 0 }, - { EC_GF_OP_XOR2, 6, 0, 0 }, - { EC_GF_OP_XOR2, 2, 6, 0 }, - { EC_GF_OP_XOR2, 6, 3, 0 }, - { EC_GF_OP_XOR2, 7, 2, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_99 = { - 8, - { 6, 5, 3, 7, 0, 1, 4, 2, }, - ec_gf8_mul_99_ops -}; + {EC_GF_OP_XOR2, 0, 2, 0}, {EC_GF_OP_XOR2, 7, 1, 0}, + {EC_GF_OP_XOR2, 0, 3, 0}, {EC_GF_OP_XOR2, 7, 2, 0}, + {EC_GF_OP_XOR2, 3, 4, 0}, {EC_GF_OP_XOR2, 2, 5, 0}, + {EC_GF_OP_XOR2, 1, 3, 0}, {EC_GF_OP_XOR2, 5, 7, 0}, + {EC_GF_OP_XOR2, 4, 2, 0}, {EC_GF_OP_XOR2, 3, 7, 0}, + {EC_GF_OP_XOR2, 6, 0, 0}, {EC_GF_OP_XOR2, 2, 6, 0}, + {EC_GF_OP_XOR2, 6, 3, 0}, {EC_GF_OP_XOR2, 7, 2, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_99 = {8, + { + 6, + 5, + 3, + 7, + 0, + 1, + 4, + 2, + }, + ec_gf8_mul_99_ops}; static ec_gf_op_t ec_gf8_mul_9A_ops[] = { - { EC_GF_OP_XOR2, 5, 4, 0 }, - { EC_GF_OP_XOR2, 6, 4, 0 }, - { EC_GF_OP_XOR2, 4, 3, 0 }, - { EC_GF_OP_XOR2, 3, 2, 0 }, - { EC_GF_OP_XOR2, 2, 6, 0 }, - { EC_GF_OP_XOR2, 6, 1, 0 }, - { EC_GF_OP_XOR2, 1, 0, 0 }, - { EC_GF_OP_XOR2, 0, 5, 0 }, - { EC_GF_OP_XOR3, 8, 4, 0 }, - { EC_GF_OP_XOR2, 0, 6, 0 }, - { EC_GF_OP_XOR2, 7, 8, 0 }, - { EC_GF_OP_XOR2, 1, 8, 0 }, - { EC_GF_OP_XOR2, 3, 7, 0 }, - { EC_GF_OP_XOR2, 5, 3, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_9A = { - 9, - { 6, 3, 4, 0, 5, 1, 2, 7, 8, }, - ec_gf8_mul_9A_ops -}; + {EC_GF_OP_XOR2, 5, 4, 0}, {EC_GF_OP_XOR2, 6, 4, 0}, + {EC_GF_OP_XOR2, 4, 3, 0}, {EC_GF_OP_XOR2, 3, 2, 0}, + {EC_GF_OP_XOR2, 2, 6, 0}, {EC_GF_OP_XOR2, 6, 1, 0}, + {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_XOR2, 0, 5, 0}, + {EC_GF_OP_XOR3, 8, 4, 0}, {EC_GF_OP_XOR2, 0, 6, 0}, + {EC_GF_OP_XOR2, 7, 8, 0}, {EC_GF_OP_XOR2, 1, 8, 0}, + {EC_GF_OP_XOR2, 3, 7, 0}, {EC_GF_OP_XOR2, 5, 3, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_9A = {9, + { + 6, + 3, + 4, + 0, + 5, + 1, + 2, + 7, + 8, + }, + ec_gf8_mul_9A_ops}; static ec_gf_op_t ec_gf8_mul_9B_ops[] = { - { EC_GF_OP_XOR2, 6, 0, 0 }, - { EC_GF_OP_XOR2, 7, 2, 0 }, - { EC_GF_OP_COPY, 9, 5, 0 }, - { EC_GF_OP_XOR2, 7, 0, 0 }, - { EC_GF_OP_XOR2, 2, 4, 0 }, - { EC_GF_OP_XOR2, 6, 1, 0 }, - { EC_GF_OP_XOR2, 5, 1, 0 }, - { EC_GF_OP_XOR2, 4, 6, 0 }, - { EC_GF_OP_XOR3, 8, 3, 2 }, - { EC_GF_OP_XOR2, 1, 3, 0 }, - { EC_GF_OP_XOR2, 5, 7, 0 }, - { EC_GF_OP_XOR2, 3, 9, 0 }, - { EC_GF_OP_XOR2, 0, 3, 0 }, - { EC_GF_OP_XOR2, 6, 3, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_9B = { - 10, - { 4, 5, 8, 6, 7, 1, 2, 0, 3, 9, }, - ec_gf8_mul_9B_ops -}; + {EC_GF_OP_XOR2, 6, 0, 0}, {EC_GF_OP_XOR2, 7, 2, 0}, + {EC_GF_OP_COPY, 9, 5, 0}, {EC_GF_OP_XOR2, 7, 0, 0}, + {EC_GF_OP_XOR2, 2, 4, 0}, {EC_GF_OP_XOR2, 6, 1, 0}, + {EC_GF_OP_XOR2, 5, 1, 0}, {EC_GF_OP_XOR2, 4, 6, 0}, + {EC_GF_OP_XOR3, 8, 3, 2}, {EC_GF_OP_XOR2, 1, 3, 0}, + {EC_GF_OP_XOR2, 5, 7, 0}, {EC_GF_OP_XOR2, 3, 9, 0}, + {EC_GF_OP_XOR2, 0, 3, 0}, {EC_GF_OP_XOR2, 6, 3, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_9B = {10, + { + 4, + 5, + 8, + 6, + 7, + 1, + 2, + 0, + 3, + 9, + }, + ec_gf8_mul_9B_ops}; static ec_gf_op_t ec_gf8_mul_9C_ops[] = { - { EC_GF_OP_XOR2, 3, 0, 0 }, - { EC_GF_OP_XOR2, 3, 6, 0 }, - { EC_GF_OP_XOR2, 7, 3, 0 }, - { EC_GF_OP_XOR2, 4, 7, 0 }, - { EC_GF_OP_XOR2, 1, 4, 0 }, - { EC_GF_OP_XOR2, 3, 1, 0 }, - { EC_GF_OP_XOR2, 2, 5, 0 }, - { EC_GF_OP_XOR2, 5, 3, 0 }, - { EC_GF_OP_XOR2, 6, 2, 0 }, - { EC_GF_OP_XOR2, 0, 2, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_9C = { - 8, - { 3, 2, 1, 0, 4, 5, 6, 7, }, - ec_gf8_mul_9C_ops -}; + {EC_GF_OP_XOR2, 3, 0, 0}, {EC_GF_OP_XOR2, 3, 6, 0}, + {EC_GF_OP_XOR2, 7, 3, 0}, {EC_GF_OP_XOR2, 4, 7, 0}, + {EC_GF_OP_XOR2, 1, 4, 0}, {EC_GF_OP_XOR2, 3, 1, 0}, + {EC_GF_OP_XOR2, 2, 5, 0}, {EC_GF_OP_XOR2, 5, 3, 0}, + {EC_GF_OP_XOR2, 6, 2, 0}, {EC_GF_OP_XOR2, 0, 2, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_9C = {8, + { + 3, + 2, + 1, + 0, + 4, + 5, + 6, + 7, + }, + ec_gf8_mul_9C_ops}; static ec_gf_op_t ec_gf8_mul_9D_ops[] = { - { EC_GF_OP_XOR2, 3, 0, 0 }, - { EC_GF_OP_XOR2, 4, 1, 0 }, - { EC_GF_OP_XOR2, 6, 3, 0 }, - { EC_GF_OP_XOR2, 5, 2, 0 }, - { EC_GF_OP_XOR2, 2, 6, 0 }, - { EC_GF_OP_XOR2, 4, 7, 0 }, - { EC_GF_OP_XOR2, 1, 5, 0 }, - { EC_GF_OP_XOR2, 3, 5, 0 }, - { EC_GF_OP_XOR2, 7, 6, 0 }, - { EC_GF_OP_XOR2, 0, 4, 0 }, - { EC_GF_OP_XOR2, 2, 4, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_9D = { - 8, - { 0, 1, 2, 3, 7, 4, 5, 6, }, - ec_gf8_mul_9D_ops -}; + {EC_GF_OP_XOR2, 3, 0, 0}, {EC_GF_OP_XOR2, 4, 1, 0}, + {EC_GF_OP_XOR2, 6, 3, 0}, {EC_GF_OP_XOR2, 5, 2, 0}, + {EC_GF_OP_XOR2, 2, 6, 0}, {EC_GF_OP_XOR2, 4, 7, 0}, + {EC_GF_OP_XOR2, 1, 5, 0}, {EC_GF_OP_XOR2, 3, 5, 0}, + {EC_GF_OP_XOR2, 7, 6, 0}, {EC_GF_OP_XOR2, 0, 4, 0}, + {EC_GF_OP_XOR2, 2, 4, 0}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_9D = {8, + { + 0, + 1, + 2, + 3, + 7, + 4, + 5, + 6, + }, + ec_gf8_mul_9D_ops}; static ec_gf_op_t ec_gf8_mul_9E_ops[] = { - { EC_GF_OP_XOR2, 7, 0, 0 }, - { EC_GF_OP_COPY, 8, 7, 0 }, - { EC_GF_OP_XOR2, 8, 5, 0 }, - { EC_GF_OP_XOR2, 5, 2, 0 }, - { EC_GF_OP_XOR2, 2, 6, 0 }, - { EC_GF_OP_XOR2, 5, 0, 0 }, - { EC_GF_OP_XOR2, 6, 4, 0 }, - { EC_GF_OP_XOR2, 6, 0, 0 }, - { EC_GF_OP_XOR2, 7, 3, 0 }, - { EC_GF_OP_XOR2, 0, 1, 0 }, - { EC_GF_OP_XOR2, 4, 1, 0 }, - { EC_GF_OP_XOR2, 3, 6, 0 }, - { EC_GF_OP_XOR2, 0, 8, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_9E = { - 9, - { 4, 5, 3, 8, 6, 0, 2, 7, 1, }, - ec_gf8_mul_9E_ops -}; + {EC_GF_OP_XOR2, 7, 0, 0}, {EC_GF_OP_COPY, 8, 7, 0}, + {EC_GF_OP_XOR2, 8, 5, 0}, {EC_GF_OP_XOR2, 5, 2, 0}, + {EC_GF_OP_XOR2, 2, 6, 0}, {EC_GF_OP_XOR2, 5, 0, 0}, + {EC_GF_OP_XOR2, 6, 4, 0}, {EC_GF_OP_XOR2, 6, 0, 0}, + {EC_GF_OP_XOR2, 7, 3, 0}, {EC_GF_OP_XOR2, 0, 1, 0}, + {EC_GF_OP_XOR2, 4, 1, 0}, {EC_GF_OP_XOR2, 3, 6, 0}, + {EC_GF_OP_XOR2, 0, 8, 0}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_9E = {9, + { + 4, + 5, + 3, + 8, + 6, + 0, + 2, + 7, + 1, + }, + ec_gf8_mul_9E_ops}; static ec_gf_op_t ec_gf8_mul_9F_ops[] = { - { EC_GF_OP_XOR3, 8, 1, 2 }, - { EC_GF_OP_XOR2, 8, 3, 0 }, - { EC_GF_OP_XOR2, 3, 0, 0 }, - { EC_GF_OP_XOR2, 4, 0, 0 }, - { EC_GF_OP_XOR2, 4, 1, 0 }, - { EC_GF_OP_XOR2, 5, 3, 0 }, - { EC_GF_OP_XOR2, 0, 6, 0 }, - { EC_GF_OP_XOR2, 1, 7, 0 }, - { EC_GF_OP_XOR2, 6, 4, 0 }, - { EC_GF_OP_XOR2, 7, 5, 0 }, - { EC_GF_OP_XOR2, 6, 8, 0 }, - { EC_GF_OP_XOR2, 5, 8, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_9F = { - 9, - { 4, 5, 6, 7, 0, 1, 2, 3, 8, }, - ec_gf8_mul_9F_ops -}; + {EC_GF_OP_XOR3, 8, 1, 2}, {EC_GF_OP_XOR2, 8, 3, 0}, + {EC_GF_OP_XOR2, 3, 0, 0}, {EC_GF_OP_XOR2, 4, 0, 0}, + {EC_GF_OP_XOR2, 4, 1, 0}, {EC_GF_OP_XOR2, 5, 3, 0}, + {EC_GF_OP_XOR2, 0, 6, 0}, {EC_GF_OP_XOR2, 1, 7, 0}, + {EC_GF_OP_XOR2, 6, 4, 0}, {EC_GF_OP_XOR2, 7, 5, 0}, + {EC_GF_OP_XOR2, 6, 8, 0}, {EC_GF_OP_XOR2, 5, 8, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_9F = {9, + { + 4, + 5, + 6, + 7, + 0, + 1, + 2, + 3, + 8, + }, + ec_gf8_mul_9F_ops}; static ec_gf_op_t ec_gf8_mul_A0_ops[] = { - { EC_GF_OP_XOR2, 6, 1, 0 }, - { EC_GF_OP_XOR2, 5, 6, 0 }, - { EC_GF_OP_XOR2, 6, 7, 0 }, - { EC_GF_OP_XOR2, 0, 1, 0 }, - { EC_GF_OP_XOR2, 4, 6, 0 }, - { EC_GF_OP_XOR2, 1, 2, 0 }, - { EC_GF_OP_XOR2, 1, 4, 0 }, - { EC_GF_OP_XOR2, 4, 3, 0 }, - { EC_GF_OP_XOR2, 3, 5, 0 }, - { EC_GF_OP_XOR2, 2, 3, 0 }, - { EC_GF_OP_XOR2, 5, 1, 0 }, - { EC_GF_OP_XOR2, 7, 2, 0 }, - { EC_GF_OP_XOR2, 2, 0, 0 }, - { EC_GF_OP_XOR2, 0, 5, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_A0 = { - 8, - { 3, 1, 6, 7, 5, 2, 4, 0, }, - ec_gf8_mul_A0_ops -}; + {EC_GF_OP_XOR2, 6, 1, 0}, {EC_GF_OP_XOR2, 5, 6, 0}, + {EC_GF_OP_XOR2, 6, 7, 0}, {EC_GF_OP_XOR2, 0, 1, 0}, + {EC_GF_OP_XOR2, 4, 6, 0}, {EC_GF_OP_XOR2, 1, 2, 0}, + {EC_GF_OP_XOR2, 1, 4, 0}, {EC_GF_OP_XOR2, 4, 3, 0}, + {EC_GF_OP_XOR2, 3, 5, 0}, {EC_GF_OP_XOR2, 2, 3, 0}, + {EC_GF_OP_XOR2, 5, 1, 0}, {EC_GF_OP_XOR2, 7, 2, 0}, + {EC_GF_OP_XOR2, 2, 0, 0}, {EC_GF_OP_XOR2, 0, 5, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_A0 = {8, + { + 3, + 1, + 6, + 7, + 5, + 2, + 4, + 0, + }, + ec_gf8_mul_A0_ops}; static ec_gf_op_t ec_gf8_mul_A1_ops[] = { - { EC_GF_OP_XOR2, 2, 6, 0 }, - { EC_GF_OP_XOR2, 1, 7, 0 }, - { EC_GF_OP_XOR2, 1, 2, 0 }, - { EC_GF_OP_XOR2, 5, 1, 0 }, - { EC_GF_OP_XOR2, 0, 3, 0 }, - { EC_GF_OP_XOR2, 6, 5, 0 }, - { EC_GF_OP_XOR2, 4, 1, 0 }, - { EC_GF_OP_XOR2, 0, 2, 0 }, - { EC_GF_OP_XOR2, 3, 4, 0 }, - { EC_GF_OP_XOR3, 8, 0, 6 }, - { EC_GF_OP_XOR2, 2, 3, 0 }, - { EC_GF_OP_XOR2, 7, 8, 0 }, - { EC_GF_OP_XOR2, 3, 8, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_A1 = { - 9, - { 7, 4, 1, 5, 6, 0, 2, 3, 8, }, - ec_gf8_mul_A1_ops -}; + {EC_GF_OP_XOR2, 2, 6, 0}, {EC_GF_OP_XOR2, 1, 7, 0}, + {EC_GF_OP_XOR2, 1, 2, 0}, {EC_GF_OP_XOR2, 5, 1, 0}, + {EC_GF_OP_XOR2, 0, 3, 0}, {EC_GF_OP_XOR2, 6, 5, 0}, + {EC_GF_OP_XOR2, 4, 1, 0}, {EC_GF_OP_XOR2, 0, 2, 0}, + {EC_GF_OP_XOR2, 3, 4, 0}, {EC_GF_OP_XOR3, 8, 0, 6}, + {EC_GF_OP_XOR2, 2, 3, 0}, {EC_GF_OP_XOR2, 7, 8, 0}, + {EC_GF_OP_XOR2, 3, 8, 0}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_A1 = {9, + { + 7, + 4, + 1, + 5, + 6, + 0, + 2, + 3, + 8, + }, + ec_gf8_mul_A1_ops}; static ec_gf_op_t ec_gf8_mul_A2_ops[] = { - { EC_GF_OP_XOR2, 2, 1, 0 }, - { EC_GF_OP_XOR2, 2, 4, 0 }, - { EC_GF_OP_XOR2, 3, 5, 0 }, - { EC_GF_OP_XOR2, 1, 6, 0 }, - { EC_GF_OP_XOR2, 2, 3, 0 }, - { EC_GF_OP_XOR2, 3, 1, 0 }, - { EC_GF_OP_XOR2, 0, 2, 0 }, - { EC_GF_OP_XOR2, 7, 3, 0 }, - { EC_GF_OP_XOR2, 1, 0, 0 }, - { EC_GF_OP_XOR2, 0, 7, 0 }, - { EC_GF_OP_XOR2, 4, 7, 0 }, - { EC_GF_OP_XOR2, 5, 0, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_A2 = { - 8, - { 7, 0, 6, 3, 2, 1, 4, 5, }, - ec_gf8_mul_A2_ops -}; + {EC_GF_OP_XOR2, 2, 1, 0}, {EC_GF_OP_XOR2, 2, 4, 0}, + {EC_GF_OP_XOR2, 3, 5, 0}, {EC_GF_OP_XOR2, 1, 6, 0}, + {EC_GF_OP_XOR2, 2, 3, 0}, {EC_GF_OP_XOR2, 3, 1, 0}, + {EC_GF_OP_XOR2, 0, 2, 0}, {EC_GF_OP_XOR2, 7, 3, 0}, + {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_XOR2, 0, 7, 0}, + {EC_GF_OP_XOR2, 4, 7, 0}, {EC_GF_OP_XOR2, 5, 0, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_A2 = {8, + { + 7, + 0, + 6, + 3, + 2, + 1, + 4, + 5, + }, + ec_gf8_mul_A2_ops}; static ec_gf_op_t ec_gf8_mul_A3_ops[] = { - { EC_GF_OP_COPY, 8, 2, 0 }, - { EC_GF_OP_XOR2, 2, 6, 0 }, - { EC_GF_OP_XOR2, 0, 2, 0 }, - { EC_GF_OP_XOR2, 4, 0, 0 }, - { EC_GF_OP_XOR2, 1, 5, 0 }, - { EC_GF_OP_XOR2, 5, 4, 0 }, - { EC_GF_OP_XOR2, 6, 1, 0 }, - { EC_GF_OP_XOR2, 4, 3, 0 }, - { EC_GF_OP_XOR2, 7, 1, 0 }, - { EC_GF_OP_XOR2, 3, 8, 0 }, - { EC_GF_OP_XOR2, 0, 7, 0 }, - { EC_GF_OP_XOR2, 1, 3, 0 }, - { EC_GF_OP_XOR2, 7, 5, 0 }, - { EC_GF_OP_XOR2, 3, 0, 0 }, - { EC_GF_OP_XOR2, 0, 4, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_A3 = { - 9, - { 3, 7, 2, 6, 1, 4, 0, 5, 8, }, - ec_gf8_mul_A3_ops -}; + {EC_GF_OP_COPY, 8, 2, 0}, {EC_GF_OP_XOR2, 2, 6, 0}, + {EC_GF_OP_XOR2, 0, 2, 0}, {EC_GF_OP_XOR2, 4, 0, 0}, + {EC_GF_OP_XOR2, 1, 5, 0}, {EC_GF_OP_XOR2, 5, 4, 0}, + {EC_GF_OP_XOR2, 6, 1, 0}, {EC_GF_OP_XOR2, 4, 3, 0}, + {EC_GF_OP_XOR2, 7, 1, 0}, {EC_GF_OP_XOR2, 3, 8, 0}, + {EC_GF_OP_XOR2, 0, 7, 0}, {EC_GF_OP_XOR2, 1, 3, 0}, + {EC_GF_OP_XOR2, 7, 5, 0}, {EC_GF_OP_XOR2, 3, 0, 0}, + {EC_GF_OP_XOR2, 0, 4, 0}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_A3 = {9, + { + 3, + 7, + 2, + 6, + 1, + 4, + 0, + 5, + 8, + }, + ec_gf8_mul_A3_ops}; static ec_gf_op_t ec_gf8_mul_A4_ops[] = { - { EC_GF_OP_XOR2, 0, 1, 0 }, - { EC_GF_OP_XOR2, 4, 2, 0 }, - { EC_GF_OP_XOR2, 5, 3, 0 }, - { EC_GF_OP_XOR2, 7, 0, 0 }, - { EC_GF_OP_XOR2, 2, 5, 0 }, - { EC_GF_OP_XOR2, 6, 4, 0 }, - { EC_GF_OP_XOR2, 5, 1, 0 }, - { EC_GF_OP_XOR2, 4, 7, 0 }, - { EC_GF_OP_XOR2, 3, 6, 0 }, - { EC_GF_OP_XOR2, 1, 4, 0 }, - { EC_GF_OP_XOR2, 3, 1, 0 }, - { EC_GF_OP_XOR2, 0, 3, 0 }, - { EC_GF_OP_XOR2, 3, 2, 0 }, - { EC_GF_OP_XOR2, 4, 3, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_A4 = { - 8, - { 5, 6, 7, 2, 4, 3, 0, 1, }, - ec_gf8_mul_A4_ops -}; + {EC_GF_OP_XOR2, 0, 1, 0}, {EC_GF_OP_XOR2, 4, 2, 0}, + {EC_GF_OP_XOR2, 5, 3, 0}, {EC_GF_OP_XOR2, 7, 0, 0}, + {EC_GF_OP_XOR2, 2, 5, 0}, {EC_GF_OP_XOR2, 6, 4, 0}, + {EC_GF_OP_XOR2, 5, 1, 0}, {EC_GF_OP_XOR2, 4, 7, 0}, + {EC_GF_OP_XOR2, 3, 6, 0}, {EC_GF_OP_XOR2, 1, 4, 0}, + {EC_GF_OP_XOR2, 3, 1, 0}, {EC_GF_OP_XOR2, 0, 3, 0}, + {EC_GF_OP_XOR2, 3, 2, 0}, {EC_GF_OP_XOR2, 4, 3, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_A4 = {8, + { + 5, + 6, + 7, + 2, + 4, + 3, + 0, + 1, + }, + ec_gf8_mul_A4_ops}; static ec_gf_op_t ec_gf8_mul_A5_ops[] = { - { EC_GF_OP_XOR2, 7, 1, 0 }, - { EC_GF_OP_XOR2, 1, 5, 0 }, - { EC_GF_OP_XOR2, 7, 0, 0 }, - { EC_GF_OP_XOR2, 3, 0, 0 }, - { EC_GF_OP_XOR2, 6, 1, 0 }, - { EC_GF_OP_XOR2, 5, 2, 0 }, - { EC_GF_OP_XOR2, 0, 2, 0 }, - { EC_GF_OP_XOR2, 1, 3, 0 }, - { EC_GF_OP_XOR3, 8, 5, 6 }, - { EC_GF_OP_XOR2, 2, 7, 0 }, - { EC_GF_OP_XOR2, 0, 4, 0 }, - { EC_GF_OP_XOR2, 3, 7, 0 }, - { EC_GF_OP_XOR2, 4, 8, 0 }, - { EC_GF_OP_XOR2, 7, 8, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_A5 = { - 9, - { 1, 4, 2, 5, 6, 7, 3, 0, 8, }, - ec_gf8_mul_A5_ops -}; + {EC_GF_OP_XOR2, 7, 1, 0}, {EC_GF_OP_XOR2, 1, 5, 0}, + {EC_GF_OP_XOR2, 7, 0, 0}, {EC_GF_OP_XOR2, 3, 0, 0}, + {EC_GF_OP_XOR2, 6, 1, 0}, {EC_GF_OP_XOR2, 5, 2, 0}, + {EC_GF_OP_XOR2, 0, 2, 0}, {EC_GF_OP_XOR2, 1, 3, 0}, + {EC_GF_OP_XOR3, 8, 5, 6}, {EC_GF_OP_XOR2, 2, 7, 0}, + {EC_GF_OP_XOR2, 0, 4, 0}, {EC_GF_OP_XOR2, 3, 7, 0}, + {EC_GF_OP_XOR2, 4, 8, 0}, {EC_GF_OP_XOR2, 7, 8, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_A5 = {9, + { + 1, + 4, + 2, + 5, + 6, + 7, + 3, + 0, + 8, + }, + ec_gf8_mul_A5_ops}; static ec_gf_op_t ec_gf8_mul_A6_ops[] = { - { EC_GF_OP_XOR2, 2, 0, 0 }, - { EC_GF_OP_XOR2, 4, 6, 0 }, - { EC_GF_OP_XOR2, 3, 5, 0 }, - { EC_GF_OP_XOR2, 2, 4, 0 }, - { EC_GF_OP_XOR2, 3, 7, 0 }, - { EC_GF_OP_XOR2, 7, 2, 0 }, - { EC_GF_OP_XOR2, 1, 3, 0 }, - { EC_GF_OP_XOR2, 5, 7, 0 }, - { EC_GF_OP_XOR2, 6, 1, 0 }, - { EC_GF_OP_XOR2, 4, 1, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_A6 = { - 8, - { 1, 2, 0, 3, 4, 5, 6, 7, }, - ec_gf8_mul_A6_ops -}; + {EC_GF_OP_XOR2, 2, 0, 0}, {EC_GF_OP_XOR2, 4, 6, 0}, + {EC_GF_OP_XOR2, 3, 5, 0}, {EC_GF_OP_XOR2, 2, 4, 0}, + {EC_GF_OP_XOR2, 3, 7, 0}, {EC_GF_OP_XOR2, 7, 2, 0}, + {EC_GF_OP_XOR2, 1, 3, 0}, {EC_GF_OP_XOR2, 5, 7, 0}, + {EC_GF_OP_XOR2, 6, 1, 0}, {EC_GF_OP_XOR2, 4, 1, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_A6 = {8, + { + 1, + 2, + 0, + 3, + 4, + 5, + 6, + 7, + }, + ec_gf8_mul_A6_ops}; static ec_gf_op_t ec_gf8_mul_A7_ops[] = { - { EC_GF_OP_XOR2, 2, 0, 0 }, - { EC_GF_OP_XOR2, 3, 1, 0 }, - { EC_GF_OP_XOR2, 5, 7, 0 }, - { EC_GF_OP_XOR2, 4, 2, 0 }, - { EC_GF_OP_XOR2, 3, 5, 0 }, - { EC_GF_OP_XOR2, 4, 6, 0 }, - { EC_GF_OP_XOR2, 0, 3, 0 }, - { EC_GF_OP_XOR2, 6, 3, 0 }, - { EC_GF_OP_XOR2, 1, 4, 0 }, - { EC_GF_OP_XOR2, 7, 4, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_A7 = { - 8, - { 0, 1, 2, 5, 6, 7, 3, 4, }, - ec_gf8_mul_A7_ops -}; + {EC_GF_OP_XOR2, 2, 0, 0}, {EC_GF_OP_XOR2, 3, 1, 0}, + {EC_GF_OP_XOR2, 5, 7, 0}, {EC_GF_OP_XOR2, 4, 2, 0}, + {EC_GF_OP_XOR2, 3, 5, 0}, {EC_GF_OP_XOR2, 4, 6, 0}, + {EC_GF_OP_XOR2, 0, 3, 0}, {EC_GF_OP_XOR2, 6, 3, 0}, + {EC_GF_OP_XOR2, 1, 4, 0}, {EC_GF_OP_XOR2, 7, 4, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_A7 = {8, + { + 0, + 1, + 2, + 5, + 6, + 7, + 3, + 4, + }, + ec_gf8_mul_A7_ops}; static ec_gf_op_t ec_gf8_mul_A8_ops[] = { - { EC_GF_OP_XOR2, 0, 2, 0 }, - { EC_GF_OP_XOR2, 0, 7, 0 }, - { EC_GF_OP_COPY, 8, 0, 0 }, - { EC_GF_OP_XOR2, 8, 1, 0 }, - { EC_GF_OP_XOR2, 1, 6, 0 }, - { EC_GF_OP_COPY, 9, 4, 0 }, - { EC_GF_OP_XOR2, 0, 5, 0 }, - { EC_GF_OP_XOR2, 4, 1, 0 }, - { EC_GF_OP_XOR2, 5, 1, 0 }, - { EC_GF_OP_XOR2, 8, 3, 0 }, - { EC_GF_OP_XOR2, 1, 3, 0 }, - { EC_GF_OP_XOR2, 3, 2, 0 }, - { EC_GF_OP_XOR2, 2, 9, 0 }, - { EC_GF_OP_XOR2, 3, 0, 0 }, - { EC_GF_OP_XOR2, 7, 2, 0 }, - { EC_GF_OP_XOR2, 6, 2, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_A8 = { - 10, - { 1, 7, 5, 8, 6, 3, 4, 0, 2, 9, }, - ec_gf8_mul_A8_ops -}; + {EC_GF_OP_XOR2, 0, 2, 0}, {EC_GF_OP_XOR2, 0, 7, 0}, + {EC_GF_OP_COPY, 8, 0, 0}, {EC_GF_OP_XOR2, 8, 1, 0}, + {EC_GF_OP_XOR2, 1, 6, 0}, {EC_GF_OP_COPY, 9, 4, 0}, + {EC_GF_OP_XOR2, 0, 5, 0}, {EC_GF_OP_XOR2, 4, 1, 0}, + {EC_GF_OP_XOR2, 5, 1, 0}, {EC_GF_OP_XOR2, 8, 3, 0}, + {EC_GF_OP_XOR2, 1, 3, 0}, {EC_GF_OP_XOR2, 3, 2, 0}, + {EC_GF_OP_XOR2, 2, 9, 0}, {EC_GF_OP_XOR2, 3, 0, 0}, + {EC_GF_OP_XOR2, 7, 2, 0}, {EC_GF_OP_XOR2, 6, 2, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_A8 = {10, + { + 1, + 7, + 5, + 8, + 6, + 3, + 4, + 0, + 2, + 9, + }, + ec_gf8_mul_A8_ops}; static ec_gf_op_t ec_gf8_mul_A9_ops[] = { - { EC_GF_OP_XOR2, 4, 1, 0 }, - { EC_GF_OP_XOR2, 1, 0, 0 }, - { EC_GF_OP_XOR2, 5, 0, 0 }, - { EC_GF_OP_XOR2, 0, 3, 0 }, - { EC_GF_OP_XOR2, 0, 7, 0 }, - { EC_GF_OP_XOR2, 5, 2, 0 }, - { EC_GF_OP_XOR2, 7, 2, 0 }, - { EC_GF_OP_XOR2, 2, 6, 0 }, - { EC_GF_OP_XOR2, 6, 1, 0 }, - { EC_GF_OP_XOR2, 1, 7, 0 }, - { EC_GF_OP_XOR2, 3, 6, 0 }, - { EC_GF_OP_XOR2, 7, 4, 0 }, - { EC_GF_OP_XOR2, 6, 5, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_A9 = { - 8, - { 3, 7, 6, 1, 2, 0, 4, 5, }, - ec_gf8_mul_A9_ops -}; + {EC_GF_OP_XOR2, 4, 1, 0}, {EC_GF_OP_XOR2, 1, 0, 0}, + {EC_GF_OP_XOR2, 5, 0, 0}, {EC_GF_OP_XOR2, 0, 3, 0}, + {EC_GF_OP_XOR2, 0, 7, 0}, {EC_GF_OP_XOR2, 5, 2, 0}, + {EC_GF_OP_XOR2, 7, 2, 0}, {EC_GF_OP_XOR2, 2, 6, 0}, + {EC_GF_OP_XOR2, 6, 1, 0}, {EC_GF_OP_XOR2, 1, 7, 0}, + {EC_GF_OP_XOR2, 3, 6, 0}, {EC_GF_OP_XOR2, 7, 4, 0}, + {EC_GF_OP_XOR2, 6, 5, 0}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_A9 = {8, + { + 3, + 7, + 6, + 1, + 2, + 0, + 4, + 5, + }, + ec_gf8_mul_A9_ops}; static ec_gf_op_t ec_gf8_mul_AA_ops[] = { - { EC_GF_OP_XOR2, 5, 6, 0 }, - { EC_GF_OP_XOR2, 3, 0, 0 }, - { EC_GF_OP_XOR2, 4, 5, 0 }, - { EC_GF_OP_XOR2, 6, 3, 0 }, - { EC_GF_OP_XOR2, 5, 7, 0 }, - { EC_GF_OP_XOR2, 2, 0, 0 }, - { EC_GF_OP_XOR2, 7, 6, 0 }, - { EC_GF_OP_XOR2, 2, 5, 0 }, - { EC_GF_OP_XOR2, 0, 1, 0 }, - { EC_GF_OP_XOR2, 3, 1, 0 }, - { EC_GF_OP_XOR2, 0, 7, 0 }, - { EC_GF_OP_XOR2, 1, 4, 0 }, - { EC_GF_OP_XOR2, 7, 4, 0 }, - { EC_GF_OP_XOR2, 4, 2, 0 }, - { EC_GF_OP_XOR2, 6, 4, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_AA = { - 8, - { 0, 4, 5, 3, 6, 7, 1, 2, }, - ec_gf8_mul_AA_ops -}; + {EC_GF_OP_XOR2, 5, 6, 0}, {EC_GF_OP_XOR2, 3, 0, 0}, + {EC_GF_OP_XOR2, 4, 5, 0}, {EC_GF_OP_XOR2, 6, 3, 0}, + {EC_GF_OP_XOR2, 5, 7, 0}, {EC_GF_OP_XOR2, 2, 0, 0}, + {EC_GF_OP_XOR2, 7, 6, 0}, {EC_GF_OP_XOR2, 2, 5, 0}, + {EC_GF_OP_XOR2, 0, 1, 0}, {EC_GF_OP_XOR2, 3, 1, 0}, + {EC_GF_OP_XOR2, 0, 7, 0}, {EC_GF_OP_XOR2, 1, 4, 0}, + {EC_GF_OP_XOR2, 7, 4, 0}, {EC_GF_OP_XOR2, 4, 2, 0}, + {EC_GF_OP_XOR2, 6, 4, 0}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_AA = {8, + { + 0, + 4, + 5, + 3, + 6, + 7, + 1, + 2, + }, + ec_gf8_mul_AA_ops}; static ec_gf_op_t ec_gf8_mul_AB_ops[] = { - { EC_GF_OP_COPY, 8, 0, 0 }, - { EC_GF_OP_XOR2, 0, 1, 0 }, - { EC_GF_OP_XOR2, 1, 4, 0 }, - { EC_GF_OP_XOR2, 1, 5, 0 }, - { EC_GF_OP_XOR2, 6, 1, 0 }, - { EC_GF_OP_XOR2, 2, 0, 0 }, - { EC_GF_OP_COPY, 9, 6, 0 }, - { EC_GF_OP_XOR2, 6, 2, 0 }, - { EC_GF_OP_XOR2, 4, 6, 0 }, - { EC_GF_OP_XOR2, 7, 4, 0 }, - { EC_GF_OP_XOR2, 8, 7, 0 }, - { EC_GF_OP_XOR2, 3, 8, 0 }, - { EC_GF_OP_XOR2, 5, 3, 0 }, - { EC_GF_OP_XOR2, 6, 3, 0 }, - { EC_GF_OP_XOR2, 2, 5, 0 }, - { EC_GF_OP_XOR3, 3, 9, 7 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_AB = { - 10, - { 2, 3, 8, 0, 5, 6, 1, 4, 7, 9, }, - ec_gf8_mul_AB_ops -}; + {EC_GF_OP_COPY, 8, 0, 0}, {EC_GF_OP_XOR2, 0, 1, 0}, + {EC_GF_OP_XOR2, 1, 4, 0}, {EC_GF_OP_XOR2, 1, 5, 0}, + {EC_GF_OP_XOR2, 6, 1, 0}, {EC_GF_OP_XOR2, 2, 0, 0}, + {EC_GF_OP_COPY, 9, 6, 0}, {EC_GF_OP_XOR2, 6, 2, 0}, + {EC_GF_OP_XOR2, 4, 6, 0}, {EC_GF_OP_XOR2, 7, 4, 0}, + {EC_GF_OP_XOR2, 8, 7, 0}, {EC_GF_OP_XOR2, 3, 8, 0}, + {EC_GF_OP_XOR2, 5, 3, 0}, {EC_GF_OP_XOR2, 6, 3, 0}, + {EC_GF_OP_XOR2, 2, 5, 0}, {EC_GF_OP_XOR3, 3, 9, 7}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_AB = {10, + { + 2, + 3, + 8, + 0, + 5, + 6, + 1, + 4, + 7, + 9, + }, + ec_gf8_mul_AB_ops}; static ec_gf_op_t ec_gf8_mul_AC_ops[] = { - { EC_GF_OP_XOR2, 5, 0, 0 }, - { EC_GF_OP_XOR2, 0, 2, 0 }, - { EC_GF_OP_XOR2, 2, 4, 0 }, - { EC_GF_OP_XOR2, 4, 7, 0 }, - { EC_GF_OP_XOR2, 7, 0, 0 }, - { EC_GF_OP_XOR2, 0, 3, 0 }, - { EC_GF_OP_XOR2, 0, 6, 0 }, - { EC_GF_OP_XOR2, 3, 1, 0 }, - { EC_GF_OP_XOR2, 6, 1, 0 }, - { EC_GF_OP_XOR2, 1, 5, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_AC = { - 8, - { 3, 2, 1, 0, 4, 5, 6, 7, }, - ec_gf8_mul_AC_ops -}; + {EC_GF_OP_XOR2, 5, 0, 0}, {EC_GF_OP_XOR2, 0, 2, 0}, + {EC_GF_OP_XOR2, 2, 4, 0}, {EC_GF_OP_XOR2, 4, 7, 0}, + {EC_GF_OP_XOR2, 7, 0, 0}, {EC_GF_OP_XOR2, 0, 3, 0}, + {EC_GF_OP_XOR2, 0, 6, 0}, {EC_GF_OP_XOR2, 3, 1, 0}, + {EC_GF_OP_XOR2, 6, 1, 0}, {EC_GF_OP_XOR2, 1, 5, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_AC = {8, + { + 3, + 2, + 1, + 0, + 4, + 5, + 6, + 7, + }, + ec_gf8_mul_AC_ops}; static ec_gf_op_t ec_gf8_mul_AD_ops[] = { - { EC_GF_OP_XOR3, 8, 1, 2 }, - { EC_GF_OP_XOR2, 2, 0, 0 }, - { EC_GF_OP_XOR2, 3, 1, 0 }, - { EC_GF_OP_XOR2, 5, 0, 0 }, - { EC_GF_OP_XOR2, 3, 0, 0 }, - { EC_GF_OP_XOR2, 4, 8, 0 }, - { EC_GF_OP_XOR2, 5, 8, 0 }, - { EC_GF_OP_XOR2, 6, 2, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_AD = { - 9, - { 3, 4, 5, 6, 7, 0, 1, 2, 8, }, - ec_gf8_mul_AD_ops -}; + {EC_GF_OP_XOR3, 8, 1, 2}, {EC_GF_OP_XOR2, 2, 0, 0}, + {EC_GF_OP_XOR2, 3, 1, 0}, {EC_GF_OP_XOR2, 5, 0, 0}, + {EC_GF_OP_XOR2, 3, 0, 0}, {EC_GF_OP_XOR2, 4, 8, 0}, + {EC_GF_OP_XOR2, 5, 8, 0}, {EC_GF_OP_XOR2, 6, 2, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_AD = {9, + { + 3, + 4, + 5, + 6, + 7, + 0, + 1, + 2, + 8, + }, + ec_gf8_mul_AD_ops}; static ec_gf_op_t ec_gf8_mul_AE_ops[] = { - { EC_GF_OP_XOR2, 6, 5, 0 }, - { EC_GF_OP_XOR2, 5, 0, 0 }, - { EC_GF_OP_COPY, 8, 5, 0 }, - { EC_GF_OP_XOR2, 5, 7, 0 }, - { EC_GF_OP_XOR2, 7, 1, 0 }, - { EC_GF_OP_XOR2, 0, 2, 0 }, - { EC_GF_OP_XOR2, 1, 6, 0 }, - { EC_GF_OP_XOR2, 7, 3, 0 }, - { EC_GF_OP_XOR2, 6, 5, 0 }, - { EC_GF_OP_XOR2, 2, 6, 0 }, - { EC_GF_OP_XOR2, 0, 4, 0 }, - { EC_GF_OP_XOR2, 6, 3, 0 }, - { EC_GF_OP_XOR2, 3, 4, 0 }, - { EC_GF_OP_XOR2, 4, 8, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_AE = { - 9, - { 7, 0, 5, 6, 3, 4, 1, 2, 8, }, - ec_gf8_mul_AE_ops -}; + {EC_GF_OP_XOR2, 6, 5, 0}, {EC_GF_OP_XOR2, 5, 0, 0}, + {EC_GF_OP_COPY, 8, 5, 0}, {EC_GF_OP_XOR2, 5, 7, 0}, + {EC_GF_OP_XOR2, 7, 1, 0}, {EC_GF_OP_XOR2, 0, 2, 0}, + {EC_GF_OP_XOR2, 1, 6, 0}, {EC_GF_OP_XOR2, 7, 3, 0}, + {EC_GF_OP_XOR2, 6, 5, 0}, {EC_GF_OP_XOR2, 2, 6, 0}, + {EC_GF_OP_XOR2, 0, 4, 0}, {EC_GF_OP_XOR2, 6, 3, 0}, + {EC_GF_OP_XOR2, 3, 4, 0}, {EC_GF_OP_XOR2, 4, 8, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_AE = {9, + { + 7, + 0, + 5, + 6, + 3, + 4, + 1, + 2, + 8, + }, + ec_gf8_mul_AE_ops}; static ec_gf_op_t ec_gf8_mul_AF_ops[] = { - { EC_GF_OP_XOR2, 4, 0, 0 }, - { EC_GF_OP_XOR2, 6, 0, 0 }, - { EC_GF_OP_XOR2, 0, 1, 0 }, - { EC_GF_OP_XOR2, 0, 7, 0 }, - { EC_GF_OP_XOR2, 5, 1, 0 }, - { EC_GF_OP_XOR2, 7, 6, 0 }, - { EC_GF_OP_XOR2, 1, 2, 0 }, - { EC_GF_OP_XOR2, 6, 2, 0 }, - { EC_GF_OP_XOR2, 2, 5, 0 }, - { EC_GF_OP_XOR2, 1, 4, 0 }, - { EC_GF_OP_XOR2, 2, 0, 0 }, - { EC_GF_OP_XOR2, 0, 3, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_AF = { - 8, - { 0, 1, 2, 7, 3, 4, 5, 6, }, - ec_gf8_mul_AF_ops -}; + {EC_GF_OP_XOR2, 4, 0, 0}, {EC_GF_OP_XOR2, 6, 0, 0}, + {EC_GF_OP_XOR2, 0, 1, 0}, {EC_GF_OP_XOR2, 0, 7, 0}, + {EC_GF_OP_XOR2, 5, 1, 0}, {EC_GF_OP_XOR2, 7, 6, 0}, + {EC_GF_OP_XOR2, 1, 2, 0}, {EC_GF_OP_XOR2, 6, 2, 0}, + {EC_GF_OP_XOR2, 2, 5, 0}, {EC_GF_OP_XOR2, 1, 4, 0}, + {EC_GF_OP_XOR2, 2, 0, 0}, {EC_GF_OP_XOR2, 0, 3, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_AF = {8, + { + 0, + 1, + 2, + 7, + 3, + 4, + 5, + 6, + }, + ec_gf8_mul_AF_ops}; static ec_gf_op_t ec_gf8_mul_B0_ops[] = { - { EC_GF_OP_XOR2, 4, 1, 0 }, - { EC_GF_OP_XOR2, 3, 6, 0 }, - { EC_GF_OP_XOR2, 7, 4, 0 }, - { EC_GF_OP_XOR2, 4, 3, 0 }, - { EC_GF_OP_XOR2, 2, 4, 0 }, - { EC_GF_OP_XOR2, 1, 0, 0 }, - { EC_GF_OP_XOR2, 4, 5, 0 }, - { EC_GF_OP_XOR2, 6, 2, 0 }, - { EC_GF_OP_XOR2, 1, 6, 0 }, - { EC_GF_OP_XOR2, 0, 4, 0 }, - { EC_GF_OP_XOR2, 5, 1, 0 }, - { EC_GF_OP_XOR2, 1, 7, 0 }, - { EC_GF_OP_XOR2, 0, 1, 0 }, - { EC_GF_OP_XOR2, 3, 1, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_B0 = { - 8, - { 4, 0, 7, 2, 3, 1, 6, 5, }, - ec_gf8_mul_B0_ops -}; + {EC_GF_OP_XOR2, 4, 1, 0}, {EC_GF_OP_XOR2, 3, 6, 0}, + {EC_GF_OP_XOR2, 7, 4, 0}, {EC_GF_OP_XOR2, 4, 3, 0}, + {EC_GF_OP_XOR2, 2, 4, 0}, {EC_GF_OP_XOR2, 1, 0, 0}, + {EC_GF_OP_XOR2, 4, 5, 0}, {EC_GF_OP_XOR2, 6, 2, 0}, + {EC_GF_OP_XOR2, 1, 6, 0}, {EC_GF_OP_XOR2, 0, 4, 0}, + {EC_GF_OP_XOR2, 5, 1, 0}, {EC_GF_OP_XOR2, 1, 7, 0}, + {EC_GF_OP_XOR2, 0, 1, 0}, {EC_GF_OP_XOR2, 3, 1, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_B0 = {8, + { + 4, + 0, + 7, + 2, + 3, + 1, + 6, + 5, + }, + ec_gf8_mul_B0_ops}; static ec_gf_op_t ec_gf8_mul_B1_ops[] = { - { EC_GF_OP_XOR2, 4, 1, 0 }, - { EC_GF_OP_COPY, 8, 4, 0 }, - { EC_GF_OP_XOR2, 2, 7, 0 }, - { EC_GF_OP_XOR2, 4, 2, 0 }, - { EC_GF_OP_XOR2, 2, 3, 0 }, - { EC_GF_OP_XOR2, 6, 4, 0 }, - { EC_GF_OP_XOR2, 1, 0, 0 }, - { EC_GF_OP_XOR2, 2, 5, 0 }, - { EC_GF_OP_XOR2, 0, 6, 0 }, - { EC_GF_OP_XOR2, 7, 6, 0 }, - { EC_GF_OP_XOR2, 1, 2, 0 }, - { EC_GF_OP_XOR2, 6, 5, 0 }, - { EC_GF_OP_XOR2, 3, 7, 0 }, - { EC_GF_OP_XOR2, 2, 0, 0 }, - { EC_GF_OP_XOR3, 5, 8, 1 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_B1 = { - 9, - { 2, 6, 4, 7, 0, 1, 3, 5, 8, }, - ec_gf8_mul_B1_ops -}; + {EC_GF_OP_XOR2, 4, 1, 0}, {EC_GF_OP_COPY, 8, 4, 0}, + {EC_GF_OP_XOR2, 2, 7, 0}, {EC_GF_OP_XOR2, 4, 2, 0}, + {EC_GF_OP_XOR2, 2, 3, 0}, {EC_GF_OP_XOR2, 6, 4, 0}, + {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_XOR2, 2, 5, 0}, + {EC_GF_OP_XOR2, 0, 6, 0}, {EC_GF_OP_XOR2, 7, 6, 0}, + {EC_GF_OP_XOR2, 1, 2, 0}, {EC_GF_OP_XOR2, 6, 5, 0}, + {EC_GF_OP_XOR2, 3, 7, 0}, {EC_GF_OP_XOR2, 2, 0, 0}, + {EC_GF_OP_XOR3, 5, 8, 1}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_B1 = {9, + { + 2, + 6, + 4, + 7, + 0, + 1, + 3, + 5, + 8, + }, + ec_gf8_mul_B1_ops}; static ec_gf_op_t ec_gf8_mul_B2_ops[] = { - { EC_GF_OP_XOR2, 1, 3, 0 }, - { EC_GF_OP_XOR2, 2, 1, 0 }, - { EC_GF_OP_XOR2, 0, 2, 0 }, - { EC_GF_OP_XOR2, 6, 0, 0 }, - { EC_GF_OP_XOR2, 1, 6, 0 }, - { EC_GF_OP_XOR3, 8, 4, 5 }, - { EC_GF_OP_XOR2, 2, 8, 0 }, - { EC_GF_OP_XOR2, 8, 1, 0 }, - { EC_GF_OP_XOR2, 7, 8, 0 }, - { EC_GF_OP_XOR2, 3, 8, 0 }, - { EC_GF_OP_XOR2, 0, 7, 0 }, - { EC_GF_OP_XOR2, 5, 0, 0 }, - { EC_GF_OP_XOR2, 1, 5, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_B2 = { - 9, - { 0, 7, 4, 5, 6, 1, 2, 3, 8, }, - ec_gf8_mul_B2_ops -}; + {EC_GF_OP_XOR2, 1, 3, 0}, {EC_GF_OP_XOR2, 2, 1, 0}, + {EC_GF_OP_XOR2, 0, 2, 0}, {EC_GF_OP_XOR2, 6, 0, 0}, + {EC_GF_OP_XOR2, 1, 6, 0}, {EC_GF_OP_XOR3, 8, 4, 5}, + {EC_GF_OP_XOR2, 2, 8, 0}, {EC_GF_OP_XOR2, 8, 1, 0}, + {EC_GF_OP_XOR2, 7, 8, 0}, {EC_GF_OP_XOR2, 3, 8, 0}, + {EC_GF_OP_XOR2, 0, 7, 0}, {EC_GF_OP_XOR2, 5, 0, 0}, + {EC_GF_OP_XOR2, 1, 5, 0}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_B2 = {9, + { + 0, + 7, + 4, + 5, + 6, + 1, + 2, + 3, + 8, + }, + ec_gf8_mul_B2_ops}; static ec_gf_op_t ec_gf8_mul_B3_ops[] = { - { EC_GF_OP_XOR2, 5, 0, 0 }, - { EC_GF_OP_COPY, 9, 5, 0 }, - { EC_GF_OP_XOR2, 4, 2, 0 }, - { EC_GF_OP_XOR3, 8, 6, 4 }, - { EC_GF_OP_XOR2, 5, 3, 0 }, - { EC_GF_OP_XOR2, 8, 5, 0 }, - { EC_GF_OP_XOR2, 0, 1, 0 }, - { EC_GF_OP_XOR2, 7, 8, 0 }, - { EC_GF_OP_XOR2, 0, 8, 0 }, - { EC_GF_OP_XOR2, 1, 7, 0 }, - { EC_GF_OP_XOR2, 2, 1, 0 }, - { EC_GF_OP_XOR2, 6, 1, 0 }, - { EC_GF_OP_XOR2, 3, 1, 0 }, - { EC_GF_OP_XOR2, 5, 2, 0 }, - { EC_GF_OP_XOR3, 1, 9, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_B3 = { - 10, - { 2, 3, 4, 5, 1, 6, 0, 7, 8, 9, }, - ec_gf8_mul_B3_ops -}; + {EC_GF_OP_XOR2, 5, 0, 0}, {EC_GF_OP_COPY, 9, 5, 0}, + {EC_GF_OP_XOR2, 4, 2, 0}, {EC_GF_OP_XOR3, 8, 6, 4}, + {EC_GF_OP_XOR2, 5, 3, 0}, {EC_GF_OP_XOR2, 8, 5, 0}, + {EC_GF_OP_XOR2, 0, 1, 0}, {EC_GF_OP_XOR2, 7, 8, 0}, + {EC_GF_OP_XOR2, 0, 8, 0}, {EC_GF_OP_XOR2, 1, 7, 0}, + {EC_GF_OP_XOR2, 2, 1, 0}, {EC_GF_OP_XOR2, 6, 1, 0}, + {EC_GF_OP_XOR2, 3, 1, 0}, {EC_GF_OP_XOR2, 5, 2, 0}, + {EC_GF_OP_XOR3, 1, 9, 0}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_B3 = {10, + { + 2, + 3, + 4, + 5, + 1, + 6, + 0, + 7, + 8, + 9, + }, + ec_gf8_mul_B3_ops}; static ec_gf_op_t ec_gf8_mul_B4_ops[] = { - { EC_GF_OP_XOR2, 3, 1, 0 }, - { EC_GF_OP_XOR2, 4, 3, 0 }, - { EC_GF_OP_XOR2, 1, 0, 0 }, - { EC_GF_OP_XOR2, 3, 2, 0 }, - { EC_GF_OP_XOR2, 2, 1, 0 }, - { EC_GF_OP_XOR2, 5, 4, 0 }, - { EC_GF_OP_XOR2, 4, 2, 0 }, - { EC_GF_OP_XOR2, 6, 3, 0 }, - { EC_GF_OP_XOR2, 7, 4, 0 }, - { EC_GF_OP_XOR2, 0, 7, 0 }, - { EC_GF_OP_XOR2, 7, 6, 0 }, - { EC_GF_OP_XOR2, 6, 5, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_B4 = { - 8, - { 5, 6, 7, 0, 1, 2, 3, 4, }, - ec_gf8_mul_B4_ops -}; + {EC_GF_OP_XOR2, 3, 1, 0}, {EC_GF_OP_XOR2, 4, 3, 0}, + {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_XOR2, 3, 2, 0}, + {EC_GF_OP_XOR2, 2, 1, 0}, {EC_GF_OP_XOR2, 5, 4, 0}, + {EC_GF_OP_XOR2, 4, 2, 0}, {EC_GF_OP_XOR2, 6, 3, 0}, + {EC_GF_OP_XOR2, 7, 4, 0}, {EC_GF_OP_XOR2, 0, 7, 0}, + {EC_GF_OP_XOR2, 7, 6, 0}, {EC_GF_OP_XOR2, 6, 5, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_B4 = {8, + { + 5, + 6, + 7, + 0, + 1, + 2, + 3, + 4, + }, + ec_gf8_mul_B4_ops}; static ec_gf_op_t ec_gf8_mul_B5_ops[] = { - { EC_GF_OP_XOR2, 1, 0, 0 }, - { EC_GF_OP_XOR2, 0, 2, 0 }, - { EC_GF_OP_XOR2, 0, 3, 0 }, - { EC_GF_OP_XOR2, 6, 0, 0 }, - { EC_GF_OP_COPY, 8, 6, 0 }, - { EC_GF_OP_XOR2, 6, 1, 0 }, - { EC_GF_OP_XOR2, 1, 4, 0 }, - { EC_GF_OP_XOR2, 4, 2, 0 }, - { EC_GF_OP_XOR2, 5, 1, 0 }, - { EC_GF_OP_XOR2, 7, 4, 0 }, - { EC_GF_OP_XOR2, 3, 5, 0 }, - { EC_GF_OP_XOR2, 0, 7, 0 }, - { EC_GF_OP_XOR2, 5, 4, 0 }, - { EC_GF_OP_XOR2, 2, 0, 0 }, - { EC_GF_OP_XOR3, 4, 8, 3 }, - { EC_GF_OP_XOR2, 0, 6, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_B5 = { - 9, - { 3, 4, 0, 7, 1, 5, 6, 2, 8, }, - ec_gf8_mul_B5_ops -}; + {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_XOR2, 0, 2, 0}, + {EC_GF_OP_XOR2, 0, 3, 0}, {EC_GF_OP_XOR2, 6, 0, 0}, + {EC_GF_OP_COPY, 8, 6, 0}, {EC_GF_OP_XOR2, 6, 1, 0}, + {EC_GF_OP_XOR2, 1, 4, 0}, {EC_GF_OP_XOR2, 4, 2, 0}, + {EC_GF_OP_XOR2, 5, 1, 0}, {EC_GF_OP_XOR2, 7, 4, 0}, + {EC_GF_OP_XOR2, 3, 5, 0}, {EC_GF_OP_XOR2, 0, 7, 0}, + {EC_GF_OP_XOR2, 5, 4, 0}, {EC_GF_OP_XOR2, 2, 0, 0}, + {EC_GF_OP_XOR3, 4, 8, 3}, {EC_GF_OP_XOR2, 0, 6, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_B5 = {9, + { + 3, + 4, + 0, + 7, + 1, + 5, + 6, + 2, + 8, + }, + ec_gf8_mul_B5_ops}; static ec_gf_op_t ec_gf8_mul_B6_ops[] = { - { EC_GF_OP_XOR2, 7, 1, 0 }, - { EC_GF_OP_XOR2, 0, 4, 0 }, - { EC_GF_OP_XOR2, 1, 2, 0 }, - { EC_GF_OP_XOR2, 4, 3, 0 }, - { EC_GF_OP_XOR2, 2, 3, 0 }, - { EC_GF_OP_XOR2, 7, 4, 0 }, - { EC_GF_OP_XOR2, 3, 5, 0 }, - { EC_GF_OP_XOR2, 5, 7, 0 }, - { EC_GF_OP_XOR2, 6, 0, 0 }, - { EC_GF_OP_XOR2, 7, 0, 0 }, - { EC_GF_OP_XOR2, 0, 1, 0 }, - { EC_GF_OP_XOR2, 2, 6, 0 }, - { EC_GF_OP_XOR2, 1, 3, 0 }, - { EC_GF_OP_XOR2, 3, 2, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_B6 = { - 8, - { 5, 3, 6, 4, 7, 0, 1, 2, }, - ec_gf8_mul_B6_ops -}; + {EC_GF_OP_XOR2, 7, 1, 0}, {EC_GF_OP_XOR2, 0, 4, 0}, + {EC_GF_OP_XOR2, 1, 2, 0}, {EC_GF_OP_XOR2, 4, 3, 0}, + {EC_GF_OP_XOR2, 2, 3, 0}, {EC_GF_OP_XOR2, 7, 4, 0}, + {EC_GF_OP_XOR2, 3, 5, 0}, {EC_GF_OP_XOR2, 5, 7, 0}, + {EC_GF_OP_XOR2, 6, 0, 0}, {EC_GF_OP_XOR2, 7, 0, 0}, + {EC_GF_OP_XOR2, 0, 1, 0}, {EC_GF_OP_XOR2, 2, 6, 0}, + {EC_GF_OP_XOR2, 1, 3, 0}, {EC_GF_OP_XOR2, 3, 2, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_B6 = {8, + { + 5, + 3, + 6, + 4, + 7, + 0, + 1, + 2, + }, + ec_gf8_mul_B6_ops}; static ec_gf_op_t ec_gf8_mul_B7_ops[] = { - { EC_GF_OP_XOR2, 2, 1, 0 }, - { EC_GF_OP_XOR2, 0, 4, 0 }, - { EC_GF_OP_XOR2, 2, 6, 0 }, - { EC_GF_OP_XOR2, 0, 2, 0 }, - { EC_GF_OP_XOR2, 1, 0, 0 }, - { EC_GF_OP_XOR2, 0, 5, 0 }, - { EC_GF_OP_XOR2, 7, 1, 0 }, - { EC_GF_OP_XOR2, 3, 7, 0 }, - { EC_GF_OP_XOR2, 6, 0, 0 }, - { EC_GF_OP_XOR2, 2, 3, 0 }, - { EC_GF_OP_XOR2, 5, 2, 0 }, - { EC_GF_OP_XOR2, 7, 5, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_B7 = { - 8, - { 5, 0, 1, 4, 2, 6, 7, 3, }, - ec_gf8_mul_B7_ops -}; + {EC_GF_OP_XOR2, 2, 1, 0}, {EC_GF_OP_XOR2, 0, 4, 0}, + {EC_GF_OP_XOR2, 2, 6, 0}, {EC_GF_OP_XOR2, 0, 2, 0}, + {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_XOR2, 0, 5, 0}, + {EC_GF_OP_XOR2, 7, 1, 0}, {EC_GF_OP_XOR2, 3, 7, 0}, + {EC_GF_OP_XOR2, 6, 0, 0}, {EC_GF_OP_XOR2, 2, 3, 0}, + {EC_GF_OP_XOR2, 5, 2, 0}, {EC_GF_OP_XOR2, 7, 5, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_B7 = {8, + { + 5, + 0, + 1, + 4, + 2, + 6, + 7, + 3, + }, + ec_gf8_mul_B7_ops}; static ec_gf_op_t ec_gf8_mul_B8_ops[] = { - { EC_GF_OP_XOR2, 2, 5, 0 }, - { EC_GF_OP_XOR2, 7, 2, 0 }, - { EC_GF_OP_XOR2, 2, 0, 0 }, - { EC_GF_OP_XOR2, 0, 1, 0 }, - { EC_GF_OP_XOR2, 6, 3, 0 }, - { EC_GF_OP_XOR2, 3, 2, 0 }, - { EC_GF_OP_XOR2, 1, 4, 0 }, - { EC_GF_OP_XOR2, 0, 6, 0 }, - { EC_GF_OP_XOR2, 4, 7, 0 }, - { EC_GF_OP_XOR2, 5, 1, 0 }, - { EC_GF_OP_XOR2, 6, 1, 0 }, - { EC_GF_OP_XOR2, 7, 5, 0 }, - { EC_GF_OP_XOR2, 1, 3, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_B8 = { - 8, - { 6, 4, 5, 1, 2, 0, 7, 3, }, - ec_gf8_mul_B8_ops -}; + {EC_GF_OP_XOR2, 2, 5, 0}, {EC_GF_OP_XOR2, 7, 2, 0}, + {EC_GF_OP_XOR2, 2, 0, 0}, {EC_GF_OP_XOR2, 0, 1, 0}, + {EC_GF_OP_XOR2, 6, 3, 0}, {EC_GF_OP_XOR2, 3, 2, 0}, + {EC_GF_OP_XOR2, 1, 4, 0}, {EC_GF_OP_XOR2, 0, 6, 0}, + {EC_GF_OP_XOR2, 4, 7, 0}, {EC_GF_OP_XOR2, 5, 1, 0}, + {EC_GF_OP_XOR2, 6, 1, 0}, {EC_GF_OP_XOR2, 7, 5, 0}, + {EC_GF_OP_XOR2, 1, 3, 0}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_B8 = {8, + { + 6, + 4, + 5, + 1, + 2, + 0, + 7, + 3, + }, + ec_gf8_mul_B8_ops}; static ec_gf_op_t ec_gf8_mul_B9_ops[] = { - { EC_GF_OP_COPY, 8, 0, 0 }, - { EC_GF_OP_XOR2, 0, 1, 0 }, - { EC_GF_OP_XOR2, 2, 5, 0 }, - { EC_GF_OP_XOR2, 0, 4, 0 }, - { EC_GF_OP_XOR2, 2, 0, 0 }, - { EC_GF_OP_XOR2, 3, 0, 0 }, - { EC_GF_OP_XOR3, 0, 8, 2 }, - { EC_GF_OP_XOR2, 6, 3, 0 }, - { EC_GF_OP_XOR2, 1, 2, 0 }, - { EC_GF_OP_XOR2, 7, 0, 0 }, - { EC_GF_OP_XOR2, 5, 6, 0 }, - { EC_GF_OP_XOR2, 3, 7, 0 }, - { EC_GF_OP_XOR2, 4, 5, 0 }, - { EC_GF_OP_XOR2, 5, 3, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_B9 = { - 9, - { 6, 7, 0, 2, 1, 4, 5, 3, 8, }, - ec_gf8_mul_B9_ops -}; + {EC_GF_OP_COPY, 8, 0, 0}, {EC_GF_OP_XOR2, 0, 1, 0}, + {EC_GF_OP_XOR2, 2, 5, 0}, {EC_GF_OP_XOR2, 0, 4, 0}, + {EC_GF_OP_XOR2, 2, 0, 0}, {EC_GF_OP_XOR2, 3, 0, 0}, + {EC_GF_OP_XOR3, 0, 8, 2}, {EC_GF_OP_XOR2, 6, 3, 0}, + {EC_GF_OP_XOR2, 1, 2, 0}, {EC_GF_OP_XOR2, 7, 0, 0}, + {EC_GF_OP_XOR2, 5, 6, 0}, {EC_GF_OP_XOR2, 3, 7, 0}, + {EC_GF_OP_XOR2, 4, 5, 0}, {EC_GF_OP_XOR2, 5, 3, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_B9 = {9, + { + 6, + 7, + 0, + 2, + 1, + 4, + 5, + 3, + 8, + }, + ec_gf8_mul_B9_ops}; static ec_gf_op_t ec_gf8_mul_BA_ops[] = { - { EC_GF_OP_XOR2, 2, 0, 0 }, - { EC_GF_OP_XOR2, 5, 7, 0 }, - { EC_GF_OP_XOR2, 4, 5, 0 }, - { EC_GF_OP_XOR2, 3, 2, 0 }, - { EC_GF_OP_XOR2, 2, 4, 0 }, - { EC_GF_OP_XOR2, 5, 3, 0 }, - { EC_GF_OP_XOR2, 1, 2, 0 }, - { EC_GF_OP_XOR2, 6, 5, 0 }, - { EC_GF_OP_XOR2, 0, 1, 0 }, - { EC_GF_OP_XOR2, 7, 6, 0 }, - { EC_GF_OP_XOR2, 3, 0, 0 }, - { EC_GF_OP_XOR2, 6, 0, 0 }, - { EC_GF_OP_XOR2, 1, 7, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_BA = { - 8, - { 1, 2, 4, 3, 5, 6, 0, 7, }, - ec_gf8_mul_BA_ops -}; + {EC_GF_OP_XOR2, 2, 0, 0}, {EC_GF_OP_XOR2, 5, 7, 0}, + {EC_GF_OP_XOR2, 4, 5, 0}, {EC_GF_OP_XOR2, 3, 2, 0}, + {EC_GF_OP_XOR2, 2, 4, 0}, {EC_GF_OP_XOR2, 5, 3, 0}, + {EC_GF_OP_XOR2, 1, 2, 0}, {EC_GF_OP_XOR2, 6, 5, 0}, + {EC_GF_OP_XOR2, 0, 1, 0}, {EC_GF_OP_XOR2, 7, 6, 0}, + {EC_GF_OP_XOR2, 3, 0, 0}, {EC_GF_OP_XOR2, 6, 0, 0}, + {EC_GF_OP_XOR2, 1, 7, 0}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_BA = {8, + { + 1, + 2, + 4, + 3, + 5, + 6, + 0, + 7, + }, + ec_gf8_mul_BA_ops}; static ec_gf_op_t ec_gf8_mul_BB_ops[] = { - { EC_GF_OP_XOR2, 3, 6, 0 }, - { EC_GF_OP_COPY, 8, 3, 0 }, - { EC_GF_OP_XOR2, 1, 0, 0 }, - { EC_GF_OP_XOR2, 3, 1, 0 }, - { EC_GF_OP_XOR2, 4, 3, 0 }, - { EC_GF_OP_XOR2, 8, 5, 0 }, - { EC_GF_OP_XOR2, 7, 4, 0 }, - { EC_GF_OP_XOR2, 5, 4, 0 }, - { EC_GF_OP_XOR2, 8, 7, 0 }, - { EC_GF_OP_XOR2, 2, 8, 0 }, - { EC_GF_OP_XOR2, 0, 2, 0 }, - { EC_GF_OP_XOR2, 1, 2, 0 }, - { EC_GF_OP_XOR2, 6, 0, 0 }, - { EC_GF_OP_XOR2, 4, 0, 0 }, - { EC_GF_OP_XOR2, 3, 6, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_BB = { - 9, - { 7, 2, 1, 8, 3, 5, 6, 4, 0, }, - ec_gf8_mul_BB_ops -}; + {EC_GF_OP_XOR2, 3, 6, 0}, {EC_GF_OP_COPY, 8, 3, 0}, + {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_XOR2, 3, 1, 0}, + {EC_GF_OP_XOR2, 4, 3, 0}, {EC_GF_OP_XOR2, 8, 5, 0}, + {EC_GF_OP_XOR2, 7, 4, 0}, {EC_GF_OP_XOR2, 5, 4, 0}, + {EC_GF_OP_XOR2, 8, 7, 0}, {EC_GF_OP_XOR2, 2, 8, 0}, + {EC_GF_OP_XOR2, 0, 2, 0}, {EC_GF_OP_XOR2, 1, 2, 0}, + {EC_GF_OP_XOR2, 6, 0, 0}, {EC_GF_OP_XOR2, 4, 0, 0}, + {EC_GF_OP_XOR2, 3, 6, 0}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_BB = {9, + { + 7, + 2, + 1, + 8, + 3, + 5, + 6, + 4, + 0, + }, + ec_gf8_mul_BB_ops}; static ec_gf_op_t ec_gf8_mul_BC_ops[] = { - { EC_GF_OP_COPY, 8, 1, 0 }, - { EC_GF_OP_XOR2, 8, 2, 0 }, - { EC_GF_OP_XOR2, 6, 0, 0 }, - { EC_GF_OP_XOR2, 2, 3, 0 }, - { EC_GF_OP_XOR2, 6, 7, 0 }, - { EC_GF_OP_XOR2, 0, 2, 0 }, - { EC_GF_OP_XOR2, 4, 2, 0 }, - { EC_GF_OP_XOR2, 1, 6, 0 }, - { EC_GF_OP_XOR2, 7, 8, 0 }, - { EC_GF_OP_XOR3, 2, 8, 4 }, - { EC_GF_OP_XOR2, 5, 6, 0 }, - { EC_GF_OP_XOR2, 4, 5, 0 }, - { EC_GF_OP_XOR2, 3, 4, 0 }, - { EC_GF_OP_XOR2, 6, 3, 0 }, - { EC_GF_OP_XOR2, 3, 7, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_BC = { - 9, - { 2, 6, 3, 4, 5, 1, 7, 0, 8, }, - ec_gf8_mul_BC_ops -}; + {EC_GF_OP_COPY, 8, 1, 0}, {EC_GF_OP_XOR2, 8, 2, 0}, + {EC_GF_OP_XOR2, 6, 0, 0}, {EC_GF_OP_XOR2, 2, 3, 0}, + {EC_GF_OP_XOR2, 6, 7, 0}, {EC_GF_OP_XOR2, 0, 2, 0}, + {EC_GF_OP_XOR2, 4, 2, 0}, {EC_GF_OP_XOR2, 1, 6, 0}, + {EC_GF_OP_XOR2, 7, 8, 0}, {EC_GF_OP_XOR3, 2, 8, 4}, + {EC_GF_OP_XOR2, 5, 6, 0}, {EC_GF_OP_XOR2, 4, 5, 0}, + {EC_GF_OP_XOR2, 3, 4, 0}, {EC_GF_OP_XOR2, 6, 3, 0}, + {EC_GF_OP_XOR2, 3, 7, 0}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_BC = {9, + { + 2, + 6, + 3, + 4, + 5, + 1, + 7, + 0, + 8, + }, + ec_gf8_mul_BC_ops}; static ec_gf_op_t ec_gf8_mul_BD_ops[] = { - { EC_GF_OP_XOR2, 3, 0, 0 }, - { EC_GF_OP_XOR2, 3, 1, 0 }, - { EC_GF_OP_XOR2, 1, 2, 0 }, - { EC_GF_OP_XOR2, 7, 1, 0 }, - { EC_GF_OP_XOR2, 1, 4, 0 }, - { EC_GF_OP_XOR2, 4, 3, 0 }, - { EC_GF_OP_XOR2, 5, 1, 0 }, - { EC_GF_OP_XOR2, 3, 7, 0 }, - { EC_GF_OP_XOR2, 0, 6, 0 }, - { EC_GF_OP_XOR2, 0, 5, 0 }, - { EC_GF_OP_XOR2, 6, 7, 0 }, - { EC_GF_OP_XOR2, 7, 0, 0 }, - { EC_GF_OP_XOR2, 2, 7, 0 }, - { EC_GF_OP_XOR2, 1, 2, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_BD = { - 8, - { 4, 5, 0, 2, 7, 1, 6, 3, }, - ec_gf8_mul_BD_ops -}; + {EC_GF_OP_XOR2, 3, 0, 0}, {EC_GF_OP_XOR2, 3, 1, 0}, + {EC_GF_OP_XOR2, 1, 2, 0}, {EC_GF_OP_XOR2, 7, 1, 0}, + {EC_GF_OP_XOR2, 1, 4, 0}, {EC_GF_OP_XOR2, 4, 3, 0}, + {EC_GF_OP_XOR2, 5, 1, 0}, {EC_GF_OP_XOR2, 3, 7, 0}, + {EC_GF_OP_XOR2, 0, 6, 0}, {EC_GF_OP_XOR2, 0, 5, 0}, + {EC_GF_OP_XOR2, 6, 7, 0}, {EC_GF_OP_XOR2, 7, 0, 0}, + {EC_GF_OP_XOR2, 2, 7, 0}, {EC_GF_OP_XOR2, 1, 2, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_BD = {8, + { + 4, + 5, + 0, + 2, + 7, + 1, + 6, + 3, + }, + ec_gf8_mul_BD_ops}; static ec_gf_op_t ec_gf8_mul_BE_ops[] = { - { EC_GF_OP_XOR2, 0, 3, 0 }, - { EC_GF_OP_XOR2, 0, 6, 0 }, - { EC_GF_OP_XOR2, 1, 5, 0 }, - { EC_GF_OP_XOR2, 5, 0, 0 }, - { EC_GF_OP_XOR2, 4, 5, 0 }, - { EC_GF_OP_XOR2, 3, 4, 0 }, - { EC_GF_OP_XOR2, 7, 3, 0 }, - { EC_GF_OP_XOR2, 3, 2, 0 }, - { EC_GF_OP_XOR2, 1, 7, 0 }, - { EC_GF_OP_XOR2, 2, 0, 0 }, - { EC_GF_OP_XOR2, 6, 3, 0 }, - { EC_GF_OP_XOR2, 0, 1, 0 }, - { EC_GF_OP_XOR2, 3, 1, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_BE = { - 8, - { 0, 6, 7, 4, 5, 1, 3, 2, }, - ec_gf8_mul_BE_ops -}; + {EC_GF_OP_XOR2, 0, 3, 0}, {EC_GF_OP_XOR2, 0, 6, 0}, + {EC_GF_OP_XOR2, 1, 5, 0}, {EC_GF_OP_XOR2, 5, 0, 0}, + {EC_GF_OP_XOR2, 4, 5, 0}, {EC_GF_OP_XOR2, 3, 4, 0}, + {EC_GF_OP_XOR2, 7, 3, 0}, {EC_GF_OP_XOR2, 3, 2, 0}, + {EC_GF_OP_XOR2, 1, 7, 0}, {EC_GF_OP_XOR2, 2, 0, 0}, + {EC_GF_OP_XOR2, 6, 3, 0}, {EC_GF_OP_XOR2, 0, 1, 0}, + {EC_GF_OP_XOR2, 3, 1, 0}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_BE = {8, + { + 0, + 6, + 7, + 4, + 5, + 1, + 3, + 2, + }, + ec_gf8_mul_BE_ops}; static ec_gf_op_t ec_gf8_mul_BF_ops[] = { - { EC_GF_OP_XOR2, 7, 1, 0 }, - { EC_GF_OP_XOR2, 6, 7, 0 }, - { EC_GF_OP_XOR2, 5, 6, 0 }, - { EC_GF_OP_XOR2, 4, 5, 0 }, - { EC_GF_OP_XOR2, 0, 4, 0 }, - { EC_GF_OP_XOR2, 1, 2, 0 }, - { EC_GF_OP_XOR2, 7, 0, 0 }, - { EC_GF_OP_XOR2, 2, 5, 0 }, - { EC_GF_OP_XOR2, 1, 0, 0 }, - { EC_GF_OP_XOR2, 6, 1, 0 }, - { EC_GF_OP_XOR2, 4, 6, 0 }, - { EC_GF_OP_XOR2, 4, 3, 0 }, - { EC_GF_OP_XOR2, 3, 7, 0 }, - { EC_GF_OP_XOR2, 5, 3, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_BF = { - 8, - { 5, 6, 1, 7, 3, 0, 2, 4, }, - ec_gf8_mul_BF_ops -}; + {EC_GF_OP_XOR2, 7, 1, 0}, {EC_GF_OP_XOR2, 6, 7, 0}, + {EC_GF_OP_XOR2, 5, 6, 0}, {EC_GF_OP_XOR2, 4, 5, 0}, + {EC_GF_OP_XOR2, 0, 4, 0}, {EC_GF_OP_XOR2, 1, 2, 0}, + {EC_GF_OP_XOR2, 7, 0, 0}, {EC_GF_OP_XOR2, 2, 5, 0}, + {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_XOR2, 6, 1, 0}, + {EC_GF_OP_XOR2, 4, 6, 0}, {EC_GF_OP_XOR2, 4, 3, 0}, + {EC_GF_OP_XOR2, 3, 7, 0}, {EC_GF_OP_XOR2, 5, 3, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_BF = {8, + { + 5, + 6, + 1, + 7, + 3, + 0, + 2, + 4, + }, + ec_gf8_mul_BF_ops}; static ec_gf_op_t ec_gf8_mul_C0_ops[] = { - { EC_GF_OP_XOR2, 4, 1, 0 }, - { EC_GF_OP_XOR2, 6, 3, 0 }, - { EC_GF_OP_XOR2, 7, 4, 0 }, - { EC_GF_OP_XOR2, 4, 6, 0 }, - { EC_GF_OP_XOR2, 5, 2, 0 }, - { EC_GF_OP_XOR2, 2, 6, 0 }, - { EC_GF_OP_XOR2, 3, 5, 0 }, - { EC_GF_OP_XOR2, 6, 0, 0 }, - { EC_GF_OP_XOR2, 1, 5, 0 }, - { EC_GF_OP_XOR2, 3, 7, 0 }, - { EC_GF_OP_XOR2, 0, 7, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_C0 = { - 8, - { 1, 2, 3, 4, 7, 5, 6, 0, }, - ec_gf8_mul_C0_ops -}; + {EC_GF_OP_XOR2, 4, 1, 0}, {EC_GF_OP_XOR2, 6, 3, 0}, + {EC_GF_OP_XOR2, 7, 4, 0}, {EC_GF_OP_XOR2, 4, 6, 0}, + {EC_GF_OP_XOR2, 5, 2, 0}, {EC_GF_OP_XOR2, 2, 6, 0}, + {EC_GF_OP_XOR2, 3, 5, 0}, {EC_GF_OP_XOR2, 6, 0, 0}, + {EC_GF_OP_XOR2, 1, 5, 0}, {EC_GF_OP_XOR2, 3, 7, 0}, + {EC_GF_OP_XOR2, 0, 7, 0}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_C0 = {8, + { + 1, + 2, + 3, + 4, + 7, + 5, + 6, + 0, + }, + ec_gf8_mul_C0_ops}; static ec_gf_op_t ec_gf8_mul_C1_ops[] = { - { EC_GF_OP_XOR3, 8, 1, 2 }, - { EC_GF_OP_XOR2, 8, 3, 0 }, - { EC_GF_OP_XOR2, 4, 1, 0 }, - { EC_GF_OP_XOR2, 3, 0, 0 }, - { EC_GF_OP_XOR2, 0, 4, 0 }, - { EC_GF_OP_XOR2, 1, 7, 0 }, - { EC_GF_OP_XOR2, 5, 3, 0 }, - { EC_GF_OP_XOR2, 7, 0, 0 }, - { EC_GF_OP_XOR2, 4, 6, 0 }, - { EC_GF_OP_XOR2, 7, 5, 0 }, - { EC_GF_OP_XOR2, 6, 8, 0 }, - { EC_GF_OP_XOR2, 5, 8, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_C1 = { - 9, - { 5, 6, 7, 4, 1, 2, 3, 0, 8, }, - ec_gf8_mul_C1_ops -}; + {EC_GF_OP_XOR3, 8, 1, 2}, {EC_GF_OP_XOR2, 8, 3, 0}, + {EC_GF_OP_XOR2, 4, 1, 0}, {EC_GF_OP_XOR2, 3, 0, 0}, + {EC_GF_OP_XOR2, 0, 4, 0}, {EC_GF_OP_XOR2, 1, 7, 0}, + {EC_GF_OP_XOR2, 5, 3, 0}, {EC_GF_OP_XOR2, 7, 0, 0}, + {EC_GF_OP_XOR2, 4, 6, 0}, {EC_GF_OP_XOR2, 7, 5, 0}, + {EC_GF_OP_XOR2, 6, 8, 0}, {EC_GF_OP_XOR2, 5, 8, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_C1 = {9, + { + 5, + 6, + 7, + 4, + 1, + 2, + 3, + 0, + 8, + }, + ec_gf8_mul_C1_ops}; static ec_gf_op_t ec_gf8_mul_C2_ops[] = { - { EC_GF_OP_XOR2, 0, 2, 0 }, - { EC_GF_OP_XOR2, 1, 3, 0 }, - { EC_GF_OP_XOR2, 0, 3, 0 }, - { EC_GF_OP_XOR2, 1, 4, 0 }, - { EC_GF_OP_XOR2, 6, 0, 0 }, - { EC_GF_OP_XOR2, 4, 2, 0 }, - { EC_GF_OP_XOR2, 2, 6, 0 }, - { EC_GF_OP_XOR2, 4, 5, 0 }, - { EC_GF_OP_XOR2, 5, 2, 0 }, - { EC_GF_OP_XOR2, 7, 1, 0 }, - { EC_GF_OP_XOR2, 3, 4, 0 }, - { EC_GF_OP_XOR2, 2, 7, 0 }, - { EC_GF_OP_XOR2, 7, 3, 0 }, - { EC_GF_OP_XOR2, 0, 2, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_C2 = { - 8, - { 7, 6, 3, 0, 1, 4, 5, 2, }, - ec_gf8_mul_C2_ops -}; + {EC_GF_OP_XOR2, 0, 2, 0}, {EC_GF_OP_XOR2, 1, 3, 0}, + {EC_GF_OP_XOR2, 0, 3, 0}, {EC_GF_OP_XOR2, 1, 4, 0}, + {EC_GF_OP_XOR2, 6, 0, 0}, {EC_GF_OP_XOR2, 4, 2, 0}, + {EC_GF_OP_XOR2, 2, 6, 0}, {EC_GF_OP_XOR2, 4, 5, 0}, + {EC_GF_OP_XOR2, 5, 2, 0}, {EC_GF_OP_XOR2, 7, 1, 0}, + {EC_GF_OP_XOR2, 3, 4, 0}, {EC_GF_OP_XOR2, 2, 7, 0}, + {EC_GF_OP_XOR2, 7, 3, 0}, {EC_GF_OP_XOR2, 0, 2, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_C2 = {8, + { + 7, + 6, + 3, + 0, + 1, + 4, + 5, + 2, + }, + ec_gf8_mul_C2_ops}; static ec_gf_op_t ec_gf8_mul_C3_ops[] = { - { EC_GF_OP_COPY, 8, 0, 0 }, - { EC_GF_OP_XOR2, 0, 1, 0 }, - { EC_GF_OP_XOR2, 0, 2, 0 }, - { EC_GF_OP_XOR2, 6, 0, 0 }, - { EC_GF_OP_XOR2, 2, 4, 0 }, - { EC_GF_OP_XOR2, 7, 0, 0 }, - { EC_GF_OP_XOR3, 0, 2, 6 }, - { EC_GF_OP_XOR2, 6, 3, 0 }, - { EC_GF_OP_XOR3, 9, 1, 0 }, - { EC_GF_OP_XOR2, 1, 3, 0 }, - { EC_GF_OP_XOR2, 3, 5, 0 }, - { EC_GF_OP_XOR2, 5, 7, 0 }, - { EC_GF_OP_XOR2, 4, 3, 0 }, - { EC_GF_OP_XOR2, 7, 9, 0 }, - { EC_GF_OP_XOR2, 3, 8, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_C3 = { - 10, - { 5, 6, 4, 7, 1, 2, 3, 0, 8, 9, }, - ec_gf8_mul_C3_ops -}; + {EC_GF_OP_COPY, 8, 0, 0}, {EC_GF_OP_XOR2, 0, 1, 0}, + {EC_GF_OP_XOR2, 0, 2, 0}, {EC_GF_OP_XOR2, 6, 0, 0}, + {EC_GF_OP_XOR2, 2, 4, 0}, {EC_GF_OP_XOR2, 7, 0, 0}, + {EC_GF_OP_XOR3, 0, 2, 6}, {EC_GF_OP_XOR2, 6, 3, 0}, + {EC_GF_OP_XOR3, 9, 1, 0}, {EC_GF_OP_XOR2, 1, 3, 0}, + {EC_GF_OP_XOR2, 3, 5, 0}, {EC_GF_OP_XOR2, 5, 7, 0}, + {EC_GF_OP_XOR2, 4, 3, 0}, {EC_GF_OP_XOR2, 7, 9, 0}, + {EC_GF_OP_XOR2, 3, 8, 0}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_C3 = {10, + { + 5, + 6, + 4, + 7, + 1, + 2, + 3, + 0, + 8, + 9, + }, + ec_gf8_mul_C3_ops}; static ec_gf_op_t ec_gf8_mul_C4_ops[] = { - { EC_GF_OP_XOR2, 3, 7, 0 }, - { EC_GF_OP_XOR2, 2, 3, 0 }, - { EC_GF_OP_XOR2, 3, 4, 0 }, - { EC_GF_OP_XOR2, 4, 5, 0 }, - { EC_GF_OP_XOR2, 5, 2, 0 }, - { EC_GF_OP_XOR2, 1, 0, 0 }, - { EC_GF_OP_XOR2, 2, 6, 0 }, - { EC_GF_OP_XOR2, 1, 4, 0 }, - { EC_GF_OP_XOR2, 6, 7, 0 }, - { EC_GF_OP_XOR2, 0, 3, 0 }, - { EC_GF_OP_XOR2, 7, 1, 0 }, - { EC_GF_OP_XOR2, 1, 2, 0 }, - { EC_GF_OP_XOR2, 6, 0, 0 }, - { EC_GF_OP_XOR2, 0, 1, 0 }, - { EC_GF_OP_XOR2, 4, 0, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_C4 = { - 8, - { 0, 2, 1, 3, 4, 5, 6, 7, }, - ec_gf8_mul_C4_ops -}; + {EC_GF_OP_XOR2, 3, 7, 0}, {EC_GF_OP_XOR2, 2, 3, 0}, + {EC_GF_OP_XOR2, 3, 4, 0}, {EC_GF_OP_XOR2, 4, 5, 0}, + {EC_GF_OP_XOR2, 5, 2, 0}, {EC_GF_OP_XOR2, 1, 0, 0}, + {EC_GF_OP_XOR2, 2, 6, 0}, {EC_GF_OP_XOR2, 1, 4, 0}, + {EC_GF_OP_XOR2, 6, 7, 0}, {EC_GF_OP_XOR2, 0, 3, 0}, + {EC_GF_OP_XOR2, 7, 1, 0}, {EC_GF_OP_XOR2, 1, 2, 0}, + {EC_GF_OP_XOR2, 6, 0, 0}, {EC_GF_OP_XOR2, 0, 1, 0}, + {EC_GF_OP_XOR2, 4, 0, 0}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_C4 = {8, + { + 0, + 2, + 1, + 3, + 4, + 5, + 6, + 7, + }, + ec_gf8_mul_C4_ops}; static ec_gf_op_t ec_gf8_mul_C5_ops[] = { - { EC_GF_OP_XOR2, 0, 4, 0 }, - { EC_GF_OP_XOR2, 5, 0, 0 }, - { EC_GF_OP_XOR2, 6, 1, 0 }, - { EC_GF_OP_XOR2, 0, 3, 0 }, - { EC_GF_OP_XOR2, 1, 5, 0 }, - { EC_GF_OP_XOR2, 6, 2, 0 }, - { EC_GF_OP_XOR2, 3, 7, 0 }, - { EC_GF_OP_XOR2, 5, 6, 0 }, - { EC_GF_OP_XOR2, 7, 4, 0 }, - { EC_GF_OP_XOR2, 2, 3, 0 }, - { EC_GF_OP_XOR2, 4, 5, 0 }, - { EC_GF_OP_XOR2, 3, 6, 0 }, - { EC_GF_OP_XOR2, 5, 2, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_C5 = { - 8, - { 4, 3, 5, 7, 6, 2, 0, 1, }, - ec_gf8_mul_C5_ops -}; + {EC_GF_OP_XOR2, 0, 4, 0}, {EC_GF_OP_XOR2, 5, 0, 0}, + {EC_GF_OP_XOR2, 6, 1, 0}, {EC_GF_OP_XOR2, 0, 3, 0}, + {EC_GF_OP_XOR2, 1, 5, 0}, {EC_GF_OP_XOR2, 6, 2, 0}, + {EC_GF_OP_XOR2, 3, 7, 0}, {EC_GF_OP_XOR2, 5, 6, 0}, + {EC_GF_OP_XOR2, 7, 4, 0}, {EC_GF_OP_XOR2, 2, 3, 0}, + {EC_GF_OP_XOR2, 4, 5, 0}, {EC_GF_OP_XOR2, 3, 6, 0}, + {EC_GF_OP_XOR2, 5, 2, 0}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_C5 = {8, + { + 4, + 3, + 5, + 7, + 6, + 2, + 0, + 1, + }, + ec_gf8_mul_C5_ops}; static ec_gf_op_t ec_gf8_mul_C6_ops[] = { - { EC_GF_OP_XOR2, 4, 3, 0 }, - { EC_GF_OP_COPY, 8, 4, 0 }, - { EC_GF_OP_XOR2, 4, 2, 0 }, - { EC_GF_OP_XOR3, 9, 5, 4 }, - { EC_GF_OP_XOR2, 6, 9, 0 }, - { EC_GF_OP_XOR2, 0, 6, 0 }, - { EC_GF_OP_XOR2, 1, 7, 0 }, - { EC_GF_OP_XOR2, 2, 0, 0 }, - { EC_GF_OP_XOR2, 7, 9, 0 }, - { EC_GF_OP_XOR2, 6, 1, 0 }, - { EC_GF_OP_XOR2, 3, 2, 0 }, - { EC_GF_OP_XOR2, 5, 6, 0 }, - { EC_GF_OP_XOR2, 1, 3, 0 }, - { EC_GF_OP_XOR2, 6, 8, 0 }, - { EC_GF_OP_XOR2, 3, 7, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_C6 = { - 10, - { 6, 3, 0, 4, 5, 7, 2, 1, 8, 9, }, - ec_gf8_mul_C6_ops -}; + {EC_GF_OP_XOR2, 4, 3, 0}, {EC_GF_OP_COPY, 8, 4, 0}, + {EC_GF_OP_XOR2, 4, 2, 0}, {EC_GF_OP_XOR3, 9, 5, 4}, + {EC_GF_OP_XOR2, 6, 9, 0}, {EC_GF_OP_XOR2, 0, 6, 0}, + {EC_GF_OP_XOR2, 1, 7, 0}, {EC_GF_OP_XOR2, 2, 0, 0}, + {EC_GF_OP_XOR2, 7, 9, 0}, {EC_GF_OP_XOR2, 6, 1, 0}, + {EC_GF_OP_XOR2, 3, 2, 0}, {EC_GF_OP_XOR2, 5, 6, 0}, + {EC_GF_OP_XOR2, 1, 3, 0}, {EC_GF_OP_XOR2, 6, 8, 0}, + {EC_GF_OP_XOR2, 3, 7, 0}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_C6 = {10, + { + 6, + 3, + 0, + 4, + 5, + 7, + 2, + 1, + 8, + 9, + }, + ec_gf8_mul_C6_ops}; static ec_gf_op_t ec_gf8_mul_C7_ops[] = { - { EC_GF_OP_XOR2, 5, 0, 0 }, - { EC_GF_OP_XOR2, 5, 3, 0 }, - { EC_GF_OP_XOR2, 2, 4, 0 }, - { EC_GF_OP_XOR2, 4, 5, 0 }, - { EC_GF_OP_XOR2, 1, 3, 0 }, - { EC_GF_OP_XOR2, 6, 4, 0 }, - { EC_GF_OP_XOR2, 7, 2, 0 }, - { EC_GF_OP_XOR2, 1, 6, 0 }, - { EC_GF_OP_XOR2, 3, 7, 0 }, - { EC_GF_OP_XOR2, 7, 1, 0 }, - { EC_GF_OP_XOR2, 5, 7, 0 }, - { EC_GF_OP_XOR2, 0, 5, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_C7 = { - 8, - { 7, 0, 6, 2, 5, 3, 4, 1, }, - ec_gf8_mul_C7_ops -}; + {EC_GF_OP_XOR2, 5, 0, 0}, {EC_GF_OP_XOR2, 5, 3, 0}, + {EC_GF_OP_XOR2, 2, 4, 0}, {EC_GF_OP_XOR2, 4, 5, 0}, + {EC_GF_OP_XOR2, 1, 3, 0}, {EC_GF_OP_XOR2, 6, 4, 0}, + {EC_GF_OP_XOR2, 7, 2, 0}, {EC_GF_OP_XOR2, 1, 6, 0}, + {EC_GF_OP_XOR2, 3, 7, 0}, {EC_GF_OP_XOR2, 7, 1, 0}, + {EC_GF_OP_XOR2, 5, 7, 0}, {EC_GF_OP_XOR2, 0, 5, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_C7 = {8, + { + 7, + 0, + 6, + 2, + 5, + 3, + 4, + 1, + }, + ec_gf8_mul_C7_ops}; static ec_gf_op_t ec_gf8_mul_C8_ops[] = { - { EC_GF_OP_XOR2, 6, 5, 0 }, - { EC_GF_OP_XOR2, 5, 0, 0 }, - { EC_GF_OP_XOR2, 0, 1, 0 }, - { EC_GF_OP_XOR2, 0, 7, 0 }, - { EC_GF_OP_XOR2, 7, 6, 0 }, - { EC_GF_OP_XOR2, 6, 4, 0 }, - { EC_GF_OP_XOR2, 4, 3, 0 }, - { EC_GF_OP_XOR2, 4, 1, 0 }, - { EC_GF_OP_XOR2, 3, 2, 0 }, - { EC_GF_OP_XOR2, 1, 2, 0 }, - { EC_GF_OP_XOR2, 2, 4, 0 }, - { EC_GF_OP_XOR2, 4, 5, 0 }, - { EC_GF_OP_XOR2, 5, 7, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_C8 = { - 8, - { 1, 3, 2, 4, 6, 7, 5, 0, }, - ec_gf8_mul_C8_ops -}; + {EC_GF_OP_XOR2, 6, 5, 0}, {EC_GF_OP_XOR2, 5, 0, 0}, + {EC_GF_OP_XOR2, 0, 1, 0}, {EC_GF_OP_XOR2, 0, 7, 0}, + {EC_GF_OP_XOR2, 7, 6, 0}, {EC_GF_OP_XOR2, 6, 4, 0}, + {EC_GF_OP_XOR2, 4, 3, 0}, {EC_GF_OP_XOR2, 4, 1, 0}, + {EC_GF_OP_XOR2, 3, 2, 0}, {EC_GF_OP_XOR2, 1, 2, 0}, + {EC_GF_OP_XOR2, 2, 4, 0}, {EC_GF_OP_XOR2, 4, 5, 0}, + {EC_GF_OP_XOR2, 5, 7, 0}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_C8 = {8, + { + 1, + 3, + 2, + 4, + 6, + 7, + 5, + 0, + }, + ec_gf8_mul_C8_ops}; static ec_gf_op_t ec_gf8_mul_C9_ops[] = { - { EC_GF_OP_XOR2, 1, 0, 0 }, - { EC_GF_OP_XOR2, 3, 0, 0 }, - { EC_GF_OP_XOR2, 0, 7, 0 }, - { EC_GF_OP_XOR2, 7, 6, 0 }, - { EC_GF_OP_XOR2, 4, 1, 0 }, - { EC_GF_OP_XOR2, 6, 5, 0 }, - { EC_GF_OP_XOR2, 5, 4, 0 }, - { EC_GF_OP_XOR2, 2, 1, 0 }, - { EC_GF_OP_XOR2, 4, 3, 0 }, - { EC_GF_OP_XOR2, 3, 2, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_C9 = { - 8, - { 2, 3, 4, 5, 6, 7, 0, 1, }, - ec_gf8_mul_C9_ops -}; + {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_XOR2, 3, 0, 0}, + {EC_GF_OP_XOR2, 0, 7, 0}, {EC_GF_OP_XOR2, 7, 6, 0}, + {EC_GF_OP_XOR2, 4, 1, 0}, {EC_GF_OP_XOR2, 6, 5, 0}, + {EC_GF_OP_XOR2, 5, 4, 0}, {EC_GF_OP_XOR2, 2, 1, 0}, + {EC_GF_OP_XOR2, 4, 3, 0}, {EC_GF_OP_XOR2, 3, 2, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_C9 = {8, + { + 2, + 3, + 4, + 5, + 6, + 7, + 0, + 1, + }, + ec_gf8_mul_C9_ops}; static ec_gf_op_t ec_gf8_mul_CA_ops[] = { - { EC_GF_OP_XOR2, 6, 7, 0 }, - { EC_GF_OP_XOR2, 7, 2, 0 }, - { EC_GF_OP_XOR2, 5, 6, 0 }, - { EC_GF_OP_XOR2, 2, 0, 0 }, - { EC_GF_OP_XOR2, 4, 5, 0 }, - { EC_GF_OP_XOR2, 2, 3, 0 }, - { EC_GF_OP_XOR2, 6, 1, 0 }, - { EC_GF_OP_XOR2, 3, 4, 0 }, - { EC_GF_OP_XOR2, 1, 7, 0 }, - { EC_GF_OP_XOR2, 6, 0, 0 }, - { EC_GF_OP_XOR2, 7, 3, 0 }, - { EC_GF_OP_XOR2, 0, 5, 0 }, - { EC_GF_OP_XOR2, 5, 7, 0 }, - { EC_GF_OP_XOR2, 7, 6, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_CA = { - 8, - { 1, 2, 5, 7, 3, 4, 0, 6, }, - ec_gf8_mul_CA_ops -}; + {EC_GF_OP_XOR2, 6, 7, 0}, {EC_GF_OP_XOR2, 7, 2, 0}, + {EC_GF_OP_XOR2, 5, 6, 0}, {EC_GF_OP_XOR2, 2, 0, 0}, + {EC_GF_OP_XOR2, 4, 5, 0}, {EC_GF_OP_XOR2, 2, 3, 0}, + {EC_GF_OP_XOR2, 6, 1, 0}, {EC_GF_OP_XOR2, 3, 4, 0}, + {EC_GF_OP_XOR2, 1, 7, 0}, {EC_GF_OP_XOR2, 6, 0, 0}, + {EC_GF_OP_XOR2, 7, 3, 0}, {EC_GF_OP_XOR2, 0, 5, 0}, + {EC_GF_OP_XOR2, 5, 7, 0}, {EC_GF_OP_XOR2, 7, 6, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_CA = {8, + { + 1, + 2, + 5, + 7, + 3, + 4, + 0, + 6, + }, + ec_gf8_mul_CA_ops}; static ec_gf_op_t ec_gf8_mul_CB_ops[] = { - { EC_GF_OP_XOR2, 1, 0, 0 }, - { EC_GF_OP_XOR2, 2, 1, 0 }, - { EC_GF_OP_XOR2, 1, 6, 0 }, - { EC_GF_OP_XOR2, 6, 3, 0 }, - { EC_GF_OP_XOR2, 3, 2, 0 }, - { EC_GF_OP_XOR2, 2, 7, 0 }, - { EC_GF_OP_XOR2, 4, 2, 0 }, - { EC_GF_OP_XOR2, 7, 5, 0 }, - { EC_GF_OP_XOR2, 5, 4, 0 }, - { EC_GF_OP_XOR2, 0, 7, 0 }, - { EC_GF_OP_XOR2, 4, 3, 0 }, - { EC_GF_OP_XOR2, 7, 6, 0 }, - { EC_GF_OP_XOR2, 6, 4, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_CB = { - 8, - { 2, 3, 4, 5, 7, 6, 0, 1, }, - ec_gf8_mul_CB_ops -}; + {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_XOR2, 2, 1, 0}, + {EC_GF_OP_XOR2, 1, 6, 0}, {EC_GF_OP_XOR2, 6, 3, 0}, + {EC_GF_OP_XOR2, 3, 2, 0}, {EC_GF_OP_XOR2, 2, 7, 0}, + {EC_GF_OP_XOR2, 4, 2, 0}, {EC_GF_OP_XOR2, 7, 5, 0}, + {EC_GF_OP_XOR2, 5, 4, 0}, {EC_GF_OP_XOR2, 0, 7, 0}, + {EC_GF_OP_XOR2, 4, 3, 0}, {EC_GF_OP_XOR2, 7, 6, 0}, + {EC_GF_OP_XOR2, 6, 4, 0}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_CB = {8, + { + 2, + 3, + 4, + 5, + 7, + 6, + 0, + 1, + }, + ec_gf8_mul_CB_ops}; static ec_gf_op_t ec_gf8_mul_CC_ops[] = { - { EC_GF_OP_XOR2, 7, 2, 0 }, - { EC_GF_OP_XOR2, 4, 7, 0 }, - { EC_GF_OP_XOR2, 0, 2, 0 }, - { EC_GF_OP_XOR2, 7, 3, 0 }, - { EC_GF_OP_XOR2, 2, 1, 0 }, - { EC_GF_OP_XOR2, 3, 5, 0 }, - { EC_GF_OP_XOR2, 1, 7, 0 }, - { EC_GF_OP_XOR2, 2, 6, 0 }, - { EC_GF_OP_XOR2, 5, 4, 0 }, - { EC_GF_OP_XOR2, 6, 3, 0 }, - { EC_GF_OP_XOR2, 4, 6, 0 }, - { EC_GF_OP_XOR2, 0, 4, 0 }, - { EC_GF_OP_XOR2, 3, 0, 0 }, - { EC_GF_OP_XOR2, 1, 3, 0 }, - { EC_GF_OP_XOR2, 4, 1, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_CC = { - 8, - { 2, 7, 1, 0, 5, 6, 3, 4, }, - ec_gf8_mul_CC_ops -}; + {EC_GF_OP_XOR2, 7, 2, 0}, {EC_GF_OP_XOR2, 4, 7, 0}, + {EC_GF_OP_XOR2, 0, 2, 0}, {EC_GF_OP_XOR2, 7, 3, 0}, + {EC_GF_OP_XOR2, 2, 1, 0}, {EC_GF_OP_XOR2, 3, 5, 0}, + {EC_GF_OP_XOR2, 1, 7, 0}, {EC_GF_OP_XOR2, 2, 6, 0}, + {EC_GF_OP_XOR2, 5, 4, 0}, {EC_GF_OP_XOR2, 6, 3, 0}, + {EC_GF_OP_XOR2, 4, 6, 0}, {EC_GF_OP_XOR2, 0, 4, 0}, + {EC_GF_OP_XOR2, 3, 0, 0}, {EC_GF_OP_XOR2, 1, 3, 0}, + {EC_GF_OP_XOR2, 4, 1, 0}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_CC = {8, + { + 2, + 7, + 1, + 0, + 5, + 6, + 3, + 4, + }, + ec_gf8_mul_CC_ops}; static ec_gf_op_t ec_gf8_mul_CD_ops[] = { - { EC_GF_OP_XOR2, 4, 0, 0 }, - { EC_GF_OP_XOR2, 3, 6, 0 }, - { EC_GF_OP_XOR2, 0, 1, 0 }, - { EC_GF_OP_XOR2, 6, 2, 0 }, - { EC_GF_OP_XOR2, 1, 3, 0 }, - { EC_GF_OP_XOR2, 2, 5, 0 }, - { EC_GF_OP_XOR2, 1, 4, 0 }, - { EC_GF_OP_XOR2, 5, 0, 0 }, - { EC_GF_OP_XOR2, 4, 7, 0 }, - { EC_GF_OP_XOR2, 0, 6, 0 }, - { EC_GF_OP_XOR2, 7, 2, 0 }, - { EC_GF_OP_XOR2, 6, 4, 0 }, - { EC_GF_OP_XOR2, 2, 6, 0 }, - { EC_GF_OP_XOR2, 6, 1, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_CD = { - 8, - { 0, 6, 1, 2, 7, 3, 4, 5, }, - ec_gf8_mul_CD_ops -}; + {EC_GF_OP_XOR2, 4, 0, 0}, {EC_GF_OP_XOR2, 3, 6, 0}, + {EC_GF_OP_XOR2, 0, 1, 0}, {EC_GF_OP_XOR2, 6, 2, 0}, + {EC_GF_OP_XOR2, 1, 3, 0}, {EC_GF_OP_XOR2, 2, 5, 0}, + {EC_GF_OP_XOR2, 1, 4, 0}, {EC_GF_OP_XOR2, 5, 0, 0}, + {EC_GF_OP_XOR2, 4, 7, 0}, {EC_GF_OP_XOR2, 0, 6, 0}, + {EC_GF_OP_XOR2, 7, 2, 0}, {EC_GF_OP_XOR2, 6, 4, 0}, + {EC_GF_OP_XOR2, 2, 6, 0}, {EC_GF_OP_XOR2, 6, 1, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_CD = {8, + { + 0, + 6, + 1, + 2, + 7, + 3, + 4, + 5, + }, + ec_gf8_mul_CD_ops}; static ec_gf_op_t ec_gf8_mul_CE_ops[] = { - { EC_GF_OP_XOR2, 0, 2, 0 }, - { EC_GF_OP_XOR2, 3, 5, 0 }, - { EC_GF_OP_XOR2, 5, 0, 0 }, - { EC_GF_OP_XOR2, 7, 5, 0 }, - { EC_GF_OP_COPY, 8, 7, 0 }, - { EC_GF_OP_XOR2, 7, 3, 0 }, - { EC_GF_OP_XOR2, 3, 4, 0 }, - { EC_GF_OP_XOR2, 6, 3, 0 }, - { EC_GF_OP_XOR2, 2, 3, 0 }, - { EC_GF_OP_XOR3, 3, 6, 8 }, - { EC_GF_OP_XOR2, 0, 6, 0 }, - { EC_GF_OP_XOR3, 8, 2, 3 }, - { EC_GF_OP_XOR2, 1, 8, 0 }, - { EC_GF_OP_XOR2, 4, 8, 0 }, - { EC_GF_OP_XOR2, 5, 1, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_CE = { - 9, - { 5, 7, 3, 0, 2, 6, 4, 1, 8, }, - ec_gf8_mul_CE_ops -}; + {EC_GF_OP_XOR2, 0, 2, 0}, {EC_GF_OP_XOR2, 3, 5, 0}, + {EC_GF_OP_XOR2, 5, 0, 0}, {EC_GF_OP_XOR2, 7, 5, 0}, + {EC_GF_OP_COPY, 8, 7, 0}, {EC_GF_OP_XOR2, 7, 3, 0}, + {EC_GF_OP_XOR2, 3, 4, 0}, {EC_GF_OP_XOR2, 6, 3, 0}, + {EC_GF_OP_XOR2, 2, 3, 0}, {EC_GF_OP_XOR3, 3, 6, 8}, + {EC_GF_OP_XOR2, 0, 6, 0}, {EC_GF_OP_XOR3, 8, 2, 3}, + {EC_GF_OP_XOR2, 1, 8, 0}, {EC_GF_OP_XOR2, 4, 8, 0}, + {EC_GF_OP_XOR2, 5, 1, 0}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_CE = {9, + { + 5, + 7, + 3, + 0, + 2, + 6, + 4, + 1, + 8, + }, + ec_gf8_mul_CE_ops}; static ec_gf_op_t ec_gf8_mul_CF_ops[] = { - { EC_GF_OP_XOR2, 1, 2, 0 }, - { EC_GF_OP_XOR2, 5, 3, 0 }, - { EC_GF_OP_XOR2, 3, 6, 0 }, - { EC_GF_OP_XOR2, 2, 5, 0 }, - { EC_GF_OP_XOR2, 1, 4, 0 }, - { EC_GF_OP_XOR2, 4, 3, 0 }, - { EC_GF_OP_XOR2, 0, 4, 0 }, - { EC_GF_OP_XOR2, 7, 0, 0 }, - { EC_GF_OP_XOR2, 0, 2, 0 }, - { EC_GF_OP_XOR2, 6, 7, 0 }, - { EC_GF_OP_XOR2, 5, 6, 0 }, - { EC_GF_OP_XOR2, 6, 1, 0 }, - { EC_GF_OP_XOR2, 1, 0, 0 }, - { EC_GF_OP_XOR2, 3, 6, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_CF = { - 8, - { 3, 6, 7, 0, 2, 4, 5, 1, }, - ec_gf8_mul_CF_ops -}; + {EC_GF_OP_XOR2, 1, 2, 0}, {EC_GF_OP_XOR2, 5, 3, 0}, + {EC_GF_OP_XOR2, 3, 6, 0}, {EC_GF_OP_XOR2, 2, 5, 0}, + {EC_GF_OP_XOR2, 1, 4, 0}, {EC_GF_OP_XOR2, 4, 3, 0}, + {EC_GF_OP_XOR2, 0, 4, 0}, {EC_GF_OP_XOR2, 7, 0, 0}, + {EC_GF_OP_XOR2, 0, 2, 0}, {EC_GF_OP_XOR2, 6, 7, 0}, + {EC_GF_OP_XOR2, 5, 6, 0}, {EC_GF_OP_XOR2, 6, 1, 0}, + {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_XOR2, 3, 6, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_CF = {8, + { + 3, + 6, + 7, + 0, + 2, + 4, + 5, + 1, + }, + ec_gf8_mul_CF_ops}; static ec_gf_op_t ec_gf8_mul_D0_ops[] = { - { EC_GF_OP_XOR2, 5, 2, 0 }, - { EC_GF_OP_XOR2, 0, 3, 0 }, - { EC_GF_OP_XOR2, 3, 5, 0 }, - { EC_GF_OP_XOR2, 6, 3, 0 }, - { EC_GF_OP_XOR2, 4, 1, 0 }, - { EC_GF_OP_XOR2, 1, 6, 0 }, - { EC_GF_OP_XOR2, 5, 4, 0 }, - { EC_GF_OP_XOR2, 7, 1, 0 }, - { EC_GF_OP_XOR2, 4, 0, 0 }, - { EC_GF_OP_XOR2, 2, 7, 0 }, - { EC_GF_OP_XOR2, 0, 2, 0 }, - { EC_GF_OP_XOR2, 3, 2, 0 }, - { EC_GF_OP_XOR2, 1, 0, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_D0 = { - 8, - { 5, 6, 7, 2, 0, 3, 1, 4, }, - ec_gf8_mul_D0_ops -}; + {EC_GF_OP_XOR2, 5, 2, 0}, {EC_GF_OP_XOR2, 0, 3, 0}, + {EC_GF_OP_XOR2, 3, 5, 0}, {EC_GF_OP_XOR2, 6, 3, 0}, + {EC_GF_OP_XOR2, 4, 1, 0}, {EC_GF_OP_XOR2, 1, 6, 0}, + {EC_GF_OP_XOR2, 5, 4, 0}, {EC_GF_OP_XOR2, 7, 1, 0}, + {EC_GF_OP_XOR2, 4, 0, 0}, {EC_GF_OP_XOR2, 2, 7, 0}, + {EC_GF_OP_XOR2, 0, 2, 0}, {EC_GF_OP_XOR2, 3, 2, 0}, + {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_D0 = {8, + { + 5, + 6, + 7, + 2, + 0, + 3, + 1, + 4, + }, + ec_gf8_mul_D0_ops}; static ec_gf_op_t ec_gf8_mul_D1_ops[] = { - { EC_GF_OP_XOR2, 1, 2, 0 }, - { EC_GF_OP_XOR2, 0, 4, 0 }, - { EC_GF_OP_XOR2, 5, 1, 0 }, - { EC_GF_OP_XOR2, 4, 6, 0 }, - { EC_GF_OP_XOR2, 6, 5, 0 }, - { EC_GF_OP_XOR2, 5, 0, 0 }, - { EC_GF_OP_XOR2, 7, 6, 0 }, - { EC_GF_OP_XOR2, 6, 3, 0 }, - { EC_GF_OP_XOR2, 2, 7, 0 }, - { EC_GF_OP_XOR2, 0, 2, 0 }, - { EC_GF_OP_XOR2, 3, 2, 0 }, - { EC_GF_OP_XOR3, 8, 6, 0 }, - { EC_GF_OP_XOR2, 4, 8, 0 }, - { EC_GF_OP_XOR2, 1, 8, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_D1 = { - 9, - { 5, 6, 3, 2, 0, 7, 4, 1, 8, }, - ec_gf8_mul_D1_ops -}; + {EC_GF_OP_XOR2, 1, 2, 0}, {EC_GF_OP_XOR2, 0, 4, 0}, + {EC_GF_OP_XOR2, 5, 1, 0}, {EC_GF_OP_XOR2, 4, 6, 0}, + {EC_GF_OP_XOR2, 6, 5, 0}, {EC_GF_OP_XOR2, 5, 0, 0}, + {EC_GF_OP_XOR2, 7, 6, 0}, {EC_GF_OP_XOR2, 6, 3, 0}, + {EC_GF_OP_XOR2, 2, 7, 0}, {EC_GF_OP_XOR2, 0, 2, 0}, + {EC_GF_OP_XOR2, 3, 2, 0}, {EC_GF_OP_XOR3, 8, 6, 0}, + {EC_GF_OP_XOR2, 4, 8, 0}, {EC_GF_OP_XOR2, 1, 8, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_D1 = {9, + { + 5, + 6, + 3, + 2, + 0, + 7, + 4, + 1, + 8, + }, + ec_gf8_mul_D1_ops}; static ec_gf_op_t ec_gf8_mul_D2_ops[] = { - { EC_GF_OP_XOR2, 3, 5, 0 }, - { EC_GF_OP_XOR2, 3, 6, 0 }, - { EC_GF_OP_XOR2, 2, 3, 0 }, - { EC_GF_OP_XOR2, 3, 0, 0 }, - { EC_GF_OP_XOR2, 0, 2, 0 }, - { EC_GF_OP_XOR2, 3, 1, 0 }, - { EC_GF_OP_XOR2, 7, 0, 0 }, - { EC_GF_OP_XOR2, 4, 3, 0 }, - { EC_GF_OP_XOR2, 1, 2, 0 }, - { EC_GF_OP_XOR2, 6, 7, 0 }, - { EC_GF_OP_XOR2, 5, 4, 0 }, - { EC_GF_OP_XOR2, 4, 6, 0 }, - { EC_GF_OP_XOR2, 7, 5, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_D2 = { - 8, - { 7, 0, 2, 1, 3, 4, 6, 5, }, - ec_gf8_mul_D2_ops -}; + {EC_GF_OP_XOR2, 3, 5, 0}, {EC_GF_OP_XOR2, 3, 6, 0}, + {EC_GF_OP_XOR2, 2, 3, 0}, {EC_GF_OP_XOR2, 3, 0, 0}, + {EC_GF_OP_XOR2, 0, 2, 0}, {EC_GF_OP_XOR2, 3, 1, 0}, + {EC_GF_OP_XOR2, 7, 0, 0}, {EC_GF_OP_XOR2, 4, 3, 0}, + {EC_GF_OP_XOR2, 1, 2, 0}, {EC_GF_OP_XOR2, 6, 7, 0}, + {EC_GF_OP_XOR2, 5, 4, 0}, {EC_GF_OP_XOR2, 4, 6, 0}, + {EC_GF_OP_XOR2, 7, 5, 0}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_D2 = {8, + { + 7, + 0, + 2, + 1, + 3, + 4, + 6, + 5, + }, + ec_gf8_mul_D2_ops}; static ec_gf_op_t ec_gf8_mul_D3_ops[] = { - { EC_GF_OP_XOR2, 4, 7, 0 }, - { EC_GF_OP_COPY, 8, 4, 0 }, - { EC_GF_OP_XOR2, 2, 1, 0 }, - { EC_GF_OP_XOR2, 4, 2, 0 }, - { EC_GF_OP_XOR2, 5, 4, 0 }, - { EC_GF_OP_XOR2, 6, 5, 0 }, - { EC_GF_OP_XOR2, 8, 6, 0 }, - { EC_GF_OP_XOR2, 3, 8, 0 }, - { EC_GF_OP_XOR2, 2, 3, 0 }, - { EC_GF_OP_XOR2, 3, 0, 0 }, - { EC_GF_OP_XOR2, 1, 3, 0 }, - { EC_GF_OP_XOR2, 0, 5, 0 }, - { EC_GF_OP_XOR2, 7, 1, 0 }, - { EC_GF_OP_XOR2, 1, 5, 0 }, - { EC_GF_OP_XOR2, 4, 7, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_D3 = { - 9, - { 0, 3, 2, 8, 4, 6, 7, 1, 5, }, - ec_gf8_mul_D3_ops -}; + {EC_GF_OP_XOR2, 4, 7, 0}, {EC_GF_OP_COPY, 8, 4, 0}, + {EC_GF_OP_XOR2, 2, 1, 0}, {EC_GF_OP_XOR2, 4, 2, 0}, + {EC_GF_OP_XOR2, 5, 4, 0}, {EC_GF_OP_XOR2, 6, 5, 0}, + {EC_GF_OP_XOR2, 8, 6, 0}, {EC_GF_OP_XOR2, 3, 8, 0}, + {EC_GF_OP_XOR2, 2, 3, 0}, {EC_GF_OP_XOR2, 3, 0, 0}, + {EC_GF_OP_XOR2, 1, 3, 0}, {EC_GF_OP_XOR2, 0, 5, 0}, + {EC_GF_OP_XOR2, 7, 1, 0}, {EC_GF_OP_XOR2, 1, 5, 0}, + {EC_GF_OP_XOR2, 4, 7, 0}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_D3 = {9, + { + 0, + 3, + 2, + 8, + 4, + 6, + 7, + 1, + 5, + }, + ec_gf8_mul_D3_ops}; static ec_gf_op_t ec_gf8_mul_D4_ops[] = { - { EC_GF_OP_XOR2, 1, 5, 0 }, - { EC_GF_OP_COPY, 8, 1, 0 }, - { EC_GF_OP_XOR2, 1, 2, 0 }, - { EC_GF_OP_XOR2, 0, 1, 0 }, - { EC_GF_OP_XOR2, 5, 3, 0 }, - { EC_GF_OP_XOR2, 6, 1, 0 }, - { EC_GF_OP_XOR2, 3, 0, 0 }, - { EC_GF_OP_XOR3, 1, 7, 8 }, - { EC_GF_OP_XOR2, 7, 3, 0 }, - { EC_GF_OP_XOR2, 3, 4, 0 }, - { EC_GF_OP_XOR2, 4, 6, 0 }, - { EC_GF_OP_XOR2, 2, 3, 0 }, - { EC_GF_OP_XOR2, 6, 5, 0 }, - { EC_GF_OP_XOR2, 3, 1, 0 }, - { EC_GF_OP_XOR2, 1, 6, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_D4 = { - 9, - { 4, 1, 7, 5, 0, 6, 3, 2, 8, }, - ec_gf8_mul_D4_ops -}; + {EC_GF_OP_XOR2, 1, 5, 0}, {EC_GF_OP_COPY, 8, 1, 0}, + {EC_GF_OP_XOR2, 1, 2, 0}, {EC_GF_OP_XOR2, 0, 1, 0}, + {EC_GF_OP_XOR2, 5, 3, 0}, {EC_GF_OP_XOR2, 6, 1, 0}, + {EC_GF_OP_XOR2, 3, 0, 0}, {EC_GF_OP_XOR3, 1, 7, 8}, + {EC_GF_OP_XOR2, 7, 3, 0}, {EC_GF_OP_XOR2, 3, 4, 0}, + {EC_GF_OP_XOR2, 4, 6, 0}, {EC_GF_OP_XOR2, 2, 3, 0}, + {EC_GF_OP_XOR2, 6, 5, 0}, {EC_GF_OP_XOR2, 3, 1, 0}, + {EC_GF_OP_XOR2, 1, 6, 0}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_D4 = {9, + { + 4, + 1, + 7, + 5, + 0, + 6, + 3, + 2, + 8, + }, + ec_gf8_mul_D4_ops}; static ec_gf_op_t ec_gf8_mul_D5_ops[] = { - { EC_GF_OP_XOR2, 0, 4, 0 }, - { EC_GF_OP_XOR2, 1, 5, 0 }, - { EC_GF_OP_XOR2, 1, 0, 0 }, - { EC_GF_OP_XOR2, 2, 1, 0 }, - { EC_GF_OP_XOR2, 6, 2, 0 }, - { EC_GF_OP_XOR2, 0, 6, 0 }, - { EC_GF_OP_XOR2, 3, 0, 0 }, - { EC_GF_OP_XOR2, 7, 3, 0 }, - { EC_GF_OP_XOR2, 1, 7, 0 }, - { EC_GF_OP_XOR2, 0, 1, 0 }, - { EC_GF_OP_XOR2, 4, 0, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_D5 = { - 8, - { 6, 7, 4, 5, 2, 3, 1, 0, }, - ec_gf8_mul_D5_ops -}; + {EC_GF_OP_XOR2, 0, 4, 0}, {EC_GF_OP_XOR2, 1, 5, 0}, + {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_XOR2, 2, 1, 0}, + {EC_GF_OP_XOR2, 6, 2, 0}, {EC_GF_OP_XOR2, 0, 6, 0}, + {EC_GF_OP_XOR2, 3, 0, 0}, {EC_GF_OP_XOR2, 7, 3, 0}, + {EC_GF_OP_XOR2, 1, 7, 0}, {EC_GF_OP_XOR2, 0, 1, 0}, + {EC_GF_OP_XOR2, 4, 0, 0}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_D5 = {8, + { + 6, + 7, + 4, + 5, + 2, + 3, + 1, + 0, + }, + ec_gf8_mul_D5_ops}; static ec_gf_op_t ec_gf8_mul_D6_ops[] = { - { EC_GF_OP_COPY, 8, 0, 0 }, - { EC_GF_OP_XOR2, 0, 2, 0 }, - { EC_GF_OP_XOR2, 2, 1, 0 }, - { EC_GF_OP_XOR2, 2, 4, 0 }, - { EC_GF_OP_XOR2, 2, 6, 0 }, - { EC_GF_OP_XOR2, 3, 2, 0 }, - { EC_GF_OP_XOR2, 0, 3, 0 }, - { EC_GF_OP_XOR2, 5, 0, 0 }, - { EC_GF_OP_XOR2, 2, 5, 0 }, - { EC_GF_OP_XOR2, 7, 2, 0 }, - { EC_GF_OP_XOR2, 1, 7, 0 }, - { EC_GF_OP_XOR2, 4, 7, 0 }, - { EC_GF_OP_XOR2, 6, 7, 0 }, - { EC_GF_OP_XOR2, 0, 7, 0 }, - { EC_GF_OP_XOR2, 7, 8, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_D6 = { - 9, - { 0, 6, 2, 7, 1, 3, 4, 5, 8, }, - ec_gf8_mul_D6_ops -}; + {EC_GF_OP_COPY, 8, 0, 0}, {EC_GF_OP_XOR2, 0, 2, 0}, + {EC_GF_OP_XOR2, 2, 1, 0}, {EC_GF_OP_XOR2, 2, 4, 0}, + {EC_GF_OP_XOR2, 2, 6, 0}, {EC_GF_OP_XOR2, 3, 2, 0}, + {EC_GF_OP_XOR2, 0, 3, 0}, {EC_GF_OP_XOR2, 5, 0, 0}, + {EC_GF_OP_XOR2, 2, 5, 0}, {EC_GF_OP_XOR2, 7, 2, 0}, + {EC_GF_OP_XOR2, 1, 7, 0}, {EC_GF_OP_XOR2, 4, 7, 0}, + {EC_GF_OP_XOR2, 6, 7, 0}, {EC_GF_OP_XOR2, 0, 7, 0}, + {EC_GF_OP_XOR2, 7, 8, 0}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_D6 = {9, + { + 0, + 6, + 2, + 7, + 1, + 3, + 4, + 5, + 8, + }, + ec_gf8_mul_D6_ops}; static ec_gf_op_t ec_gf8_mul_D7_ops[] = { - { EC_GF_OP_XOR2, 7, 2, 0 }, - { EC_GF_OP_XOR2, 5, 7, 0 }, - { EC_GF_OP_XOR2, 7, 0, 0 }, - { EC_GF_OP_XOR2, 0, 1, 0 }, - { EC_GF_OP_XOR3, 8, 3, 5 }, - { EC_GF_OP_XOR2, 0, 4, 0 }, - { EC_GF_OP_XOR2, 0, 8, 0 }, - { EC_GF_OP_XOR2, 6, 0, 0 }, - { EC_GF_OP_XOR2, 2, 6, 0 }, - { EC_GF_OP_XOR2, 1, 6, 0 }, - { EC_GF_OP_XOR2, 4, 6, 0 }, - { EC_GF_OP_XOR2, 3, 6, 0 }, - { EC_GF_OP_XOR3, 6, 7, 8 }, - { EC_GF_OP_XOR2, 7, 2, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_D7 = { - 9, - { 3, 4, 6, 5, 0, 7, 1, 2, 8, }, - ec_gf8_mul_D7_ops -}; + {EC_GF_OP_XOR2, 7, 2, 0}, {EC_GF_OP_XOR2, 5, 7, 0}, + {EC_GF_OP_XOR2, 7, 0, 0}, {EC_GF_OP_XOR2, 0, 1, 0}, + {EC_GF_OP_XOR3, 8, 3, 5}, {EC_GF_OP_XOR2, 0, 4, 0}, + {EC_GF_OP_XOR2, 0, 8, 0}, {EC_GF_OP_XOR2, 6, 0, 0}, + {EC_GF_OP_XOR2, 2, 6, 0}, {EC_GF_OP_XOR2, 1, 6, 0}, + {EC_GF_OP_XOR2, 4, 6, 0}, {EC_GF_OP_XOR2, 3, 6, 0}, + {EC_GF_OP_XOR3, 6, 7, 8}, {EC_GF_OP_XOR2, 7, 2, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_D7 = {9, + { + 3, + 4, + 6, + 5, + 0, + 7, + 1, + 2, + 8, + }, + ec_gf8_mul_D7_ops}; static ec_gf_op_t ec_gf8_mul_D8_ops[] = { - { EC_GF_OP_XOR2, 3, 2, 0 }, - { EC_GF_OP_XOR2, 4, 1, 0 }, - { EC_GF_OP_XOR2, 5, 3, 0 }, - { EC_GF_OP_XOR2, 4, 2, 0 }, - { EC_GF_OP_XOR2, 3, 1, 0 }, - { EC_GF_OP_XOR2, 2, 0, 0 }, - { EC_GF_OP_XOR2, 6, 3, 0 }, - { EC_GF_OP_XOR2, 3, 2, 0 }, - { EC_GF_OP_XOR2, 7, 3, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_D8 = { - 8, - { 4, 5, 6, 7, 0, 1, 2, 3, }, - ec_gf8_mul_D8_ops -}; + {EC_GF_OP_XOR2, 3, 2, 0}, {EC_GF_OP_XOR2, 4, 1, 0}, + {EC_GF_OP_XOR2, 5, 3, 0}, {EC_GF_OP_XOR2, 4, 2, 0}, + {EC_GF_OP_XOR2, 3, 1, 0}, {EC_GF_OP_XOR2, 2, 0, 0}, + {EC_GF_OP_XOR2, 6, 3, 0}, {EC_GF_OP_XOR2, 3, 2, 0}, + {EC_GF_OP_XOR2, 7, 3, 0}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_D8 = {8, + { + 4, + 5, + 6, + 7, + 0, + 1, + 2, + 3, + }, + ec_gf8_mul_D8_ops}; static ec_gf_op_t ec_gf8_mul_D9_ops[] = { - { EC_GF_OP_XOR2, 4, 0, 0 }, - { EC_GF_OP_XOR2, 0, 1, 0 }, - { EC_GF_OP_XOR2, 5, 1, 0 }, - { EC_GF_OP_XOR2, 1, 2, 0 }, - { EC_GF_OP_XOR2, 6, 1, 0 }, - { EC_GF_OP_XOR2, 7, 0, 0 }, - { EC_GF_OP_XOR2, 1, 4, 0 }, - { EC_GF_OP_XOR2, 2, 3, 0 }, - { EC_GF_OP_XOR2, 0, 6, 0 }, - { EC_GF_OP_XOR2, 3, 7, 0 }, - { EC_GF_OP_XOR2, 6, 2, 0 }, - { EC_GF_OP_XOR2, 2, 5, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_D9 = { - 8, - { 1, 2, 6, 7, 4, 5, 0, 3, }, - ec_gf8_mul_D9_ops -}; + {EC_GF_OP_XOR2, 4, 0, 0}, {EC_GF_OP_XOR2, 0, 1, 0}, + {EC_GF_OP_XOR2, 5, 1, 0}, {EC_GF_OP_XOR2, 1, 2, 0}, + {EC_GF_OP_XOR2, 6, 1, 0}, {EC_GF_OP_XOR2, 7, 0, 0}, + {EC_GF_OP_XOR2, 1, 4, 0}, {EC_GF_OP_XOR2, 2, 3, 0}, + {EC_GF_OP_XOR2, 0, 6, 0}, {EC_GF_OP_XOR2, 3, 7, 0}, + {EC_GF_OP_XOR2, 6, 2, 0}, {EC_GF_OP_XOR2, 2, 5, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_D9 = {8, + { + 1, + 2, + 6, + 7, + 4, + 5, + 0, + 3, + }, + ec_gf8_mul_D9_ops}; static ec_gf_op_t ec_gf8_mul_DA_ops[] = { - { EC_GF_OP_XOR2, 6, 2, 0 }, - { EC_GF_OP_XOR2, 2, 7, 0 }, - { EC_GF_OP_XOR2, 7, 3, 0 }, - { EC_GF_OP_XOR2, 0, 7, 0 }, - { EC_GF_OP_XOR3, 8, 2, 0 }, - { EC_GF_OP_XOR2, 7, 6, 0 }, - { EC_GF_OP_XOR2, 4, 1, 0 }, - { EC_GF_OP_XOR2, 1, 8, 0 }, - { EC_GF_OP_XOR2, 5, 8, 0 }, - { EC_GF_OP_XOR2, 2, 4, 0 }, - { EC_GF_OP_XOR2, 6, 1, 0 }, - { EC_GF_OP_XOR2, 3, 5, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_DA = { - 9, - { 2, 5, 7, 1, 0, 4, 3, 6, 8, }, - ec_gf8_mul_DA_ops -}; + {EC_GF_OP_XOR2, 6, 2, 0}, {EC_GF_OP_XOR2, 2, 7, 0}, + {EC_GF_OP_XOR2, 7, 3, 0}, {EC_GF_OP_XOR2, 0, 7, 0}, + {EC_GF_OP_XOR3, 8, 2, 0}, {EC_GF_OP_XOR2, 7, 6, 0}, + {EC_GF_OP_XOR2, 4, 1, 0}, {EC_GF_OP_XOR2, 1, 8, 0}, + {EC_GF_OP_XOR2, 5, 8, 0}, {EC_GF_OP_XOR2, 2, 4, 0}, + {EC_GF_OP_XOR2, 6, 1, 0}, {EC_GF_OP_XOR2, 3, 5, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_DA = {9, + { + 2, + 5, + 7, + 1, + 0, + 4, + 3, + 6, + 8, + }, + ec_gf8_mul_DA_ops}; static ec_gf_op_t ec_gf8_mul_DB_ops[] = { - { EC_GF_OP_COPY, 8, 0, 0 }, - { EC_GF_OP_XOR2, 0, 1, 0 }, - { EC_GF_OP_XOR2, 2, 0, 0 }, - { EC_GF_OP_XOR2, 5, 2, 0 }, - { EC_GF_OP_XOR2, 1, 5, 0 }, - { EC_GF_OP_XOR2, 8, 4, 0 }, - { EC_GF_OP_XOR2, 5, 3, 0 }, - { EC_GF_OP_XOR2, 4, 2, 0 }, - { EC_GF_OP_XOR2, 3, 7, 0 }, - { EC_GF_OP_XOR2, 7, 4, 0 }, - { EC_GF_OP_XOR2, 4, 1, 0 }, - { EC_GF_OP_XOR2, 1, 6, 0 }, - { EC_GF_OP_XOR2, 6, 3, 0 }, - { EC_GF_OP_XOR2, 3, 8, 0 }, - { EC_GF_OP_XOR2, 0, 6, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_DB = { - 9, - { 7, 5, 6, 2, 3, 4, 1, 0, 8, }, - ec_gf8_mul_DB_ops -}; + {EC_GF_OP_COPY, 8, 0, 0}, {EC_GF_OP_XOR2, 0, 1, 0}, + {EC_GF_OP_XOR2, 2, 0, 0}, {EC_GF_OP_XOR2, 5, 2, 0}, + {EC_GF_OP_XOR2, 1, 5, 0}, {EC_GF_OP_XOR2, 8, 4, 0}, + {EC_GF_OP_XOR2, 5, 3, 0}, {EC_GF_OP_XOR2, 4, 2, 0}, + {EC_GF_OP_XOR2, 3, 7, 0}, {EC_GF_OP_XOR2, 7, 4, 0}, + {EC_GF_OP_XOR2, 4, 1, 0}, {EC_GF_OP_XOR2, 1, 6, 0}, + {EC_GF_OP_XOR2, 6, 3, 0}, {EC_GF_OP_XOR2, 3, 8, 0}, + {EC_GF_OP_XOR2, 0, 6, 0}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_DB = {9, + { + 7, + 5, + 6, + 2, + 3, + 4, + 1, + 0, + 8, + }, + ec_gf8_mul_DB_ops}; static ec_gf_op_t ec_gf8_mul_DC_ops[] = { - { EC_GF_OP_XOR2, 7, 3, 0 }, - { EC_GF_OP_XOR2, 3, 0, 0 }, - { EC_GF_OP_XOR2, 4, 2, 0 }, - { EC_GF_OP_XOR2, 0, 4, 0 }, - { EC_GF_OP_XOR2, 3, 1, 0 }, - { EC_GF_OP_XOR2, 6, 1, 0 }, - { EC_GF_OP_XOR2, 1, 7, 0 }, - { EC_GF_OP_XOR2, 4, 6, 0 }, - { EC_GF_OP_XOR2, 7, 2, 0 }, - { EC_GF_OP_XOR2, 6, 3, 0 }, - { EC_GF_OP_XOR2, 2, 3, 0 }, - { EC_GF_OP_XOR2, 3, 5, 0 }, - { EC_GF_OP_XOR2, 5, 7, 0 }, - { EC_GF_OP_XOR2, 7, 6, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_DC = { - 8, - { 4, 5, 2, 6, 7, 1, 0, 3, }, - ec_gf8_mul_DC_ops -}; + {EC_GF_OP_XOR2, 7, 3, 0}, {EC_GF_OP_XOR2, 3, 0, 0}, + {EC_GF_OP_XOR2, 4, 2, 0}, {EC_GF_OP_XOR2, 0, 4, 0}, + {EC_GF_OP_XOR2, 3, 1, 0}, {EC_GF_OP_XOR2, 6, 1, 0}, + {EC_GF_OP_XOR2, 1, 7, 0}, {EC_GF_OP_XOR2, 4, 6, 0}, + {EC_GF_OP_XOR2, 7, 2, 0}, {EC_GF_OP_XOR2, 6, 3, 0}, + {EC_GF_OP_XOR2, 2, 3, 0}, {EC_GF_OP_XOR2, 3, 5, 0}, + {EC_GF_OP_XOR2, 5, 7, 0}, {EC_GF_OP_XOR2, 7, 6, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_DC = {8, + { + 4, + 5, + 2, + 6, + 7, + 1, + 0, + 3, + }, + ec_gf8_mul_DC_ops}; static ec_gf_op_t ec_gf8_mul_DD_ops[] = { - { EC_GF_OP_XOR2, 3, 1, 0 }, - { EC_GF_OP_XOR2, 3, 0, 0 }, - { EC_GF_OP_XOR2, 5, 7, 0 }, - { EC_GF_OP_XOR2, 5, 3, 0 }, - { EC_GF_OP_XOR2, 4, 2, 0 }, - { EC_GF_OP_XOR2, 6, 0, 0 }, - { EC_GF_OP_XOR2, 0, 5, 0 }, - { EC_GF_OP_XOR2, 4, 6, 0 }, - { EC_GF_OP_XOR2, 2, 0, 0 }, - { EC_GF_OP_XOR2, 1, 4, 0 }, - { EC_GF_OP_XOR2, 7, 4, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_DD = { - 8, - { 1, 2, 3, 6, 7, 0, 4, 5, }, - ec_gf8_mul_DD_ops -}; + {EC_GF_OP_XOR2, 3, 1, 0}, {EC_GF_OP_XOR2, 3, 0, 0}, + {EC_GF_OP_XOR2, 5, 7, 0}, {EC_GF_OP_XOR2, 5, 3, 0}, + {EC_GF_OP_XOR2, 4, 2, 0}, {EC_GF_OP_XOR2, 6, 0, 0}, + {EC_GF_OP_XOR2, 0, 5, 0}, {EC_GF_OP_XOR2, 4, 6, 0}, + {EC_GF_OP_XOR2, 2, 0, 0}, {EC_GF_OP_XOR2, 1, 4, 0}, + {EC_GF_OP_XOR2, 7, 4, 0}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_DD = {8, + { + 1, + 2, + 3, + 6, + 7, + 0, + 4, + 5, + }, + ec_gf8_mul_DD_ops}; static ec_gf_op_t ec_gf8_mul_DE_ops[] = { - { EC_GF_OP_XOR2, 2, 0, 0 }, - { EC_GF_OP_XOR2, 3, 7, 0 }, - { EC_GF_OP_XOR2, 2, 3, 0 }, - { EC_GF_OP_XOR2, 6, 2, 0 }, - { EC_GF_OP_XOR2, 1, 4, 0 }, - { EC_GF_OP_XOR2, 7, 6, 0 }, - { EC_GF_OP_XOR2, 5, 2, 0 }, - { EC_GF_OP_XOR2, 1, 3, 0 }, - { EC_GF_OP_XOR2, 0, 7, 0 }, - { EC_GF_OP_XOR2, 4, 5, 0 }, - { EC_GF_OP_XOR2, 0, 1, 0 }, - { EC_GF_OP_XOR2, 3, 4, 0 }, - { EC_GF_OP_XOR2, 4, 0, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_DE = { - 8, - { 0, 5, 2, 6, 7, 1, 3, 4, }, - ec_gf8_mul_DE_ops -}; + {EC_GF_OP_XOR2, 2, 0, 0}, {EC_GF_OP_XOR2, 3, 7, 0}, + {EC_GF_OP_XOR2, 2, 3, 0}, {EC_GF_OP_XOR2, 6, 2, 0}, + {EC_GF_OP_XOR2, 1, 4, 0}, {EC_GF_OP_XOR2, 7, 6, 0}, + {EC_GF_OP_XOR2, 5, 2, 0}, {EC_GF_OP_XOR2, 1, 3, 0}, + {EC_GF_OP_XOR2, 0, 7, 0}, {EC_GF_OP_XOR2, 4, 5, 0}, + {EC_GF_OP_XOR2, 0, 1, 0}, {EC_GF_OP_XOR2, 3, 4, 0}, + {EC_GF_OP_XOR2, 4, 0, 0}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_DE = {8, + { + 0, + 5, + 2, + 6, + 7, + 1, + 3, + 4, + }, + ec_gf8_mul_DE_ops}; static ec_gf_op_t ec_gf8_mul_DF_ops[] = { - { EC_GF_OP_COPY, 8, 0, 0 }, - { EC_GF_OP_XOR2, 0, 2, 0 }, - { EC_GF_OP_XOR2, 0, 4, 0 }, - { EC_GF_OP_XOR2, 8, 3, 0 }, - { EC_GF_OP_COPY, 9, 0, 0 }, - { EC_GF_OP_XOR2, 0, 6, 0 }, - { EC_GF_OP_XOR2, 8, 7, 0 }, - { EC_GF_OP_XOR2, 3, 0, 0 }, - { EC_GF_OP_XOR2, 7, 0, 0 }, - { EC_GF_OP_XOR2, 0, 5, 0 }, - { EC_GF_OP_XOR2, 4, 7, 0 }, - { EC_GF_OP_XOR2, 5, 1, 0 }, - { EC_GF_OP_XOR2, 7, 1, 0 }, - { EC_GF_OP_XOR2, 5, 8, 0 }, - { EC_GF_OP_XOR2, 2, 5, 0 }, - { EC_GF_OP_XOR2, 6, 5, 0 }, - { EC_GF_OP_XOR3, 1, 9, 2 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_DF = { - 10, - { 7, 2, 8, 4, 3, 1, 0, 6, 5, 9, }, - ec_gf8_mul_DF_ops -}; + {EC_GF_OP_COPY, 8, 0, 0}, {EC_GF_OP_XOR2, 0, 2, 0}, + {EC_GF_OP_XOR2, 0, 4, 0}, {EC_GF_OP_XOR2, 8, 3, 0}, + {EC_GF_OP_COPY, 9, 0, 0}, {EC_GF_OP_XOR2, 0, 6, 0}, + {EC_GF_OP_XOR2, 8, 7, 0}, {EC_GF_OP_XOR2, 3, 0, 0}, + {EC_GF_OP_XOR2, 7, 0, 0}, {EC_GF_OP_XOR2, 0, 5, 0}, + {EC_GF_OP_XOR2, 4, 7, 0}, {EC_GF_OP_XOR2, 5, 1, 0}, + {EC_GF_OP_XOR2, 7, 1, 0}, {EC_GF_OP_XOR2, 5, 8, 0}, + {EC_GF_OP_XOR2, 2, 5, 0}, {EC_GF_OP_XOR2, 6, 5, 0}, + {EC_GF_OP_XOR3, 1, 9, 2}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_DF = {10, + { + 7, + 2, + 8, + 4, + 3, + 1, + 0, + 6, + 5, + 9, + }, + ec_gf8_mul_DF_ops}; static ec_gf_op_t ec_gf8_mul_E0_ops[] = { - { EC_GF_OP_XOR2, 4, 2, 0 }, - { EC_GF_OP_XOR2, 6, 4, 0 }, - { EC_GF_OP_XOR2, 5, 3, 0 }, - { EC_GF_OP_XOR2, 3, 6, 0 }, - { EC_GF_OP_XOR2, 4, 1, 0 }, - { EC_GF_OP_XOR2, 7, 1, 0 }, - { EC_GF_OP_XOR2, 5, 7, 0 }, - { EC_GF_OP_XOR2, 6, 0, 0 }, - { EC_GF_OP_XOR2, 2, 5, 0 }, - { EC_GF_OP_XOR2, 0, 5, 0 }, - { EC_GF_OP_XOR2, 1, 6, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_E0 = { - 8, - { 2, 3, 4, 7, 5, 6, 0, 1, }, - ec_gf8_mul_E0_ops -}; + {EC_GF_OP_XOR2, 4, 2, 0}, {EC_GF_OP_XOR2, 6, 4, 0}, + {EC_GF_OP_XOR2, 5, 3, 0}, {EC_GF_OP_XOR2, 3, 6, 0}, + {EC_GF_OP_XOR2, 4, 1, 0}, {EC_GF_OP_XOR2, 7, 1, 0}, + {EC_GF_OP_XOR2, 5, 7, 0}, {EC_GF_OP_XOR2, 6, 0, 0}, + {EC_GF_OP_XOR2, 2, 5, 0}, {EC_GF_OP_XOR2, 0, 5, 0}, + {EC_GF_OP_XOR2, 1, 6, 0}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_E0 = {8, + { + 2, + 3, + 4, + 7, + 5, + 6, + 0, + 1, + }, + ec_gf8_mul_E0_ops}; static ec_gf_op_t ec_gf8_mul_E1_ops[] = { - { EC_GF_OP_COPY, 8, 1, 0 }, - { EC_GF_OP_XOR2, 8, 7, 0 }, - { EC_GF_OP_XOR2, 3, 8, 0 }, - { EC_GF_OP_XOR3, 9, 5, 3 }, - { EC_GF_OP_XOR2, 0, 9, 0 }, - { EC_GF_OP_XOR2, 1, 4, 0 }, - { EC_GF_OP_XOR2, 7, 0, 0 }, - { EC_GF_OP_XOR2, 6, 0, 0 }, - { EC_GF_OP_XOR2, 4, 9, 0 }, - { EC_GF_OP_XOR2, 0, 2, 0 }, - { EC_GF_OP_XOR2, 2, 4, 0 }, - { EC_GF_OP_XOR2, 2, 6, 0 }, - { EC_GF_OP_XOR2, 5, 2, 0 }, - { EC_GF_OP_XOR2, 2, 8, 0 }, - { EC_GF_OP_XOR2, 7, 5, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_E1 = { - 10, - { 0, 7, 1, 3, 4, 5, 6, 2, 8, 9, }, - ec_gf8_mul_E1_ops -}; + {EC_GF_OP_COPY, 8, 1, 0}, {EC_GF_OP_XOR2, 8, 7, 0}, + {EC_GF_OP_XOR2, 3, 8, 0}, {EC_GF_OP_XOR3, 9, 5, 3}, + {EC_GF_OP_XOR2, 0, 9, 0}, {EC_GF_OP_XOR2, 1, 4, 0}, + {EC_GF_OP_XOR2, 7, 0, 0}, {EC_GF_OP_XOR2, 6, 0, 0}, + {EC_GF_OP_XOR2, 4, 9, 0}, {EC_GF_OP_XOR2, 0, 2, 0}, + {EC_GF_OP_XOR2, 2, 4, 0}, {EC_GF_OP_XOR2, 2, 6, 0}, + {EC_GF_OP_XOR2, 5, 2, 0}, {EC_GF_OP_XOR2, 2, 8, 0}, + {EC_GF_OP_XOR2, 7, 5, 0}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_E1 = {10, + { + 0, + 7, + 1, + 3, + 4, + 5, + 6, + 2, + 8, + 9, + }, + ec_gf8_mul_E1_ops}; static ec_gf_op_t ec_gf8_mul_E2_ops[] = { - { EC_GF_OP_XOR2, 6, 0, 0 }, - { EC_GF_OP_XOR2, 5, 1, 0 }, - { EC_GF_OP_XOR2, 6, 2, 0 }, - { EC_GF_OP_XOR2, 1, 2, 0 }, - { EC_GF_OP_XOR2, 2, 3, 0 }, - { EC_GF_OP_XOR2, 0, 1, 0 }, - { EC_GF_OP_XOR2, 3, 4, 0 }, - { EC_GF_OP_XOR2, 7, 2, 0 }, - { EC_GF_OP_XOR2, 4, 0, 0 }, - { EC_GF_OP_XOR2, 2, 5, 0 }, - { EC_GF_OP_XOR2, 0, 7, 0 }, - { EC_GF_OP_XOR2, 7, 3, 0 }, - { EC_GF_OP_XOR2, 3, 6, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_E2 = { - 8, - { 2, 3, 7, 1, 5, 6, 0, 4, }, - ec_gf8_mul_E2_ops -}; + {EC_GF_OP_XOR2, 6, 0, 0}, {EC_GF_OP_XOR2, 5, 1, 0}, + {EC_GF_OP_XOR2, 6, 2, 0}, {EC_GF_OP_XOR2, 1, 2, 0}, + {EC_GF_OP_XOR2, 2, 3, 0}, {EC_GF_OP_XOR2, 0, 1, 0}, + {EC_GF_OP_XOR2, 3, 4, 0}, {EC_GF_OP_XOR2, 7, 2, 0}, + {EC_GF_OP_XOR2, 4, 0, 0}, {EC_GF_OP_XOR2, 2, 5, 0}, + {EC_GF_OP_XOR2, 0, 7, 0}, {EC_GF_OP_XOR2, 7, 3, 0}, + {EC_GF_OP_XOR2, 3, 6, 0}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_E2 = {8, + { + 2, + 3, + 7, + 1, + 5, + 6, + 0, + 4, + }, + ec_gf8_mul_E2_ops}; static ec_gf_op_t ec_gf8_mul_E3_ops[] = { - { EC_GF_OP_XOR2, 7, 4, 0 }, - { EC_GF_OP_XOR2, 3, 1, 0 }, - { EC_GF_OP_XOR3, 8, 2, 7 }, - { EC_GF_OP_XOR2, 4, 5, 0 }, - { EC_GF_OP_XOR2, 2, 3, 0 }, - { EC_GF_OP_XOR2, 5, 0, 0 }, - { EC_GF_OP_XOR2, 5, 2, 0 }, - { EC_GF_OP_XOR2, 0, 1, 0 }, - { EC_GF_OP_XOR2, 6, 5, 0 }, - { EC_GF_OP_XOR2, 1, 4, 0 }, - { EC_GF_OP_XOR2, 0, 8, 0 }, - { EC_GF_OP_XOR2, 4, 6, 0 }, - { EC_GF_OP_XOR2, 3, 6, 0 }, - { EC_GF_OP_XOR3, 6, 8, 4 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_E3 = { - 9, - { 5, 4, 7, 2, 1, 3, 6, 0, 8, }, - ec_gf8_mul_E3_ops -}; + {EC_GF_OP_XOR2, 7, 4, 0}, {EC_GF_OP_XOR2, 3, 1, 0}, + {EC_GF_OP_XOR3, 8, 2, 7}, {EC_GF_OP_XOR2, 4, 5, 0}, + {EC_GF_OP_XOR2, 2, 3, 0}, {EC_GF_OP_XOR2, 5, 0, 0}, + {EC_GF_OP_XOR2, 5, 2, 0}, {EC_GF_OP_XOR2, 0, 1, 0}, + {EC_GF_OP_XOR2, 6, 5, 0}, {EC_GF_OP_XOR2, 1, 4, 0}, + {EC_GF_OP_XOR2, 0, 8, 0}, {EC_GF_OP_XOR2, 4, 6, 0}, + {EC_GF_OP_XOR2, 3, 6, 0}, {EC_GF_OP_XOR3, 6, 8, 4}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_E3 = {9, + { + 5, + 4, + 7, + 2, + 1, + 3, + 6, + 0, + 8, + }, + ec_gf8_mul_E3_ops}; static ec_gf_op_t ec_gf8_mul_E4_ops[] = { - { EC_GF_OP_XOR2, 4, 0, 0 }, - { EC_GF_OP_XOR2, 2, 6, 0 }, - { EC_GF_OP_XOR2, 2, 4, 0 }, - { EC_GF_OP_XOR2, 1, 2, 0 }, - { EC_GF_OP_XOR2, 5, 1, 0 }, - { EC_GF_OP_XOR2, 4, 5, 0 }, - { EC_GF_OP_XOR2, 3, 4, 0 }, - { EC_GF_OP_XOR2, 7, 3, 0 }, - { EC_GF_OP_XOR2, 2, 7, 0 }, - { EC_GF_OP_XOR2, 4, 2, 0 }, - { EC_GF_OP_XOR2, 0, 4, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_E4 = { - 8, - { 7, 0, 1, 6, 3, 4, 2, 5, }, - ec_gf8_mul_E4_ops -}; + {EC_GF_OP_XOR2, 4, 0, 0}, {EC_GF_OP_XOR2, 2, 6, 0}, + {EC_GF_OP_XOR2, 2, 4, 0}, {EC_GF_OP_XOR2, 1, 2, 0}, + {EC_GF_OP_XOR2, 5, 1, 0}, {EC_GF_OP_XOR2, 4, 5, 0}, + {EC_GF_OP_XOR2, 3, 4, 0}, {EC_GF_OP_XOR2, 7, 3, 0}, + {EC_GF_OP_XOR2, 2, 7, 0}, {EC_GF_OP_XOR2, 4, 2, 0}, + {EC_GF_OP_XOR2, 0, 4, 0}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_E4 = {8, + { + 7, + 0, + 1, + 6, + 3, + 4, + 2, + 5, + }, + ec_gf8_mul_E4_ops}; static ec_gf_op_t ec_gf8_mul_E5_ops[] = { - { EC_GF_OP_XOR2, 7, 5, 0 }, - { EC_GF_OP_XOR2, 5, 0, 0 }, - { EC_GF_OP_XOR2, 0, 1, 0 }, - { EC_GF_OP_XOR2, 6, 3, 0 }, - { EC_GF_OP_XOR2, 0, 6, 0 }, - { EC_GF_OP_XOR2, 0, 4, 0 }, - { EC_GF_OP_COPY, 8, 0, 0 }, - { EC_GF_OP_XOR2, 0, 7, 0 }, - { EC_GF_OP_XOR2, 2, 0, 0 }, - { EC_GF_OP_XOR2, 5, 2, 0 }, - { EC_GF_OP_XOR2, 1, 2, 0 }, - { EC_GF_OP_XOR2, 4, 2, 0 }, - { EC_GF_OP_XOR2, 7, 5, 0 }, - { EC_GF_OP_XOR2, 2, 3, 0 }, - { EC_GF_OP_XOR2, 3, 8, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_E5 = { - 9, - { 4, 5, 3, 6, 7, 1, 0, 2, 8, }, - ec_gf8_mul_E5_ops -}; + {EC_GF_OP_XOR2, 7, 5, 0}, {EC_GF_OP_XOR2, 5, 0, 0}, + {EC_GF_OP_XOR2, 0, 1, 0}, {EC_GF_OP_XOR2, 6, 3, 0}, + {EC_GF_OP_XOR2, 0, 6, 0}, {EC_GF_OP_XOR2, 0, 4, 0}, + {EC_GF_OP_COPY, 8, 0, 0}, {EC_GF_OP_XOR2, 0, 7, 0}, + {EC_GF_OP_XOR2, 2, 0, 0}, {EC_GF_OP_XOR2, 5, 2, 0}, + {EC_GF_OP_XOR2, 1, 2, 0}, {EC_GF_OP_XOR2, 4, 2, 0}, + {EC_GF_OP_XOR2, 7, 5, 0}, {EC_GF_OP_XOR2, 2, 3, 0}, + {EC_GF_OP_XOR2, 3, 8, 0}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_E5 = {9, + { + 4, + 5, + 3, + 6, + 7, + 1, + 0, + 2, + 8, + }, + ec_gf8_mul_E5_ops}; static ec_gf_op_t ec_gf8_mul_E6_ops[] = { - { EC_GF_OP_XOR2, 6, 2, 0 }, - { EC_GF_OP_XOR2, 1, 6, 0 }, - { EC_GF_OP_XOR2, 6, 7, 0 }, - { EC_GF_OP_XOR2, 0, 3, 0 }, - { EC_GF_OP_XOR2, 5, 1, 0 }, - { EC_GF_OP_XOR2, 0, 6, 0 }, - { EC_GF_OP_XOR2, 7, 5, 0 }, - { EC_GF_OP_XOR2, 4, 0, 0 }, - { EC_GF_OP_XOR2, 5, 3, 0 }, - { EC_GF_OP_XOR2, 3, 4, 0 }, - { EC_GF_OP_XOR2, 1, 4, 0 }, - { EC_GF_OP_XOR2, 2, 3, 0 }, - { EC_GF_OP_XOR2, 2, 7, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_E6 = { - 8, - { 5, 4, 3, 6, 7, 0, 1, 2, }, - ec_gf8_mul_E6_ops -}; + {EC_GF_OP_XOR2, 6, 2, 0}, {EC_GF_OP_XOR2, 1, 6, 0}, + {EC_GF_OP_XOR2, 6, 7, 0}, {EC_GF_OP_XOR2, 0, 3, 0}, + {EC_GF_OP_XOR2, 5, 1, 0}, {EC_GF_OP_XOR2, 0, 6, 0}, + {EC_GF_OP_XOR2, 7, 5, 0}, {EC_GF_OP_XOR2, 4, 0, 0}, + {EC_GF_OP_XOR2, 5, 3, 0}, {EC_GF_OP_XOR2, 3, 4, 0}, + {EC_GF_OP_XOR2, 1, 4, 0}, {EC_GF_OP_XOR2, 2, 3, 0}, + {EC_GF_OP_XOR2, 2, 7, 0}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_E6 = {8, + { + 5, + 4, + 3, + 6, + 7, + 0, + 1, + 2, + }, + ec_gf8_mul_E6_ops}; static ec_gf_op_t ec_gf8_mul_E7_ops[] = { - { EC_GF_OP_COPY, 8, 6, 0 }, - { EC_GF_OP_XOR2, 3, 2, 0 }, - { EC_GF_OP_XOR2, 6, 7, 0 }, - { EC_GF_OP_XOR2, 6, 3, 0 }, - { EC_GF_OP_XOR3, 9, 0, 6 }, - { EC_GF_OP_XOR2, 4, 9, 0 }, - { EC_GF_OP_XOR2, 5, 9, 0 }, - { EC_GF_OP_XOR2, 3, 4, 0 }, - { EC_GF_OP_XOR2, 7, 5, 0 }, - { EC_GF_OP_XOR2, 4, 1, 0 }, - { EC_GF_OP_XOR2, 2, 4, 0 }, - { EC_GF_OP_XOR2, 1, 7, 0 }, - { EC_GF_OP_XOR2, 7, 2, 0 }, - { EC_GF_OP_XOR2, 0, 7, 0 }, - { EC_GF_OP_XOR2, 7, 8, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_E7 = { - 10, - { 1, 4, 3, 6, 7, 5, 2, 0, 8, 9, }, - ec_gf8_mul_E7_ops -}; + {EC_GF_OP_COPY, 8, 6, 0}, {EC_GF_OP_XOR2, 3, 2, 0}, + {EC_GF_OP_XOR2, 6, 7, 0}, {EC_GF_OP_XOR2, 6, 3, 0}, + {EC_GF_OP_XOR3, 9, 0, 6}, {EC_GF_OP_XOR2, 4, 9, 0}, + {EC_GF_OP_XOR2, 5, 9, 0}, {EC_GF_OP_XOR2, 3, 4, 0}, + {EC_GF_OP_XOR2, 7, 5, 0}, {EC_GF_OP_XOR2, 4, 1, 0}, + {EC_GF_OP_XOR2, 2, 4, 0}, {EC_GF_OP_XOR2, 1, 7, 0}, + {EC_GF_OP_XOR2, 7, 2, 0}, {EC_GF_OP_XOR2, 0, 7, 0}, + {EC_GF_OP_XOR2, 7, 8, 0}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_E7 = {10, + { + 1, + 4, + 3, + 6, + 7, + 5, + 2, + 0, + 8, + 9, + }, + ec_gf8_mul_E7_ops}; static ec_gf_op_t ec_gf8_mul_E8_ops[] = { - { EC_GF_OP_XOR2, 1, 0, 0 }, - { EC_GF_OP_XOR2, 0, 4, 0 }, - { EC_GF_OP_XOR2, 4, 2, 0 }, - { EC_GF_OP_XOR2, 2, 5, 0 }, - { EC_GF_OP_XOR2, 4, 3, 0 }, - { EC_GF_OP_XOR2, 5, 1, 0 }, - { EC_GF_OP_XOR2, 3, 6, 0 }, - { EC_GF_OP_XOR2, 6, 5, 0 }, - { EC_GF_OP_XOR2, 0, 7, 0 }, - { EC_GF_OP_XOR2, 7, 6, 0 }, - { EC_GF_OP_XOR2, 1, 0, 0 }, - { EC_GF_OP_XOR2, 6, 2, 0 }, - { EC_GF_OP_XOR2, 2, 1, 0 }, - { EC_GF_OP_XOR2, 1, 4, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_E8 = { - 8, - { 1, 4, 2, 7, 3, 0, 5, 6, }, - ec_gf8_mul_E8_ops -}; + {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_XOR2, 0, 4, 0}, + {EC_GF_OP_XOR2, 4, 2, 0}, {EC_GF_OP_XOR2, 2, 5, 0}, + {EC_GF_OP_XOR2, 4, 3, 0}, {EC_GF_OP_XOR2, 5, 1, 0}, + {EC_GF_OP_XOR2, 3, 6, 0}, {EC_GF_OP_XOR2, 6, 5, 0}, + {EC_GF_OP_XOR2, 0, 7, 0}, {EC_GF_OP_XOR2, 7, 6, 0}, + {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_XOR2, 6, 2, 0}, + {EC_GF_OP_XOR2, 2, 1, 0}, {EC_GF_OP_XOR2, 1, 4, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_E8 = {8, + { + 1, + 4, + 2, + 7, + 3, + 0, + 5, + 6, + }, + ec_gf8_mul_E8_ops}; static ec_gf_op_t ec_gf8_mul_E9_ops[] = { - { EC_GF_OP_XOR2, 1, 0, 0 }, - { EC_GF_OP_COPY, 8, 1, 0 }, - { EC_GF_OP_XOR2, 1, 6, 0 }, - { EC_GF_OP_XOR2, 6, 3, 0 }, - { EC_GF_OP_XOR2, 4, 6, 0 }, - { EC_GF_OP_XOR2, 2, 1, 0 }, - { EC_GF_OP_XOR2, 3, 7, 0 }, - { EC_GF_OP_XOR2, 0, 4, 0 }, - { EC_GF_OP_XOR2, 7, 2, 0 }, - { EC_GF_OP_XOR2, 5, 1, 0 }, - { EC_GF_OP_XOR2, 2, 0, 0 }, - { EC_GF_OP_XOR2, 6, 7, 0 }, - { EC_GF_OP_XOR2, 3, 5, 0 }, - { EC_GF_OP_XOR2, 0, 3, 0 }, - { EC_GF_OP_XOR3, 1, 8, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_E9 = { - 9, - { 6, 2, 0, 3, 4, 1, 5, 7, 8, }, - ec_gf8_mul_E9_ops -}; + {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_COPY, 8, 1, 0}, + {EC_GF_OP_XOR2, 1, 6, 0}, {EC_GF_OP_XOR2, 6, 3, 0}, + {EC_GF_OP_XOR2, 4, 6, 0}, {EC_GF_OP_XOR2, 2, 1, 0}, + {EC_GF_OP_XOR2, 3, 7, 0}, {EC_GF_OP_XOR2, 0, 4, 0}, + {EC_GF_OP_XOR2, 7, 2, 0}, {EC_GF_OP_XOR2, 5, 1, 0}, + {EC_GF_OP_XOR2, 2, 0, 0}, {EC_GF_OP_XOR2, 6, 7, 0}, + {EC_GF_OP_XOR2, 3, 5, 0}, {EC_GF_OP_XOR2, 0, 3, 0}, + {EC_GF_OP_XOR3, 1, 8, 0}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_E9 = {9, + { + 6, + 2, + 0, + 3, + 4, + 1, + 5, + 7, + 8, + }, + ec_gf8_mul_E9_ops}; static ec_gf_op_t ec_gf8_mul_EA_ops[] = { - { EC_GF_OP_XOR2, 2, 1, 0 }, - { EC_GF_OP_XOR2, 3, 2, 0 }, - { EC_GF_OP_XOR2, 1, 0, 0 }, - { EC_GF_OP_XOR2, 2, 0, 0 }, - { EC_GF_OP_XOR2, 0, 7, 0 }, - { EC_GF_OP_XOR2, 5, 2, 0 }, - { EC_GF_OP_XOR2, 7, 6, 0 }, - { EC_GF_OP_XOR2, 4, 1, 0 }, - { EC_GF_OP_XOR2, 6, 5, 0 }, - { EC_GF_OP_XOR2, 5, 4, 0 }, - { EC_GF_OP_XOR2, 4, 3, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_EA = { - 8, - { 3, 4, 5, 6, 7, 0, 1, 2, }, - ec_gf8_mul_EA_ops -}; + {EC_GF_OP_XOR2, 2, 1, 0}, {EC_GF_OP_XOR2, 3, 2, 0}, + {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_XOR2, 2, 0, 0}, + {EC_GF_OP_XOR2, 0, 7, 0}, {EC_GF_OP_XOR2, 5, 2, 0}, + {EC_GF_OP_XOR2, 7, 6, 0}, {EC_GF_OP_XOR2, 4, 1, 0}, + {EC_GF_OP_XOR2, 6, 5, 0}, {EC_GF_OP_XOR2, 5, 4, 0}, + {EC_GF_OP_XOR2, 4, 3, 0}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_EA = {8, + { + 3, + 4, + 5, + 6, + 7, + 0, + 1, + 2, + }, + ec_gf8_mul_EA_ops}; static ec_gf_op_t ec_gf8_mul_EB_ops[] = { - { EC_GF_OP_XOR2, 1, 0, 0 }, - { EC_GF_OP_XOR2, 2, 1, 0 }, - { EC_GF_OP_XOR2, 3, 2, 0 }, - { EC_GF_OP_XOR2, 2, 7, 0 }, - { EC_GF_OP_XOR2, 7, 5, 0 }, - { EC_GF_OP_XOR2, 5, 4, 0 }, - { EC_GF_OP_XOR2, 0, 7, 0 }, - { EC_GF_OP_XOR2, 1, 6, 0 }, - { EC_GF_OP_XOR2, 4, 3, 0 }, - { EC_GF_OP_XOR2, 6, 5, 0 }, - { EC_GF_OP_XOR2, 7, 6, 0 }, - { EC_GF_OP_XOR2, 6, 4, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_EB = { - 8, - { 3, 4, 5, 6, 7, 0, 1, 2, }, - ec_gf8_mul_EB_ops -}; + {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_XOR2, 2, 1, 0}, + {EC_GF_OP_XOR2, 3, 2, 0}, {EC_GF_OP_XOR2, 2, 7, 0}, + {EC_GF_OP_XOR2, 7, 5, 0}, {EC_GF_OP_XOR2, 5, 4, 0}, + {EC_GF_OP_XOR2, 0, 7, 0}, {EC_GF_OP_XOR2, 1, 6, 0}, + {EC_GF_OP_XOR2, 4, 3, 0}, {EC_GF_OP_XOR2, 6, 5, 0}, + {EC_GF_OP_XOR2, 7, 6, 0}, {EC_GF_OP_XOR2, 6, 4, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_EB = {8, + { + 3, + 4, + 5, + 6, + 7, + 0, + 1, + 2, + }, + ec_gf8_mul_EB_ops}; static ec_gf_op_t ec_gf8_mul_EC_ops[] = { - { EC_GF_OP_XOR2, 0, 5, 0 }, - { EC_GF_OP_XOR2, 6, 1, 0 }, - { EC_GF_OP_XOR3, 8, 4, 0 }, - { EC_GF_OP_XOR2, 1, 8, 0 }, - { EC_GF_OP_XOR2, 7, 3, 0 }, - { EC_GF_OP_XOR2, 6, 2, 0 }, - { EC_GF_OP_XOR2, 3, 8, 0 }, - { EC_GF_OP_XOR2, 2, 7, 0 }, - { EC_GF_OP_XOR2, 7, 6, 0 }, - { EC_GF_OP_XOR2, 5, 3, 0 }, - { EC_GF_OP_XOR2, 4, 2, 0 }, - { EC_GF_OP_XOR2, 6, 0, 0 }, - { EC_GF_OP_XOR2, 3, 7, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_EC = { - 9, - { 7, 4, 3, 0, 2, 5, 1, 6, 8, }, - ec_gf8_mul_EC_ops -}; + {EC_GF_OP_XOR2, 0, 5, 0}, {EC_GF_OP_XOR2, 6, 1, 0}, + {EC_GF_OP_XOR3, 8, 4, 0}, {EC_GF_OP_XOR2, 1, 8, 0}, + {EC_GF_OP_XOR2, 7, 3, 0}, {EC_GF_OP_XOR2, 6, 2, 0}, + {EC_GF_OP_XOR2, 3, 8, 0}, {EC_GF_OP_XOR2, 2, 7, 0}, + {EC_GF_OP_XOR2, 7, 6, 0}, {EC_GF_OP_XOR2, 5, 3, 0}, + {EC_GF_OP_XOR2, 4, 2, 0}, {EC_GF_OP_XOR2, 6, 0, 0}, + {EC_GF_OP_XOR2, 3, 7, 0}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_EC = {9, + { + 7, + 4, + 3, + 0, + 2, + 5, + 1, + 6, + 8, + }, + ec_gf8_mul_EC_ops}; static ec_gf_op_t ec_gf8_mul_ED_ops[] = { - { EC_GF_OP_XOR2, 5, 3, 0 }, - { EC_GF_OP_XOR2, 0, 5, 0 }, - { EC_GF_OP_XOR2, 2, 4, 0 }, - { EC_GF_OP_XOR2, 4, 0, 0 }, - { EC_GF_OP_XOR2, 3, 1, 0 }, - { EC_GF_OP_XOR2, 6, 4, 0 }, - { EC_GF_OP_XOR2, 3, 6, 0 }, - { EC_GF_OP_XOR2, 7, 3, 0 }, - { EC_GF_OP_XOR2, 2, 7, 0 }, - { EC_GF_OP_XOR2, 6, 2, 0 }, - { EC_GF_OP_XOR2, 5, 2, 0 }, - { EC_GF_OP_XOR2, 1, 6, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_ED = { - 8, - { 5, 6, 7, 0, 1, 4, 3, 2, }, - ec_gf8_mul_ED_ops -}; + {EC_GF_OP_XOR2, 5, 3, 0}, {EC_GF_OP_XOR2, 0, 5, 0}, + {EC_GF_OP_XOR2, 2, 4, 0}, {EC_GF_OP_XOR2, 4, 0, 0}, + {EC_GF_OP_XOR2, 3, 1, 0}, {EC_GF_OP_XOR2, 6, 4, 0}, + {EC_GF_OP_XOR2, 3, 6, 0}, {EC_GF_OP_XOR2, 7, 3, 0}, + {EC_GF_OP_XOR2, 2, 7, 0}, {EC_GF_OP_XOR2, 6, 2, 0}, + {EC_GF_OP_XOR2, 5, 2, 0}, {EC_GF_OP_XOR2, 1, 6, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_ED = {8, + { + 5, + 6, + 7, + 0, + 1, + 4, + 3, + 2, + }, + ec_gf8_mul_ED_ops}; static ec_gf_op_t ec_gf8_mul_EE_ops[] = { - { EC_GF_OP_XOR2, 5, 3, 0 }, - { EC_GF_OP_XOR2, 3, 0, 0 }, - { EC_GF_OP_XOR2, 0, 1, 0 }, - { EC_GF_OP_XOR3, 8, 2, 3 }, - { EC_GF_OP_XOR2, 0, 4, 0 }, - { EC_GF_OP_XOR2, 4, 8, 0 }, - { EC_GF_OP_XOR2, 6, 4, 0 }, - { EC_GF_OP_XOR2, 8, 5, 0 }, - { EC_GF_OP_XOR2, 4, 7, 0 }, - { EC_GF_OP_XOR2, 5, 6, 0 }, - { EC_GF_OP_XOR2, 1, 8, 0 }, - { EC_GF_OP_XOR2, 7, 8, 0 }, - { EC_GF_OP_XOR2, 6, 0, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_EE = { - 9, - { 6, 4, 5, 7, 2, 3, 0, 1, 8, }, - ec_gf8_mul_EE_ops -}; + {EC_GF_OP_XOR2, 5, 3, 0}, {EC_GF_OP_XOR2, 3, 0, 0}, + {EC_GF_OP_XOR2, 0, 1, 0}, {EC_GF_OP_XOR3, 8, 2, 3}, + {EC_GF_OP_XOR2, 0, 4, 0}, {EC_GF_OP_XOR2, 4, 8, 0}, + {EC_GF_OP_XOR2, 6, 4, 0}, {EC_GF_OP_XOR2, 8, 5, 0}, + {EC_GF_OP_XOR2, 4, 7, 0}, {EC_GF_OP_XOR2, 5, 6, 0}, + {EC_GF_OP_XOR2, 1, 8, 0}, {EC_GF_OP_XOR2, 7, 8, 0}, + {EC_GF_OP_XOR2, 6, 0, 0}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_EE = {9, + { + 6, + 4, + 5, + 7, + 2, + 3, + 0, + 1, + 8, + }, + ec_gf8_mul_EE_ops}; static ec_gf_op_t ec_gf8_mul_EF_ops[] = { - { EC_GF_OP_XOR2, 5, 1, 0 }, - { EC_GF_OP_XOR2, 1, 3, 0 }, - { EC_GF_OP_XOR2, 0, 1, 0 }, - { EC_GF_OP_COPY, 8, 0, 0 }, - { EC_GF_OP_XOR2, 8, 2, 0 }, - { EC_GF_OP_XOR2, 0, 5, 0 }, - { EC_GF_OP_XOR2, 2, 4, 0 }, - { EC_GF_OP_XOR2, 7, 8, 0 }, - { EC_GF_OP_XOR2, 3, 2, 0 }, - { EC_GF_OP_XOR2, 6, 8, 0 }, - { EC_GF_OP_XOR2, 4, 7, 0 }, - { EC_GF_OP_XOR2, 3, 6, 0 }, - { EC_GF_OP_XOR2, 7, 5, 0 }, - { EC_GF_OP_XOR2, 5, 3, 0 }, - { EC_GF_OP_XOR2, 1, 7, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_EF = { - 9, - { 6, 4, 5, 7, 2, 0, 3, 1, 8, }, - ec_gf8_mul_EF_ops -}; + {EC_GF_OP_XOR2, 5, 1, 0}, {EC_GF_OP_XOR2, 1, 3, 0}, + {EC_GF_OP_XOR2, 0, 1, 0}, {EC_GF_OP_COPY, 8, 0, 0}, + {EC_GF_OP_XOR2, 8, 2, 0}, {EC_GF_OP_XOR2, 0, 5, 0}, + {EC_GF_OP_XOR2, 2, 4, 0}, {EC_GF_OP_XOR2, 7, 8, 0}, + {EC_GF_OP_XOR2, 3, 2, 0}, {EC_GF_OP_XOR2, 6, 8, 0}, + {EC_GF_OP_XOR2, 4, 7, 0}, {EC_GF_OP_XOR2, 3, 6, 0}, + {EC_GF_OP_XOR2, 7, 5, 0}, {EC_GF_OP_XOR2, 5, 3, 0}, + {EC_GF_OP_XOR2, 1, 7, 0}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_EF = {9, + { + 6, + 4, + 5, + 7, + 2, + 0, + 3, + 1, + 8, + }, + ec_gf8_mul_EF_ops}; static ec_gf_op_t ec_gf8_mul_F0_ops[] = { - { EC_GF_OP_XOR2, 6, 1, 0 }, - { EC_GF_OP_XOR2, 6, 2, 0 }, - { EC_GF_OP_XOR2, 3, 0, 0 }, - { EC_GF_OP_XOR3, 8, 3, 6 }, - { EC_GF_OP_XOR2, 5, 8, 0 }, - { EC_GF_OP_XOR2, 8, 4, 0 }, - { EC_GF_OP_XOR2, 1, 5, 0 }, - { EC_GF_OP_XOR2, 7, 8, 0 }, - { EC_GF_OP_XOR2, 0, 1, 0 }, - { EC_GF_OP_XOR2, 2, 7, 0 }, - { EC_GF_OP_XOR2, 4, 0, 0 }, - { EC_GF_OP_XOR2, 1, 8, 0 }, - { EC_GF_OP_XOR2, 0, 2, 0 }, - { EC_GF_OP_XOR2, 3, 0, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_F0 = { - 9, - { 3, 4, 6, 1, 2, 0, 5, 7, 8, }, - ec_gf8_mul_F0_ops -}; + {EC_GF_OP_XOR2, 6, 1, 0}, {EC_GF_OP_XOR2, 6, 2, 0}, + {EC_GF_OP_XOR2, 3, 0, 0}, {EC_GF_OP_XOR3, 8, 3, 6}, + {EC_GF_OP_XOR2, 5, 8, 0}, {EC_GF_OP_XOR2, 8, 4, 0}, + {EC_GF_OP_XOR2, 1, 5, 0}, {EC_GF_OP_XOR2, 7, 8, 0}, + {EC_GF_OP_XOR2, 0, 1, 0}, {EC_GF_OP_XOR2, 2, 7, 0}, + {EC_GF_OP_XOR2, 4, 0, 0}, {EC_GF_OP_XOR2, 1, 8, 0}, + {EC_GF_OP_XOR2, 0, 2, 0}, {EC_GF_OP_XOR2, 3, 0, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_F0 = {9, + { + 3, + 4, + 6, + 1, + 2, + 0, + 5, + 7, + 8, + }, + ec_gf8_mul_F0_ops}; static ec_gf_op_t ec_gf8_mul_F1_ops[] = { - { EC_GF_OP_XOR2, 3, 1, 0 }, - { EC_GF_OP_XOR2, 3, 5, 0 }, - { EC_GF_OP_COPY, 8, 3, 0 }, - { EC_GF_OP_XOR2, 3, 4, 0 }, - { EC_GF_OP_XOR2, 2, 3, 0 }, - { EC_GF_OP_COPY, 9, 2, 0 }, - { EC_GF_OP_XOR2, 2, 6, 0 }, - { EC_GF_OP_XOR2, 9, 0, 0 }, - { EC_GF_OP_XOR2, 6, 1, 0 }, - { EC_GF_OP_XOR2, 5, 2, 0 }, - { EC_GF_OP_XOR2, 7, 9, 0 }, - { EC_GF_OP_XOR2, 4, 9, 0 }, - { EC_GF_OP_XOR2, 0, 5, 0 }, - { EC_GF_OP_XOR3, 9, 8, 7 }, - { EC_GF_OP_XOR2, 1, 9, 0 }, - { EC_GF_OP_XOR2, 5, 9, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_F1 = { - 10, - { 7, 2, 6, 3, 5, 1, 4, 0, 8, 9, }, - ec_gf8_mul_F1_ops -}; + {EC_GF_OP_XOR2, 3, 1, 0}, {EC_GF_OP_XOR2, 3, 5, 0}, + {EC_GF_OP_COPY, 8, 3, 0}, {EC_GF_OP_XOR2, 3, 4, 0}, + {EC_GF_OP_XOR2, 2, 3, 0}, {EC_GF_OP_COPY, 9, 2, 0}, + {EC_GF_OP_XOR2, 2, 6, 0}, {EC_GF_OP_XOR2, 9, 0, 0}, + {EC_GF_OP_XOR2, 6, 1, 0}, {EC_GF_OP_XOR2, 5, 2, 0}, + {EC_GF_OP_XOR2, 7, 9, 0}, {EC_GF_OP_XOR2, 4, 9, 0}, + {EC_GF_OP_XOR2, 0, 5, 0}, {EC_GF_OP_XOR3, 9, 8, 7}, + {EC_GF_OP_XOR2, 1, 9, 0}, {EC_GF_OP_XOR2, 5, 9, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_F1 = {10, + { + 7, + 2, + 6, + 3, + 5, + 1, + 4, + 0, + 8, + 9, + }, + ec_gf8_mul_F1_ops}; static ec_gf_op_t ec_gf8_mul_F2_ops[] = { - { EC_GF_OP_XOR2, 0, 1, 0 }, - { EC_GF_OP_XOR2, 5, 4, 0 }, - { EC_GF_OP_XOR2, 1, 5, 0 }, - { EC_GF_OP_XOR2, 7, 2, 0 }, - { EC_GF_OP_XOR2, 0, 6, 0 }, - { EC_GF_OP_XOR2, 6, 7, 0 }, - { EC_GF_OP_XOR2, 4, 0, 0 }, - { EC_GF_OP_XOR2, 2, 3, 0 }, - { EC_GF_OP_XOR2, 7, 1, 0 }, - { EC_GF_OP_XOR3, 8, 6, 4 }, - { EC_GF_OP_XOR2, 1, 2, 0 }, - { EC_GF_OP_XOR2, 2, 0, 0 }, - { EC_GF_OP_XOR2, 3, 8, 0 }, - { EC_GF_OP_XOR2, 5, 8, 0 }, - { EC_GF_OP_XOR2, 0, 1, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_F2 = { - 9, - { 1, 0, 6, 7, 4, 5, 2, 3, 8, }, - ec_gf8_mul_F2_ops -}; + {EC_GF_OP_XOR2, 0, 1, 0}, {EC_GF_OP_XOR2, 5, 4, 0}, + {EC_GF_OP_XOR2, 1, 5, 0}, {EC_GF_OP_XOR2, 7, 2, 0}, + {EC_GF_OP_XOR2, 0, 6, 0}, {EC_GF_OP_XOR2, 6, 7, 0}, + {EC_GF_OP_XOR2, 4, 0, 0}, {EC_GF_OP_XOR2, 2, 3, 0}, + {EC_GF_OP_XOR2, 7, 1, 0}, {EC_GF_OP_XOR3, 8, 6, 4}, + {EC_GF_OP_XOR2, 1, 2, 0}, {EC_GF_OP_XOR2, 2, 0, 0}, + {EC_GF_OP_XOR2, 3, 8, 0}, {EC_GF_OP_XOR2, 5, 8, 0}, + {EC_GF_OP_XOR2, 0, 1, 0}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_F2 = {9, + { + 1, + 0, + 6, + 7, + 4, + 5, + 2, + 3, + 8, + }, + ec_gf8_mul_F2_ops}; static ec_gf_op_t ec_gf8_mul_F3_ops[] = { - { EC_GF_OP_XOR2, 1, 0, 0 }, - { EC_GF_OP_XOR2, 2, 1, 0 }, - { EC_GF_OP_XOR2, 3, 2, 0 }, - { EC_GF_OP_XOR2, 4, 3, 0 }, - { EC_GF_OP_XOR2, 2, 7, 0 }, - { EC_GF_OP_XOR2, 0, 7, 0 }, - { EC_GF_OP_XOR2, 5, 4, 0 }, - { EC_GF_OP_XOR2, 1, 6, 0 }, - { EC_GF_OP_XOR2, 7, 6, 0 }, - { EC_GF_OP_XOR2, 0, 5, 0 }, - { EC_GF_OP_XOR2, 6, 5, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_F3 = { - 8, - { 5, 6, 7, 0, 1, 2, 3, 4, }, - ec_gf8_mul_F3_ops -}; + {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_XOR2, 2, 1, 0}, + {EC_GF_OP_XOR2, 3, 2, 0}, {EC_GF_OP_XOR2, 4, 3, 0}, + {EC_GF_OP_XOR2, 2, 7, 0}, {EC_GF_OP_XOR2, 0, 7, 0}, + {EC_GF_OP_XOR2, 5, 4, 0}, {EC_GF_OP_XOR2, 1, 6, 0}, + {EC_GF_OP_XOR2, 7, 6, 0}, {EC_GF_OP_XOR2, 0, 5, 0}, + {EC_GF_OP_XOR2, 6, 5, 0}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_F3 = {8, + { + 5, + 6, + 7, + 0, + 1, + 2, + 3, + 4, + }, + ec_gf8_mul_F3_ops}; static ec_gf_op_t ec_gf8_mul_F4_ops[] = { - { EC_GF_OP_XOR2, 1, 0, 0 }, - { EC_GF_OP_XOR2, 2, 1, 0 }, - { EC_GF_OP_XOR2, 3, 2, 0 }, - { EC_GF_OP_XOR2, 4, 3, 0 }, - { EC_GF_OP_XOR2, 5, 4, 0 }, - { EC_GF_OP_XOR2, 6, 5, 0 }, - { EC_GF_OP_XOR2, 7, 6, 0 }, - { EC_GF_OP_XOR2, 0, 7, 0 }, - { EC_GF_OP_XOR2, 1, 7, 0 }, - { EC_GF_OP_XOR2, 3, 7, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_F4 = { - 8, - { 0, 1, 2, 3, 4, 5, 6, 7, }, - ec_gf8_mul_F4_ops -}; + {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_XOR2, 2, 1, 0}, + {EC_GF_OP_XOR2, 3, 2, 0}, {EC_GF_OP_XOR2, 4, 3, 0}, + {EC_GF_OP_XOR2, 5, 4, 0}, {EC_GF_OP_XOR2, 6, 5, 0}, + {EC_GF_OP_XOR2, 7, 6, 0}, {EC_GF_OP_XOR2, 0, 7, 0}, + {EC_GF_OP_XOR2, 1, 7, 0}, {EC_GF_OP_XOR2, 3, 7, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_F4 = {8, + { + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + }, + ec_gf8_mul_F4_ops}; static ec_gf_op_t ec_gf8_mul_F5_ops[] = { - { EC_GF_OP_XOR2, 1, 0, 0 }, - { EC_GF_OP_XOR2, 2, 1, 0 }, - { EC_GF_OP_XOR2, 3, 2, 0 }, - { EC_GF_OP_XOR2, 4, 3, 0 }, - { EC_GF_OP_XOR2, 5, 4, 0 }, - { EC_GF_OP_XOR2, 6, 5, 0 }, - { EC_GF_OP_XOR2, 7, 6, 0 }, - { EC_GF_OP_XOR2, 0, 7, 0 }, - { EC_GF_OP_XOR2, 2, 7, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_F5 = { - 8, - { 7, 0, 1, 2, 3, 4, 5, 6, }, - ec_gf8_mul_F5_ops -}; + {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_XOR2, 2, 1, 0}, + {EC_GF_OP_XOR2, 3, 2, 0}, {EC_GF_OP_XOR2, 4, 3, 0}, + {EC_GF_OP_XOR2, 5, 4, 0}, {EC_GF_OP_XOR2, 6, 5, 0}, + {EC_GF_OP_XOR2, 7, 6, 0}, {EC_GF_OP_XOR2, 0, 7, 0}, + {EC_GF_OP_XOR2, 2, 7, 0}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_F5 = {8, + { + 7, + 0, + 1, + 2, + 3, + 4, + 5, + 6, + }, + ec_gf8_mul_F5_ops}; static ec_gf_op_t ec_gf8_mul_F6_ops[] = { - { EC_GF_OP_XOR2, 3, 1, 0 }, - { EC_GF_OP_COPY, 8, 3, 0 }, - { EC_GF_OP_XOR2, 3, 5, 0 }, - { EC_GF_OP_XOR2, 2, 0, 0 }, - { EC_GF_OP_COPY, 9, 3, 0 }, - { EC_GF_OP_XOR2, 3, 2, 0 }, - { EC_GF_OP_XOR2, 2, 7, 0 }, - { EC_GF_OP_XOR2, 4, 2, 0 }, - { EC_GF_OP_XOR2, 9, 4, 0 }, - { EC_GF_OP_XOR2, 4, 1, 0 }, - { EC_GF_OP_XOR2, 6, 9, 0 }, - { EC_GF_OP_XOR2, 7, 6, 0 }, - { EC_GF_OP_XOR2, 0, 7, 0 }, - { EC_GF_OP_XOR2, 5, 7, 0 }, - { EC_GF_OP_XOR2, 6, 1, 0 }, - { EC_GF_OP_XOR3, 7, 8, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_F6 = { - 10, - { 0, 6, 2, 7, 4, 3, 5, 9, 1, 8, }, - ec_gf8_mul_F6_ops -}; + {EC_GF_OP_XOR2, 3, 1, 0}, {EC_GF_OP_COPY, 8, 3, 0}, + {EC_GF_OP_XOR2, 3, 5, 0}, {EC_GF_OP_XOR2, 2, 0, 0}, + {EC_GF_OP_COPY, 9, 3, 0}, {EC_GF_OP_XOR2, 3, 2, 0}, + {EC_GF_OP_XOR2, 2, 7, 0}, {EC_GF_OP_XOR2, 4, 2, 0}, + {EC_GF_OP_XOR2, 9, 4, 0}, {EC_GF_OP_XOR2, 4, 1, 0}, + {EC_GF_OP_XOR2, 6, 9, 0}, {EC_GF_OP_XOR2, 7, 6, 0}, + {EC_GF_OP_XOR2, 0, 7, 0}, {EC_GF_OP_XOR2, 5, 7, 0}, + {EC_GF_OP_XOR2, 6, 1, 0}, {EC_GF_OP_XOR3, 7, 8, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_F6 = {10, + { + 0, + 6, + 2, + 7, + 4, + 3, + 5, + 9, + 1, + 8, + }, + ec_gf8_mul_F6_ops}; static ec_gf_op_t ec_gf8_mul_F7_ops[] = { - { EC_GF_OP_XOR2, 1, 0, 0 }, - { EC_GF_OP_XOR2, 2, 1, 0 }, - { EC_GF_OP_XOR2, 3, 2, 0 }, - { EC_GF_OP_XOR2, 4, 3, 0 }, - { EC_GF_OP_XOR2, 5, 4, 0 }, - { EC_GF_OP_XOR2, 6, 5, 0 }, - { EC_GF_OP_XOR2, 0, 7, 0 }, - { EC_GF_OP_XOR2, 2, 7, 0 }, - { EC_GF_OP_XOR2, 1, 6, 0 }, - { EC_GF_OP_XOR2, 7, 6, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_F7 = { - 8, - { 6, 7, 0, 1, 2, 3, 4, 5, }, - ec_gf8_mul_F7_ops -}; + {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_XOR2, 2, 1, 0}, + {EC_GF_OP_XOR2, 3, 2, 0}, {EC_GF_OP_XOR2, 4, 3, 0}, + {EC_GF_OP_XOR2, 5, 4, 0}, {EC_GF_OP_XOR2, 6, 5, 0}, + {EC_GF_OP_XOR2, 0, 7, 0}, {EC_GF_OP_XOR2, 2, 7, 0}, + {EC_GF_OP_XOR2, 1, 6, 0}, {EC_GF_OP_XOR2, 7, 6, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_F7 = {8, + { + 6, + 7, + 0, + 1, + 2, + 3, + 4, + 5, + }, + ec_gf8_mul_F7_ops}; static ec_gf_op_t ec_gf8_mul_F8_ops[] = { - { EC_GF_OP_XOR2, 4, 0, 0 }, - { EC_GF_OP_XOR2, 3, 5, 0 }, - { EC_GF_OP_XOR2, 6, 4, 0 }, - { EC_GF_OP_XOR2, 2, 0, 0 }, - { EC_GF_OP_XOR2, 4, 3, 0 }, - { EC_GF_OP_XOR2, 1, 6, 0 }, - { EC_GF_OP_XOR2, 2, 4, 0 }, - { EC_GF_OP_XOR2, 5, 1, 0 }, - { EC_GF_OP_XOR2, 7, 2, 0 }, - { EC_GF_OP_XOR2, 7, 5, 0 }, - { EC_GF_OP_XOR2, 3, 7, 0 }, - { EC_GF_OP_XOR2, 6, 7, 0 }, - { EC_GF_OP_XOR2, 0, 3, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_F8 = { - 8, - { 6, 2, 0, 1, 4, 5, 3, 7, }, - ec_gf8_mul_F8_ops -}; + {EC_GF_OP_XOR2, 4, 0, 0}, {EC_GF_OP_XOR2, 3, 5, 0}, + {EC_GF_OP_XOR2, 6, 4, 0}, {EC_GF_OP_XOR2, 2, 0, 0}, + {EC_GF_OP_XOR2, 4, 3, 0}, {EC_GF_OP_XOR2, 1, 6, 0}, + {EC_GF_OP_XOR2, 2, 4, 0}, {EC_GF_OP_XOR2, 5, 1, 0}, + {EC_GF_OP_XOR2, 7, 2, 0}, {EC_GF_OP_XOR2, 7, 5, 0}, + {EC_GF_OP_XOR2, 3, 7, 0}, {EC_GF_OP_XOR2, 6, 7, 0}, + {EC_GF_OP_XOR2, 0, 3, 0}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_F8 = {8, + { + 6, + 2, + 0, + 1, + 4, + 5, + 3, + 7, + }, + ec_gf8_mul_F8_ops}; static ec_gf_op_t ec_gf8_mul_F9_ops[] = { - { EC_GF_OP_XOR2, 1, 5, 0 }, - { EC_GF_OP_XOR2, 5, 3, 0 }, - { EC_GF_OP_XOR2, 6, 1, 0 }, - { EC_GF_OP_XOR2, 0, 5, 0 }, - { EC_GF_OP_XOR2, 7, 6, 0 }, - { EC_GF_OP_XOR2, 6, 0, 0 }, - { EC_GF_OP_XOR2, 2, 6, 0 }, - { EC_GF_OP_XOR2, 6, 4, 0 }, - { EC_GF_OP_XOR2, 1, 2, 0 }, - { EC_GF_OP_XOR2, 3, 6, 0 }, - { EC_GF_OP_XOR3, 8, 7, 1 }, - { EC_GF_OP_XOR2, 1, 3, 0 }, - { EC_GF_OP_XOR2, 4, 8, 0 }, - { EC_GF_OP_XOR2, 5, 8, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_F9 = { - 9, - { 4, 1, 7, 6, 0, 3, 5, 2, 8, }, - ec_gf8_mul_F9_ops -}; + {EC_GF_OP_XOR2, 1, 5, 0}, {EC_GF_OP_XOR2, 5, 3, 0}, + {EC_GF_OP_XOR2, 6, 1, 0}, {EC_GF_OP_XOR2, 0, 5, 0}, + {EC_GF_OP_XOR2, 7, 6, 0}, {EC_GF_OP_XOR2, 6, 0, 0}, + {EC_GF_OP_XOR2, 2, 6, 0}, {EC_GF_OP_XOR2, 6, 4, 0}, + {EC_GF_OP_XOR2, 1, 2, 0}, {EC_GF_OP_XOR2, 3, 6, 0}, + {EC_GF_OP_XOR3, 8, 7, 1}, {EC_GF_OP_XOR2, 1, 3, 0}, + {EC_GF_OP_XOR2, 4, 8, 0}, {EC_GF_OP_XOR2, 5, 8, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_F9 = {9, + { + 4, + 1, + 7, + 6, + 0, + 3, + 5, + 2, + 8, + }, + ec_gf8_mul_F9_ops}; static ec_gf_op_t ec_gf8_mul_FA_ops[] = { - { EC_GF_OP_XOR2, 1, 0, 0 }, - { EC_GF_OP_XOR2, 0, 4, 0 }, - { EC_GF_OP_XOR2, 2, 1, 0 }, - { EC_GF_OP_XOR2, 0, 7, 0 }, - { EC_GF_OP_XOR2, 7, 2, 0 }, - { EC_GF_OP_XOR2, 1, 5, 0 }, - { EC_GF_OP_XOR2, 3, 7, 0 }, - { EC_GF_OP_XOR2, 5, 0, 0 }, - { EC_GF_OP_XOR2, 7, 6, 0 }, - { EC_GF_OP_XOR2, 0, 3, 0 }, - { EC_GF_OP_XOR2, 6, 1, 0 }, - { EC_GF_OP_XOR2, 4, 7, 0 }, - { EC_GF_OP_XOR2, 1, 0, 0 }, - { EC_GF_OP_XOR2, 2, 6, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_FA = { - 8, - { 0, 1, 2, 4, 5, 6, 7, 3, }, - ec_gf8_mul_FA_ops -}; + {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_XOR2, 0, 4, 0}, + {EC_GF_OP_XOR2, 2, 1, 0}, {EC_GF_OP_XOR2, 0, 7, 0}, + {EC_GF_OP_XOR2, 7, 2, 0}, {EC_GF_OP_XOR2, 1, 5, 0}, + {EC_GF_OP_XOR2, 3, 7, 0}, {EC_GF_OP_XOR2, 5, 0, 0}, + {EC_GF_OP_XOR2, 7, 6, 0}, {EC_GF_OP_XOR2, 0, 3, 0}, + {EC_GF_OP_XOR2, 6, 1, 0}, {EC_GF_OP_XOR2, 4, 7, 0}, + {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_XOR2, 2, 6, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_FA = {8, + { + 0, + 1, + 2, + 4, + 5, + 6, + 7, + 3, + }, + ec_gf8_mul_FA_ops}; static ec_gf_op_t ec_gf8_mul_FB_ops[] = { - { EC_GF_OP_XOR2, 1, 0, 0 }, - { EC_GF_OP_XOR2, 2, 1, 0 }, - { EC_GF_OP_XOR2, 0, 5, 0 }, - { EC_GF_OP_XOR2, 3, 2, 0 }, - { EC_GF_OP_XOR2, 0, 7, 0 }, - { EC_GF_OP_XOR2, 2, 7, 0 }, - { EC_GF_OP_XOR2, 1, 6, 0 }, - { EC_GF_OP_XOR2, 7, 6, 0 }, - { EC_GF_OP_XOR2, 4, 3, 0 }, - { EC_GF_OP_XOR2, 6, 5, 0 }, - { EC_GF_OP_XOR2, 7, 4, 0 }, - { EC_GF_OP_XOR2, 5, 4, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_FB = { - 8, - { 4, 5, 6, 7, 0, 1, 2, 3, }, - ec_gf8_mul_FB_ops -}; + {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_XOR2, 2, 1, 0}, + {EC_GF_OP_XOR2, 0, 5, 0}, {EC_GF_OP_XOR2, 3, 2, 0}, + {EC_GF_OP_XOR2, 0, 7, 0}, {EC_GF_OP_XOR2, 2, 7, 0}, + {EC_GF_OP_XOR2, 1, 6, 0}, {EC_GF_OP_XOR2, 7, 6, 0}, + {EC_GF_OP_XOR2, 4, 3, 0}, {EC_GF_OP_XOR2, 6, 5, 0}, + {EC_GF_OP_XOR2, 7, 4, 0}, {EC_GF_OP_XOR2, 5, 4, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_FB = {8, + { + 4, + 5, + 6, + 7, + 0, + 1, + 2, + 3, + }, + ec_gf8_mul_FB_ops}; static ec_gf_op_t ec_gf8_mul_FC_ops[] = { - { EC_GF_OP_XOR2, 7, 0, 0 }, - { EC_GF_OP_XOR2, 7, 4, 0 }, - { EC_GF_OP_XOR2, 5, 1, 0 }, - { EC_GF_OP_COPY, 9, 3, 0 }, - { EC_GF_OP_XOR3, 8, 5, 7 }, - { EC_GF_OP_XOR2, 3, 6, 0 }, - { EC_GF_OP_XOR2, 8, 3, 0 }, - { EC_GF_OP_XOR2, 2, 8, 0 }, - { EC_GF_OP_XOR2, 1, 2, 0 }, - { EC_GF_OP_XOR2, 4, 2, 0 }, - { EC_GF_OP_XOR2, 0, 1, 0 }, - { EC_GF_OP_XOR2, 3, 4, 0 }, - { EC_GF_OP_XOR2, 5, 0, 0 }, - { EC_GF_OP_XOR2, 6, 0, 0 }, - { EC_GF_OP_XOR3, 0, 9, 2 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_FC = { - 10, - { 5, 6, 3, 7, 1, 8, 0, 4, 2, 9, }, - ec_gf8_mul_FC_ops -}; + {EC_GF_OP_XOR2, 7, 0, 0}, {EC_GF_OP_XOR2, 7, 4, 0}, + {EC_GF_OP_XOR2, 5, 1, 0}, {EC_GF_OP_COPY, 9, 3, 0}, + {EC_GF_OP_XOR3, 8, 5, 7}, {EC_GF_OP_XOR2, 3, 6, 0}, + {EC_GF_OP_XOR2, 8, 3, 0}, {EC_GF_OP_XOR2, 2, 8, 0}, + {EC_GF_OP_XOR2, 1, 2, 0}, {EC_GF_OP_XOR2, 4, 2, 0}, + {EC_GF_OP_XOR2, 0, 1, 0}, {EC_GF_OP_XOR2, 3, 4, 0}, + {EC_GF_OP_XOR2, 5, 0, 0}, {EC_GF_OP_XOR2, 6, 0, 0}, + {EC_GF_OP_XOR3, 0, 9, 2}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_FC = {10, + { + 5, + 6, + 3, + 7, + 1, + 8, + 0, + 4, + 2, + 9, + }, + ec_gf8_mul_FC_ops}; static ec_gf_op_t ec_gf8_mul_FD_ops[] = { - { EC_GF_OP_XOR2, 7, 1, 0 }, - { EC_GF_OP_COPY, 8, 7, 0 }, - { EC_GF_OP_XOR2, 5, 0, 0 }, - { EC_GF_OP_XOR2, 7, 5, 0 }, - { EC_GF_OP_XOR2, 4, 2, 0 }, - { EC_GF_OP_XOR2, 4, 7, 0 }, - { EC_GF_OP_XOR2, 5, 6, 0 }, - { EC_GF_OP_XOR2, 0, 4, 0 }, - { EC_GF_OP_XOR2, 3, 0, 0 }, - { EC_GF_OP_XOR2, 5, 3, 0 }, - { EC_GF_OP_XOR2, 2, 5, 0 }, - { EC_GF_OP_XOR2, 1, 2, 0 }, - { EC_GF_OP_XOR2, 0, 1, 0 }, - { EC_GF_OP_XOR2, 6, 1, 0 }, - { EC_GF_OP_XOR3, 1, 8, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_FD = { - 9, - { 5, 3, 7, 6, 1, 2, 4, 0, 8, }, - ec_gf8_mul_FD_ops -}; + {EC_GF_OP_XOR2, 7, 1, 0}, {EC_GF_OP_COPY, 8, 7, 0}, + {EC_GF_OP_XOR2, 5, 0, 0}, {EC_GF_OP_XOR2, 7, 5, 0}, + {EC_GF_OP_XOR2, 4, 2, 0}, {EC_GF_OP_XOR2, 4, 7, 0}, + {EC_GF_OP_XOR2, 5, 6, 0}, {EC_GF_OP_XOR2, 0, 4, 0}, + {EC_GF_OP_XOR2, 3, 0, 0}, {EC_GF_OP_XOR2, 5, 3, 0}, + {EC_GF_OP_XOR2, 2, 5, 0}, {EC_GF_OP_XOR2, 1, 2, 0}, + {EC_GF_OP_XOR2, 0, 1, 0}, {EC_GF_OP_XOR2, 6, 1, 0}, + {EC_GF_OP_XOR3, 1, 8, 0}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_FD = {9, + { + 5, + 3, + 7, + 6, + 1, + 2, + 4, + 0, + 8, + }, + ec_gf8_mul_FD_ops}; static ec_gf_op_t ec_gf8_mul_FE_ops[] = { - { EC_GF_OP_XOR2, 2, 0, 0 }, - { EC_GF_OP_COPY, 8, 2, 0 }, - { EC_GF_OP_XOR2, 2, 4, 0 }, - { EC_GF_OP_XOR2, 6, 2, 0 }, - { EC_GF_OP_XOR2, 8, 5, 0 }, - { EC_GF_OP_XOR2, 5, 6, 0 }, - { EC_GF_OP_XOR2, 6, 1, 0 }, - { EC_GF_OP_XOR2, 0, 6, 0 }, - { EC_GF_OP_XOR2, 6, 7, 0 }, - { EC_GF_OP_XOR2, 7, 3, 0 }, - { EC_GF_OP_XOR2, 7, 8, 0 }, - { EC_GF_OP_XOR2, 3, 0, 0 }, - { EC_GF_OP_XOR2, 4, 7, 0 }, - { EC_GF_OP_XOR2, 1, 7, 0 }, - { EC_GF_OP_XOR2, 0, 4, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_FE = { - 9, - { 3, 4, 8, 2, 5, 0, 6, 1, 7, }, - ec_gf8_mul_FE_ops -}; + {EC_GF_OP_XOR2, 2, 0, 0}, {EC_GF_OP_COPY, 8, 2, 0}, + {EC_GF_OP_XOR2, 2, 4, 0}, {EC_GF_OP_XOR2, 6, 2, 0}, + {EC_GF_OP_XOR2, 8, 5, 0}, {EC_GF_OP_XOR2, 5, 6, 0}, + {EC_GF_OP_XOR2, 6, 1, 0}, {EC_GF_OP_XOR2, 0, 6, 0}, + {EC_GF_OP_XOR2, 6, 7, 0}, {EC_GF_OP_XOR2, 7, 3, 0}, + {EC_GF_OP_XOR2, 7, 8, 0}, {EC_GF_OP_XOR2, 3, 0, 0}, + {EC_GF_OP_XOR2, 4, 7, 0}, {EC_GF_OP_XOR2, 1, 7, 0}, + {EC_GF_OP_XOR2, 0, 4, 0}, {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_FE = {9, + { + 3, + 4, + 8, + 2, + 5, + 0, + 6, + 1, + 7, + }, + ec_gf8_mul_FE_ops}; static ec_gf_op_t ec_gf8_mul_FF_ops[] = { - { EC_GF_OP_XOR2, 4, 7, 0 }, - { EC_GF_OP_COPY, 9, 0, 0 }, - { EC_GF_OP_COPY, 8, 4, 0 }, - { EC_GF_OP_XOR2, 9, 1, 0 }, - { EC_GF_OP_XOR2, 4, 2, 0 }, - { EC_GF_OP_XOR2, 9, 4, 0 }, - { EC_GF_OP_XOR2, 0, 5, 0 }, - { EC_GF_OP_XOR2, 2, 0, 0 }, - { EC_GF_OP_XOR2, 3, 9, 0 }, - { EC_GF_OP_XOR2, 7, 3, 0 }, - { EC_GF_OP_XOR2, 2, 6, 0 }, - { EC_GF_OP_XOR2, 5, 3, 0 }, - { EC_GF_OP_XOR2, 6, 7, 0 }, - { EC_GF_OP_XOR2, 1, 7, 0 }, - { EC_GF_OP_XOR3, 3, 8, 5 }, - { EC_GF_OP_XOR2, 4, 6, 0 }, - { EC_GF_OP_END, 0, 0, 0 } -}; - -static ec_gf_mul_t ec_gf8_mul_FF = { - 10, - { 6, 5, 0, 1, 2, 4, 9, 3, 7, 8, }, - ec_gf8_mul_FF_ops -}; + {EC_GF_OP_XOR2, 4, 7, 0}, {EC_GF_OP_COPY, 9, 0, 0}, + {EC_GF_OP_COPY, 8, 4, 0}, {EC_GF_OP_XOR2, 9, 1, 0}, + {EC_GF_OP_XOR2, 4, 2, 0}, {EC_GF_OP_XOR2, 9, 4, 0}, + {EC_GF_OP_XOR2, 0, 5, 0}, {EC_GF_OP_XOR2, 2, 0, 0}, + {EC_GF_OP_XOR2, 3, 9, 0}, {EC_GF_OP_XOR2, 7, 3, 0}, + {EC_GF_OP_XOR2, 2, 6, 0}, {EC_GF_OP_XOR2, 5, 3, 0}, + {EC_GF_OP_XOR2, 6, 7, 0}, {EC_GF_OP_XOR2, 1, 7, 0}, + {EC_GF_OP_XOR3, 3, 8, 5}, {EC_GF_OP_XOR2, 4, 6, 0}, + {EC_GF_OP_END, 0, 0, 0}}; + +static ec_gf_mul_t ec_gf8_mul_FF = {10, + { + 6, + 5, + 0, + 1, + 2, + 4, + 9, + 3, + 7, + 8, + }, + ec_gf8_mul_FF_ops}; ec_gf_mul_t *ec_gf8_mul[] = { &ec_gf8_mul_00, &ec_gf8_mul_01, &ec_gf8_mul_02, &ec_gf8_mul_03, @@ -5955,5 +5879,4 @@ ec_gf_mul_t *ec_gf8_mul[] = { &ec_gf8_mul_F0, &ec_gf8_mul_F1, &ec_gf8_mul_F2, &ec_gf8_mul_F3, &ec_gf8_mul_F4, &ec_gf8_mul_F5, &ec_gf8_mul_F6, &ec_gf8_mul_F7, &ec_gf8_mul_F8, &ec_gf8_mul_F9, &ec_gf8_mul_FA, &ec_gf8_mul_FB, - &ec_gf8_mul_FC, &ec_gf8_mul_FD, &ec_gf8_mul_FE, &ec_gf8_mul_FF -}; + &ec_gf8_mul_FC, &ec_gf8_mul_FD, &ec_gf8_mul_FE, &ec_gf8_mul_FF}; diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c index 976018c9e29..229c0683d91 100644 --- a/xlators/cluster/ec/src/ec-heal.c +++ b/xlators/cluster/ec/src/ec-heal.c @@ -27,84 +27,104 @@ #include "ec-fops.h" #include "ec-heald.h" -#define EC_COUNT(array, max) ({int __i; int __res = 0; for (__i = 0; __i < max; __i++) if (array[__i]) __res++; __res; }) -#define EC_INTERSECT(dst, src1, src2, max) ({int __i; for (__i = 0; __i < max; __i++) dst[__i] = src1[__i] && src2[__i]; }) -#define EC_ADJUST_SOURCE(source, sources, max) ({int __i; if (sources[source] == 0) {source = -1; for (__i = 0; __i < max; __i++) if (sources[__i]) source = __i; } }) -#define IA_EQUAL(f, s, field) (memcmp (&(f.ia_##field), &(s.ia_##field), sizeof (s.ia_##field)) == 0) -#define EC_REPLIES_ALLOC(replies, numsubvols) do { \ - int __i = 0; \ - replies = alloca0(numsubvols * sizeof (*replies)); \ - for (__i = 0; __i < numsubvols; __i++) \ - INIT_LIST_HEAD (&replies[__i].entries.list); \ - } while (0) - +#define EC_COUNT(array, max) \ + ({ \ + int __i; \ + int __res = 0; \ + for (__i = 0; __i < max; __i++) \ + if (array[__i]) \ + __res++; \ + __res; \ + }) +#define EC_INTERSECT(dst, src1, src2, max) \ + ({ \ + int __i; \ + for (__i = 0; __i < max; __i++) \ + dst[__i] = src1[__i] && src2[__i]; \ + }) +#define EC_ADJUST_SOURCE(source, sources, max) \ + ({ \ + int __i; \ + if (sources[source] == 0) { \ + source = -1; \ + for (__i = 0; __i < max; __i++) \ + if (sources[__i]) \ + source = __i; \ + } \ + }) +#define IA_EQUAL(f, s, field) \ + (memcmp(&(f.ia_##field), &(s.ia_##field), sizeof(s.ia_##field)) == 0) +#define EC_REPLIES_ALLOC(replies, numsubvols) \ + do { \ + int __i = 0; \ + replies = alloca0(numsubvols * sizeof(*replies)); \ + for (__i = 0; __i < numsubvols; __i++) \ + INIT_LIST_HEAD(&replies[__i].entries.list); \ + } while (0) struct ec_name_data { - call_frame_t *frame; - unsigned char *participants; - unsigned char *failed_on; - unsigned char *gfidless; - unsigned char *enoent; - unsigned char *same; - char *name; - inode_t *parent; - default_args_cbk_t *replies; + call_frame_t *frame; + unsigned char *participants; + unsigned char *failed_on; + unsigned char *gfidless; + unsigned char *enoent; + unsigned char *same; + char *name; + inode_t *parent; + default_args_cbk_t *replies; }; -static char *ec_ignore_xattrs[] = { - GF_SELINUX_XATTR_KEY, - QUOTA_SIZE_KEY, - NULL -}; +static char *ec_ignore_xattrs[] = {GF_SELINUX_XATTR_KEY, QUOTA_SIZE_KEY, NULL}; static gf_boolean_t -ec_ignorable_key_match (dict_t *dict, char *key, data_t *val, void *mdata) +ec_ignorable_key_match(dict_t *dict, char *key, data_t *val, void *mdata) { - int i = 0; + int i = 0; - if (!key) - goto out; + if (!key) + goto out; - if (strncmp (key, EC_XATTR_PREFIX, SLEN (EC_XATTR_PREFIX)) == 0) - return _gf_true; + if (strncmp(key, EC_XATTR_PREFIX, SLEN(EC_XATTR_PREFIX)) == 0) + return _gf_true; - for (i = 0; ec_ignore_xattrs[i]; i++) { - if (!strcmp (key, ec_ignore_xattrs[i])) - return _gf_true; - } + for (i = 0; ec_ignore_xattrs[i]; i++) { + if (!strcmp(key, ec_ignore_xattrs[i])) + return _gf_true; + } out: - return _gf_false; + return _gf_false; } static gf_boolean_t -ec_sh_key_match (dict_t *dict, char *key, data_t *val, void *mdata) +ec_sh_key_match(dict_t *dict, char *key, data_t *val, void *mdata) { - return !ec_ignorable_key_match (dict, key, val, mdata); + return !ec_ignorable_key_match(dict, key, val, mdata); } /* FOP: heal */ -uintptr_t ec_heal_check(ec_fop_data_t * fop, uintptr_t * pgood) +uintptr_t +ec_heal_check(ec_fop_data_t *fop, uintptr_t *pgood) { - ec_cbk_data_t * cbk; - uintptr_t mask[2] = { 0, 0 }; + ec_cbk_data_t *cbk; + uintptr_t mask[2] = {0, 0}; list_for_each_entry(cbk, &fop->cbk_list, list) { mask[cbk->op_ret >= 0] |= cbk->mask; } - if (pgood != NULL) - { + if (pgood != NULL) { *pgood = mask[1]; } return mask[0]; } -void ec_heal_update(ec_fop_data_t * fop, int32_t is_open) +void +ec_heal_update(ec_fop_data_t *fop, int32_t is_open) { - ec_heal_t * heal = fop->data; + ec_heal_t *heal = fop->data; uintptr_t good, bad; bad = ec_heal_check(fop, &good); @@ -112,8 +132,7 @@ void ec_heal_update(ec_fop_data_t * fop, int32_t is_open) LOCK(&heal->lock); heal->bad &= ~bad; - if (is_open) - { + if (is_open) { heal->open |= good; } @@ -122,9 +141,10 @@ void ec_heal_update(ec_fop_data_t * fop, int32_t is_open) fop->error = 0; } -void ec_heal_avoid(ec_fop_data_t * fop) +void +ec_heal_avoid(ec_fop_data_t *fop) { - ec_heal_t * heal = fop->data; + ec_heal_t *heal = fop->data; uintptr_t bad; bad = ec_heal_check(fop, NULL); @@ -136,22 +156,24 @@ void ec_heal_avoid(ec_fop_data_t * fop) UNLOCK(&heal->lock); } -int32_t ec_heal_lock_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) +int32_t +ec_heal_lock_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) { ec_fop_data_t *fop = cookie; ec_heal_t *heal = fop->data; if (op_ret >= 0) { - GF_ASSERT(ec_set_inode_size(heal->fop, heal->fd->inode, - heal->total_size)); + GF_ASSERT( + ec_set_inode_size(heal->fop, heal->fd->inode, heal->total_size)); } return 0; } -void ec_heal_lock(ec_heal_t *heal, int32_t type, fd_t *fd, loc_t *loc, - off_t offset, size_t size) +void +ec_heal_lock(ec_heal_t *heal, int32_t type, fd_t *fd, loc_t *loc, off_t offset, + size_t size) { struct gf_flock flock; fop_inodelk_cbk_t cbk = NULL; @@ -176,15 +198,12 @@ void ec_heal_lock(ec_heal_t *heal, int32_t type, fd_t *fd, loc_t *loc, cbk = ec_heal_lock_cbk; } - if (fd != NULL) - { + if (fd != NULL) { ec_finodelk(heal->fop->frame, heal->xl, &heal->fop->frame->root->lk_owner, heal->fop->mask, EC_MINIMUM_ALL, cbk, heal, heal->xl->name, fd, F_SETLKW, &flock, NULL); - } - else - { + } else { ec_inodelk(heal->fop->frame, heal->xl, &heal->fop->frame->root->lk_owner, heal->fop->mask, EC_MINIMUM_ALL, cbk, heal, heal->xl->name, loc, F_SETLKW, @@ -192,28 +211,28 @@ void ec_heal_lock(ec_heal_t *heal, int32_t type, fd_t *fd, loc_t *loc, } } -void ec_heal_inodelk(ec_heal_t *heal, int32_t type, int32_t use_fd, - off_t offset, size_t size) +void +ec_heal_inodelk(ec_heal_t *heal, int32_t type, int32_t use_fd, off_t offset, + size_t size) { ec_heal_lock(heal, type, use_fd ? heal->fd : NULL, &heal->loc, offset, size); } int32_t -ec_heal_xattr_clean (dict_t *dict, char *key, data_t *data, - void *arg) +ec_heal_xattr_clean(dict_t *dict, char *key, data_t *data, void *arg) { - dict_t *base = arg; + dict_t *base = arg; - if (ec_ignorable_key_match (NULL, key, NULL, NULL)) { - dict_del (dict, key); - return 0; - } + if (ec_ignorable_key_match(NULL, key, NULL, NULL)) { + dict_del(dict, key); + return 0; + } - if (dict_get (base, key) != NULL) - dict_del (dict, key); + if (dict_get(base, key) != NULL) + dict_del(dict, key); - return 0; + return 0; } /******************************************************************** @@ -223,68 +242,66 @@ ec_heal_xattr_clean (dict_t *dict, char *key, data_t *data, * *******************************************************************/ void -ec_wind_xattrop_parallel (call_frame_t *frame, xlator_t *subvol, - int child_index, loc_t *loc, - gf_xattrop_flags_t flags, dict_t **dict, - dict_t *xdata) +ec_wind_xattrop_parallel(call_frame_t *frame, xlator_t *subvol, int child_index, + loc_t *loc, gf_xattrop_flags_t flags, dict_t **dict, + dict_t *xdata) { - gf_msg_debug ("EC", 0, "WIND: on child %d ", child_index); - STACK_WIND_COOKIE (frame, cluster_xattrop_cbk, - (void *)(uintptr_t) child_index, - subvol, subvol->fops->xattrop, loc, - flags, dict[child_index], xdata); + gf_msg_debug("EC", 0, "WIND: on child %d ", child_index); + STACK_WIND_COOKIE( + frame, cluster_xattrop_cbk, (void *)(uintptr_t)child_index, subvol, + subvol->fops->xattrop, loc, flags, dict[child_index], xdata); } int32_t -ec_heal_writev_cbk (call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, int32_t op_errno, - struct iatt *prebuf, struct iatt *postbuf, - dict_t *xdata) +ec_heal_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) { ec_fop_data_t *fop = cookie; ec_heal_t *heal = fop->data; ec_trace("WRITE_CBK", cookie, "ret=%d, errno=%d", op_ret, op_errno); - gf_msg_debug (fop->xl->name, 0, "%s: write op_ret %d, op_errno %s" - " at %"PRIu64, uuid_utoa (heal->fd->inode->gfid), op_ret, - strerror (op_errno), heal->offset); + gf_msg_debug(fop->xl->name, 0, + "%s: write op_ret %d, op_errno %s" + " at %" PRIu64, + uuid_utoa(heal->fd->inode->gfid), op_ret, strerror(op_errno), + heal->offset); ec_heal_update(cookie, 0); return 0; } -int32_t ec_heal_readv_cbk(call_frame_t * frame, void * cookie, xlator_t * this, - int32_t op_ret, int32_t op_errno, - struct iovec * vector, int32_t count, - struct iatt * stbuf, struct iobref * iobref, - dict_t * xdata) +int32_t +ec_heal_readv_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iovec *vector, + int32_t count, struct iatt *stbuf, struct iobref *iobref, + dict_t *xdata) { - ec_fop_data_t * fop = cookie; - ec_heal_t * heal = fop->data; + ec_fop_data_t *fop = cookie; + ec_heal_t *heal = fop->data; ec_trace("READ_CBK", fop, "ret=%d, errno=%d", op_ret, op_errno); ec_heal_avoid(fop); - if (op_ret > 0) - { - gf_msg_debug (fop->xl->name, 0, "%s: read succeeded, proceeding " - "to write at %"PRIu64, uuid_utoa (heal->fd->inode->gfid), - heal->offset); + if (op_ret > 0) { + gf_msg_debug(fop->xl->name, 0, + "%s: read succeeded, proceeding " + "to write at %" PRIu64, + uuid_utoa(heal->fd->inode->gfid), heal->offset); ec_writev(heal->fop->frame, heal->xl, heal->bad, EC_MINIMUM_ONE, ec_heal_writev_cbk, heal, heal->fd, vector, count, heal->offset, 0, iobref, NULL); - } - else - { + } else { if (op_ret < 0) { - gf_msg_debug (fop->xl->name, 0, "%s: read failed %s, failing " - "to heal block at %"PRIu64, - uuid_utoa (heal->fd->inode->gfid), strerror (op_errno), - heal->offset); - heal->bad = 0; + gf_msg_debug(fop->xl->name, 0, + "%s: read failed %s, failing " + "to heal block at %" PRIu64, + uuid_utoa(heal->fd->inode->gfid), strerror(op_errno), + heal->offset); + heal->bad = 0; } heal->done = 1; } @@ -292,31 +309,31 @@ int32_t ec_heal_readv_cbk(call_frame_t * frame, void * cookie, xlator_t * this, return 0; } -void ec_heal_data_block(ec_heal_t *heal) +void +ec_heal_data_block(ec_heal_t *heal) { ec_trace("DATA", heal->fop, "good=%lX, bad=%lX", heal->good, heal->bad); if ((heal->good != 0) && (heal->bad != 0) && - (heal->iatt.ia_type == IA_IFREG)) - { + (heal->iatt.ia_type == IA_IFREG)) { ec_readv(heal->fop->frame, heal->xl, heal->good, EC_MINIMUM_MIN, - ec_heal_readv_cbk, heal, heal->fd, heal->size, heal->offset, - 0, NULL); + ec_heal_readv_cbk, heal, heal->fd, heal->size, heal->offset, 0, + NULL); } } /* FOP: fheal */ -void ec_fheal(call_frame_t * frame, xlator_t * this, uintptr_t target, - int32_t minimum, fop_fheal_cbk_t func, void * data, fd_t * fd, - int32_t partial, dict_t *xdata) +void +ec_fheal(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum, + fop_fheal_cbk_t func, void *data, fd_t *fd, int32_t partial, + dict_t *xdata) { - ec_fd_t * ctx = ec_fd_get(fd, this); + ec_fd_t *ctx = ec_fd_get(fd, this); - if (ctx != NULL) - { - gf_msg_trace ("ec", 0, "FHEAL ctx: flags=%X, open=%lX", ctx->flags, - ctx->open); + if (ctx != NULL) { + gf_msg_trace("ec", 0, "FHEAL ctx: flags=%X, open=%lX", ctx->flags, + ctx->open); ec_heal(frame, this, target, minimum, func, data, &ctx->loc, partial, xdata); } @@ -324,1624 +341,1601 @@ void ec_fheal(call_frame_t * frame, xlator_t * this, uintptr_t target, /* Common heal code */ void -ec_mask_to_char_array (uintptr_t mask, unsigned char *array, int numsubvols) +ec_mask_to_char_array(uintptr_t mask, unsigned char *array, int numsubvols) { - int i = 0; + int i = 0; - for (i = 0; i < numsubvols; i++) - array[i] = ((mask >> i) & 1); + for (i = 0; i < numsubvols; i++) + array[i] = ((mask >> i) & 1); } uintptr_t -ec_char_array_to_mask (unsigned char *array, int numsubvols) +ec_char_array_to_mask(unsigned char *array, int numsubvols) { - int i = 0; - uintptr_t mask = 0; + int i = 0; + uintptr_t mask = 0; - if (array == NULL) - goto out; + if (array == NULL) + goto out; - for (i = 0; i < numsubvols; i++) - if (array[i]) - mask |= (1ULL<nodes; i++) { - if (!replies[i].valid) - continue; - - if (replies[i].op_ret == -1) - continue; - - if (source == -1) - source = i; - - ret = ec_dict_get_array (replies[i].xdata, EC_XATTR_VERSION, - xattr, EC_VERSION_SIZE); - if (ret == 0) { - versions[i] = xattr[EC_DATA_TXN]; - if (max_version < versions[i]) { - max_version = versions[i]; - source = i; - } - } + for (i = 0; i < ec->nodes; i++) { + if (!replies[i].valid) + continue; - memset (xattr, 0, sizeof(xattr)); - ret = ec_dict_get_array (replies[i].xdata, EC_XATTR_DIRTY, - xattr, EC_VERSION_SIZE); - if (ret == 0) { - dirty[i] = xattr[EC_DATA_TXN]; - } + if (replies[i].op_ret == -1) + continue; + + if (source == -1) + source = i; + + ret = ec_dict_get_array(replies[i].xdata, EC_XATTR_VERSION, xattr, + EC_VERSION_SIZE); + if (ret == 0) { + versions[i] = xattr[EC_DATA_TXN]; + if (max_version < versions[i]) { + max_version = versions[i]; + source = i; + } } - if (source < 0) - goto out; + memset(xattr, 0, sizeof(xattr)); + ret = ec_dict_get_array(replies[i].xdata, EC_XATTR_DIRTY, xattr, + EC_VERSION_SIZE); + if (ret == 0) { + dirty[i] = xattr[EC_DATA_TXN]; + } + } - for (i = 0; i < ec->nodes; i++) { - if (!replies[i].valid) - continue; + if (source < 0) + goto out; - if (replies[i].op_ret == -1) - continue; + for (i = 0; i < ec->nodes; i++) { + if (!replies[i].valid) + continue; - if (versions[i] == versions[source]) - sources[i] = 1; - else - healed_sinks[i] = 1; - } + if (replies[i].op_ret == -1) + continue; + + if (versions[i] == versions[source]) + sources[i] = 1; + else + healed_sinks[i] = 1; + } out: - return source; + return source; } int -ec_adjust_versions (call_frame_t *frame, ec_t *ec, ec_txn_t type, - inode_t *inode, int source, unsigned char *sources, - unsigned char *healed_sinks, uint64_t *versions, - uint64_t *dirty) -{ - int i = 0; - int ret = 0; - int call_count = 0; - dict_t **xattr = NULL; - int op_ret = 0; - loc_t loc = {0}; - gf_boolean_t erase_dirty = _gf_false; - uint64_t *versions_xattr = NULL; - uint64_t *dirty_xattr = NULL; - uint64_t allzero[2] = {0}; - unsigned char *on = NULL; - unsigned char *output = NULL; - default_args_cbk_t *replies = NULL; - - /* Allocate the required memory */ - loc.inode = inode_ref (inode); - gf_uuid_copy (loc.gfid, inode->gfid); - on = alloca0 (ec->nodes); - output = alloca0 (ec->nodes); - EC_REPLIES_ALLOC (replies, ec->nodes); - xattr = GF_CALLOC (ec->nodes, sizeof (*xattr), gf_common_mt_pointer); - if (!xattr) { - op_ret = -ENOMEM; - goto out; - } - for (i = 0; i < ec->nodes; i++) { - xattr[i] = dict_new (); - if (!xattr[i]) { - op_ret = -ENOMEM; - goto out; - } +ec_adjust_versions(call_frame_t *frame, ec_t *ec, ec_txn_t type, inode_t *inode, + int source, unsigned char *sources, + unsigned char *healed_sinks, uint64_t *versions, + uint64_t *dirty) +{ + int i = 0; + int ret = 0; + int call_count = 0; + dict_t **xattr = NULL; + int op_ret = 0; + loc_t loc = {0}; + gf_boolean_t erase_dirty = _gf_false; + uint64_t *versions_xattr = NULL; + uint64_t *dirty_xattr = NULL; + uint64_t allzero[2] = {0}; + unsigned char *on = NULL; + unsigned char *output = NULL; + default_args_cbk_t *replies = NULL; + + /* Allocate the required memory */ + loc.inode = inode_ref(inode); + gf_uuid_copy(loc.gfid, inode->gfid); + on = alloca0(ec->nodes); + output = alloca0(ec->nodes); + EC_REPLIES_ALLOC(replies, ec->nodes); + xattr = GF_CALLOC(ec->nodes, sizeof(*xattr), gf_common_mt_pointer); + if (!xattr) { + op_ret = -ENOMEM; + goto out; + } + for (i = 0; i < ec->nodes; i++) { + xattr[i] = dict_new(); + if (!xattr[i]) { + op_ret = -ENOMEM; + goto out; } + } - /* dirty xattr represents if the file/dir needs heal. Unless all the - * copies are healed, don't erase it */ - if (EC_COUNT (sources, ec->nodes) + - EC_COUNT (healed_sinks, ec->nodes) == ec->nodes) - erase_dirty = _gf_true; - else - op_ret = -ENOTCONN; - - /* Populate the xattr array */ - for (i = 0; i < ec->nodes; i++) { - if (!sources[i] && !healed_sinks[i]) - continue; - versions_xattr = GF_CALLOC (EC_VERSION_SIZE, - sizeof(*versions_xattr), - gf_common_mt_pointer); - if (!versions_xattr) { - op_ret = -ENOMEM; - continue; - } - - versions_xattr[type] = hton64(versions[source] - versions[i]); - ret = dict_set_bin (xattr[i], EC_XATTR_VERSION, - versions_xattr, - (sizeof (*versions_xattr) * EC_VERSION_SIZE) - ); - if (ret < 0) { - op_ret = -ENOMEM; - continue; - } - - if (erase_dirty) { - dirty_xattr = GF_CALLOC (EC_VERSION_SIZE, - sizeof(*dirty_xattr), - gf_common_mt_pointer); - if (!dirty_xattr) { - op_ret = -ENOMEM; - continue; - } - - dirty_xattr[type] = hton64(-dirty[i]); - ret = dict_set_bin (xattr[i], EC_XATTR_DIRTY, - dirty_xattr, - (sizeof(*dirty_xattr) * - EC_VERSION_SIZE) - ); - if (ret < 0) { - op_ret = -ENOMEM; - continue; - } - } + /* dirty xattr represents if the file/dir needs heal. Unless all the + * copies are healed, don't erase it */ + if (EC_COUNT(sources, ec->nodes) + EC_COUNT(healed_sinks, ec->nodes) == + ec->nodes) + erase_dirty = _gf_true; + else + op_ret = -ENOTCONN; + + /* Populate the xattr array */ + for (i = 0; i < ec->nodes; i++) { + if (!sources[i] && !healed_sinks[i]) + continue; + versions_xattr = GF_CALLOC(EC_VERSION_SIZE, sizeof(*versions_xattr), + gf_common_mt_pointer); + if (!versions_xattr) { + op_ret = -ENOMEM; + continue; + } + + versions_xattr[type] = hton64(versions[source] - versions[i]); + ret = dict_set_bin(xattr[i], EC_XATTR_VERSION, versions_xattr, + (sizeof(*versions_xattr) * EC_VERSION_SIZE)); + if (ret < 0) { + op_ret = -ENOMEM; + continue; + } - if (memcmp (versions_xattr, allzero, - (sizeof(*versions_xattr) * EC_VERSION_SIZE)) == 0) { + if (erase_dirty) { + dirty_xattr = GF_CALLOC(EC_VERSION_SIZE, sizeof(*dirty_xattr), + gf_common_mt_pointer); + if (!dirty_xattr) { + op_ret = -ENOMEM; + continue; + } - if (!erase_dirty) { - continue; - } + dirty_xattr[type] = hton64(-dirty[i]); + ret = dict_set_bin(xattr[i], EC_XATTR_DIRTY, dirty_xattr, + (sizeof(*dirty_xattr) * EC_VERSION_SIZE)); + if (ret < 0) { + op_ret = -ENOMEM; + continue; + } + } - if (memcmp (dirty_xattr, allzero, (sizeof (*dirty_xattr) - * EC_VERSION_SIZE)) == 0) { - continue; - } - } + if (memcmp(versions_xattr, allzero, + (sizeof(*versions_xattr) * EC_VERSION_SIZE)) == 0) { + if (!erase_dirty) { + continue; + } - on[i] = 1; - call_count++; + if (memcmp(dirty_xattr, allzero, + (sizeof(*dirty_xattr) * EC_VERSION_SIZE)) == 0) { + continue; + } } - /* Update the bricks with xattr */ - if (call_count) { - PARALLEL_FOP_ONLIST (ec->xl_list, on, ec->nodes, replies, - frame, ec_wind_xattrop_parallel, - &loc, GF_XATTROP_ADD_ARRAY64, xattr, NULL); - ret = cluster_fop_success_fill (replies, ec->nodes, output); - } + on[i] = 1; + call_count++; + } - if (ret < call_count) { - op_ret = -ENOTCONN; - goto out; - } + /* Update the bricks with xattr */ + if (call_count) { + PARALLEL_FOP_ONLIST(ec->xl_list, on, ec->nodes, replies, frame, + ec_wind_xattrop_parallel, &loc, + GF_XATTROP_ADD_ARRAY64, xattr, NULL); + ret = cluster_fop_success_fill(replies, ec->nodes, output); + } + + if (ret < call_count) { + op_ret = -ENOTCONN; + goto out; + } out: - /* Cleanup */ - if (xattr) { - for (i = 0; i < ec->nodes; i++) { - if (xattr[i]) - dict_unref (xattr[i]); - } - GF_FREE (xattr); + /* Cleanup */ + if (xattr) { + for (i = 0; i < ec->nodes; i++) { + if (xattr[i]) + dict_unref(xattr[i]); } - cluster_replies_wipe (replies, ec->nodes); - loc_wipe (&loc); - return op_ret; + GF_FREE(xattr); + } + cluster_replies_wipe(replies, ec->nodes); + loc_wipe(&loc); + return op_ret; } int -ec_heal_metadata_find_direction (ec_t *ec, default_args_cbk_t *replies, - uint64_t *versions, uint64_t *dirty, - unsigned char *sources, unsigned char *healed_sinks) -{ - uint64_t xattr[EC_VERSION_SIZE] = {0}; - uint64_t max_version = 0; - int same_count = 0; - int max_same_count = 0; - int same_source = -1; - int ret = 0; - int i = 0; - int j = 0; - int *groups = NULL; - struct iatt source_ia = {0}; - struct iatt child_ia = {0}; - - groups = alloca0 (ec->nodes * sizeof(*groups)); - for (i = 0; i < ec->nodes; i++) - groups[i] = -1; - - for (i = 0; i < ec->nodes; i++) { - if (!replies[i].valid) - continue; - if (replies[i].op_ret < 0) - continue; - ret = ec_dict_get_array (replies[i].xdata, EC_XATTR_VERSION, - xattr, EC_VERSION_SIZE); - if (ret == 0) { - versions[i] = xattr[EC_METADATA_TXN]; - } - - memset (xattr, 0, sizeof (xattr)); - ret = ec_dict_get_array (replies[i].xdata, EC_XATTR_DIRTY, - xattr, EC_VERSION_SIZE); - if (ret == 0) { - dirty[i] = xattr[EC_METADATA_TXN]; - } - if (groups[i] >= 0) /*Already part of group*/ - continue; - groups[i] = i; - same_count = 1; - source_ia = replies[i].stat; - for (j = i + 1; j < ec->nodes; j++) { - if (!replies[j].valid || replies[j].op_ret < 0) - continue; - child_ia = replies[j].stat; - if (!IA_EQUAL(source_ia, child_ia, gfid) || - !IA_EQUAL(source_ia, child_ia, type) || - !IA_EQUAL(source_ia, child_ia, prot) || - !IA_EQUAL(source_ia, child_ia, uid) || - !IA_EQUAL(source_ia, child_ia, gid)) - continue; - if (!are_dicts_equal(replies[i].xdata, replies[j].xdata, - ec_sh_key_match, NULL)) - continue; - groups[j] = i; - same_count++; - } - - if (max_same_count < same_count) { - max_same_count = same_count; - same_source = i; - } +ec_heal_metadata_find_direction(ec_t *ec, default_args_cbk_t *replies, + uint64_t *versions, uint64_t *dirty, + unsigned char *sources, + unsigned char *healed_sinks) +{ + uint64_t xattr[EC_VERSION_SIZE] = {0}; + uint64_t max_version = 0; + int same_count = 0; + int max_same_count = 0; + int same_source = -1; + int ret = 0; + int i = 0; + int j = 0; + int *groups = NULL; + struct iatt source_ia = {0}; + struct iatt child_ia = {0}; + + groups = alloca0(ec->nodes * sizeof(*groups)); + for (i = 0; i < ec->nodes; i++) + groups[i] = -1; + + for (i = 0; i < ec->nodes; i++) { + if (!replies[i].valid) + continue; + if (replies[i].op_ret < 0) + continue; + ret = ec_dict_get_array(replies[i].xdata, EC_XATTR_VERSION, xattr, + EC_VERSION_SIZE); + if (ret == 0) { + versions[i] = xattr[EC_METADATA_TXN]; } - if (max_same_count < ec->fragments) { - ret = -EIO; - goto out; + memset(xattr, 0, sizeof(xattr)); + ret = ec_dict_get_array(replies[i].xdata, EC_XATTR_DIRTY, xattr, + EC_VERSION_SIZE); + if (ret == 0) { + dirty[i] = xattr[EC_METADATA_TXN]; + } + if (groups[i] >= 0) /*Already part of group*/ + continue; + groups[i] = i; + same_count = 1; + source_ia = replies[i].stat; + for (j = i + 1; j < ec->nodes; j++) { + if (!replies[j].valid || replies[j].op_ret < 0) + continue; + child_ia = replies[j].stat; + if (!IA_EQUAL(source_ia, child_ia, gfid) || + !IA_EQUAL(source_ia, child_ia, type) || + !IA_EQUAL(source_ia, child_ia, prot) || + !IA_EQUAL(source_ia, child_ia, uid) || + !IA_EQUAL(source_ia, child_ia, gid)) + continue; + if (!are_dicts_equal(replies[i].xdata, replies[j].xdata, + ec_sh_key_match, NULL)) + continue; + groups[j] = i; + same_count++; + } + + if (max_same_count < same_count) { + max_same_count = same_count; + same_source = i; } + } - for (i = 0; i < ec->nodes; i++) { - if (groups[i] == groups[same_source]) - sources[i] = 1; - else if (replies[i].valid && replies[i].op_ret >= 0) - healed_sinks[i] = 1; - } - for (i = 0; i < ec->nodes; i++) { - if (sources[i] && (versions[i] > max_version)) { - same_source = i; - max_version = versions[i]; - } + if (max_same_count < ec->fragments) { + ret = -EIO; + goto out; + } + + for (i = 0; i < ec->nodes; i++) { + if (groups[i] == groups[same_source]) + sources[i] = 1; + else if (replies[i].valid && replies[i].op_ret >= 0) + healed_sinks[i] = 1; + } + for (i = 0; i < ec->nodes; i++) { + if (sources[i] && (versions[i] > max_version)) { + same_source = i; + max_version = versions[i]; } - ret = same_source; + } + ret = same_source; out: - return ret; + return ret; } int -__ec_heal_metadata_prepare (call_frame_t *frame, ec_t *ec, inode_t *inode, - unsigned char *locked_on, default_args_cbk_t *replies, - uint64_t *versions, uint64_t *dirty, unsigned char *sources, - unsigned char *healed_sinks) -{ - loc_t loc = {0}; - unsigned char *output = NULL; - unsigned char *lookup_on = NULL; - int ret = 0; - int source = 0; - default_args_cbk_t *greplies = NULL; - int i = 0; - EC_REPLIES_ALLOC (greplies, ec->nodes); - - loc.inode = inode_ref (inode); - gf_uuid_copy (loc.gfid, inode->gfid); - output = alloca0 (ec->nodes); - lookup_on = alloca0 (ec->nodes); - ret = cluster_lookup (ec->xl_list, locked_on, ec->nodes, replies, - output, frame, ec->xl, &loc, NULL); - if (ret <= ec->fragments) { - ret = -ENOTCONN; - goto out; - } +__ec_heal_metadata_prepare(call_frame_t *frame, ec_t *ec, inode_t *inode, + unsigned char *locked_on, + default_args_cbk_t *replies, uint64_t *versions, + uint64_t *dirty, unsigned char *sources, + unsigned char *healed_sinks) +{ + loc_t loc = {0}; + unsigned char *output = NULL; + unsigned char *lookup_on = NULL; + int ret = 0; + int source = 0; + default_args_cbk_t *greplies = NULL; + int i = 0; + EC_REPLIES_ALLOC(greplies, ec->nodes); + + loc.inode = inode_ref(inode); + gf_uuid_copy(loc.gfid, inode->gfid); + output = alloca0(ec->nodes); + lookup_on = alloca0(ec->nodes); + ret = cluster_lookup(ec->xl_list, locked_on, ec->nodes, replies, output, + frame, ec->xl, &loc, NULL); + if (ret <= ec->fragments) { + ret = -ENOTCONN; + goto out; + } - memcpy (lookup_on, output, ec->nodes); - /*Use getxattr to get the filtered xattrs which filter internal xattrs*/ - ret = cluster_getxattr (ec->xl_list, lookup_on, ec->nodes, greplies, - output, frame, ec->xl, &loc, NULL, NULL); - for (i = 0; i < ec->nodes; i++) { - if (lookup_on[i] && !output[i]) { - replies[i].valid = 0; - continue; - } - if (replies[i].xdata) { - dict_unref (replies[i].xdata); - replies[i].xdata = NULL; - if (greplies[i].xattr) - replies[i].xdata = dict_ref (greplies[i].xattr); - } + memcpy(lookup_on, output, ec->nodes); + /*Use getxattr to get the filtered xattrs which filter internal xattrs*/ + ret = cluster_getxattr(ec->xl_list, lookup_on, ec->nodes, greplies, output, + frame, ec->xl, &loc, NULL, NULL); + for (i = 0; i < ec->nodes; i++) { + if (lookup_on[i] && !output[i]) { + replies[i].valid = 0; + continue; + } + if (replies[i].xdata) { + dict_unref(replies[i].xdata); + replies[i].xdata = NULL; + if (greplies[i].xattr) + replies[i].xdata = dict_ref(greplies[i].xattr); } + } - source = ec_heal_metadata_find_direction (ec, replies, versions, - dirty, sources, healed_sinks); - if (source < 0) { - ret = -EIO; - goto out; - } - ret = source; + source = ec_heal_metadata_find_direction(ec, replies, versions, dirty, + sources, healed_sinks); + if (source < 0) { + ret = -EIO; + goto out; + } + ret = source; out: - cluster_replies_wipe (greplies, ec->nodes); - loc_wipe (&loc); - return ret; + cluster_replies_wipe(greplies, ec->nodes); + loc_wipe(&loc); + return ret; } /* Metadata heal */ int -__ec_removexattr_sinks (call_frame_t *frame, ec_t *ec, inode_t *inode, - int source, unsigned char *sources, - unsigned char *healed_sinks, - default_args_cbk_t *replies) -{ - int i = 0; - int ret = 0; - loc_t loc = {0}; - - loc.inode = inode_ref (inode); - gf_uuid_copy (loc.gfid, inode->gfid); - - for (i = 0; i < ec->nodes; i++) { - if (i == source) - continue; - if (!sources[i] && !healed_sinks[i]) - continue; - ret = dict_foreach (replies[i].xdata, ec_heal_xattr_clean, - replies[source].xdata); - if (ret < 0) { - sources[i] = 0; - healed_sinks[i] = 0; - continue; - } - - if (replies[i].xdata->count == 0) { - continue; - } else if (sources[i]) { - /* This can happen if setxattr/removexattr succeeds on - * the bricks but fails to update the version. This - * will make sure that the xattrs are made equal after - * heal*/ - sources[i] = 0; - healed_sinks[i] = 1; - } +__ec_removexattr_sinks(call_frame_t *frame, ec_t *ec, inode_t *inode, + int source, unsigned char *sources, + unsigned char *healed_sinks, default_args_cbk_t *replies) +{ + int i = 0; + int ret = 0; + loc_t loc = {0}; + + loc.inode = inode_ref(inode); + gf_uuid_copy(loc.gfid, inode->gfid); + + for (i = 0; i < ec->nodes; i++) { + if (i == source) + continue; + if (!sources[i] && !healed_sinks[i]) + continue; + ret = dict_foreach(replies[i].xdata, ec_heal_xattr_clean, + replies[source].xdata); + if (ret < 0) { + sources[i] = 0; + healed_sinks[i] = 0; + continue; + } - ret = syncop_removexattr (ec->xl_list[i], &loc, "", - replies[i].xdata, NULL); - if (ret < 0) - healed_sinks[i] = 0; + if (replies[i].xdata->count == 0) { + continue; + } else if (sources[i]) { + /* This can happen if setxattr/removexattr succeeds on + * the bricks but fails to update the version. This + * will make sure that the xattrs are made equal after + * heal*/ + sources[i] = 0; + healed_sinks[i] = 1; } - loc_wipe (&loc); - if (EC_COUNT (healed_sinks, ec->nodes) == 0) - return -ENOTCONN; - return 0; + ret = syncop_removexattr(ec->xl_list[i], &loc, "", replies[i].xdata, + NULL); + if (ret < 0) + healed_sinks[i] = 0; + } + + loc_wipe(&loc); + if (EC_COUNT(healed_sinks, ec->nodes) == 0) + return -ENOTCONN; + return 0; } int -__ec_heal_metadata (call_frame_t *frame, ec_t *ec, inode_t *inode, - unsigned char *locked_on, unsigned char *sources, - unsigned char *healed_sinks) -{ - loc_t loc = {0}; - int ret = 0; - int source = 0; - default_args_cbk_t *replies = NULL; - default_args_cbk_t *sreplies = NULL; - uint64_t *versions = NULL; - uint64_t *dirty = NULL; - unsigned char *output = NULL; - dict_t *source_dict = NULL; - struct iatt source_buf = {0}; - - EC_REPLIES_ALLOC (replies, ec->nodes); - EC_REPLIES_ALLOC (sreplies, ec->nodes); - - loc.inode = inode_ref (inode); - gf_uuid_copy (loc.gfid, inode->gfid); - output = alloca0 (ec->nodes); - versions = alloca0 (ec->nodes * sizeof (*versions)); - dirty = alloca0 (ec->nodes * sizeof (*dirty)); - source = __ec_heal_metadata_prepare (frame, ec, inode, locked_on, replies, - versions, dirty, sources, healed_sinks); - if (source < 0) { - ret = -EIO; - goto out; - } +__ec_heal_metadata(call_frame_t *frame, ec_t *ec, inode_t *inode, + unsigned char *locked_on, unsigned char *sources, + unsigned char *healed_sinks) +{ + loc_t loc = {0}; + int ret = 0; + int source = 0; + default_args_cbk_t *replies = NULL; + default_args_cbk_t *sreplies = NULL; + uint64_t *versions = NULL; + uint64_t *dirty = NULL; + unsigned char *output = NULL; + dict_t *source_dict = NULL; + struct iatt source_buf = {0}; + + EC_REPLIES_ALLOC(replies, ec->nodes); + EC_REPLIES_ALLOC(sreplies, ec->nodes); + + loc.inode = inode_ref(inode); + gf_uuid_copy(loc.gfid, inode->gfid); + output = alloca0(ec->nodes); + versions = alloca0(ec->nodes * sizeof(*versions)); + dirty = alloca0(ec->nodes * sizeof(*dirty)); + source = __ec_heal_metadata_prepare(frame, ec, inode, locked_on, replies, + versions, dirty, sources, healed_sinks); + if (source < 0) { + ret = -EIO; + goto out; + } - if ((EC_COUNT (sources, ec->nodes) == ec->nodes) || - (EC_COUNT (healed_sinks, ec->nodes) == 0)) { - ret = 0; - goto erase_dirty; - } + if ((EC_COUNT(sources, ec->nodes) == ec->nodes) || + (EC_COUNT(healed_sinks, ec->nodes) == 0)) { + ret = 0; + goto erase_dirty; + } - source_buf = replies[source].stat; - ret = cluster_setattr (ec->xl_list, healed_sinks, ec->nodes, sreplies, - output, frame, ec->xl, &loc, - &source_buf, GF_SET_ATTR_MODE | - GF_SET_ATTR_UID | GF_SET_ATTR_GID, NULL); - /*In case the operation fails on some of the subvols*/ - memcpy (healed_sinks, output, ec->nodes); - if (EC_COUNT (healed_sinks, ec->nodes) == 0) { - ret = -ENOTCONN; - goto out; - } + source_buf = replies[source].stat; + ret = cluster_setattr(ec->xl_list, healed_sinks, ec->nodes, sreplies, + output, frame, ec->xl, &loc, &source_buf, + GF_SET_ATTR_MODE | GF_SET_ATTR_UID | GF_SET_ATTR_GID, + NULL); + /*In case the operation fails on some of the subvols*/ + memcpy(healed_sinks, output, ec->nodes); + if (EC_COUNT(healed_sinks, ec->nodes) == 0) { + ret = -ENOTCONN; + goto out; + } - ret = __ec_removexattr_sinks (frame, ec, inode, source, sources, - healed_sinks, replies); - if (ret < 0) - goto out; + ret = __ec_removexattr_sinks(frame, ec, inode, source, sources, + healed_sinks, replies); + if (ret < 0) + goto out; - source_dict = dict_ref (replies[source].xdata); - if (dict_foreach_match (source_dict, ec_ignorable_key_match, NULL, - dict_remove_foreach_fn, NULL) == -1) { - ret = -ENOMEM; - goto out; - } + source_dict = dict_ref(replies[source].xdata); + if (dict_foreach_match(source_dict, ec_ignorable_key_match, NULL, + dict_remove_foreach_fn, NULL) == -1) { + ret = -ENOMEM; + goto out; + } - ret = cluster_setxattr (ec->xl_list, healed_sinks, ec->nodes, - replies, output, frame, ec->xl, &loc, - source_dict, 0, NULL); + ret = cluster_setxattr(ec->xl_list, healed_sinks, ec->nodes, replies, + output, frame, ec->xl, &loc, source_dict, 0, NULL); - EC_INTERSECT (healed_sinks, healed_sinks, output, ec->nodes); - if (EC_COUNT (healed_sinks, ec->nodes) == 0) { - ret = -ENOTCONN; - goto out; - } + EC_INTERSECT(healed_sinks, healed_sinks, output, ec->nodes); + if (EC_COUNT(healed_sinks, ec->nodes) == 0) { + ret = -ENOTCONN; + goto out; + } erase_dirty: - ret = ec_adjust_versions (frame, ec, EC_METADATA_TXN, inode, source, - sources, healed_sinks, versions, dirty); + ret = ec_adjust_versions(frame, ec, EC_METADATA_TXN, inode, source, sources, + healed_sinks, versions, dirty); out: - if (source_dict) - dict_unref (source_dict); + if (source_dict) + dict_unref(source_dict); - loc_wipe (&loc); - cluster_replies_wipe (replies, ec->nodes); - cluster_replies_wipe (sreplies, ec->nodes); - return ret; + loc_wipe(&loc); + cluster_replies_wipe(replies, ec->nodes); + cluster_replies_wipe(sreplies, ec->nodes); + return ret; } int -ec_heal_metadata (call_frame_t *frame, ec_t *ec, inode_t *inode, - unsigned char *sources, unsigned char *healed_sinks) -{ - unsigned char *locked_on = NULL; - unsigned char *up_subvols = NULL; - unsigned char *output = NULL; - int ret = 0; - default_args_cbk_t *replies = NULL; - - EC_REPLIES_ALLOC (replies, ec->nodes); - locked_on = alloca0(ec->nodes); - output = alloca0(ec->nodes); - up_subvols = alloca0(ec->nodes); - ec_mask_to_char_array (ec->xl_up, up_subvols, ec->nodes); - ret = cluster_inodelk (ec->xl_list, up_subvols, ec->nodes, replies, - locked_on, frame, ec->xl, ec->xl->name, inode, 0, - 0); - { - if (ret <= ec->fragments) { - gf_msg_debug (ec->xl->name, 0, "%s: Skipping heal " - "as only %d number of subvolumes could " - "be locked", uuid_utoa (inode->gfid), ret); - ret = -ENOTCONN; - goto unlock; - } - ret = __ec_heal_metadata (frame, ec, inode, locked_on, sources, - healed_sinks); - } +ec_heal_metadata(call_frame_t *frame, ec_t *ec, inode_t *inode, + unsigned char *sources, unsigned char *healed_sinks) +{ + unsigned char *locked_on = NULL; + unsigned char *up_subvols = NULL; + unsigned char *output = NULL; + int ret = 0; + default_args_cbk_t *replies = NULL; + + EC_REPLIES_ALLOC(replies, ec->nodes); + locked_on = alloca0(ec->nodes); + output = alloca0(ec->nodes); + up_subvols = alloca0(ec->nodes); + ec_mask_to_char_array(ec->xl_up, up_subvols, ec->nodes); + ret = cluster_inodelk(ec->xl_list, up_subvols, ec->nodes, replies, + locked_on, frame, ec->xl, ec->xl->name, inode, 0, 0); + { + if (ret <= ec->fragments) { + gf_msg_debug(ec->xl->name, 0, + "%s: Skipping heal " + "as only %d number of subvolumes could " + "be locked", + uuid_utoa(inode->gfid), ret); + ret = -ENOTCONN; + goto unlock; + } + ret = __ec_heal_metadata(frame, ec, inode, locked_on, sources, + healed_sinks); + } unlock: - cluster_uninodelk (ec->xl_list, locked_on, ec->nodes, replies, output, - frame, ec->xl, ec->xl->name, inode, 0, 0); - cluster_replies_wipe (replies, ec->nodes); - return ret; + cluster_uninodelk(ec->xl_list, locked_on, ec->nodes, replies, output, frame, + ec->xl, ec->xl->name, inode, 0, 0); + cluster_replies_wipe(replies, ec->nodes); + return ret; } /*entry heal*/ int -__ec_heal_entry_prepare (call_frame_t *frame, ec_t *ec, inode_t *inode, - unsigned char *locked_on, uint64_t *versions, - uint64_t *dirty, unsigned char *sources, - unsigned char *healed_sinks) -{ - loc_t loc = {0}; - int source = 0; - int ret = 0; - default_args_cbk_t *replies = NULL; - unsigned char *output = NULL; - dict_t *xdata = NULL; - - EC_REPLIES_ALLOC (replies, ec->nodes); - - loc.inode = inode_ref (inode); - gf_uuid_copy (loc.gfid, inode->gfid); - xdata = dict_new (); - if (!xdata) { - ret = -ENOMEM; - goto out; - } +__ec_heal_entry_prepare(call_frame_t *frame, ec_t *ec, inode_t *inode, + unsigned char *locked_on, uint64_t *versions, + uint64_t *dirty, unsigned char *sources, + unsigned char *healed_sinks) +{ + loc_t loc = {0}; + int source = 0; + int ret = 0; + default_args_cbk_t *replies = NULL; + unsigned char *output = NULL; + dict_t *xdata = NULL; + + EC_REPLIES_ALLOC(replies, ec->nodes); + + loc.inode = inode_ref(inode); + gf_uuid_copy(loc.gfid, inode->gfid); + xdata = dict_new(); + if (!xdata) { + ret = -ENOMEM; + goto out; + } - if (dict_set_uint64(xdata, EC_XATTR_VERSION, 0) || - dict_set_uint64(xdata, EC_XATTR_DIRTY, 0)) { - ret = -ENOMEM; - goto out; - } + if (dict_set_uint64(xdata, EC_XATTR_VERSION, 0) || + dict_set_uint64(xdata, EC_XATTR_DIRTY, 0)) { + ret = -ENOMEM; + goto out; + } - output = alloca0 (ec->nodes); - ret = cluster_lookup (ec->xl_list, locked_on, ec->nodes, replies, - output, frame, ec->xl, &loc, xdata); - if (ret <= ec->fragments) { - ret = -ENOTCONN; - goto out; - } + output = alloca0(ec->nodes); + ret = cluster_lookup(ec->xl_list, locked_on, ec->nodes, replies, output, + frame, ec->xl, &loc, xdata); + if (ret <= ec->fragments) { + ret = -ENOTCONN; + goto out; + } - source = ec_heal_entry_find_direction (ec, replies, versions, - dirty, sources, healed_sinks); - if (source < 0) { - ret = -EIO; - goto out; - } - ret = source; + source = ec_heal_entry_find_direction(ec, replies, versions, dirty, sources, + healed_sinks); + if (source < 0) { + ret = -EIO; + goto out; + } + ret = source; out: - if (xdata) - dict_unref (xdata); - loc_wipe (&loc); - cluster_replies_wipe (replies, ec->nodes); - return ret; + if (xdata) + dict_unref(xdata); + loc_wipe(&loc); + cluster_replies_wipe(replies, ec->nodes); + return ret; } int32_t -ec_set_new_entry_dirty (ec_t *ec, loc_t *loc, struct iatt *ia, - call_frame_t *frame, xlator_t *this, unsigned char *on) -{ - dict_t *xattr = NULL; - int32_t ret = -1; - default_args_cbk_t *replies = NULL; - unsigned char *output = NULL; - uint64_t dirty[EC_VERSION_SIZE] = {1, 1}; - loc_t newloc = {0}; - - /*Symlinks don't have any data to be healed*/ - if (ia->ia_type == IA_IFLNK) - dirty[EC_DATA_TXN] = 0; - - newloc.inode = inode_ref (loc->inode); - gf_uuid_copy (newloc.gfid, ia->ia_gfid); - EC_REPLIES_ALLOC (replies, ec->nodes); - output = alloca0 (ec->nodes); - xattr = dict_new(); - if (!xattr) { - ret = -ENOMEM; - goto out; - } +ec_set_new_entry_dirty(ec_t *ec, loc_t *loc, struct iatt *ia, + call_frame_t *frame, xlator_t *this, unsigned char *on) +{ + dict_t *xattr = NULL; + int32_t ret = -1; + default_args_cbk_t *replies = NULL; + unsigned char *output = NULL; + uint64_t dirty[EC_VERSION_SIZE] = {1, 1}; + loc_t newloc = {0}; + + /*Symlinks don't have any data to be healed*/ + if (ia->ia_type == IA_IFLNK) + dirty[EC_DATA_TXN] = 0; + + newloc.inode = inode_ref(loc->inode); + gf_uuid_copy(newloc.gfid, ia->ia_gfid); + EC_REPLIES_ALLOC(replies, ec->nodes); + output = alloca0(ec->nodes); + xattr = dict_new(); + if (!xattr) { + ret = -ENOMEM; + goto out; + } - ret = ec_dict_set_array (xattr, EC_XATTR_DIRTY, dirty, - EC_VERSION_SIZE); - if (ret) - goto out; + ret = ec_dict_set_array(xattr, EC_XATTR_DIRTY, dirty, EC_VERSION_SIZE); + if (ret) + goto out; - ret = cluster_xattrop (ec->xl_list, on, ec->nodes, replies, output, - frame, ec->xl, &newloc, - GF_XATTROP_ADD_ARRAY64, xattr, NULL); + ret = cluster_xattrop(ec->xl_list, on, ec->nodes, replies, output, frame, + ec->xl, &newloc, GF_XATTROP_ADD_ARRAY64, xattr, NULL); - if (ret < ec->fragments) { - ret = -ENOTCONN; - goto out; - } + if (ret < ec->fragments) { + ret = -ENOTCONN; + goto out; + } out: - if (xattr) - dict_unref (xattr); - cluster_replies_wipe (replies, ec->nodes); - loc_wipe (&newloc); - return ret; + if (xattr) + dict_unref(xattr); + cluster_replies_wipe(replies, ec->nodes); + loc_wipe(&newloc); + return ret; } /*Name heal*/ int -ec_delete_stale_name (dict_t *gfid_db, char *key, data_t *d, void *data) -{ - struct ec_name_data *name_data = data; - struct iatt *ia = NULL; - ec_t *ec = NULL; - loc_t loc = {0}; - unsigned char *same = data_to_bin (d); - default_args_cbk_t *replies = NULL; - unsigned char *output = NULL; - int ret = 0; - int estale_count = 0; - int i = 0; - call_frame_t *frame = name_data->frame; - - ec = name_data->frame->this->private; - EC_REPLIES_ALLOC (replies, ec->nodes); - if (EC_COUNT (same, ec->nodes) >= ec->fragments) { - ret = 0; - goto out; - } +ec_delete_stale_name(dict_t *gfid_db, char *key, data_t *d, void *data) +{ + struct ec_name_data *name_data = data; + struct iatt *ia = NULL; + ec_t *ec = NULL; + loc_t loc = {0}; + unsigned char *same = data_to_bin(d); + default_args_cbk_t *replies = NULL; + unsigned char *output = NULL; + int ret = 0; + int estale_count = 0; + int i = 0; + call_frame_t *frame = name_data->frame; + + ec = name_data->frame->this->private; + EC_REPLIES_ALLOC(replies, ec->nodes); + if (EC_COUNT(same, ec->nodes) >= ec->fragments) { + ret = 0; + goto out; + } - loc.inode = inode_new (name_data->parent->table); - if (!loc.inode) { - ret = -ENOMEM; - goto out; + loc.inode = inode_new(name_data->parent->table); + if (!loc.inode) { + ret = -ENOMEM; + goto out; + } + gf_uuid_parse(key, loc.gfid); + output = alloca0(ec->nodes); + ret = cluster_lookup(ec->xl_list, name_data->participants, ec->nodes, + replies, output, name_data->frame, ec->xl, &loc, NULL); + + for (i = 0; i < ec->nodes; i++) { + if (!replies[i].valid) + continue; + if (replies[i].op_ret == -1) { + if (replies[i].op_errno == ESTALE || replies[i].op_errno == ENOENT) + estale_count++; + else + name_data->participants[i] = 0; } - gf_uuid_parse (key, loc.gfid); - output = alloca0(ec->nodes); - ret = cluster_lookup (ec->xl_list, name_data->participants, ec->nodes, - replies, output, name_data->frame, ec->xl, &loc, - NULL); + } - for (i = 0; i < ec->nodes; i++) { - if (!replies[i].valid) - continue; - if (replies[i].op_ret == -1) { - if (replies[i].op_errno == ESTALE || - replies[i].op_errno == ENOENT) - estale_count++; - else - name_data->participants[i] = 0; - } - } + if (estale_count <= ec->redundancy) { + /* We have at least ec->fragments number of fragments, so the + * file is recoverable, so don't delete it*/ - if (estale_count <= ec->redundancy) { - /* We have at least ec->fragments number of fragments, so the - * file is recoverable, so don't delete it*/ + /* Please note that the lookup call above could fail with + * ENOTCONN on all subvoumes and still this branch will be + * true, but in those cases conservatively we decide to not + * delete the file until we are sure*/ + ret = 0; + goto out; + } - /* Please note that the lookup call above could fail with - * ENOTCONN on all subvoumes and still this branch will be - * true, but in those cases conservatively we decide to not - * delete the file until we are sure*/ - ret = 0; - goto out; + /*Noway to recover, delete the name*/ + loc_wipe(&loc); + loc.parent = inode_ref(name_data->parent); + gf_uuid_copy(loc.pargfid, loc.parent->gfid); + loc.name = name_data->name; + for (i = 0; i < ec->nodes; i++) { + if (same[i] && replies[i].valid && (replies[i].op_ret == 0)) { + ia = &replies[i].stat; + break; } + } - /*Noway to recover, delete the name*/ - loc_wipe (&loc); - loc.parent = inode_ref (name_data->parent); - gf_uuid_copy (loc.pargfid, loc.parent->gfid); - loc.name = name_data->name; - for (i = 0; i < ec->nodes; i++) { - if (same[i] && replies[i].valid && (replies[i].op_ret == 0)) { - ia = &replies[i].stat; - break; - } - } + if (!ia) { + ret = -ENOTCONN; + goto out; + } - if (!ia) { - ret = -ENOTCONN; - goto out; - } + if (IA_ISDIR(ia->ia_type)) { + ret = cluster_rmdir(ec->xl_list, same, ec->nodes, replies, output, + frame, ec->xl, &loc, 1, NULL); + gf_msg_debug(ec->xl->name, 0, + "cluster rmdir succeeded on %d " + "nodes", + ret); + } else { + ret = cluster_unlink(ec->xl_list, same, ec->nodes, replies, output, + frame, ec->xl, &loc, 0, NULL); + gf_msg_debug(ec->xl->name, 0, + "cluster unlink succeeded on %d " + "nodes", + ret); + } - if (IA_ISDIR (ia->ia_type)) { - ret = cluster_rmdir (ec->xl_list, same, ec->nodes, replies, - output, frame, ec->xl, &loc, 1, NULL); - gf_msg_debug (ec->xl->name, 0, "cluster rmdir succeeded on %d " - "nodes", ret); + for (i = 0; i < ec->nodes; i++) { + if (output[i]) { + same[i] = 0; + name_data->enoent[i] = 1; } else { - ret = cluster_unlink (ec->xl_list, same, ec->nodes, replies, - output, frame, ec->xl, &loc, 0, NULL); - gf_msg_debug (ec->xl->name, 0, "cluster unlink succeeded on %d " - "nodes", ret); - } - - for (i = 0; i < ec->nodes; i++) { - if (output[i]) { - same[i] = 0; - name_data->enoent[i] = 1; - } else { - /*op failed*/ - if (same[i]) - name_data->participants[i] = 0; - } + /*op failed*/ + if (same[i]) + name_data->participants[i] = 0; } - ret = 0; - /*This will help in making decisions about creating names*/ - dict_del (gfid_db, key); + } + ret = 0; + /*This will help in making decisions about creating names*/ + dict_del(gfid_db, key); out: - if (ret < 0) { - gf_msg_debug (ec->xl->name, 0, "%s/%s: heal failed %s", - uuid_utoa (name_data->parent->gfid), name_data->name, - strerror (-ret)); - } - cluster_replies_wipe (replies, ec->nodes); - loc_wipe (&loc); - return ret; + if (ret < 0) { + gf_msg_debug(ec->xl->name, 0, "%s/%s: heal failed %s", + uuid_utoa(name_data->parent->gfid), name_data->name, + strerror(-ret)); + } + cluster_replies_wipe(replies, ec->nodes); + loc_wipe(&loc); + return ret; } int -ec_delete_stale_names (call_frame_t *frame, ec_t *ec, inode_t *parent, - char *name, default_args_cbk_t *replies, dict_t *gfid_db, - unsigned char *enoent, unsigned char *gfidless, - unsigned char *participants) +ec_delete_stale_names(call_frame_t *frame, ec_t *ec, inode_t *parent, + char *name, default_args_cbk_t *replies, dict_t *gfid_db, + unsigned char *enoent, unsigned char *gfidless, + unsigned char *participants) { - struct ec_name_data name_data = {0}; + struct ec_name_data name_data = {0}; - name_data.enoent = enoent; - name_data.gfidless = gfidless; - name_data.participants = participants; - name_data.name = name; - name_data.parent = parent; - name_data.frame = frame; - name_data.replies = replies; - return dict_foreach (gfid_db, ec_delete_stale_name, &name_data); + name_data.enoent = enoent; + name_data.gfidless = gfidless; + name_data.participants = participants; + name_data.name = name; + name_data.parent = parent; + name_data.frame = frame; + name_data.replies = replies; + return dict_foreach(gfid_db, ec_delete_stale_name, &name_data); } int -_assign_same (dict_t *dict, char *key, data_t *value, void *data) +_assign_same(dict_t *dict, char *key, data_t *value, void *data) { - struct ec_name_data *name_data = data; + struct ec_name_data *name_data = data; - name_data->same = data_to_bin (value); - return 0; + name_data->same = data_to_bin(value); + return 0; } int -ec_create_name (call_frame_t *frame, ec_t *ec, inode_t *parent, char *name, - default_args_cbk_t *lookup_replies, dict_t *gfid_db, - unsigned char *enoent, unsigned char *participants) -{ - int ret = 0; - int i = 0; - struct ec_name_data name_data = {0}; - struct iatt *ia = NULL; - unsigned char *output = 0; - unsigned char *output1 = 0; - unsigned char *on = NULL; - default_args_cbk_t *replies = NULL; - loc_t loc = {0}; - loc_t srcloc = {0}; - unsigned char *link = NULL; - unsigned char *create = NULL; - dict_t *xdata = NULL; - char *linkname = NULL; - ec_config_t config; - /* There should be just one gfid key */ - EC_REPLIES_ALLOC (replies, ec->nodes); - if (gfid_db->count != 1) { - ret = -EINVAL; - goto out; - } - - ret = dict_foreach (gfid_db, _assign_same, &name_data); - if (ret < 0) - goto out; - /*There should at least be one valid success reply with gfid*/ - for (i = 0; i < ec->nodes; i++) - if (name_data.same[i]) - break; +ec_create_name(call_frame_t *frame, ec_t *ec, inode_t *parent, char *name, + default_args_cbk_t *lookup_replies, dict_t *gfid_db, + unsigned char *enoent, unsigned char *participants) +{ + int ret = 0; + int i = 0; + struct ec_name_data name_data = {0}; + struct iatt *ia = NULL; + unsigned char *output = 0; + unsigned char *output1 = 0; + unsigned char *on = NULL; + default_args_cbk_t *replies = NULL; + loc_t loc = {0}; + loc_t srcloc = {0}; + unsigned char *link = NULL; + unsigned char *create = NULL; + dict_t *xdata = NULL; + char *linkname = NULL; + ec_config_t config; + /* There should be just one gfid key */ + EC_REPLIES_ALLOC(replies, ec->nodes); + if (gfid_db->count != 1) { + ret = -EINVAL; + goto out; + } - if (i == ec->nodes) { - ret = -EINVAL; - goto out; - } + ret = dict_foreach(gfid_db, _assign_same, &name_data); + if (ret < 0) + goto out; + /*There should at least be one valid success reply with gfid*/ + for (i = 0; i < ec->nodes; i++) + if (name_data.same[i]) + break; - ia = &lookup_replies[i].stat; - xdata = dict_new (); - loc.parent = inode_ref (parent); - gf_uuid_copy (loc.pargfid, parent->gfid); - loc.inode = inode_new (parent->table); - if (loc.inode) - srcloc.inode = inode_ref (loc.inode); - gf_uuid_copy (srcloc.gfid, ia->ia_gfid); - if (!loc.inode || !xdata || dict_set_static_bin (xdata, "gfid-req", - ia->ia_gfid, - sizeof (ia->ia_gfid))) { - ret = -ENOMEM; - goto out; - } - loc.name = name; - link = alloca0 (ec->nodes); - create = alloca0 (ec->nodes); - on = alloca0 (ec->nodes); - output = alloca0 (ec->nodes); - output1 = alloca0 (ec->nodes); + if (i == ec->nodes) { + ret = -EINVAL; + goto out; + } - for (i = 0; i < ec->nodes; i++) { - if (!lookup_replies[i].valid) - continue; - if (lookup_replies[i].op_ret) - continue; - on[i] = 1; - } - switch (ia->ia_type) { + ia = &lookup_replies[i].stat; + xdata = dict_new(); + loc.parent = inode_ref(parent); + gf_uuid_copy(loc.pargfid, parent->gfid); + loc.inode = inode_new(parent->table); + if (loc.inode) + srcloc.inode = inode_ref(loc.inode); + gf_uuid_copy(srcloc.gfid, ia->ia_gfid); + if (!loc.inode || !xdata || + dict_set_static_bin(xdata, "gfid-req", ia->ia_gfid, + sizeof(ia->ia_gfid))) { + ret = -ENOMEM; + goto out; + } + loc.name = name; + link = alloca0(ec->nodes); + create = alloca0(ec->nodes); + on = alloca0(ec->nodes); + output = alloca0(ec->nodes); + output1 = alloca0(ec->nodes); + + for (i = 0; i < ec->nodes; i++) { + if (!lookup_replies[i].valid) + continue; + if (lookup_replies[i].op_ret) + continue; + on[i] = 1; + } + switch (ia->ia_type) { case IA_IFDIR: - ec_set_new_entry_dirty (ec, &loc, ia, frame, ec->xl, on); - (void) cluster_mkdir (ec->xl_list, enoent, ec->nodes, - replies, output, frame, ec->xl, &loc, - st_mode_from_ia (ia->ia_prot, - ia->ia_type), 0, xdata); - break; + ec_set_new_entry_dirty(ec, &loc, ia, frame, ec->xl, on); + (void)cluster_mkdir( + ec->xl_list, enoent, ec->nodes, replies, output, frame, ec->xl, + &loc, st_mode_from_ia(ia->ia_prot, ia->ia_type), 0, xdata); + break; case IA_IFLNK: - /*Check for hard links and create/link*/ - ret = cluster_lookup (ec->xl_list, enoent, ec->nodes, - replies, output, frame, ec->xl, - &srcloc, NULL); - for (i = 0; i < ec->nodes; i++) { - if (output[i]) { - link[i] = 1; - } else { - if (replies[i].op_errno == ENOENT || - replies[i].op_errno == ESTALE) { - create[i] = 1; - } - } + /*Check for hard links and create/link*/ + ret = cluster_lookup(ec->xl_list, enoent, ec->nodes, replies, + output, frame, ec->xl, &srcloc, NULL); + for (i = 0; i < ec->nodes; i++) { + if (output[i]) { + link[i] = 1; + } else { + if (replies[i].op_errno == ENOENT || + replies[i].op_errno == ESTALE) { + create[i] = 1; + } } - - if (EC_COUNT (link, ec->nodes)) { - cluster_link (ec->xl_list, link, ec->nodes, - replies, output1, frame, ec->xl, - &srcloc, &loc, NULL); + } + + if (EC_COUNT(link, ec->nodes)) { + cluster_link(ec->xl_list, link, ec->nodes, replies, output1, + frame, ec->xl, &srcloc, &loc, NULL); + } + + if (EC_COUNT(create, ec->nodes)) { + cluster_readlink(ec->xl_list, name_data.same, ec->nodes, + replies, output, frame, ec->xl, &srcloc, 4096, + NULL); + if (EC_COUNT(output, ec->nodes) == 0) { + ret = -ENOTCONN; + goto out; } - if (EC_COUNT (create, ec->nodes)) { - cluster_readlink (ec->xl_list, name_data.same, - ec->nodes, replies, output, - frame, ec->xl, &srcloc, 4096, - NULL); - if (EC_COUNT (output, ec->nodes) == 0) { - ret = -ENOTCONN; - goto out; - } - - for (i = 0; i < ec->nodes; i++) { - if (output[i]) - break; - } - linkname = alloca0 (strlen(replies[i].buf) + 1); - strcpy (linkname, replies[i].buf); - ec_set_new_entry_dirty (ec, &loc, ia, frame, - ec->xl, on); - cluster_symlink (ec->xl_list, create, ec->nodes, - replies, output, frame, ec->xl, - linkname, &loc, 0, xdata); + for (i = 0; i < ec->nodes; i++) { + if (output[i]) + break; } - for (i = 0; i < ec->nodes; i++) - if (output1[i]) - output[i] = 1; - break; + linkname = alloca0(strlen(replies[i].buf) + 1); + strcpy(linkname, replies[i].buf); + ec_set_new_entry_dirty(ec, &loc, ia, frame, ec->xl, on); + cluster_symlink(ec->xl_list, create, ec->nodes, replies, output, + frame, ec->xl, linkname, &loc, 0, xdata); + } + for (i = 0; i < ec->nodes; i++) + if (output1[i]) + output[i] = 1; + break; case IA_IFREG: - ec_set_new_entry_dirty (ec, &loc, ia, - frame, ec->xl, on); - config.version = EC_CONFIG_VERSION; - config.algorithm = EC_CONFIG_ALGORITHM; - config.gf_word_size = EC_GF_BITS; - config.bricks = ec->nodes; - config.redundancy = ec->redundancy; - config.chunk_size = EC_METHOD_CHUNK_SIZE; - - ret = ec_dict_set_config(xdata, EC_XATTR_CONFIG, &config); - if (ret != 0) { - goto out; - } + ec_set_new_entry_dirty(ec, &loc, ia, frame, ec->xl, on); + config.version = EC_CONFIG_VERSION; + config.algorithm = EC_CONFIG_ALGORITHM; + config.gf_word_size = EC_GF_BITS; + config.bricks = ec->nodes; + config.redundancy = ec->redundancy; + config.chunk_size = EC_METHOD_CHUNK_SIZE; + + ret = ec_dict_set_config(xdata, EC_XATTR_CONFIG, &config); + if (ret != 0) { + goto out; + } - /* Fall through */ + /* Fall through */ default: - ret = dict_set_int32 (xdata, GLUSTERFS_INTERNAL_FOP_KEY, - 1); - if (ret) - goto out; - ret = cluster_mknod (ec->xl_list, enoent, ec->nodes, - replies, output, frame, ec->xl, - &loc, st_mode_from_ia (ia->ia_prot, - ia->ia_type), - makedev(ia_major(ia->ia_rdev), ia_minor(ia->ia_rdev)), - 0, xdata); - break; - } + ret = dict_set_int32(xdata, GLUSTERFS_INTERNAL_FOP_KEY, 1); + if (ret) + goto out; + ret = cluster_mknod( + ec->xl_list, enoent, ec->nodes, replies, output, frame, ec->xl, + &loc, st_mode_from_ia(ia->ia_prot, ia->ia_type), + makedev(ia_major(ia->ia_rdev), ia_minor(ia->ia_rdev)), 0, + xdata); + break; + } - for (i = 0; i < ec->nodes; i++) { - if (enoent[i] && !output[i]) - participants[i] = 0; - } + for (i = 0; i < ec->nodes; i++) { + if (enoent[i] && !output[i]) + participants[i] = 0; + } - ret = 0; + ret = 0; out: - if (ret < 0) - gf_msg_debug (ec->xl->name, 0, "%s/%s: heal failed %s", - uuid_utoa (parent->gfid), name, strerror (-ret)); - cluster_replies_wipe (replies, ec->nodes); - loc_wipe (&loc); - loc_wipe (&srcloc); - if (xdata) - dict_unref (xdata); - return ret; + if (ret < 0) + gf_msg_debug(ec->xl->name, 0, "%s/%s: heal failed %s", + uuid_utoa(parent->gfid), name, strerror(-ret)); + cluster_replies_wipe(replies, ec->nodes); + loc_wipe(&loc); + loc_wipe(&srcloc); + if (xdata) + dict_unref(xdata); + return ret; } int -__ec_heal_name (call_frame_t *frame, ec_t *ec, inode_t *parent, char *name, - unsigned char *participants) -{ - unsigned char *output = NULL; - unsigned char *enoent = NULL; - default_args_cbk_t *replies = NULL; - dict_t *xdata = NULL; - dict_t *gfid_db = NULL; - int ret = 0; - loc_t loc = {0}; - int i = 0; - struct iatt *ia = NULL; - char gfid[64] = {0}; - unsigned char *same = NULL; - unsigned char *gfidless = NULL; - - EC_REPLIES_ALLOC (replies, ec->nodes); - loc.parent = inode_ref (parent); - loc.inode = inode_new (parent->table); - gf_uuid_copy (loc.pargfid, parent->gfid); - loc.name = name; - xdata = dict_new (); - gfid_db = dict_new (); - if (!xdata || !gfid_db || !loc.inode) { - ret = -ENOMEM; - goto out; - } +__ec_heal_name(call_frame_t *frame, ec_t *ec, inode_t *parent, char *name, + unsigned char *participants) +{ + unsigned char *output = NULL; + unsigned char *enoent = NULL; + default_args_cbk_t *replies = NULL; + dict_t *xdata = NULL; + dict_t *gfid_db = NULL; + int ret = 0; + loc_t loc = {0}; + int i = 0; + struct iatt *ia = NULL; + char gfid[64] = {0}; + unsigned char *same = NULL; + unsigned char *gfidless = NULL; + + EC_REPLIES_ALLOC(replies, ec->nodes); + loc.parent = inode_ref(parent); + loc.inode = inode_new(parent->table); + gf_uuid_copy(loc.pargfid, parent->gfid); + loc.name = name; + xdata = dict_new(); + gfid_db = dict_new(); + if (!xdata || !gfid_db || !loc.inode) { + ret = -ENOMEM; + goto out; + } - ret = dict_set_int32 (xdata, GF_GFIDLESS_LOOKUP, 1); - if (ret) { - ret = -ENOMEM; - goto out; - } + ret = dict_set_int32(xdata, GF_GFIDLESS_LOOKUP, 1); + if (ret) { + ret = -ENOMEM; + goto out; + } - output = alloca0 (ec->nodes); - gfidless = alloca0 (ec->nodes); - enoent = alloca0 (ec->nodes); - ret = cluster_lookup (ec->xl_list, participants, ec->nodes, replies, - output, frame, ec->xl, &loc, NULL); - for (i = 0; i < ec->nodes; i++) { - if (!replies[i].valid) - continue; - - if (replies[i].op_ret == -1) { - /*If ESTALE comes here, that means parent dir is not - * present, nothing to do there, so reset participants - * for that brick*/ - if (replies[i].op_errno == ENOENT) - enoent[i] = 1; - else - participants[i] = 0; - continue; - } - ia = &replies[i].stat; - if (gf_uuid_is_null (ia->ia_gfid)) { - if (IA_ISDIR (ia->ia_type) || ia->ia_size == 0) - gfidless[i] = 1; - else - participants[i] = 0; - } else { - uuid_utoa_r (ia->ia_gfid, gfid); - ret = dict_get_bin (gfid_db, gfid, (void **)&same); - if (ret < 0) { - same = alloca0(ec->nodes); - } - same[i] = 1; - if (ret < 0) { - ret = dict_set_static_bin (gfid_db, gfid, same, - ec->nodes); - } - if (ret < 0) - goto out; - } + output = alloca0(ec->nodes); + gfidless = alloca0(ec->nodes); + enoent = alloca0(ec->nodes); + ret = cluster_lookup(ec->xl_list, participants, ec->nodes, replies, output, + frame, ec->xl, &loc, NULL); + for (i = 0; i < ec->nodes; i++) { + if (!replies[i].valid) + continue; + + if (replies[i].op_ret == -1) { + /*If ESTALE comes here, that means parent dir is not + * present, nothing to do there, so reset participants + * for that brick*/ + if (replies[i].op_errno == ENOENT) + enoent[i] = 1; + else + participants[i] = 0; + continue; + } + ia = &replies[i].stat; + if (gf_uuid_is_null(ia->ia_gfid)) { + if (IA_ISDIR(ia->ia_type) || ia->ia_size == 0) + gfidless[i] = 1; + else + participants[i] = 0; + } else { + uuid_utoa_r(ia->ia_gfid, gfid); + ret = dict_get_bin(gfid_db, gfid, (void **)&same); + if (ret < 0) { + same = alloca0(ec->nodes); + } + same[i] = 1; + if (ret < 0) { + ret = dict_set_static_bin(gfid_db, gfid, same, ec->nodes); + } + if (ret < 0) + goto out; } + } - ret = ec_delete_stale_names (frame, ec, parent, name, replies, gfid_db, - enoent, gfidless, participants); + ret = ec_delete_stale_names(frame, ec, parent, name, replies, gfid_db, + enoent, gfidless, participants); - if (gfid_db->count == 0) { - /* All entries seem to be stale entries and deleted, - * nothing more to do.*/ - goto out; - } + if (gfid_db->count == 0) { + /* All entries seem to be stale entries and deleted, + * nothing more to do.*/ + goto out; + } - if (gfid_db->count > 1) { - gf_msg (ec->xl->name, GF_LOG_INFO, 0, - EC_MSG_HEAL_FAIL, "%s/%s: Not able to heal", - uuid_utoa (parent->gfid), name); - memset (participants, 0, ec->nodes); - goto out; - } + if (gfid_db->count > 1) { + gf_msg(ec->xl->name, GF_LOG_INFO, 0, EC_MSG_HEAL_FAIL, + "%s/%s: Not able to heal", uuid_utoa(parent->gfid), name); + memset(participants, 0, ec->nodes); + goto out; + } - EC_INTERSECT (enoent, enoent, participants, ec->nodes); - if (EC_COUNT (enoent, ec->nodes) == 0) { - ret = 0; - goto out; - } + EC_INTERSECT(enoent, enoent, participants, ec->nodes); + if (EC_COUNT(enoent, ec->nodes) == 0) { + ret = 0; + goto out; + } - ret = ec_create_name (frame, ec, parent, name, replies, gfid_db, enoent, - participants); + ret = ec_create_name(frame, ec, parent, name, replies, gfid_db, enoent, + participants); out: - cluster_replies_wipe (replies, ec->nodes); - loc_wipe (&loc); - if (xdata) - dict_unref (xdata); - if (gfid_db) - dict_unref (gfid_db); - return ret; + cluster_replies_wipe(replies, ec->nodes); + loc_wipe(&loc); + if (xdata) + dict_unref(xdata); + if (gfid_db) + dict_unref(gfid_db); + return ret; } int -ec_heal_name (call_frame_t *frame, ec_t *ec, inode_t *parent, char *name, - unsigned char *participants) -{ - int ret = 0; - default_args_cbk_t *replies = NULL; - unsigned char *output = NULL; - unsigned char *locked_on = NULL; - loc_t loc = {0}; - - loc.parent = inode_ref (parent); - loc.name = name; - loc.inode = inode_new (parent->table); - if (!loc.inode) { - ret = -ENOMEM; - goto out; - } +ec_heal_name(call_frame_t *frame, ec_t *ec, inode_t *parent, char *name, + unsigned char *participants) +{ + int ret = 0; + default_args_cbk_t *replies = NULL; + unsigned char *output = NULL; + unsigned char *locked_on = NULL; + loc_t loc = {0}; + + loc.parent = inode_ref(parent); + loc.name = name; + loc.inode = inode_new(parent->table); + if (!loc.inode) { + ret = -ENOMEM; + goto out; + } - EC_REPLIES_ALLOC (replies, ec->nodes); - output = alloca0 (ec->nodes); - locked_on = alloca0 (ec->nodes); - ret = cluster_inodelk (ec->xl_list, participants, ec->nodes, replies, - locked_on, frame, ec->xl, ec->xl->name, parent, - 0, 0); - { - if (ret <= ec->fragments) { - gf_msg_debug (ec->xl->name, 0, "%s/%s: Skipping " - "heal as only %d number of subvolumes could " - "be locked", uuid_utoa (parent->gfid), name, - ret); - ret = -ENOTCONN; - goto unlock; - } - EC_INTERSECT (participants, participants, locked_on, ec->nodes); - ret = __ec_heal_name (frame, ec, parent, name, participants); - } + EC_REPLIES_ALLOC(replies, ec->nodes); + output = alloca0(ec->nodes); + locked_on = alloca0(ec->nodes); + ret = cluster_inodelk(ec->xl_list, participants, ec->nodes, replies, + locked_on, frame, ec->xl, ec->xl->name, parent, 0, 0); + { + if (ret <= ec->fragments) { + gf_msg_debug(ec->xl->name, 0, + "%s/%s: Skipping " + "heal as only %d number of subvolumes could " + "be locked", + uuid_utoa(parent->gfid), name, ret); + ret = -ENOTCONN; + goto unlock; + } + EC_INTERSECT(participants, participants, locked_on, ec->nodes); + ret = __ec_heal_name(frame, ec, parent, name, participants); + } unlock: - cluster_uninodelk (ec->xl_list, locked_on, ec->nodes, replies, output, - frame, ec->xl, ec->xl->name, parent, 0, 0); + cluster_uninodelk(ec->xl_list, locked_on, ec->nodes, replies, output, frame, + ec->xl, ec->xl->name, parent, 0, 0); out: - cluster_replies_wipe (replies, ec->nodes); - loc_wipe (&loc); - return ret; + cluster_replies_wipe(replies, ec->nodes); + loc_wipe(&loc); + return ret; } int -ec_name_heal_handler (xlator_t *subvol, gf_dirent_t *entry, loc_t *parent, - void *data) -{ - struct ec_name_data *name_data = data; - xlator_t *this = THIS; - ec_t *ec = this->private; - unsigned char *name_on = alloca0 (ec->nodes); - int i = 0; - int ret = 0; - - if (ec->shutdown) { - gf_msg_debug(this->name, 0, "Cancelling directory heal " - "because EC is stopping."); - return -ENOTCONN; - } +ec_name_heal_handler(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent, + void *data) +{ + struct ec_name_data *name_data = data; + xlator_t *this = THIS; + ec_t *ec = this->private; + unsigned char *name_on = alloca0(ec->nodes); + int i = 0; + int ret = 0; + + if (ec->shutdown) { + gf_msg_debug(this->name, 0, + "Cancelling directory heal " + "because EC is stopping."); + return -ENOTCONN; + } - memcpy (name_on, name_data->participants, ec->nodes); - ret = ec_heal_name (name_data->frame, ec, parent->inode, - entry->d_name, name_on); + memcpy(name_on, name_data->participants, ec->nodes); + ret = ec_heal_name(name_data->frame, ec, parent->inode, entry->d_name, + name_on); - if (ret < 0) - memset (name_on, 0, ec->nodes); + if (ret < 0) + memset(name_on, 0, ec->nodes); - for (i = 0; i < ec->nodes; i++) - if (name_data->participants[i] && !name_on[i]) - name_data->failed_on[i] = 1; - return 0; + for (i = 0; i < ec->nodes; i++) + if (name_data->participants[i] && !name_on[i]) + name_data->failed_on[i] = 1; + return 0; } int -ec_heal_names (call_frame_t *frame, ec_t *ec, inode_t *inode, - unsigned char *participants) +ec_heal_names(call_frame_t *frame, ec_t *ec, inode_t *inode, + unsigned char *participants) { - int i = 0; - int j = 0; - loc_t loc = {0}; - struct ec_name_data name_data = {0}; - int ret = 0; - - loc.inode = inode_ref (inode); - gf_uuid_copy (loc.gfid, inode->gfid); - name_data.frame = frame; - name_data.participants = participants; - name_data.failed_on = alloca0(ec->nodes);; - - for (i = 0; i < ec->nodes; i++) { - if (!participants[i]) - continue; - ret = syncop_dir_scan (ec->xl_list[i], &loc, - GF_CLIENT_PID_SELF_HEALD, &name_data, - ec_name_heal_handler); - if (ret < 0) { - break; - } - for (j = 0; j < ec->nodes; j++) - if (name_data.failed_on[j]) - participants[j] = 0; + int i = 0; + int j = 0; + loc_t loc = {0}; + struct ec_name_data name_data = {0}; + int ret = 0; + + loc.inode = inode_ref(inode); + gf_uuid_copy(loc.gfid, inode->gfid); + name_data.frame = frame; + name_data.participants = participants; + name_data.failed_on = alloca0(ec->nodes); + ; + + for (i = 0; i < ec->nodes; i++) { + if (!participants[i]) + continue; + ret = syncop_dir_scan(ec->xl_list[i], &loc, GF_CLIENT_PID_SELF_HEALD, + &name_data, ec_name_heal_handler); + if (ret < 0) { + break; + } + for (j = 0; j < ec->nodes; j++) + if (name_data.failed_on[j]) + participants[j] = 0; - if (EC_COUNT (participants, ec->nodes) <= ec->fragments) { - ret = -ENOTCONN; - break; - } + if (EC_COUNT(participants, ec->nodes) <= ec->fragments) { + ret = -ENOTCONN; + break; } - loc_wipe (&loc); - return ret; + } + loc_wipe(&loc); + return ret; } int -__ec_heal_entry (call_frame_t *frame, ec_t *ec, inode_t *inode, - unsigned char *heal_on, unsigned char *sources, - unsigned char *healed_sinks) -{ - unsigned char *locked_on = NULL; - unsigned char *output = NULL; - uint64_t *versions = NULL; - uint64_t *dirty = NULL; - unsigned char *participants = NULL; - default_args_cbk_t *replies = NULL; - int ret = 0; - int source = 0; - int i = 0; - - locked_on = alloca0(ec->nodes); - output = alloca0(ec->nodes); - versions = alloca0 (ec->nodes * sizeof (*versions)); - dirty = alloca0 (ec->nodes * sizeof (*dirty)); - - EC_REPLIES_ALLOC (replies, ec->nodes); - ret = cluster_inodelk (ec->xl_list, heal_on, ec->nodes, replies, - locked_on, frame, ec->xl, ec->xl->name, inode, - 0, 0); - { - if (ret <= ec->fragments) { - gf_msg_debug (ec->xl->name, 0, "%s: Skipping heal " - "as only %d number of subvolumes could " - "be locked", uuid_utoa (inode->gfid), ret); - ret = -ENOTCONN; - goto unlock; - } - ret = __ec_heal_entry_prepare (frame, ec, inode, locked_on, - versions, dirty, sources, - healed_sinks); - source = ret; - } +__ec_heal_entry(call_frame_t *frame, ec_t *ec, inode_t *inode, + unsigned char *heal_on, unsigned char *sources, + unsigned char *healed_sinks) +{ + unsigned char *locked_on = NULL; + unsigned char *output = NULL; + uint64_t *versions = NULL; + uint64_t *dirty = NULL; + unsigned char *participants = NULL; + default_args_cbk_t *replies = NULL; + int ret = 0; + int source = 0; + int i = 0; + + locked_on = alloca0(ec->nodes); + output = alloca0(ec->nodes); + versions = alloca0(ec->nodes * sizeof(*versions)); + dirty = alloca0(ec->nodes * sizeof(*dirty)); + + EC_REPLIES_ALLOC(replies, ec->nodes); + ret = cluster_inodelk(ec->xl_list, heal_on, ec->nodes, replies, locked_on, + frame, ec->xl, ec->xl->name, inode, 0, 0); + { + if (ret <= ec->fragments) { + gf_msg_debug(ec->xl->name, 0, + "%s: Skipping heal " + "as only %d number of subvolumes could " + "be locked", + uuid_utoa(inode->gfid), ret); + ret = -ENOTCONN; + goto unlock; + } + ret = __ec_heal_entry_prepare(frame, ec, inode, locked_on, versions, + dirty, sources, healed_sinks); + source = ret; + } unlock: - cluster_uninodelk (ec->xl_list, locked_on, ec->nodes, replies, output, - frame, ec->xl, ec->xl->name, inode, 0, 0); - if (ret < 0) - goto out; + cluster_uninodelk(ec->xl_list, locked_on, ec->nodes, replies, output, frame, + ec->xl, ec->xl->name, inode, 0, 0); + if (ret < 0) + goto out; - participants = alloca0 (ec->nodes); - for (i = 0; i < ec->nodes; i++) { - if (sources[i] || healed_sinks[i]) - participants[i] = 1; - } - ret = ec_heal_names (frame, ec, inode, participants); + participants = alloca0(ec->nodes); + for (i = 0; i < ec->nodes; i++) { + if (sources[i] || healed_sinks[i]) + participants[i] = 1; + } + ret = ec_heal_names(frame, ec, inode, participants); - if (EC_COUNT (participants, ec->nodes) <= ec->fragments) - goto out; + if (EC_COUNT(participants, ec->nodes) <= ec->fragments) + goto out; - for (i = 0; i < ec->nodes; i++) { - if (!participants[i]) { - sources[i] = 0; - healed_sinks[i] = 0; - } + for (i = 0; i < ec->nodes; i++) { + if (!participants[i]) { + sources[i] = 0; + healed_sinks[i] = 0; } + } - ec_adjust_versions (frame, ec, EC_DATA_TXN, inode, source, - sources, healed_sinks, versions, dirty); + ec_adjust_versions(frame, ec, EC_DATA_TXN, inode, source, sources, + healed_sinks, versions, dirty); out: - cluster_replies_wipe (replies, ec->nodes); - return ret; + cluster_replies_wipe(replies, ec->nodes); + return ret; } int -ec_heal_entry (call_frame_t *frame, ec_t *ec, inode_t *inode, - unsigned char *sources, unsigned char *healed_sinks) +ec_heal_entry(call_frame_t *frame, ec_t *ec, inode_t *inode, + unsigned char *sources, unsigned char *healed_sinks) { - unsigned char *locked_on = NULL; - unsigned char *up_subvols = NULL; - unsigned char *output = NULL; - char selfheal_domain[1024] = {0}; - int ret = 0; - default_args_cbk_t *replies = NULL; - - EC_REPLIES_ALLOC (replies, ec->nodes); - locked_on = alloca0(ec->nodes); - output = alloca0(ec->nodes); - up_subvols = alloca0(ec->nodes); - - sprintf (selfheal_domain, "%s:self-heal", ec->xl->name); - ec_mask_to_char_array (ec->xl_up, up_subvols, ec->nodes); - /*If other processes are already doing the heal, don't block*/ - ret = cluster_tiebreaker_inodelk (ec->xl_list, up_subvols, ec->nodes, - replies, locked_on, frame, ec->xl, - selfheal_domain, inode, 0, 0); - { - if (ret <= ec->fragments) { - gf_msg_debug (ec->xl->name, 0, "%s: Skipping heal " - "as only %d number of subvolumes could " - "be locked", uuid_utoa (inode->gfid), ret); - ret = -ENOTCONN; - goto unlock; - } - ret = __ec_heal_entry (frame, ec, inode, locked_on, - sources, healed_sinks); - } + unsigned char *locked_on = NULL; + unsigned char *up_subvols = NULL; + unsigned char *output = NULL; + char selfheal_domain[1024] = {0}; + int ret = 0; + default_args_cbk_t *replies = NULL; + + EC_REPLIES_ALLOC(replies, ec->nodes); + locked_on = alloca0(ec->nodes); + output = alloca0(ec->nodes); + up_subvols = alloca0(ec->nodes); + + sprintf(selfheal_domain, "%s:self-heal", ec->xl->name); + ec_mask_to_char_array(ec->xl_up, up_subvols, ec->nodes); + /*If other processes are already doing the heal, don't block*/ + ret = cluster_tiebreaker_inodelk(ec->xl_list, up_subvols, ec->nodes, + replies, locked_on, frame, ec->xl, + selfheal_domain, inode, 0, 0); + { + if (ret <= ec->fragments) { + gf_msg_debug(ec->xl->name, 0, + "%s: Skipping heal " + "as only %d number of subvolumes could " + "be locked", + uuid_utoa(inode->gfid), ret); + ret = -ENOTCONN; + goto unlock; + } + ret = __ec_heal_entry(frame, ec, inode, locked_on, sources, + healed_sinks); + } unlock: - cluster_uninodelk (ec->xl_list, locked_on, ec->nodes, replies, output, - frame, ec->xl, selfheal_domain, inode, 0, 0); - cluster_replies_wipe (replies, ec->nodes); - return ret; + cluster_uninodelk(ec->xl_list, locked_on, ec->nodes, replies, output, frame, + ec->xl, selfheal_domain, inode, 0, 0); + cluster_replies_wipe(replies, ec->nodes); + return ret; } /*Find direction for data heal and heal info*/ int -ec_heal_data_find_direction (ec_t *ec, default_args_cbk_t *replies, - uint64_t *data_versions, - uint64_t *dirty, uint64_t *size, unsigned char *sources, - unsigned char *healed_sinks, - gf_boolean_t check_ondisksize, int which) -{ - uint64_t xattr[EC_VERSION_SIZE] = {0}; - char version_size[128] = {0}; - dict_t *version_size_db = NULL; - unsigned char *same = NULL; - int max_same_count = 0; - int source = 0; - int i = 0; - int ret = 0; - dict_t *dict = NULL; - uint64_t source_size = 0; - - version_size_db = dict_new (); - if (!version_size_db) { - ret = -ENOMEM; - goto out; +ec_heal_data_find_direction(ec_t *ec, default_args_cbk_t *replies, + uint64_t *data_versions, uint64_t *dirty, + uint64_t *size, unsigned char *sources, + unsigned char *healed_sinks, + gf_boolean_t check_ondisksize, int which) +{ + uint64_t xattr[EC_VERSION_SIZE] = {0}; + char version_size[128] = {0}; + dict_t *version_size_db = NULL; + unsigned char *same = NULL; + int max_same_count = 0; + int source = 0; + int i = 0; + int ret = 0; + dict_t *dict = NULL; + uint64_t source_size = 0; + + version_size_db = dict_new(); + if (!version_size_db) { + ret = -ENOMEM; + goto out; + } + + for (i = 0; i < ec->nodes; i++) { + if (!replies[i].valid) + continue; + if (replies[i].op_ret < 0) + continue; + dict = (which == EC_COMBINE_XDATA) ? replies[i].xdata + : replies[i].xattr; + + ret = ec_dict_get_array(dict, EC_XATTR_VERSION, xattr, EC_VERSION_SIZE); + if (ret == 0) { + data_versions[i] = xattr[EC_DATA_TXN]; } - for (i = 0; i < ec->nodes; i++) { - if (!replies[i].valid) - continue; - if (replies[i].op_ret < 0) - continue; - dict = (which == EC_COMBINE_XDATA) ? replies[i].xdata : - replies[i].xattr; - - ret = ec_dict_get_array (dict, EC_XATTR_VERSION, - xattr, EC_VERSION_SIZE); - if (ret == 0) { - data_versions[i] = xattr[EC_DATA_TXN]; - } + memset(xattr, 0, sizeof(xattr)); + ret = ec_dict_get_array(dict, EC_XATTR_DIRTY, xattr, EC_VERSION_SIZE); + if (ret == 0) { + dirty[i] = xattr[EC_DATA_TXN]; + } + ret = ec_dict_del_number(dict, EC_XATTR_SIZE, &size[i]); + /*Build a db of same metadata and data version and size*/ + snprintf(version_size, sizeof(version_size), "%" PRIu64 "-%" PRIu64, + data_versions[i], size[i]); - memset (xattr, 0, sizeof (xattr)); - ret = ec_dict_get_array (dict, EC_XATTR_DIRTY, - xattr, EC_VERSION_SIZE); - if (ret == 0) { - dirty[i] = xattr[EC_DATA_TXN]; - } - ret = ec_dict_del_number (dict, EC_XATTR_SIZE, - &size[i]); - /*Build a db of same metadata and data version and size*/ - snprintf (version_size, sizeof (version_size), - "%"PRIu64"-%"PRIu64, data_versions[i], size[i]); - - ret = dict_get_bin (version_size_db, version_size, - (void **)&same); - if (ret < 0) { - same = alloca0 (ec->nodes); - } + ret = dict_get_bin(version_size_db, version_size, (void **)&same); + if (ret < 0) { + same = alloca0(ec->nodes); + } - same[i] = 1; - if (max_same_count < EC_COUNT (same, ec->nodes)) { - max_same_count = EC_COUNT (same, ec->nodes); - source = i; - } + same[i] = 1; + if (max_same_count < EC_COUNT(same, ec->nodes)) { + max_same_count = EC_COUNT(same, ec->nodes); + source = i; + } - if (ret < 0) { - ret = dict_set_static_bin (version_size_db, - version_size, same, ec->nodes); - } + if (ret < 0) { + ret = dict_set_static_bin(version_size_db, version_size, same, + ec->nodes); + } - if (ret < 0) { - ret = -ENOMEM; - goto out; - } + if (ret < 0) { + ret = -ENOMEM; + goto out; } - /* If we don't have ec->fragments number of same version,size it is not - * recoverable*/ - if (max_same_count < ec->fragments) { - ret = -EIO; - goto out; - } else { - snprintf (version_size, sizeof (version_size), - "%"PRIu64"-%"PRIu64, data_versions[source], - size[source]); - - ret = dict_get_bin (version_size_db, version_size, - (void **)&same); - if (ret < 0) - goto out; - memcpy (sources, same, ec->nodes); - for (i = 0; i < ec->nodes; i++) { - if (replies[i].valid && (replies[i].op_ret == 0) && - !sources[i]) - healed_sinks[i] = 1; - } + } + /* If we don't have ec->fragments number of same version,size it is not + * recoverable*/ + if (max_same_count < ec->fragments) { + ret = -EIO; + goto out; + } else { + snprintf(version_size, sizeof(version_size), "%" PRIu64 "-%" PRIu64, + data_versions[source], size[source]); + + ret = dict_get_bin(version_size_db, version_size, (void **)&same); + if (ret < 0) + goto out; + memcpy(sources, same, ec->nodes); + for (i = 0; i < ec->nodes; i++) { + if (replies[i].valid && (replies[i].op_ret == 0) && !sources[i]) + healed_sinks[i] = 1; } + } - /* There could be files with versions, size same but on disk ia_size - * could be different because of disk crashes, mark them as sinks as - * well*/ + /* There could be files with versions, size same but on disk ia_size + * could be different because of disk crashes, mark them as sinks as + * well*/ - if (check_ondisksize) { - source_size = size[source]; - ec_adjust_size_up (ec, &source_size, _gf_true); + if (check_ondisksize) { + source_size = size[source]; + ec_adjust_size_up(ec, &source_size, _gf_true); - for (i = 0; i < ec->nodes; i++) { - if (sources[i]) { - if (replies[i].stat.ia_size != source_size) { - sources[i] = 0; - healed_sinks[i] = 1; - max_same_count--; - } else { - source = i; - } - } - } - if (max_same_count < ec->fragments) { - ret = -EIO; - goto out; + for (i = 0; i < ec->nodes; i++) { + if (sources[i]) { + if (replies[i].stat.ia_size != source_size) { + sources[i] = 0; + healed_sinks[i] = 1; + max_same_count--; + } else { + source = i; } + } } + if (max_same_count < ec->fragments) { + ret = -EIO; + goto out; + } + } - ret = source; + ret = source; out: - if (version_size_db) - dict_unref (version_size_db); - return ret; + if (version_size_db) + dict_unref(version_size_db); + return ret; } int -__ec_heal_data_prepare (call_frame_t *frame, ec_t *ec, fd_t *fd, - unsigned char *locked_on, uint64_t *versions, - uint64_t *dirty, uint64_t *size, unsigned char *sources, - unsigned char *healed_sinks, unsigned char *trim, - struct iatt *stbuf) -{ - default_args_cbk_t *replies = NULL; - default_args_cbk_t *fstat_replies = NULL; - unsigned char *output = NULL; - unsigned char *fstat_output = NULL; - dict_t *xattrs = NULL; - uint64_t zero_array[2] = {0}; - int source = 0; - int ret = 0; - uint64_t zero_value = 0; - int i = 0; - - EC_REPLIES_ALLOC (replies, ec->nodes); - EC_REPLIES_ALLOC (fstat_replies, ec->nodes); - output = alloca0(ec->nodes); - fstat_output = alloca0(ec->nodes); - xattrs = dict_new (); - if (!xattrs || - dict_set_static_bin (xattrs, EC_XATTR_VERSION, zero_array, - sizeof (zero_array)) || - dict_set_static_bin (xattrs, EC_XATTR_DIRTY, zero_array, - sizeof (zero_array)) || - dict_set_static_bin (xattrs, EC_XATTR_SIZE, &zero_value, - sizeof (zero_value))) { - ret = -ENOMEM; - goto out; - } +__ec_heal_data_prepare(call_frame_t *frame, ec_t *ec, fd_t *fd, + unsigned char *locked_on, uint64_t *versions, + uint64_t *dirty, uint64_t *size, unsigned char *sources, + unsigned char *healed_sinks, unsigned char *trim, + struct iatt *stbuf) +{ + default_args_cbk_t *replies = NULL; + default_args_cbk_t *fstat_replies = NULL; + unsigned char *output = NULL; + unsigned char *fstat_output = NULL; + dict_t *xattrs = NULL; + uint64_t zero_array[2] = {0}; + int source = 0; + int ret = 0; + uint64_t zero_value = 0; + int i = 0; + + EC_REPLIES_ALLOC(replies, ec->nodes); + EC_REPLIES_ALLOC(fstat_replies, ec->nodes); + output = alloca0(ec->nodes); + fstat_output = alloca0(ec->nodes); + xattrs = dict_new(); + if (!xattrs || + dict_set_static_bin(xattrs, EC_XATTR_VERSION, zero_array, + sizeof(zero_array)) || + dict_set_static_bin(xattrs, EC_XATTR_DIRTY, zero_array, + sizeof(zero_array)) || + dict_set_static_bin(xattrs, EC_XATTR_SIZE, &zero_value, + sizeof(zero_value))) { + ret = -ENOMEM; + goto out; + } - ret = cluster_fxattrop (ec->xl_list, locked_on, ec->nodes, - replies, output, frame, ec->xl, fd, - GF_XATTROP_ADD_ARRAY64, xattrs, NULL); + ret = cluster_fxattrop(ec->xl_list, locked_on, ec->nodes, replies, output, + frame, ec->xl, fd, GF_XATTROP_ADD_ARRAY64, xattrs, + NULL); - ret = cluster_fstat (ec->xl_list, locked_on, ec->nodes, fstat_replies, - fstat_output, frame, ec->xl, fd, NULL); + ret = cluster_fstat(ec->xl_list, locked_on, ec->nodes, fstat_replies, + fstat_output, frame, ec->xl, fd, NULL); - for (i = 0; i < ec->nodes; i++) { - output[i] = output[i] && fstat_output[i]; - replies[i].valid = output[i]; - if (output[i]) - replies[i].stat = fstat_replies[i].stat; - } + for (i = 0; i < ec->nodes; i++) { + output[i] = output[i] && fstat_output[i]; + replies[i].valid = output[i]; + if (output[i]) + replies[i].stat = fstat_replies[i].stat; + } - if (EC_COUNT (output, ec->nodes) <= ec->fragments) { - ret = -ENOTCONN; - goto out; - } + if (EC_COUNT(output, ec->nodes) <= ec->fragments) { + ret = -ENOTCONN; + goto out; + } - source = ec_heal_data_find_direction (ec, replies, versions, - dirty, size, sources, - healed_sinks, _gf_true, - EC_COMBINE_DICT); - ret = source; - if (ret < 0) - goto out; + source = ec_heal_data_find_direction(ec, replies, versions, dirty, size, + sources, healed_sinks, _gf_true, + EC_COMBINE_DICT); + ret = source; + if (ret < 0) + goto out; - if (stbuf) - *stbuf = replies[source].stat; + if (stbuf) + *stbuf = replies[source].stat; - for (i = 0; i < ec->nodes; i++) { - if (healed_sinks[i]) { - if (replies[i].stat.ia_size) - trim[i] = 1; - } + for (i = 0; i < ec->nodes; i++) { + if (healed_sinks[i]) { + if (replies[i].stat.ia_size) + trim[i] = 1; } + } - if (EC_COUNT(sources, ec->nodes) < ec->fragments) { - ret = -ENOTCONN; - goto out; - } + if (EC_COUNT(sources, ec->nodes) < ec->fragments) { + ret = -ENOTCONN; + goto out; + } - ret = source; + ret = source; out: - if (xattrs) - dict_unref (xattrs); - cluster_replies_wipe (replies, ec->nodes); - cluster_replies_wipe (fstat_replies, ec->nodes); - if (ret < 0) { - gf_msg_debug (ec->xl->name, 0, "%s: heal failed %s", - uuid_utoa (fd->inode->gfid), strerror (-ret)); - } else { - gf_msg_debug (ec->xl->name, 0, "%s: sources: %d, sinks: " - "%d", uuid_utoa (fd->inode->gfid), - EC_COUNT (sources, ec->nodes), - EC_COUNT (healed_sinks, ec->nodes)); - } - return ret; + if (xattrs) + dict_unref(xattrs); + cluster_replies_wipe(replies, ec->nodes); + cluster_replies_wipe(fstat_replies, ec->nodes); + if (ret < 0) { + gf_msg_debug(ec->xl->name, 0, "%s: heal failed %s", + uuid_utoa(fd->inode->gfid), strerror(-ret)); + } else { + gf_msg_debug(ec->xl->name, 0, + "%s: sources: %d, sinks: " + "%d", + uuid_utoa(fd->inode->gfid), EC_COUNT(sources, ec->nodes), + EC_COUNT(healed_sinks, ec->nodes)); + } + return ret; } int -__ec_heal_mark_sinks (call_frame_t *frame, ec_t *ec, fd_t *fd, - uint64_t *versions, unsigned char *healed_sinks) -{ - int i = 0; - int ret = 0; - unsigned char *mark = NULL; - dict_t *xattrs = NULL; - default_args_cbk_t *replies = NULL; - unsigned char *output = NULL; - uint64_t versions_xattr[2] = {0}; - - EC_REPLIES_ALLOC (replies, ec->nodes); - xattrs = dict_new (); - if (!xattrs) { - ret = -ENOMEM; - goto out; - } +__ec_heal_mark_sinks(call_frame_t *frame, ec_t *ec, fd_t *fd, + uint64_t *versions, unsigned char *healed_sinks) +{ + int i = 0; + int ret = 0; + unsigned char *mark = NULL; + dict_t *xattrs = NULL; + default_args_cbk_t *replies = NULL; + unsigned char *output = NULL; + uint64_t versions_xattr[2] = {0}; + + EC_REPLIES_ALLOC(replies, ec->nodes); + xattrs = dict_new(); + if (!xattrs) { + ret = -ENOMEM; + goto out; + } - mark = alloca0 (ec->nodes); - for (i = 0; i < ec->nodes; i++) { - if (!healed_sinks[i]) - continue; - if ((versions[i] >> EC_SELFHEAL_BIT) & 1) - continue; - mark[i] = 1; - } + mark = alloca0(ec->nodes); + for (i = 0; i < ec->nodes; i++) { + if (!healed_sinks[i]) + continue; + if ((versions[i] >> EC_SELFHEAL_BIT) & 1) + continue; + mark[i] = 1; + } - if (EC_COUNT (mark, ec->nodes) == 0) - return 0; + if (EC_COUNT(mark, ec->nodes) == 0) + return 0; - versions_xattr[EC_DATA_TXN] = hton64(1ULL<nodes); - ret = cluster_fxattrop (ec->xl_list, mark, ec->nodes, - replies, output, frame, ec->xl, fd, - GF_XATTROP_ADD_ARRAY64, xattrs, NULL); - for (i = 0; i < ec->nodes; i++) { - if (!output[i]) { - if (mark[i]) - healed_sinks[i] = 0; - continue; - } - versions[i] |= (1ULL<nodes); + ret = cluster_fxattrop(ec->xl_list, mark, ec->nodes, replies, output, frame, + ec->xl, fd, GF_XATTROP_ADD_ARRAY64, xattrs, NULL); + for (i = 0; i < ec->nodes; i++) { + if (!output[i]) { + if (mark[i]) + healed_sinks[i] = 0; + continue; } + versions[i] |= (1ULL << EC_SELFHEAL_BIT); + } - if (EC_COUNT (healed_sinks, ec->nodes) == 0) { - ret = -ENOTCONN; - goto out; - } - ret = 0; + if (EC_COUNT(healed_sinks, ec->nodes) == 0) { + ret = -ENOTCONN; + goto out; + } + ret = 0; out: - cluster_replies_wipe (replies, ec->nodes); - if (xattrs) - dict_unref (xattrs); - if (ret < 0) - gf_msg_debug (ec->xl->name, 0, "%s: heal failed %s", - uuid_utoa (fd->inode->gfid), strerror (-ret)); - return ret; + cluster_replies_wipe(replies, ec->nodes); + if (xattrs) + dict_unref(xattrs); + if (ret < 0) + gf_msg_debug(ec->xl->name, 0, "%s: heal failed %s", + uuid_utoa(fd->inode->gfid), strerror(-ret)); + return ret; } int32_t -ec_manager_heal_block (ec_fop_data_t *fop, int32_t state) +ec_manager_heal_block(ec_fop_data_t *fop, int32_t state) { ec_heal_t *heal = fop->data; heal->fop = fop; switch (state) { - case EC_STATE_INIT: - ec_owner_set(fop->frame, fop->frame->root); + case EC_STATE_INIT: + ec_owner_set(fop->frame, fop->frame->root); - ec_heal_inodelk(heal, F_WRLCK, 1, 0, 0); + ec_heal_inodelk(heal, F_WRLCK, 1, 0, 0); - return EC_STATE_HEAL_DATA_COPY; + return EC_STATE_HEAL_DATA_COPY; - case EC_STATE_HEAL_DATA_COPY: - gf_msg_debug (fop->xl->name, 0, "%s: read/write starting", - uuid_utoa (heal->fd->inode->gfid)); - ec_heal_data_block (heal); + case EC_STATE_HEAL_DATA_COPY: + gf_msg_debug(fop->xl->name, 0, "%s: read/write starting", + uuid_utoa(heal->fd->inode->gfid)); + ec_heal_data_block(heal); - return EC_STATE_HEAL_DATA_UNLOCK; + return EC_STATE_HEAL_DATA_UNLOCK; - case -EC_STATE_HEAL_DATA_COPY: - case -EC_STATE_HEAL_DATA_UNLOCK: - case EC_STATE_HEAL_DATA_UNLOCK: - ec_heal_inodelk(heal, F_UNLCK, 1, 0, 0); + case -EC_STATE_HEAL_DATA_COPY: + case -EC_STATE_HEAL_DATA_UNLOCK: + case EC_STATE_HEAL_DATA_UNLOCK: + ec_heal_inodelk(heal, F_UNLCK, 1, 0, 0); - return EC_STATE_REPORT; + return EC_STATE_REPORT; - case EC_STATE_REPORT: - if (fop->cbks.heal) { - fop->cbks.heal (fop->req_frame, fop, fop->xl, 0, - 0, (heal->good | heal->bad), - heal->good, heal->bad, NULL); - } + case EC_STATE_REPORT: + if (fop->cbks.heal) { + fop->cbks.heal(fop->req_frame, fop, fop->xl, 0, 0, + (heal->good | heal->bad), heal->good, heal->bad, + NULL); + } - return EC_STATE_END; - case -EC_STATE_REPORT: - if (fop->cbks.heal) { - fop->cbks.heal (fop->req_frame, fop, fop->xl, -1, - fop->error, 0, 0, 0, NULL); - } + return EC_STATE_END; + case -EC_STATE_REPORT: + if (fop->cbks.heal) { + fop->cbks.heal(fop->req_frame, fop, fop->xl, -1, fop->error, 0, + 0, 0, NULL); + } - return EC_STATE_END; - default: - gf_msg (fop->xl->name, GF_LOG_ERROR, 0, - EC_MSG_UNHANDLED_STATE, "Unhandled state %d for %s", - state, ec_fop_name(fop->id)); + return EC_STATE_END; + default: + gf_msg(fop->xl->name, GF_LOG_ERROR, 0, EC_MSG_UNHANDLED_STATE, + "Unhandled state %d for %s", state, ec_fop_name(fop->id)); - return EC_STATE_END; + return EC_STATE_END; } } /*Takes lock */ void -ec_heal_block (call_frame_t *frame, xlator_t *this, uintptr_t target, +ec_heal_block(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum, fop_heal_cbk_t func, ec_heal_t *heal) { - ec_cbk_t callback = { .heal = func }; + ec_cbk_t callback = {.heal = func}; ec_fop_data_t *fop = NULL; int32_t error = ENOMEM; @@ -1950,9 +1944,8 @@ ec_heal_block (call_frame_t *frame, xlator_t *this, uintptr_t target, VALIDATE_OR_GOTO(this, out); GF_VALIDATE_OR_GOTO(this->name, this->private, out); - fop = ec_fop_data_allocate (frame, this, EC_FOP_HEAL, 0, target, minimum, - NULL, ec_manager_heal_block, callback, - heal); + fop = ec_fop_data_allocate(frame, this, EC_FOP_HEAL, 0, target, minimum, + NULL, ec_manager_heal_block, callback, heal); if (fop == NULL) goto out; @@ -1967,761 +1960,751 @@ out: } int32_t -ec_heal_block_done (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, uintptr_t mask, - uintptr_t good, uintptr_t bad, dict_t *xdata) +ec_heal_block_done(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, uintptr_t mask, + uintptr_t good, uintptr_t bad, dict_t *xdata) { - ec_fop_data_t *fop = cookie; - ec_heal_t *heal = fop->data; + ec_fop_data_t *fop = cookie; + ec_heal_t *heal = fop->data; - fop->heal = NULL; - heal->fop = NULL; - heal->error = op_ret < 0 ? op_errno : 0; - syncbarrier_wake (heal->data); - return 0; + fop->heal = NULL; + heal->fop = NULL; + heal->error = op_ret < 0 ? op_errno : 0; + syncbarrier_wake(heal->data); + return 0; } int -ec_sync_heal_block (call_frame_t *frame, xlator_t *this, ec_heal_t *heal) +ec_sync_heal_block(call_frame_t *frame, xlator_t *this, ec_heal_t *heal) { - ec_heal_block (frame, this, heal->bad|heal->good, EC_MINIMUM_ONE, - ec_heal_block_done, heal); - syncbarrier_wait (heal->data, 1); - if (heal->error != 0) { - return -heal->error; - } - if (heal->bad == 0) - return -ENOTCONN; - return 0; + ec_heal_block(frame, this, heal->bad | heal->good, EC_MINIMUM_ONE, + ec_heal_block_done, heal); + syncbarrier_wait(heal->data, 1); + if (heal->error != 0) { + return -heal->error; + } + if (heal->bad == 0) + return -ENOTCONN; + return 0; } int -ec_rebuild_data (call_frame_t *frame, ec_t *ec, fd_t *fd, uint64_t size, - unsigned char *sources, unsigned char *healed_sinks) +ec_rebuild_data(call_frame_t *frame, ec_t *ec, fd_t *fd, uint64_t size, + unsigned char *sources, unsigned char *healed_sinks) { - ec_heal_t *heal = NULL; - int ret = 0; - syncbarrier_t barrier; - - if (syncbarrier_init (&barrier)) - return -ENOMEM; - - heal = alloca0(sizeof (*heal)); - heal->fd = fd_ref (fd); - heal->xl = ec->xl; - heal->data = &barrier; - ec_adjust_size_up (ec, &size, _gf_false); - heal->total_size = size; - heal->size = (128 * GF_UNIT_KB * (ec->self_heal_window_size)); - /* We need to adjust the size to a multiple of the stripe size of the - * volume. Otherwise writes would need to fill gaps (head and/or tail) - * with existent data from the bad bricks. This could be garbage on a - * damaged file or it could fail if there aren't enough bricks. */ - heal->size -= heal->size % ec->stripe_size; - heal->bad = ec_char_array_to_mask (healed_sinks, ec->nodes); - heal->good = ec_char_array_to_mask (sources, ec->nodes); - heal->iatt.ia_type = IA_IFREG; - LOCK_INIT(&heal->lock); - - for (heal->offset = 0; (heal->offset < size) && !heal->done; - heal->offset += heal->size) { - /* We immediately abort any heal if a shutdown request has been - * received to avoid delays. The healing of this file will be - * restarted by another SHD or other client that accesses the - * file. */ - if (ec->shutdown) { - gf_msg_debug(ec->xl->name, 0, "Cancelling heal because " - "EC is stopping."); - ret = -ENOTCONN; - break; - } - - gf_msg_debug (ec->xl->name, 0, "%s: sources: %d, sinks: " - "%d, offset: %"PRIu64" bsize: %"PRIu64, - uuid_utoa (fd->inode->gfid), - EC_COUNT (sources, ec->nodes), - EC_COUNT (healed_sinks, ec->nodes), heal->offset, - heal->size); - ret = ec_sync_heal_block (frame, ec->xl, heal); - if (ret < 0) - break; - - } - memset (healed_sinks, 0, ec->nodes); - ec_mask_to_char_array (heal->bad, healed_sinks, ec->nodes); - fd_unref (heal->fd); - LOCK_DESTROY (&heal->lock); - syncbarrier_destroy (heal->data); + ec_heal_t *heal = NULL; + int ret = 0; + syncbarrier_t barrier; + + if (syncbarrier_init(&barrier)) + return -ENOMEM; + + heal = alloca0(sizeof(*heal)); + heal->fd = fd_ref(fd); + heal->xl = ec->xl; + heal->data = &barrier; + ec_adjust_size_up(ec, &size, _gf_false); + heal->total_size = size; + heal->size = (128 * GF_UNIT_KB * (ec->self_heal_window_size)); + /* We need to adjust the size to a multiple of the stripe size of the + * volume. Otherwise writes would need to fill gaps (head and/or tail) + * with existent data from the bad bricks. This could be garbage on a + * damaged file or it could fail if there aren't enough bricks. */ + heal->size -= heal->size % ec->stripe_size; + heal->bad = ec_char_array_to_mask(healed_sinks, ec->nodes); + heal->good = ec_char_array_to_mask(sources, ec->nodes); + heal->iatt.ia_type = IA_IFREG; + LOCK_INIT(&heal->lock); + + for (heal->offset = 0; (heal->offset < size) && !heal->done; + heal->offset += heal->size) { + /* We immediately abort any heal if a shutdown request has been + * received to avoid delays. The healing of this file will be + * restarted by another SHD or other client that accesses the + * file. */ + if (ec->shutdown) { + gf_msg_debug(ec->xl->name, 0, + "Cancelling heal because " + "EC is stopping."); + ret = -ENOTCONN; + break; + } + + gf_msg_debug(ec->xl->name, 0, + "%s: sources: %d, sinks: " + "%d, offset: %" PRIu64 " bsize: %" PRIu64, + uuid_utoa(fd->inode->gfid), EC_COUNT(sources, ec->nodes), + EC_COUNT(healed_sinks, ec->nodes), heal->offset, + heal->size); + ret = ec_sync_heal_block(frame, ec->xl, heal); if (ret < 0) - gf_msg_debug (ec->xl->name, 0, "%s: heal failed %s", - uuid_utoa (fd->inode->gfid), strerror (-ret)); - return ret; + break; + } + memset(healed_sinks, 0, ec->nodes); + ec_mask_to_char_array(heal->bad, healed_sinks, ec->nodes); + fd_unref(heal->fd); + LOCK_DESTROY(&heal->lock); + syncbarrier_destroy(heal->data); + if (ret < 0) + gf_msg_debug(ec->xl->name, 0, "%s: heal failed %s", + uuid_utoa(fd->inode->gfid), strerror(-ret)); + return ret; } int -__ec_heal_trim_sinks (call_frame_t *frame, ec_t *ec, - fd_t *fd, unsigned char *healed_sinks, - unsigned char *trim, uint64_t size) +__ec_heal_trim_sinks(call_frame_t *frame, ec_t *ec, fd_t *fd, + unsigned char *healed_sinks, unsigned char *trim, + uint64_t size) { - default_args_cbk_t *replies = NULL; - unsigned char *output = NULL; - int ret = 0; - int i = 0; - off_t trim_offset = 0; + default_args_cbk_t *replies = NULL; + unsigned char *output = NULL; + int ret = 0; + int i = 0; + off_t trim_offset = 0; - EC_REPLIES_ALLOC (replies, ec->nodes); - output = alloca0 (ec->nodes); + EC_REPLIES_ALLOC(replies, ec->nodes); + output = alloca0(ec->nodes); - if (EC_COUNT (trim, ec->nodes) == 0) { - ret = 0; - goto out; - } - trim_offset = size; - ec_adjust_offset_up (ec, &trim_offset, _gf_true); - ret = cluster_ftruncate (ec->xl_list, trim, ec->nodes, replies, output, - frame, ec->xl, fd, trim_offset, NULL); - for (i = 0; i < ec->nodes; i++) { - if (!output[i] && trim[i]) - healed_sinks[i] = 0; - } + if (EC_COUNT(trim, ec->nodes) == 0) { + ret = 0; + goto out; + } + trim_offset = size; + ec_adjust_offset_up(ec, &trim_offset, _gf_true); + ret = cluster_ftruncate(ec->xl_list, trim, ec->nodes, replies, output, + frame, ec->xl, fd, trim_offset, NULL); + for (i = 0; i < ec->nodes; i++) { + if (!output[i] && trim[i]) + healed_sinks[i] = 0; + } - if (EC_COUNT (healed_sinks, ec->nodes) == 0) { - ret = -ENOTCONN; - goto out; - } + if (EC_COUNT(healed_sinks, ec->nodes) == 0) { + ret = -ENOTCONN; + goto out; + } out: - cluster_replies_wipe (replies, ec->nodes); - if (ret < 0) - gf_msg_debug (ec->xl->name, 0, "%s: heal failed %s", - uuid_utoa (fd->inode->gfid), strerror (-ret)); - return ret; + cluster_replies_wipe(replies, ec->nodes); + if (ret < 0) + gf_msg_debug(ec->xl->name, 0, "%s: heal failed %s", + uuid_utoa(fd->inode->gfid), strerror(-ret)); + return ret; } int -ec_data_undo_pending (call_frame_t *frame, ec_t *ec, fd_t *fd, dict_t *xattr, - uint64_t *versions, uint64_t *dirty, uint64_t *size, - int source, gf_boolean_t erase_dirty, int idx) -{ - uint64_t versions_xattr[2] = {0}; - uint64_t dirty_xattr[2] = {0}; - uint64_t allzero[2] = {0}; - uint64_t size_xattr = 0; - int ret = 0; - - versions_xattr[EC_DATA_TXN] = hton64(versions[source] - versions[idx]); - ret = dict_set_static_bin (xattr, EC_XATTR_VERSION, - versions_xattr, - sizeof (versions_xattr)); - if (ret < 0) - goto out; - - size_xattr = hton64(size[source] - size[idx]); - ret = dict_set_static_bin (xattr, EC_XATTR_SIZE, - &size_xattr, sizeof (size_xattr)); - if (ret < 0) - goto out; - - if (erase_dirty) { - dirty_xattr[EC_DATA_TXN] = hton64(-dirty[idx]); - ret = dict_set_static_bin (xattr, EC_XATTR_DIRTY, - dirty_xattr, - sizeof (dirty_xattr)); - if (ret < 0) - goto out; - } - - if ((memcmp (versions_xattr, allzero, sizeof (allzero)) == 0) && - (memcmp (dirty_xattr, allzero, sizeof (allzero)) == 0) && - (size_xattr == 0)) { - ret = 0; - goto out; - } +ec_data_undo_pending(call_frame_t *frame, ec_t *ec, fd_t *fd, dict_t *xattr, + uint64_t *versions, uint64_t *dirty, uint64_t *size, + int source, gf_boolean_t erase_dirty, int idx) +{ + uint64_t versions_xattr[2] = {0}; + uint64_t dirty_xattr[2] = {0}; + uint64_t allzero[2] = {0}; + uint64_t size_xattr = 0; + int ret = 0; + + versions_xattr[EC_DATA_TXN] = hton64(versions[source] - versions[idx]); + ret = dict_set_static_bin(xattr, EC_XATTR_VERSION, versions_xattr, + sizeof(versions_xattr)); + if (ret < 0) + goto out; - ret = syncop_fxattrop (ec->xl_list[idx], fd, - GF_XATTROP_ADD_ARRAY64, xattr, NULL, NULL, NULL); -out: - return ret; -} + size_xattr = hton64(size[source] - size[idx]); + ret = dict_set_static_bin(xattr, EC_XATTR_SIZE, &size_xattr, + sizeof(size_xattr)); + if (ret < 0) + goto out; -int -__ec_fd_data_adjust_versions (call_frame_t *frame, ec_t *ec, fd_t *fd, - unsigned char *sources, unsigned char *healed_sinks, - uint64_t *versions, uint64_t *dirty, uint64_t *size) -{ - dict_t *xattr = NULL; - int i = 0; - int ret = 0; - int op_ret = 0; - int source = -1; - gf_boolean_t erase_dirty = _gf_false; - - xattr = dict_new (); - if (!xattr) { - op_ret = -ENOMEM; - goto out; - } + if (erase_dirty) { + dirty_xattr[EC_DATA_TXN] = hton64(-dirty[idx]); + ret = dict_set_static_bin(xattr, EC_XATTR_DIRTY, dirty_xattr, + sizeof(dirty_xattr)); + if (ret < 0) + goto out; + } - /* dirty xattr represents if the file needs heal. Unless all the - * copies are healed, don't erase it */ - if (EC_COUNT (sources, ec->nodes) + - EC_COUNT (healed_sinks, ec->nodes) == ec->nodes) - erase_dirty = _gf_true; + if ((memcmp(versions_xattr, allzero, sizeof(allzero)) == 0) && + (memcmp(dirty_xattr, allzero, sizeof(allzero)) == 0) && + (size_xattr == 0)) { + ret = 0; + goto out; + } - for (i = 0; i < ec->nodes; i++) { - if (sources[i]) { - source = i; - break; - } - } + ret = syncop_fxattrop(ec->xl_list[idx], fd, GF_XATTROP_ADD_ARRAY64, xattr, + NULL, NULL, NULL); +out: + return ret; +} - if (source == -1) { - op_ret = -ENOTCONN; - goto out; - } +int +__ec_fd_data_adjust_versions(call_frame_t *frame, ec_t *ec, fd_t *fd, + unsigned char *sources, + unsigned char *healed_sinks, uint64_t *versions, + uint64_t *dirty, uint64_t *size) +{ + dict_t *xattr = NULL; + int i = 0; + int ret = 0; + int op_ret = 0; + int source = -1; + gf_boolean_t erase_dirty = _gf_false; + + xattr = dict_new(); + if (!xattr) { + op_ret = -ENOMEM; + goto out; + } - for (i = 0; i < ec->nodes; i++) { - if (healed_sinks[i]) { - ret = ec_data_undo_pending (frame, ec, fd, xattr, - versions, dirty, size, - source, erase_dirty, i); - if (ret < 0) - goto out; - } + /* dirty xattr represents if the file needs heal. Unless all the + * copies are healed, don't erase it */ + if (EC_COUNT(sources, ec->nodes) + EC_COUNT(healed_sinks, ec->nodes) == + ec->nodes) + erase_dirty = _gf_true; + for (i = 0; i < ec->nodes; i++) { + if (sources[i]) { + source = i; + break; } + } + + if (source == -1) { + op_ret = -ENOTCONN; + goto out; + } - if (!erase_dirty) + for (i = 0; i < ec->nodes; i++) { + if (healed_sinks[i]) { + ret = ec_data_undo_pending(frame, ec, fd, xattr, versions, dirty, + size, source, erase_dirty, i); + if (ret < 0) goto out; + } + } - for (i = 0; i < ec->nodes; i++) { - if (sources[i]) { - ret = ec_data_undo_pending (frame, ec, fd, xattr, - versions, dirty, size, - source, erase_dirty, i); - if (ret < 0) - continue; - } + if (!erase_dirty) + goto out; + for (i = 0; i < ec->nodes; i++) { + if (sources[i]) { + ret = ec_data_undo_pending(frame, ec, fd, xattr, versions, dirty, + size, source, erase_dirty, i); + if (ret < 0) + continue; } + } out: - if (xattr) - dict_unref (xattr); - return op_ret; + if (xattr) + dict_unref(xattr); + return op_ret; } int -ec_restore_time_and_adjust_versions (call_frame_t *frame, ec_t *ec, fd_t *fd, - unsigned char *sources, - unsigned char *healed_sinks, - uint64_t *versions, uint64_t *dirty, - uint64_t *size) -{ - unsigned char *locked_on = NULL; - unsigned char *participants = NULL; - unsigned char *output = NULL; - default_args_cbk_t *replies = NULL; - unsigned char *postsh_sources = NULL; - unsigned char *postsh_healed_sinks = NULL; - unsigned char *postsh_trim = NULL; - uint64_t *postsh_versions = NULL; - uint64_t *postsh_dirty = NULL; - uint64_t *postsh_size = NULL; - int ret = 0; - int i = 0; - struct iatt source_buf = {0}; - loc_t loc = {0}; - - locked_on = alloca0(ec->nodes); - output = alloca0(ec->nodes); - participants = alloca0(ec->nodes); - postsh_sources = alloca0(ec->nodes); - postsh_healed_sinks = alloca0(ec->nodes); - postsh_trim = alloca0(ec->nodes); - postsh_versions = alloca0(ec->nodes * sizeof (*postsh_versions)); - postsh_dirty = alloca0(ec->nodes * sizeof (*postsh_dirty)); - postsh_size = alloca0(ec->nodes * sizeof (*postsh_size)); - - for (i = 0; i < ec->nodes; i++) { - if (healed_sinks[i] || sources[i]) - participants[i] = 1; - } - - EC_REPLIES_ALLOC (replies, ec->nodes); - ret = cluster_inodelk (ec->xl_list, participants, ec->nodes, replies, - locked_on, frame, ec->xl, ec->xl->name, - fd->inode, 0, 0); - { - if (ret <= ec->fragments) { - gf_msg_debug (ec->xl->name, 0, "%s: Skipping heal " - "as only %d number of subvolumes could " - "be locked", uuid_utoa (fd->inode->gfid), ret); - ret = -ENOTCONN; - goto unlock; - } +ec_restore_time_and_adjust_versions(call_frame_t *frame, ec_t *ec, fd_t *fd, + unsigned char *sources, + unsigned char *healed_sinks, + uint64_t *versions, uint64_t *dirty, + uint64_t *size) +{ + unsigned char *locked_on = NULL; + unsigned char *participants = NULL; + unsigned char *output = NULL; + default_args_cbk_t *replies = NULL; + unsigned char *postsh_sources = NULL; + unsigned char *postsh_healed_sinks = NULL; + unsigned char *postsh_trim = NULL; + uint64_t *postsh_versions = NULL; + uint64_t *postsh_dirty = NULL; + uint64_t *postsh_size = NULL; + int ret = 0; + int i = 0; + struct iatt source_buf = {0}; + loc_t loc = {0}; + + locked_on = alloca0(ec->nodes); + output = alloca0(ec->nodes); + participants = alloca0(ec->nodes); + postsh_sources = alloca0(ec->nodes); + postsh_healed_sinks = alloca0(ec->nodes); + postsh_trim = alloca0(ec->nodes); + postsh_versions = alloca0(ec->nodes * sizeof(*postsh_versions)); + postsh_dirty = alloca0(ec->nodes * sizeof(*postsh_dirty)); + postsh_size = alloca0(ec->nodes * sizeof(*postsh_size)); + + for (i = 0; i < ec->nodes; i++) { + if (healed_sinks[i] || sources[i]) + participants[i] = 1; + } - ret = __ec_heal_data_prepare (frame, ec, fd, locked_on, - postsh_versions, postsh_dirty, - postsh_size, postsh_sources, - postsh_healed_sinks, postsh_trim, - &source_buf); - if (ret < 0) - goto unlock; - - loc.inode = inode_ref (fd->inode); - gf_uuid_copy (loc.gfid, fd->inode->gfid); - ret = cluster_setattr (ec->xl_list, healed_sinks, ec->nodes, - replies, output, frame, ec->xl, &loc, - &source_buf, - GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME, - NULL); - EC_INTERSECT (healed_sinks, healed_sinks, output, ec->nodes); - if (EC_COUNT (healed_sinks, ec->nodes) == 0) { - ret = -ENOTCONN; - goto unlock; - } - ret = __ec_fd_data_adjust_versions (frame, ec, fd, sources, - healed_sinks, versions, dirty, size); - } + EC_REPLIES_ALLOC(replies, ec->nodes); + ret = cluster_inodelk(ec->xl_list, participants, ec->nodes, replies, + locked_on, frame, ec->xl, ec->xl->name, fd->inode, 0, + 0); + { + if (ret <= ec->fragments) { + gf_msg_debug(ec->xl->name, 0, + "%s: Skipping heal " + "as only %d number of subvolumes could " + "be locked", + uuid_utoa(fd->inode->gfid), ret); + ret = -ENOTCONN; + goto unlock; + } + + ret = __ec_heal_data_prepare(frame, ec, fd, locked_on, postsh_versions, + postsh_dirty, postsh_size, postsh_sources, + postsh_healed_sinks, postsh_trim, + &source_buf); + if (ret < 0) + goto unlock; + + loc.inode = inode_ref(fd->inode); + gf_uuid_copy(loc.gfid, fd->inode->gfid); + ret = cluster_setattr(ec->xl_list, healed_sinks, ec->nodes, replies, + output, frame, ec->xl, &loc, &source_buf, + GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME, NULL); + EC_INTERSECT(healed_sinks, healed_sinks, output, ec->nodes); + if (EC_COUNT(healed_sinks, ec->nodes) == 0) { + ret = -ENOTCONN; + goto unlock; + } + ret = __ec_fd_data_adjust_versions(frame, ec, fd, sources, healed_sinks, + versions, dirty, size); + } unlock: - cluster_uninodelk (ec->xl_list, locked_on, ec->nodes, replies, output, - frame, ec->xl, ec->xl->name, fd->inode, 0, 0); - cluster_replies_wipe (replies, ec->nodes); - loc_wipe (&loc); - return ret; + cluster_uninodelk(ec->xl_list, locked_on, ec->nodes, replies, output, frame, + ec->xl, ec->xl->name, fd->inode, 0, 0); + cluster_replies_wipe(replies, ec->nodes); + loc_wipe(&loc); + return ret; } int -__ec_heal_data (call_frame_t *frame, ec_t *ec, fd_t *fd, unsigned char *heal_on, - unsigned char *sources, unsigned char *healed_sinks) +__ec_heal_data(call_frame_t *frame, ec_t *ec, fd_t *fd, unsigned char *heal_on, + unsigned char *sources, unsigned char *healed_sinks) { - unsigned char *locked_on = NULL; - unsigned char *output = NULL; - uint64_t *versions = NULL; - uint64_t *dirty = NULL; - uint64_t *size = NULL; - unsigned char *trim = NULL; - default_args_cbk_t *replies = NULL; - int ret = 0; - int source = 0; - - locked_on = alloca0(ec->nodes); - output = alloca0(ec->nodes); - trim = alloca0 (ec->nodes); - versions = alloca0 (ec->nodes * sizeof (*versions)); - dirty = alloca0 (ec->nodes * sizeof (*dirty)); - size = alloca0 (ec->nodes * sizeof (*size)); - - EC_REPLIES_ALLOC (replies, ec->nodes); - ret = cluster_inodelk (ec->xl_list, heal_on, ec->nodes, replies, - locked_on, frame, ec->xl, ec->xl->name, - fd->inode, 0, 0); - { - if (ret <= ec->fragments) { - gf_msg_debug (ec->xl->name, 0, "%s: Skipping heal " - "as only %d number of subvolumes could " - "be locked", uuid_utoa (fd->inode->gfid), ret); - ret = -ENOTCONN; - goto unlock; - } - - ret = __ec_heal_data_prepare (frame, ec, fd, locked_on, - versions, dirty, size, sources, - healed_sinks, trim, NULL); - if (ret < 0) - goto unlock; - - if (EC_COUNT(healed_sinks, ec->nodes) == 0) { - ret = __ec_fd_data_adjust_versions (frame, ec, fd, - sources, - healed_sinks, versions, dirty, size); - goto unlock; - } + unsigned char *locked_on = NULL; + unsigned char *output = NULL; + uint64_t *versions = NULL; + uint64_t *dirty = NULL; + uint64_t *size = NULL; + unsigned char *trim = NULL; + default_args_cbk_t *replies = NULL; + int ret = 0; + int source = 0; + + locked_on = alloca0(ec->nodes); + output = alloca0(ec->nodes); + trim = alloca0(ec->nodes); + versions = alloca0(ec->nodes * sizeof(*versions)); + dirty = alloca0(ec->nodes * sizeof(*dirty)); + size = alloca0(ec->nodes * sizeof(*size)); + + EC_REPLIES_ALLOC(replies, ec->nodes); + ret = cluster_inodelk(ec->xl_list, heal_on, ec->nodes, replies, locked_on, + frame, ec->xl, ec->xl->name, fd->inode, 0, 0); + { + if (ret <= ec->fragments) { + gf_msg_debug(ec->xl->name, 0, + "%s: Skipping heal " + "as only %d number of subvolumes could " + "be locked", + uuid_utoa(fd->inode->gfid), ret); + ret = -ENOTCONN; + goto unlock; + } - source = ret; - ret = __ec_heal_mark_sinks (frame, ec, fd, versions, - healed_sinks); - if (ret < 0) - goto unlock; + ret = __ec_heal_data_prepare(frame, ec, fd, locked_on, versions, dirty, + size, sources, healed_sinks, trim, NULL); + if (ret < 0) + goto unlock; - ret = __ec_heal_trim_sinks (frame, ec, fd, healed_sinks, - trim, size[source]); + if (EC_COUNT(healed_sinks, ec->nodes) == 0) { + ret = __ec_fd_data_adjust_versions( + frame, ec, fd, sources, healed_sinks, versions, dirty, size); + goto unlock; } -unlock: - cluster_uninodelk (ec->xl_list, locked_on, ec->nodes, replies, output, - frame, ec->xl, ec->xl->name, fd->inode, 0, 0); + + source = ret; + ret = __ec_heal_mark_sinks(frame, ec, fd, versions, healed_sinks); if (ret < 0) - goto out; + goto unlock; - if (EC_COUNT(healed_sinks, ec->nodes) == 0) - goto out; + ret = __ec_heal_trim_sinks(frame, ec, fd, healed_sinks, trim, + size[source]); + } +unlock: + cluster_uninodelk(ec->xl_list, locked_on, ec->nodes, replies, output, frame, + ec->xl, ec->xl->name, fd->inode, 0, 0); + if (ret < 0) + goto out; - gf_msg_debug (ec->xl->name, 0, "%s: sources: %d, sinks: " - "%d", uuid_utoa (fd->inode->gfid), - EC_COUNT (sources, ec->nodes), - EC_COUNT (healed_sinks, ec->nodes)); + if (EC_COUNT(healed_sinks, ec->nodes) == 0) + goto out; - ret = ec_rebuild_data (frame, ec, fd, size[source], sources, - healed_sinks); - if (ret < 0) - goto out; + gf_msg_debug(ec->xl->name, 0, + "%s: sources: %d, sinks: " + "%d", + uuid_utoa(fd->inode->gfid), EC_COUNT(sources, ec->nodes), + EC_COUNT(healed_sinks, ec->nodes)); + + ret = ec_rebuild_data(frame, ec, fd, size[source], sources, healed_sinks); + if (ret < 0) + goto out; - ret = ec_restore_time_and_adjust_versions (frame, ec, fd, sources, - healed_sinks, versions, - dirty, size); + ret = ec_restore_time_and_adjust_versions( + frame, ec, fd, sources, healed_sinks, versions, dirty, size); out: - cluster_replies_wipe (replies, ec->nodes); - return ret; + cluster_replies_wipe(replies, ec->nodes); + return ret; } int -ec_heal_data (call_frame_t *frame, ec_t *ec, gf_boolean_t block, inode_t *inode, - unsigned char *sources, unsigned char *healed_sinks) -{ - unsigned char *locked_on = NULL; - unsigned char *up_subvols = NULL; - unsigned char *output = NULL; - default_args_cbk_t *replies = NULL; - fd_t *fd = NULL; - loc_t loc = {0}; - char selfheal_domain[1024] = {0}; - int ret = 0; - - EC_REPLIES_ALLOC (replies, ec->nodes); - - locked_on = alloca0(ec->nodes); - output = alloca0(ec->nodes); - up_subvols = alloca0(ec->nodes); - loc.inode = inode_ref (inode); - gf_uuid_copy (loc.gfid, inode->gfid); - - fd = fd_create (inode, 0); - if (!fd) { - ret = -ENOMEM; - goto out; - } +ec_heal_data(call_frame_t *frame, ec_t *ec, gf_boolean_t block, inode_t *inode, + unsigned char *sources, unsigned char *healed_sinks) +{ + unsigned char *locked_on = NULL; + unsigned char *up_subvols = NULL; + unsigned char *output = NULL; + default_args_cbk_t *replies = NULL; + fd_t *fd = NULL; + loc_t loc = {0}; + char selfheal_domain[1024] = {0}; + int ret = 0; + + EC_REPLIES_ALLOC(replies, ec->nodes); + + locked_on = alloca0(ec->nodes); + output = alloca0(ec->nodes); + up_subvols = alloca0(ec->nodes); + loc.inode = inode_ref(inode); + gf_uuid_copy(loc.gfid, inode->gfid); + + fd = fd_create(inode, 0); + if (!fd) { + ret = -ENOMEM; + goto out; + } - ec_mask_to_char_array (ec->xl_up, up_subvols, ec->nodes); + ec_mask_to_char_array(ec->xl_up, up_subvols, ec->nodes); - ret = cluster_open (ec->xl_list, up_subvols, ec->nodes, replies, output, - frame, ec->xl, &loc, O_RDWR|O_LARGEFILE, fd, NULL); - if (ret <= ec->fragments) { - ret = -ENOTCONN; - goto out; - } + ret = cluster_open(ec->xl_list, up_subvols, ec->nodes, replies, output, + frame, ec->xl, &loc, O_RDWR | O_LARGEFILE, fd, NULL); + if (ret <= ec->fragments) { + ret = -ENOTCONN; + goto out; + } - fd_bind (fd); - sprintf (selfheal_domain, "%s:self-heal", ec->xl->name); - /*If other processes are already doing the heal, don't block*/ - if (block) { - ret = cluster_inodelk (ec->xl_list, output, ec->nodes, replies, - locked_on, frame, ec->xl, - selfheal_domain, inode, 0, 0); - } else { - ret = cluster_tiebreaker_inodelk (ec->xl_list, output, - ec->nodes, replies, locked_on, - frame, ec->xl, - selfheal_domain, inode, 0, 0); - } - { - if (ret <= ec->fragments) { - gf_msg_debug (ec->xl->name, 0, "%s: Skipping heal " - "as only %d number of subvolumes could " - "be locked", uuid_utoa (inode->gfid), ret); - ret = -ENOTCONN; - goto unlock; - } - ret = __ec_heal_data (frame, ec, fd, locked_on, sources, - healed_sinks); - } + fd_bind(fd); + sprintf(selfheal_domain, "%s:self-heal", ec->xl->name); + /*If other processes are already doing the heal, don't block*/ + if (block) { + ret = cluster_inodelk(ec->xl_list, output, ec->nodes, replies, + locked_on, frame, ec->xl, selfheal_domain, inode, + 0, 0); + } else { + ret = cluster_tiebreaker_inodelk(ec->xl_list, output, ec->nodes, + replies, locked_on, frame, ec->xl, + selfheal_domain, inode, 0, 0); + } + { + if (ret <= ec->fragments) { + gf_msg_debug(ec->xl->name, 0, + "%s: Skipping heal " + "as only %d number of subvolumes could " + "be locked", + uuid_utoa(inode->gfid), ret); + ret = -ENOTCONN; + goto unlock; + } + ret = __ec_heal_data(frame, ec, fd, locked_on, sources, healed_sinks); + } unlock: - cluster_uninodelk (ec->xl_list, locked_on, ec->nodes, replies, output, - frame, ec->xl, selfheal_domain, inode, 0, 0); + cluster_uninodelk(ec->xl_list, locked_on, ec->nodes, replies, output, frame, + ec->xl, selfheal_domain, inode, 0, 0); out: - if (fd) - fd_unref (fd); - loc_wipe (&loc); - cluster_replies_wipe (replies, ec->nodes); - return ret; + if (fd) + fd_unref(fd); + loc_wipe(&loc); + cluster_replies_wipe(replies, ec->nodes); + return ret; } void -ec_heal_do (xlator_t *this, void *data, loc_t *loc, int32_t partial) -{ - call_frame_t *frame = NULL; - unsigned char *participants = NULL; - unsigned char *msources = NULL; - unsigned char *mhealed_sinks = NULL; - unsigned char *sources = NULL; - unsigned char *healed_sinks = NULL; - ec_t *ec = NULL; - int ret = 0; - int op_ret = 0; - int op_errno = 0; - intptr_t mgood = 0; - intptr_t mbad = 0; - intptr_t good = 0; - intptr_t bad = 0; - ec_fop_data_t *fop = data; - gf_boolean_t blocking = _gf_false; - ec_heal_need_t need_heal = EC_HEAL_NONEED; - unsigned char *up_subvols = NULL; - char up_bricks[32]; - - ec = this->private; - - /* If it is heal request from getxattr, complete the heal and then - * unwind, if it is ec_heal with NULL as frame then no need to block - * the heal as the caller doesn't care about its completion. In case - * of heald whichever gets tiebreaking inodelk will take care of the - * heal, so no need to block*/ - if (fop->req_frame && !ec->shd.iamshd) - blocking = _gf_true; - - frame = create_frame (this, this->ctx->pool); - if (!frame) - goto out; +ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial) +{ + call_frame_t *frame = NULL; + unsigned char *participants = NULL; + unsigned char *msources = NULL; + unsigned char *mhealed_sinks = NULL; + unsigned char *sources = NULL; + unsigned char *healed_sinks = NULL; + ec_t *ec = NULL; + int ret = 0; + int op_ret = 0; + int op_errno = 0; + intptr_t mgood = 0; + intptr_t mbad = 0; + intptr_t good = 0; + intptr_t bad = 0; + ec_fop_data_t *fop = data; + gf_boolean_t blocking = _gf_false; + ec_heal_need_t need_heal = EC_HEAL_NONEED; + unsigned char *up_subvols = NULL; + char up_bricks[32]; + + ec = this->private; + + /* If it is heal request from getxattr, complete the heal and then + * unwind, if it is ec_heal with NULL as frame then no need to block + * the heal as the caller doesn't care about its completion. In case + * of heald whichever gets tiebreaking inodelk will take care of the + * heal, so no need to block*/ + if (fop->req_frame && !ec->shd.iamshd) + blocking = _gf_true; + + frame = create_frame(this, this->ctx->pool); + if (!frame) + goto out; - ec_owner_set(frame, frame->root); - /*Do heal as root*/ - frame->root->uid = 0; - frame->root->gid = 0; - /*Mark the fops as internal*/ - frame->root->pid = GF_CLIENT_PID_SELF_HEALD; - participants = alloca0(ec->nodes); - ec_mask_to_char_array (ec->xl_up, participants, ec->nodes); - - up_subvols = alloca0(ec->nodes); - ec_mask_to_char_array (ec->xl_up, up_subvols, ec->nodes); - - if (loc->name && strlen (loc->name)) { - ret = ec_heal_name (frame, ec, loc->parent, (char *)loc->name, - participants); - if (ret == 0) { - gf_msg_debug (this->name, 0, "%s: name heal " - "successful on %lX", loc->path, - ec_char_array_to_mask (participants, - ec->nodes)); - } else { - gf_msg_debug (this->name, 0, "%s: name heal " - "failed. ret = %d, subvolumes up = %s", - loc->path, ret, - ec_bin(up_bricks, sizeof(up_bricks), ec->xl_up, - ec->nodes)); - } + ec_owner_set(frame, frame->root); + /*Do heal as root*/ + frame->root->uid = 0; + frame->root->gid = 0; + /*Mark the fops as internal*/ + frame->root->pid = GF_CLIENT_PID_SELF_HEALD; + participants = alloca0(ec->nodes); + ec_mask_to_char_array(ec->xl_up, participants, ec->nodes); + + up_subvols = alloca0(ec->nodes); + ec_mask_to_char_array(ec->xl_up, up_subvols, ec->nodes); + + if (loc->name && strlen(loc->name)) { + ret = ec_heal_name(frame, ec, loc->parent, (char *)loc->name, + participants); + if (ret == 0) { + gf_msg_debug(this->name, 0, + "%s: name heal " + "successful on %lX", + loc->path, + ec_char_array_to_mask(participants, ec->nodes)); + } else { + gf_msg_debug( + this->name, 0, + "%s: name heal " + "failed. ret = %d, subvolumes up = %s", + loc->path, ret, + ec_bin(up_bricks, sizeof(up_bricks), ec->xl_up, ec->nodes)); } + } - /* Mount triggers heal only when it detects that it must need heal, shd - * triggers heals periodically which need not be thorough*/ - ec_heal_inspect (frame, ec, loc->inode, up_subvols, _gf_false, - !ec->shd.iamshd, &need_heal); + /* Mount triggers heal only when it detects that it must need heal, shd + * triggers heals periodically which need not be thorough*/ + ec_heal_inspect(frame, ec, loc->inode, up_subvols, _gf_false, + !ec->shd.iamshd, &need_heal); - if (need_heal == EC_HEAL_NONEED) { - gf_msg (ec->xl->name, GF_LOG_DEBUG, 0, - EC_MSG_HEAL_FAIL, "Heal is not required for : %s ", - uuid_utoa(loc->gfid)); - goto out; - } + if (need_heal == EC_HEAL_NONEED) { + gf_msg(ec->xl->name, GF_LOG_DEBUG, 0, EC_MSG_HEAL_FAIL, + "Heal is not required for : %s ", uuid_utoa(loc->gfid)); + goto out; + } - msources = alloca0(ec->nodes); - mhealed_sinks = alloca0(ec->nodes); - ret = ec_heal_metadata (frame, ec, loc->inode, msources, mhealed_sinks); - if (ret == 0) { - mgood = ec_char_array_to_mask (msources, ec->nodes); - mbad = ec_char_array_to_mask (mhealed_sinks, ec->nodes); - } else { - op_ret = -1; - op_errno = -ret; - } - sources = alloca0(ec->nodes); - healed_sinks = alloca0(ec->nodes); - if (IA_ISREG (loc->inode->ia_type)) { - ret = ec_heal_data (frame, ec, blocking, loc->inode, sources, - healed_sinks); - } else if (IA_ISDIR (loc->inode->ia_type) && !partial) { - ret = ec_heal_entry (frame, ec, loc->inode, sources, - healed_sinks); - } else { - ret = 0; - memcpy (sources, participants, ec->nodes); - memcpy (healed_sinks, participants, ec->nodes); - } + msources = alloca0(ec->nodes); + mhealed_sinks = alloca0(ec->nodes); + ret = ec_heal_metadata(frame, ec, loc->inode, msources, mhealed_sinks); + if (ret == 0) { + mgood = ec_char_array_to_mask(msources, ec->nodes); + mbad = ec_char_array_to_mask(mhealed_sinks, ec->nodes); + } else { + op_ret = -1; + op_errno = -ret; + } + sources = alloca0(ec->nodes); + healed_sinks = alloca0(ec->nodes); + if (IA_ISREG(loc->inode->ia_type)) { + ret = ec_heal_data(frame, ec, blocking, loc->inode, sources, + healed_sinks); + } else if (IA_ISDIR(loc->inode->ia_type) && !partial) { + ret = ec_heal_entry(frame, ec, loc->inode, sources, healed_sinks); + } else { + ret = 0; + memcpy(sources, participants, ec->nodes); + memcpy(healed_sinks, participants, ec->nodes); + } - if (ret == 0) { - good = ec_char_array_to_mask (sources, ec->nodes); - bad = ec_char_array_to_mask (healed_sinks, ec->nodes); - } else { - op_ret = -1; - op_errno = -ret; - } + if (ret == 0) { + good = ec_char_array_to_mask(sources, ec->nodes); + bad = ec_char_array_to_mask(healed_sinks, ec->nodes); + } else { + op_ret = -1; + op_errno = -ret; + } out: - if (fop->cbks.heal) { - fop->cbks.heal (fop->req_frame, fop, fop->xl, op_ret, - op_errno, ec_char_array_to_mask (participants, - ec->nodes), - mgood & good, mbad & bad, NULL); - } - if (frame) - STACK_DESTROY (frame->root); - return; + if (fop->cbks.heal) { + fop->cbks.heal(fop->req_frame, fop, fop->xl, op_ret, op_errno, + ec_char_array_to_mask(participants, ec->nodes), + mgood & good, mbad & bad, NULL); + } + if (frame) + STACK_DESTROY(frame->root); + return; } int -ec_synctask_heal_wrap (void *opaque) +ec_synctask_heal_wrap(void *opaque) { - ec_fop_data_t *fop = opaque; - ec_heal_do (fop->xl, fop, &fop->loc[0], fop->int32); - return 0; + ec_fop_data_t *fop = opaque; + ec_heal_do(fop->xl, fop, &fop->loc[0], fop->int32); + return 0; } int -ec_heal_done (int ret, call_frame_t *heal, void *opaque) +ec_heal_done(int ret, call_frame_t *heal, void *opaque) { - if (opaque) - ec_fop_data_release (opaque); - return 0; + if (opaque) + ec_fop_data_release(opaque); + return 0; } -ec_fop_data_t* -__ec_dequeue_heals (ec_t *ec) +ec_fop_data_t * +__ec_dequeue_heals(ec_t *ec) { - ec_fop_data_t *fop = NULL; + ec_fop_data_t *fop = NULL; - if (list_empty (&ec->heal_waiting)) - goto none; + if (list_empty(&ec->heal_waiting)) + goto none; - if ((ec->background_heals > 0) && (ec->healers >= ec->background_heals)) - goto none; + if ((ec->background_heals > 0) && (ec->healers >= ec->background_heals)) + goto none; - fop = list_entry(ec->heal_waiting.next, ec_fop_data_t, healer); - ec->heal_waiters--; - list_del_init(&fop->healer); - list_add(&fop->healer, &ec->healing); - ec->healers++; - return fop; + fop = list_entry(ec->heal_waiting.next, ec_fop_data_t, healer); + ec->heal_waiters--; + list_del_init(&fop->healer); + list_add(&fop->healer, &ec->healing); + ec->healers++; + return fop; none: - gf_msg_debug (ec->xl->name, 0, "Num healers: %d, Num Waiters: %d", - ec->healers, ec->heal_waiters); - return NULL; + gf_msg_debug(ec->xl->name, 0, "Num healers: %d, Num Waiters: %d", + ec->healers, ec->heal_waiters); + return NULL; } void -ec_heal_fail (ec_t *ec, ec_fop_data_t *fop) +ec_heal_fail(ec_t *ec, ec_fop_data_t *fop) { - if (fop->cbks.heal) { - fop->cbks.heal (fop->req_frame, NULL, ec->xl, -1, fop->error, 0, 0, - 0, NULL); - } - ec_fop_data_release (fop); + if (fop->cbks.heal) { + fop->cbks.heal(fop->req_frame, NULL, ec->xl, -1, fop->error, 0, 0, 0, + NULL); + } + ec_fop_data_release(fop); } void -ec_launch_heal (ec_t *ec, ec_fop_data_t *fop) +ec_launch_heal(ec_t *ec, ec_fop_data_t *fop) { - int ret = 0; + int ret = 0; - ret = synctask_new (ec->xl->ctx->env, ec_synctask_heal_wrap, - ec_heal_done, NULL, fop); - if (ret < 0) { - ec_fop_set_error(fop, ENOMEM); - ec_heal_fail (ec, fop); - } + ret = synctask_new(ec->xl->ctx->env, ec_synctask_heal_wrap, ec_heal_done, + NULL, fop); + if (ret < 0) { + ec_fop_set_error(fop, ENOMEM); + ec_heal_fail(ec, fop); + } } void -ec_handle_healers_done (ec_fop_data_t *fop) +ec_handle_healers_done(ec_fop_data_t *fop) { - ec_t *ec = fop->xl->private; - ec_fop_data_t *heal_fop = NULL; + ec_t *ec = fop->xl->private; + ec_fop_data_t *heal_fop = NULL; - if (list_empty (&fop->healer)) - return; + if (list_empty(&fop->healer)) + return; - LOCK (&ec->lock); + LOCK(&ec->lock); - list_del_init (&fop->healer); + list_del_init(&fop->healer); - do { - ec->healers--; - heal_fop = __ec_dequeue_heals (ec); + do { + ec->healers--; + heal_fop = __ec_dequeue_heals(ec); - if ((heal_fop != NULL) && ec->shutdown) { - /* This will prevent ec_handle_healers_done() to be - * called recursively. That would be problematic if - * the queue is too big. */ - list_del_init(&heal_fop->healer); + if ((heal_fop != NULL) && ec->shutdown) { + /* This will prevent ec_handle_healers_done() to be + * called recursively. That would be problematic if + * the queue is too big. */ + list_del_init(&heal_fop->healer); - UNLOCK(&ec->lock); + UNLOCK(&ec->lock); - ec_fop_set_error(fop, ENOTCONN); - ec_heal_fail(ec, heal_fop); + ec_fop_set_error(fop, ENOTCONN); + ec_heal_fail(ec, heal_fop); - LOCK(&ec->lock); - } - } while ((heal_fop != NULL) && ec->shutdown); + LOCK(&ec->lock); + } + } while ((heal_fop != NULL) && ec->shutdown); - UNLOCK (&ec->lock); + UNLOCK(&ec->lock); - if (heal_fop) - ec_launch_heal (ec, heal_fop); + if (heal_fop) + ec_launch_heal(ec, heal_fop); } void -ec_heal_throttle (xlator_t *this, ec_fop_data_t *fop) -{ - gf_boolean_t can_heal = _gf_true; - ec_t *ec = this->private; - - if (fop->req_frame == NULL) { - - LOCK (&ec->lock); - { - if ((ec->background_heals > 0) && - (ec->heal_wait_qlen + ec->background_heals) > - (ec->heal_waiters + ec->healers)) { - list_add_tail(&fop->healer, &ec->heal_waiting); - ec->heal_waiters++; - fop = __ec_dequeue_heals (ec); - } else { - can_heal = _gf_false; - } - } - UNLOCK (&ec->lock); - } +ec_heal_throttle(xlator_t *this, ec_fop_data_t *fop) +{ + gf_boolean_t can_heal = _gf_true; + ec_t *ec = this->private; - if (can_heal) { - if (fop) - ec_launch_heal (ec, fop); - } else { - gf_msg_debug (this->name, 0, "Max number of heals are " - "pending, background self-heal rejected"); - ec_fop_set_error(fop, EBUSY); - ec_heal_fail (ec, fop); - } + if (fop->req_frame == NULL) { + LOCK(&ec->lock); + { + if ((ec->background_heals > 0) && + (ec->heal_wait_qlen + ec->background_heals) > + (ec->heal_waiters + ec->healers)) { + list_add_tail(&fop->healer, &ec->heal_waiting); + ec->heal_waiters++; + fop = __ec_dequeue_heals(ec); + } else { + can_heal = _gf_false; + } + } + UNLOCK(&ec->lock); + } + + if (can_heal) { + if (fop) + ec_launch_heal(ec, fop); + } else { + gf_msg_debug(this->name, 0, + "Max number of heals are " + "pending, background self-heal rejected"); + ec_fop_set_error(fop, EBUSY); + ec_heal_fail(ec, fop); + } } void -ec_heal (call_frame_t *frame, xlator_t *this, uintptr_t target, - int32_t minimum, fop_heal_cbk_t func, void *data, loc_t *loc, - int32_t partial, dict_t *xdata) +ec_heal(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum, + fop_heal_cbk_t func, void *data, loc_t *loc, int32_t partial, + dict_t *xdata) { - ec_cbk_t callback = { .heal = func }; + ec_cbk_t callback = {.heal = func}; ec_fop_data_t *fop = NULL; int32_t err = EINVAL; - gf_msg_trace ("ec", 0, "EC(HEAL) %p", frame); + gf_msg_trace("ec", 0, "EC(HEAL) %p", frame); VALIDATE_OR_GOTO(this, fail); GF_VALIDATE_OR_GOTO(this->name, this->private, fail); - if (!loc || !loc->inode || gf_uuid_is_null (loc->inode->gfid)) - goto fail; + if (!loc || !loc->inode || gf_uuid_is_null(loc->inode->gfid)) + goto fail; if (frame && frame->local) - goto fail; - fop = ec_fop_data_allocate (frame, this, EC_FOP_HEAL, 0, target, minimum, - NULL, NULL, callback, data); + goto fail; + fop = ec_fop_data_allocate(frame, this, EC_FOP_HEAL, 0, target, minimum, + NULL, NULL, callback, data); err = ENOMEM; @@ -2738,454 +2721,446 @@ ec_heal (call_frame_t *frame, xlator_t *this, uintptr_t target, if (xdata) fop->xdata = dict_ref(xdata); - ec_heal_throttle (this, fop); + ec_heal_throttle(this, fop); return; fail: if (fop) - ec_fop_data_release (fop); + ec_fop_data_release(fop); if (func) - func (frame, NULL, this, -1, err, 0, 0, 0, NULL); + func(frame, NULL, this, -1, err, 0, 0, 0, NULL); } int -ec_replace_heal_done (int ret, call_frame_t *heal, void *opaque) +ec_replace_heal_done(int ret, call_frame_t *heal, void *opaque) { - ec_t *ec = opaque; + ec_t *ec = opaque; - gf_msg_debug (ec->xl->name, 0, - "getxattr on bricks is done ret %d", ret); - return 0; + gf_msg_debug(ec->xl->name, 0, "getxattr on bricks is done ret %d", ret); + return 0; } int32_t -ec_replace_heal (ec_t *ec, inode_t *inode) +ec_replace_heal(ec_t *ec, inode_t *inode) { - loc_t loc = {0}; - int ret = 0; + loc_t loc = {0}; + int ret = 0; - loc.inode = inode_ref (inode); - gf_uuid_copy (loc.gfid, inode->gfid); - ret = syncop_getxattr (ec->xl, &loc, NULL, EC_XATTR_HEAL, - NULL, NULL); - if (ret < 0) - gf_msg_debug (ec->xl->name, 0, - "Heal failed for replace brick ret = %d", ret); - - /* Once the root inode has been checked, it might have triggered a - * self-heal on it after a replace brick command or for some other - * reason. It can also happen that the volume already had damaged - * files in the index, even if the heal on the root directory failed. - * In both cases we need to wake all index healers to continue - * healing remaining entries that are marked as dirty. */ - ec_shd_index_healer_wake(ec); - - loc_wipe (&loc); - return ret; + loc.inode = inode_ref(inode); + gf_uuid_copy(loc.gfid, inode->gfid); + ret = syncop_getxattr(ec->xl, &loc, NULL, EC_XATTR_HEAL, NULL, NULL); + if (ret < 0) + gf_msg_debug(ec->xl->name, 0, "Heal failed for replace brick ret = %d", + ret); + + /* Once the root inode has been checked, it might have triggered a + * self-heal on it after a replace brick command or for some other + * reason. It can also happen that the volume already had damaged + * files in the index, even if the heal on the root directory failed. + * In both cases we need to wake all index healers to continue + * healing remaining entries that are marked as dirty. */ + ec_shd_index_healer_wake(ec); + + loc_wipe(&loc); + return ret; } int32_t -ec_replace_brick_heal_wrap (void *opaque) +ec_replace_brick_heal_wrap(void *opaque) { - ec_t *ec = opaque; - inode_table_t *itable = NULL; - int32_t ret = -1; + ec_t *ec = opaque; + inode_table_t *itable = NULL; + int32_t ret = -1; - if (ec->xl->itable) - itable = ec->xl->itable; - else - goto out; - ret = ec_replace_heal (ec, itable->root); + if (ec->xl->itable) + itable = ec->xl->itable; + else + goto out; + ret = ec_replace_heal(ec, itable->root); out: - return ret; + return ret; } int32_t -ec_launch_replace_heal (ec_t *ec) +ec_launch_replace_heal(ec_t *ec) { - int ret = -1; + int ret = -1; - if (!ec) - return ret; - ret = synctask_new (ec->xl->ctx->env, ec_replace_brick_heal_wrap, - ec_replace_heal_done, NULL, ec); - if (ret < 0) { - gf_msg_debug (ec->xl->name, 0, - "Heal failed for replace brick ret = %d", ret); - } + if (!ec) return ret; + ret = synctask_new(ec->xl->ctx->env, ec_replace_brick_heal_wrap, + ec_replace_heal_done, NULL, ec); + if (ret < 0) { + gf_msg_debug(ec->xl->name, 0, "Heal failed for replace brick ret = %d", + ret); + } + return ret; } int32_t ec_set_heal_info(dict_t **dict_rsp, char *status) { - dict_t *dict = NULL; - int ret = 0; + dict_t *dict = NULL; + int ret = 0; - dict = dict_new (); - if (!dict) { - ret = -ENOMEM; - goto out; - } - ret = dict_set_str (dict, "heal-info", status); - if (ret) { - gf_msg (THIS->name, GF_LOG_WARNING, -ret, - EC_MSG_HEAL_FAIL, - "Failed to set heal-info key to " - "%s", status); - dict_unref(dict); - dict = NULL; - } - *dict_rsp = dict; + dict = dict_new(); + if (!dict) { + ret = -ENOMEM; + goto out; + } + ret = dict_set_str(dict, "heal-info", status); + if (ret) { + gf_msg(THIS->name, GF_LOG_WARNING, -ret, EC_MSG_HEAL_FAIL, + "Failed to set heal-info key to " + "%s", + status); + dict_unref(dict); + dict = NULL; + } + *dict_rsp = dict; out: - return ret; + return ret; } static int32_t -_need_heal_calculate (ec_t *ec, uint64_t *dirty, unsigned char *sources, - gf_boolean_t self_locked, int32_t lock_count, - ec_heal_need_t *need_heal) -{ - int i = 0; - int source_count = 0; - - source_count = EC_COUNT (sources, ec->nodes); - if (source_count == ec->nodes) { - *need_heal = EC_HEAL_NONEED; - if (self_locked || lock_count == 0) { - for (i = 0; i < ec->nodes; i++) { - if (dirty[i]) { - *need_heal = EC_HEAL_MUST; - goto out; - } - } - } else { - for (i = 0; i < ec->nodes; i++) { - /* Since each lock can only increment the dirty - * count once, if dirty is > 1 it means that - * another operation has left the dirty count - * set and this indicates a problem in the - * inode.*/ - if (dirty[i] > 1) { - *need_heal = EC_HEAL_MUST; - goto out; - } - } +_need_heal_calculate(ec_t *ec, uint64_t *dirty, unsigned char *sources, + gf_boolean_t self_locked, int32_t lock_count, + ec_heal_need_t *need_heal) +{ + int i = 0; + int source_count = 0; + + source_count = EC_COUNT(sources, ec->nodes); + if (source_count == ec->nodes) { + *need_heal = EC_HEAL_NONEED; + if (self_locked || lock_count == 0) { + for (i = 0; i < ec->nodes; i++) { + if (dirty[i]) { + *need_heal = EC_HEAL_MUST; + goto out; } + } } else { - *need_heal = EC_HEAL_MUST; + for (i = 0; i < ec->nodes; i++) { + /* Since each lock can only increment the dirty + * count once, if dirty is > 1 it means that + * another operation has left the dirty count + * set and this indicates a problem in the + * inode.*/ + if (dirty[i] > 1) { + *need_heal = EC_HEAL_MUST; + goto out; + } + } } + } else { + *need_heal = EC_HEAL_MUST; + } out: - return source_count; + return source_count; } static int32_t -ec_need_metadata_heal (ec_t *ec, inode_t *inode, default_args_cbk_t *replies, - int32_t lock_count, gf_boolean_t self_locked, - gf_boolean_t thorough, ec_heal_need_t *need_heal) -{ - uint64_t *dirty = NULL; - unsigned char *sources = NULL; - unsigned char *healed_sinks = NULL; - uint64_t *meta_versions = NULL; - int ret = 0; - int i = 0; - - sources = alloca0(ec->nodes); - healed_sinks = alloca0(ec->nodes); - dirty = alloca0 (ec->nodes * sizeof (*dirty)); - meta_versions = alloca0 (ec->nodes * sizeof (*meta_versions)); - ret = ec_heal_metadata_find_direction (ec, replies, meta_versions, - dirty, sources, healed_sinks); - if (ret < 0 && ret != -EIO) { +ec_need_metadata_heal(ec_t *ec, inode_t *inode, default_args_cbk_t *replies, + int32_t lock_count, gf_boolean_t self_locked, + gf_boolean_t thorough, ec_heal_need_t *need_heal) +{ + uint64_t *dirty = NULL; + unsigned char *sources = NULL; + unsigned char *healed_sinks = NULL; + uint64_t *meta_versions = NULL; + int ret = 0; + int i = 0; + + sources = alloca0(ec->nodes); + healed_sinks = alloca0(ec->nodes); + dirty = alloca0(ec->nodes * sizeof(*dirty)); + meta_versions = alloca0(ec->nodes * sizeof(*meta_versions)); + ret = ec_heal_metadata_find_direction(ec, replies, meta_versions, dirty, + sources, healed_sinks); + if (ret < 0 && ret != -EIO) { + goto out; + } + + ret = _need_heal_calculate(ec, dirty, sources, self_locked, lock_count, + need_heal); + if (ret == ec->nodes && *need_heal == EC_HEAL_NONEED) { + for (i = 1; i < ec->nodes; i++) { + if (meta_versions[i] != meta_versions[0]) { + *need_heal = EC_HEAL_MUST; goto out; + } } + } +out: + return ret; +} - ret = _need_heal_calculate (ec, dirty, sources, self_locked, lock_count, - need_heal); - if (ret == ec->nodes && *need_heal == EC_HEAL_NONEED) { - for (i = 1; i < ec->nodes; i++) { - if (meta_versions[i] != meta_versions[0]) { - *need_heal = EC_HEAL_MUST; - goto out; - } - } - } +static int32_t +ec_need_data_heal(ec_t *ec, inode_t *inode, default_args_cbk_t *replies, + int32_t lock_count, gf_boolean_t self_locked, + gf_boolean_t thorough, ec_heal_need_t *need_heal) +{ + uint64_t *dirty = NULL; + unsigned char *sources = NULL; + unsigned char *healed_sinks = NULL; + uint64_t *data_versions = NULL; + uint64_t *size = NULL; + int ret = 0; + + sources = alloca0(ec->nodes); + healed_sinks = alloca0(ec->nodes); + dirty = alloca0(ec->nodes * sizeof(*dirty)); + data_versions = alloca0(ec->nodes * sizeof(*data_versions)); + size = alloca0(ec->nodes * sizeof(*size)); + + /* When dd is going on and heal info is called there is a very good + * chance for on disk sizes to mismatch even though nothing is wrong + * we don't need ondisk size check there. But if the file is either + * self-locked or the caller wants a thorough check then make sure to + * perform on disk check also. */ + ret = ec_heal_data_find_direction( + ec, replies, data_versions, dirty, size, sources, healed_sinks, + self_locked || thorough, EC_COMBINE_XDATA); + if (ret < 0 && ret != -EIO) { + goto out; + } + + ret = _need_heal_calculate(ec, dirty, sources, self_locked, lock_count, + need_heal); out: - return ret; + return ret; } static int32_t -ec_need_data_heal (ec_t *ec, inode_t *inode, default_args_cbk_t *replies, +ec_need_entry_heal(ec_t *ec, inode_t *inode, default_args_cbk_t *replies, int32_t lock_count, gf_boolean_t self_locked, gf_boolean_t thorough, ec_heal_need_t *need_heal) { - uint64_t *dirty = NULL; - unsigned char *sources = NULL; - unsigned char *healed_sinks = NULL; - uint64_t *data_versions = NULL; - uint64_t *size = NULL; - int ret = 0; - - sources = alloca0(ec->nodes); - healed_sinks = alloca0(ec->nodes); - dirty = alloca0 (ec->nodes * sizeof (*dirty)); - data_versions = alloca0 (ec->nodes * sizeof (*data_versions)); - size = alloca0 (ec->nodes * sizeof (*size)); - - /* When dd is going on and heal info is called there is a very good - * chance for on disk sizes to mismatch even though nothing is wrong - * we don't need ondisk size check there. But if the file is either - * self-locked or the caller wants a thorough check then make sure to - * perform on disk check also. */ - ret = ec_heal_data_find_direction (ec, replies, data_versions, - dirty, size, sources, healed_sinks, - self_locked || thorough, - EC_COMBINE_XDATA); - if (ret < 0 && ret != -EIO) { - goto out; - } + uint64_t *dirty = NULL; + unsigned char *sources = NULL; + unsigned char *healed_sinks = NULL; + uint64_t *data_versions = NULL; + int ret = 0; - ret = _need_heal_calculate (ec, dirty, sources, self_locked, lock_count, - need_heal); -out: - return ret; -} + sources = alloca0(ec->nodes); + healed_sinks = alloca0(ec->nodes); + dirty = alloca0(ec->nodes * sizeof(*dirty)); + data_versions = alloca0(ec->nodes * sizeof(*data_versions)); -static int32_t -ec_need_entry_heal (ec_t *ec, inode_t *inode, default_args_cbk_t *replies, - int32_t lock_count, gf_boolean_t self_locked, - gf_boolean_t thorough, ec_heal_need_t *need_heal) -{ - uint64_t *dirty = NULL; - unsigned char *sources = NULL; - unsigned char *healed_sinks = NULL; - uint64_t *data_versions = NULL; - int ret = 0; - - sources = alloca0(ec->nodes); - healed_sinks = alloca0(ec->nodes); - dirty = alloca0 (ec->nodes * sizeof (*dirty)); - data_versions = alloca0 (ec->nodes * sizeof (*data_versions)); - - ret = ec_heal_entry_find_direction (ec, replies, data_versions, - dirty, sources, healed_sinks); - if (ret < 0 && ret != -EIO) { - goto out; - } + ret = ec_heal_entry_find_direction(ec, replies, data_versions, dirty, + sources, healed_sinks); + if (ret < 0 && ret != -EIO) { + goto out; + } - ret = _need_heal_calculate (ec, dirty, sources, self_locked, lock_count, - need_heal); + ret = _need_heal_calculate(ec, dirty, sources, self_locked, lock_count, + need_heal); out: - return ret; + return ret; } static int32_t -ec_need_heal (ec_t *ec, inode_t *inode, default_args_cbk_t *replies, - int32_t lock_count, gf_boolean_t self_locked, - gf_boolean_t thorough, ec_heal_need_t *need_heal) +ec_need_heal(ec_t *ec, inode_t *inode, default_args_cbk_t *replies, + int32_t lock_count, gf_boolean_t self_locked, + gf_boolean_t thorough, ec_heal_need_t *need_heal) { - int ret = 0; - + int ret = 0; - ret = ec_need_metadata_heal (ec, inode, replies, lock_count, - self_locked, thorough, need_heal); - if (ret < 0) - goto out; + ret = ec_need_metadata_heal(ec, inode, replies, lock_count, self_locked, + thorough, need_heal); + if (ret < 0) + goto out; - if (*need_heal == EC_HEAL_MUST) - goto out; + if (*need_heal == EC_HEAL_MUST) + goto out; - if (inode->ia_type == IA_IFREG) { - ret = ec_need_data_heal (ec, inode, replies, lock_count, - self_locked, thorough, need_heal); - } else if (inode->ia_type == IA_IFDIR) { - ret = ec_need_entry_heal (ec, inode, replies, lock_count, - self_locked, thorough, need_heal); - } + if (inode->ia_type == IA_IFREG) { + ret = ec_need_data_heal(ec, inode, replies, lock_count, self_locked, + thorough, need_heal); + } else if (inode->ia_type == IA_IFDIR) { + ret = ec_need_entry_heal(ec, inode, replies, lock_count, self_locked, + thorough, need_heal); + } out: - return ret; + return ret; } int32_t -ec_heal_inspect (call_frame_t *frame, ec_t *ec, - inode_t *inode, unsigned char *locked_on, - gf_boolean_t self_locked, gf_boolean_t thorough, - ec_heal_need_t *need_heal) -{ - loc_t loc = {0}; - int i = 0; - int ret = 0; - dict_t *xdata = NULL; - uint64_t zero_array[2] = {0}; - uint64_t zero_value = 0; - unsigned char *output = NULL; - default_args_cbk_t *replies = NULL; - int32_t lock_count = 0; - - EC_REPLIES_ALLOC (replies, ec->nodes); - output = alloca0 (ec->nodes); - - loc.inode = inode_ref (inode); - gf_uuid_copy (loc.gfid, inode->gfid); - - xdata = dict_new (); - if (!xdata || - dict_set_static_bin (xdata, EC_XATTR_VERSION, zero_array, - sizeof (zero_array)) || - dict_set_static_bin (xdata, EC_XATTR_DIRTY, zero_array, - sizeof (zero_array)) || - dict_set_static_bin (xdata, EC_XATTR_SIZE, &zero_value, - sizeof (zero_value))) { - ret = -ENOMEM; - goto out; - } +ec_heal_inspect(call_frame_t *frame, ec_t *ec, inode_t *inode, + unsigned char *locked_on, gf_boolean_t self_locked, + gf_boolean_t thorough, ec_heal_need_t *need_heal) +{ + loc_t loc = {0}; + int i = 0; + int ret = 0; + dict_t *xdata = NULL; + uint64_t zero_array[2] = {0}; + uint64_t zero_value = 0; + unsigned char *output = NULL; + default_args_cbk_t *replies = NULL; + int32_t lock_count = 0; + + EC_REPLIES_ALLOC(replies, ec->nodes); + output = alloca0(ec->nodes); + + loc.inode = inode_ref(inode); + gf_uuid_copy(loc.gfid, inode->gfid); + + xdata = dict_new(); + if (!xdata || + dict_set_static_bin(xdata, EC_XATTR_VERSION, zero_array, + sizeof(zero_array)) || + dict_set_static_bin(xdata, EC_XATTR_DIRTY, zero_array, + sizeof(zero_array)) || + dict_set_static_bin(xdata, EC_XATTR_SIZE, &zero_value, + sizeof(zero_value))) { + ret = -ENOMEM; + goto out; + } - if (!self_locked) { - ret = dict_set_str(xdata, GLUSTERFS_INODELK_DOM_COUNT, - ec->xl->name); - if (ret) { - ret = -ENOMEM; - goto out; - } + if (!self_locked) { + ret = dict_set_str(xdata, GLUSTERFS_INODELK_DOM_COUNT, ec->xl->name); + if (ret) { + ret = -ENOMEM; + goto out; } + } - ret = cluster_lookup (ec->xl_list, locked_on, ec->nodes, replies, - output, frame, ec->xl, &loc, xdata); + ret = cluster_lookup(ec->xl_list, locked_on, ec->nodes, replies, output, + frame, ec->xl, &loc, xdata); - if (ret != ec->nodes) { - ret = ec->nodes; - *need_heal = EC_HEAL_MUST; - goto out; - } + if (ret != ec->nodes) { + ret = ec->nodes; + *need_heal = EC_HEAL_MUST; + goto out; + } - if (self_locked) - goto need_heal; + if (self_locked) + goto need_heal; - for (i = 0; i < ec->nodes; i++) { - if (!output[i] || !replies[i].xdata) { - continue; - } - if ((dict_get_int32 (replies[i].xdata, GLUSTERFS_INODELK_COUNT, - &lock_count) == 0) && lock_count > 0) { - break; - } + for (i = 0; i < ec->nodes; i++) { + if (!output[i] || !replies[i].xdata) { + continue; } + if ((dict_get_int32(replies[i].xdata, GLUSTERFS_INODELK_COUNT, + &lock_count) == 0) && + lock_count > 0) { + break; + } + } need_heal: - ret = ec_need_heal (ec, inode, replies, lock_count, - self_locked, thorough, need_heal); + ret = ec_need_heal(ec, inode, replies, lock_count, self_locked, thorough, + need_heal); - if (!self_locked && *need_heal == EC_HEAL_MUST) { - *need_heal = EC_HEAL_MAYBE; - } + if (!self_locked && *need_heal == EC_HEAL_MUST) { + *need_heal = EC_HEAL_MAYBE; + } out: - cluster_replies_wipe (replies, ec->nodes); - loc_wipe (&loc); - if (xdata) { - dict_unref(xdata); - } - return ret; + cluster_replies_wipe(replies, ec->nodes); + loc_wipe(&loc); + if (xdata) { + dict_unref(xdata); + } + return ret; } int32_t -ec_heal_locked_inspect (call_frame_t *frame, ec_t *ec, inode_t *inode, - ec_heal_need_t *need_heal) -{ - unsigned char *locked_on = NULL; - unsigned char *up_subvols = NULL; - unsigned char *output = NULL; - default_args_cbk_t *replies = NULL; - int ret = 0; - - EC_REPLIES_ALLOC (replies, ec->nodes); - locked_on = alloca0(ec->nodes); - output = alloca0(ec->nodes); - up_subvols = alloca0(ec->nodes); - ec_mask_to_char_array (ec->xl_up, up_subvols, ec->nodes); - - ret = cluster_inodelk (ec->xl_list, up_subvols, ec->nodes, - replies, locked_on, frame, ec->xl, - ec->xl->name, inode, 0, 0); - if (ret != ec->nodes) { - *need_heal = EC_HEAL_MUST; - goto unlock; - } - ret = ec_heal_inspect (frame, ec, inode, locked_on, _gf_true, _gf_true, - need_heal); +ec_heal_locked_inspect(call_frame_t *frame, ec_t *ec, inode_t *inode, + ec_heal_need_t *need_heal) +{ + unsigned char *locked_on = NULL; + unsigned char *up_subvols = NULL; + unsigned char *output = NULL; + default_args_cbk_t *replies = NULL; + int ret = 0; + + EC_REPLIES_ALLOC(replies, ec->nodes); + locked_on = alloca0(ec->nodes); + output = alloca0(ec->nodes); + up_subvols = alloca0(ec->nodes); + ec_mask_to_char_array(ec->xl_up, up_subvols, ec->nodes); + + ret = cluster_inodelk(ec->xl_list, up_subvols, ec->nodes, replies, + locked_on, frame, ec->xl, ec->xl->name, inode, 0, 0); + if (ret != ec->nodes) { + *need_heal = EC_HEAL_MUST; + goto unlock; + } + ret = ec_heal_inspect(frame, ec, inode, locked_on, _gf_true, _gf_true, + need_heal); unlock: - cluster_uninodelk (ec->xl_list, locked_on, ec->nodes, - replies, output, frame, ec->xl, - ec->xl->name, inode, 0, 0); - cluster_replies_wipe (replies, ec->nodes); - return ret; + cluster_uninodelk(ec->xl_list, locked_on, ec->nodes, replies, output, frame, + ec->xl, ec->xl->name, inode, 0, 0); + cluster_replies_wipe(replies, ec->nodes); + return ret; } int32_t -ec_get_heal_info (xlator_t *this, loc_t *entry_loc, dict_t **dict_rsp) +ec_get_heal_info(xlator_t *this, loc_t *entry_loc, dict_t **dict_rsp) { - int ret = -ENOMEM; - ec_heal_need_t need_heal = EC_HEAL_NONEED; - call_frame_t *frame = NULL; - ec_t *ec = NULL; - unsigned char *up_subvols = NULL; - loc_t loc = {0, }; + int ret = -ENOMEM; + ec_heal_need_t need_heal = EC_HEAL_NONEED; + call_frame_t *frame = NULL; + ec_t *ec = NULL; + unsigned char *up_subvols = NULL; + loc_t loc = { + 0, + }; - VALIDATE_OR_GOTO(this, out); - GF_VALIDATE_OR_GOTO(this->name, entry_loc, out); - - ec = this->private; - up_subvols = alloca0(ec->nodes); - ec_mask_to_char_array (ec->xl_up, up_subvols, ec->nodes); + VALIDATE_OR_GOTO(this, out); + GF_VALIDATE_OR_GOTO(this->name, entry_loc, out); - if (EC_COUNT (up_subvols, ec->nodes) != ec->nodes) { - need_heal = EC_HEAL_MUST; - goto set_heal; - } - frame = create_frame (this, this->ctx->pool); - if (!frame) { - goto out; - } - ec_owner_set(frame, frame->root); - frame->root->uid = 0; - frame->root->gid = 0; - frame->root->pid = GF_CLIENT_PID_SELF_HEALD; - - if (loc_copy(&loc, entry_loc) != 0) { - gf_msg (this->name, GF_LOG_ERROR, - ENOMEM, EC_MSG_LOC_COPY_FAIL, - "Failed to copy a location."); - goto out; - } - if (!loc.inode) { - ret = syncop_inode_find (this, this, loc.gfid, - &loc.inode, NULL, NULL); - if (ret < 0) - goto out; - } + ec = this->private; + up_subvols = alloca0(ec->nodes); + ec_mask_to_char_array(ec->xl_up, up_subvols, ec->nodes); - ret = ec_heal_inspect (frame, ec, loc.inode, up_subvols, _gf_false, - _gf_false, &need_heal); - if (ret == ec->nodes && need_heal == EC_HEAL_NONEED) { - goto set_heal; - } - need_heal = EC_HEAL_NONEED; - ret = ec_heal_locked_inspect (frame, ec, loc.inode, - &need_heal); + if (EC_COUNT(up_subvols, ec->nodes) != ec->nodes) { + need_heal = EC_HEAL_MUST; + goto set_heal; + } + frame = create_frame(this, this->ctx->pool); + if (!frame) { + goto out; + } + ec_owner_set(frame, frame->root); + frame->root->uid = 0; + frame->root->gid = 0; + frame->root->pid = GF_CLIENT_PID_SELF_HEALD; + + if (loc_copy(&loc, entry_loc) != 0) { + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_LOC_COPY_FAIL, + "Failed to copy a location."); + goto out; + } + if (!loc.inode) { + ret = syncop_inode_find(this, this, loc.gfid, &loc.inode, NULL, NULL); if (ret < 0) - goto out; + goto out; + } + + ret = ec_heal_inspect(frame, ec, loc.inode, up_subvols, _gf_false, + _gf_false, &need_heal); + if (ret == ec->nodes && need_heal == EC_HEAL_NONEED) { + goto set_heal; + } + need_heal = EC_HEAL_NONEED; + ret = ec_heal_locked_inspect(frame, ec, loc.inode, &need_heal); + if (ret < 0) + goto out; set_heal: - if (need_heal == EC_HEAL_MUST) { - ret = ec_set_heal_info (dict_rsp, "heal"); - } else { - ret = ec_set_heal_info (dict_rsp, "no-heal"); - } + if (need_heal == EC_HEAL_MUST) { + ret = ec_set_heal_info(dict_rsp, "heal"); + } else { + ret = ec_set_heal_info(dict_rsp, "no-heal"); + } out: - if (frame) { - STACK_DESTROY (frame->root); - } - loc_wipe (&loc); - return ret; + if (frame) { + STACK_DESTROY(frame->root); + } + loc_wipe(&loc); + return ret; } diff --git a/xlators/cluster/ec/src/ec-heald.c b/xlators/cluster/ec/src/ec-heald.c index f36901ef74f..cc1062dd353 100644 --- a/xlators/cluster/ec/src/ec-heald.c +++ b/xlators/cluster/ec/src/ec-heald.c @@ -19,559 +19,545 @@ #include "syncop-utils.h" #include "protocol-common.h" - -#define NTH_INDEX_HEALER(this, n) (&((((ec_t *)this->private))->shd.index_healers[n])) -#define NTH_FULL_HEALER(this, n) (&((((ec_t *)this->private))->shd.full_healers[n])) +#define NTH_INDEX_HEALER(this, n) \ + (&((((ec_t *)this->private))->shd.index_healers[n])) +#define NTH_FULL_HEALER(this, n) \ + (&((((ec_t *)this->private))->shd.full_healers[n])) gf_boolean_t -ec_shd_is_subvol_local (xlator_t *this, int subvol) +ec_shd_is_subvol_local(xlator_t *this, int subvol) { - ec_t *ec = NULL; - gf_boolean_t is_local = _gf_false; - loc_t loc = {0, }; - - ec = this->private; - loc.inode = this->itable->root; - syncop_is_subvol_local (ec->xl_list[subvol], &loc, &is_local); - return is_local; + ec_t *ec = NULL; + gf_boolean_t is_local = _gf_false; + loc_t loc = { + 0, + }; + + ec = this->private; + loc.inode = this->itable->root; + syncop_is_subvol_local(ec->xl_list[subvol], &loc, &is_local); + return is_local; } char * -ec_subvol_name (xlator_t *this, int subvol) +ec_subvol_name(xlator_t *this, int subvol) { - ec_t *ec = NULL; + ec_t *ec = NULL; - ec = this->private; - if (subvol < 0 || subvol > ec->nodes) - return NULL; + ec = this->private; + if (subvol < 0 || subvol > ec->nodes) + return NULL; - return ec->xl_list[subvol]->name; + return ec->xl_list[subvol]->name; } int -__ec_shd_healer_wait (struct subvol_healer *healer) +__ec_shd_healer_wait(struct subvol_healer *healer) { - ec_t *ec = NULL; - struct timespec wait_till = {0, }; - int ret = 0; + ec_t *ec = NULL; + struct timespec wait_till = { + 0, + }; + int ret = 0; - ec = healer->this->private; + ec = healer->this->private; disabled_loop: - wait_till.tv_sec = time (NULL) + 60; - - while (!healer->rerun) { - ret = pthread_cond_timedwait (&healer->cond, - &healer->mutex, - &wait_till); - if (ret == ETIMEDOUT) - break; - } + wait_till.tv_sec = time(NULL) + 60; - ret = healer->rerun; - healer->rerun = 0; + while (!healer->rerun) { + ret = pthread_cond_timedwait(&healer->cond, &healer->mutex, &wait_till); + if (ret == ETIMEDOUT) + break; + } - if (!ec->shd.enabled || !ec->up) - goto disabled_loop; + ret = healer->rerun; + healer->rerun = 0; - return ret; -} + if (!ec->shd.enabled || !ec->up) + goto disabled_loop; + return ret; +} int -ec_shd_healer_wait (struct subvol_healer *healer) +ec_shd_healer_wait(struct subvol_healer *healer) { - int ret = 0; + int ret = 0; - pthread_mutex_lock (&healer->mutex); - { - ret = __ec_shd_healer_wait (healer); - } - pthread_mutex_unlock (&healer->mutex); + pthread_mutex_lock(&healer->mutex); + { + ret = __ec_shd_healer_wait(healer); + } + pthread_mutex_unlock(&healer->mutex); - return ret; + return ret; } int -ec_shd_index_inode (xlator_t *this, xlator_t *subvol, inode_t **inode) +ec_shd_index_inode(xlator_t *this, xlator_t *subvol, inode_t **inode) { - loc_t rootloc = {0, }; - int ret = 0; - dict_t *xattr = NULL; - void *index_gfid = NULL; - - *inode = NULL; - rootloc.inode = inode_ref (this->itable->root); - gf_uuid_copy (rootloc.gfid, rootloc.inode->gfid); - - ret = syncop_getxattr (subvol, &rootloc, &xattr, - GF_XATTROP_INDEX_GFID, NULL, NULL); - if (ret < 0) - goto out; - if (!xattr) { - ret = -EINVAL; - goto out; - } - - ret = dict_get_ptr (xattr, GF_XATTROP_INDEX_GFID, &index_gfid); - if (ret) - goto out; - - gf_msg_debug (this->name, 0, "index-dir gfid for %s: %s", - subvol->name, uuid_utoa (index_gfid)); - - ret = syncop_inode_find (this, subvol, index_gfid, - inode, NULL, NULL); + loc_t rootloc = { + 0, + }; + int ret = 0; + dict_t *xattr = NULL; + void *index_gfid = NULL; + + *inode = NULL; + rootloc.inode = inode_ref(this->itable->root); + gf_uuid_copy(rootloc.gfid, rootloc.inode->gfid); + + ret = syncop_getxattr(subvol, &rootloc, &xattr, GF_XATTROP_INDEX_GFID, NULL, + NULL); + if (ret < 0) + goto out; + if (!xattr) { + ret = -EINVAL; + goto out; + } + + ret = dict_get_ptr(xattr, GF_XATTROP_INDEX_GFID, &index_gfid); + if (ret) + goto out; + + gf_msg_debug(this->name, 0, "index-dir gfid for %s: %s", subvol->name, + uuid_utoa(index_gfid)); + + ret = syncop_inode_find(this, subvol, index_gfid, inode, NULL, NULL); out: - loc_wipe (&rootloc); + loc_wipe(&rootloc); - if (xattr) - dict_unref (xattr); + if (xattr) + dict_unref(xattr); - return ret; + return ret; } int -ec_shd_index_purge (xlator_t *subvol, inode_t *inode, char *name) +ec_shd_index_purge(xlator_t *subvol, inode_t *inode, char *name) { - loc_t loc = {0, }; - int ret = 0; + loc_t loc = { + 0, + }; + int ret = 0; - loc.parent = inode_ref (inode); - loc.name = name; + loc.parent = inode_ref(inode); + loc.name = name; - ret = syncop_unlink (subvol, &loc, NULL, NULL); + ret = syncop_unlink(subvol, &loc, NULL, NULL); - loc_wipe (&loc); - return ret; + loc_wipe(&loc); + return ret; } int -ec_shd_selfheal (struct subvol_healer *healer, int child, loc_t *loc) +ec_shd_selfheal(struct subvol_healer *healer, int child, loc_t *loc) { - int32_t ret; - - ret = syncop_getxattr (healer->this, loc, NULL, EC_XATTR_HEAL, NULL, - NULL); - if ((ret >= 0) && (loc->inode->ia_type == IA_IFDIR)) { - /* If we have just healed a directory, it's possible that - * other index entries have appeared to be healed. We put a - * mark so that we can check it later and restart a scan - * without delay. */ - healer->rerun = _gf_true; - } - - return ret; + int32_t ret; + + ret = syncop_getxattr(healer->this, loc, NULL, EC_XATTR_HEAL, NULL, NULL); + if ((ret >= 0) && (loc->inode->ia_type == IA_IFDIR)) { + /* If we have just healed a directory, it's possible that + * other index entries have appeared to be healed. We put a + * mark so that we can check it later and restart a scan + * without delay. */ + healer->rerun = _gf_true; + } + + return ret; } - int -ec_shd_index_heal (xlator_t *subvol, gf_dirent_t *entry, loc_t *parent, - void *data) +ec_shd_index_heal(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent, + void *data) { - struct subvol_healer *healer = data; - ec_t *ec = NULL; - loc_t loc = {0}; - int ret = 0; - - ec = healer->this->private; - if (ec->xl_up_count <= ec->fragments) { - return -ENOTCONN; - } - if (!ec->shd.enabled) - return -EBUSY; - - gf_msg_debug (healer->this->name, 0, "got entry: %s", - entry->d_name); - - ret = gf_uuid_parse (entry->d_name, loc.gfid); - if (ret) - return 0; + struct subvol_healer *healer = data; + ec_t *ec = NULL; + loc_t loc = {0}; + int ret = 0; + + ec = healer->this->private; + if (ec->xl_up_count <= ec->fragments) { + return -ENOTCONN; + } + if (!ec->shd.enabled) + return -EBUSY; + + gf_msg_debug(healer->this->name, 0, "got entry: %s", entry->d_name); + + ret = gf_uuid_parse(entry->d_name, loc.gfid); + if (ret) + return 0; - /* If this fails with ENOENT/ESTALE index is stale */ - ret = syncop_gfid_to_path (healer->this->itable, subvol, loc.gfid, - (char **)&loc.path); - if (ret < 0) - goto out; + /* If this fails with ENOENT/ESTALE index is stale */ + ret = syncop_gfid_to_path(healer->this->itable, subvol, loc.gfid, + (char **)&loc.path); + if (ret < 0) + goto out; - ret = syncop_inode_find (healer->this, healer->this, loc.gfid, - &loc.inode, NULL, NULL); - if (ret < 0) - goto out; + ret = syncop_inode_find(healer->this, healer->this, loc.gfid, &loc.inode, + NULL, NULL); + if (ret < 0) + goto out; - ec_shd_selfheal (healer, healer->subvol, &loc); + ec_shd_selfheal(healer, healer->subvol, &loc); out: - if (ret == -ENOENT || ret == -ESTALE) { - gf_msg (healer->this->name, GF_LOG_DEBUG, 0, - EC_MSG_HEAL_FAIL, "Purging index for gfid %s:", - uuid_utoa(loc.gfid)); - ec_shd_index_purge (subvol, parent->inode, entry->d_name); - } - loc_wipe (&loc); - - return 0; + if (ret == -ENOENT || ret == -ESTALE) { + gf_msg(healer->this->name, GF_LOG_DEBUG, 0, EC_MSG_HEAL_FAIL, + "Purging index for gfid %s:", uuid_utoa(loc.gfid)); + ec_shd_index_purge(subvol, parent->inode, entry->d_name); + } + loc_wipe(&loc); + + return 0; } int -ec_shd_index_sweep (struct subvol_healer *healer) +ec_shd_index_sweep(struct subvol_healer *healer) { - loc_t loc = {0}; - ec_t *ec = NULL; - int ret = 0; - xlator_t *subvol = NULL; - dict_t *xdata = NULL; - - ec = healer->this->private; - subvol = ec->xl_list[healer->subvol]; - - ret = ec_shd_index_inode (healer->this, subvol, &loc.inode); - if (ret < 0) { - gf_msg (healer->this->name, GF_LOG_WARNING, errno, - EC_MSG_INDEX_DIR_GET_FAIL, - "unable to get index-dir on %s", subvol->name); - goto out; - } - - xdata = dict_new (); - if (!xdata || dict_set_int32 (xdata, "get-gfid-type", 1)) { - ret = -ENOMEM; - goto out; - } - - ret = syncop_mt_dir_scan (NULL, subvol, &loc, GF_CLIENT_PID_SELF_HEALD, - healer, ec_shd_index_heal, xdata, - ec->shd.max_threads, ec->shd.wait_qlength); + loc_t loc = {0}; + ec_t *ec = NULL; + int ret = 0; + xlator_t *subvol = NULL; + dict_t *xdata = NULL; + + ec = healer->this->private; + subvol = ec->xl_list[healer->subvol]; + + ret = ec_shd_index_inode(healer->this, subvol, &loc.inode); + if (ret < 0) { + gf_msg(healer->this->name, GF_LOG_WARNING, errno, + EC_MSG_INDEX_DIR_GET_FAIL, "unable to get index-dir on %s", + subvol->name); + goto out; + } + + xdata = dict_new(); + if (!xdata || dict_set_int32(xdata, "get-gfid-type", 1)) { + ret = -ENOMEM; + goto out; + } + + ret = syncop_mt_dir_scan(NULL, subvol, &loc, GF_CLIENT_PID_SELF_HEALD, + healer, ec_shd_index_heal, xdata, + ec->shd.max_threads, ec->shd.wait_qlength); out: - if (xdata) - dict_unref (xdata); - loc_wipe (&loc); + if (xdata) + dict_unref(xdata); + loc_wipe(&loc); - return ret; + return ret; } int -ec_shd_full_heal (xlator_t *subvol, gf_dirent_t *entry, loc_t *parent, - void *data) +ec_shd_full_heal(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent, + void *data) { - struct subvol_healer *healer = data; - xlator_t *this = healer->this; - ec_t *ec = NULL; - loc_t loc = {0}; - int ret = 0; - - ec = this->private; - if (ec->xl_up_count <= ec->fragments) { - return -ENOTCONN; - } - if (!ec->shd.enabled) - return -EBUSY; - - if (gf_uuid_is_null(entry->d_stat.ia_gfid)) { - /* It's possible that an entry has been removed just after - * being seen in a directory but before getting its stat info. - * In this case we'll receive a NULL gfid here. Since the file - * doesn't exist anymore, we can safely ignore it. */ - return 0; - } + struct subvol_healer *healer = data; + xlator_t *this = healer->this; + ec_t *ec = NULL; + loc_t loc = {0}; + int ret = 0; + + ec = this->private; + if (ec->xl_up_count <= ec->fragments) { + return -ENOTCONN; + } + if (!ec->shd.enabled) + return -EBUSY; + + if (gf_uuid_is_null(entry->d_stat.ia_gfid)) { + /* It's possible that an entry has been removed just after + * being seen in a directory but before getting its stat info. + * In this case we'll receive a NULL gfid here. Since the file + * doesn't exist anymore, we can safely ignore it. */ + return 0; + } - loc.parent = inode_ref (parent->inode); - loc.name = entry->d_name; - gf_uuid_copy (loc.gfid, entry->d_stat.ia_gfid); + loc.parent = inode_ref(parent->inode); + loc.name = entry->d_name; + gf_uuid_copy(loc.gfid, entry->d_stat.ia_gfid); - /* If this fails with ENOENT/ESTALE index is stale */ - ret = syncop_gfid_to_path (this->itable, subvol, loc.gfid, - (char **)&loc.path); - if (ret < 0) - goto out; + /* If this fails with ENOENT/ESTALE index is stale */ + ret = syncop_gfid_to_path(this->itable, subvol, loc.gfid, + (char **)&loc.path); + if (ret < 0) + goto out; - ret = syncop_inode_find (this, this, loc.gfid, - &loc.inode, NULL, NULL); - if (ret < 0) - goto out; + ret = syncop_inode_find(this, this, loc.gfid, &loc.inode, NULL, NULL); + if (ret < 0) + goto out; - ec_shd_selfheal (healer, healer->subvol, &loc); + ec_shd_selfheal(healer, healer->subvol, &loc); - ret = 0; + ret = 0; out: - loc_wipe (&loc); - return ret; + loc_wipe(&loc); + return ret; } int -ec_shd_full_sweep (struct subvol_healer *healer, inode_t *inode) +ec_shd_full_sweep(struct subvol_healer *healer, inode_t *inode) { - ec_t *ec = NULL; - loc_t loc = {0}; - - ec = healer->this->private; - loc.inode = inode; - return syncop_ftw (ec->xl_list[healer->subvol], &loc, - GF_CLIENT_PID_SELF_HEALD, healer, - ec_shd_full_heal); -} + ec_t *ec = NULL; + loc_t loc = {0}; + ec = healer->this->private; + loc.inode = inode; + return syncop_ftw(ec->xl_list[healer->subvol], &loc, + GF_CLIENT_PID_SELF_HEALD, healer, ec_shd_full_heal); +} void * -ec_shd_index_healer (void *data) +ec_shd_index_healer(void *data) { - struct subvol_healer *healer = NULL; - xlator_t *this = NULL; - - healer = data; - THIS = this = healer->this; - ec_t *ec = this->private; - - for (;;) { - ec_shd_healer_wait (healer); - - if (ec->xl_up_count > ec->fragments) { - gf_msg_debug (this->name, 0, - "starting index sweep on subvol %s", - ec_subvol_name (this, healer->subvol)); - ec_shd_index_sweep (healer); - } - gf_msg_debug (this->name, 0, - "finished index sweep on subvol %s", - ec_subvol_name (this, healer->subvol)); + struct subvol_healer *healer = NULL; + xlator_t *this = NULL; + + healer = data; + THIS = this = healer->this; + ec_t *ec = this->private; + + for (;;) { + ec_shd_healer_wait(healer); + + if (ec->xl_up_count > ec->fragments) { + gf_msg_debug(this->name, 0, "starting index sweep on subvol %s", + ec_subvol_name(this, healer->subvol)); + ec_shd_index_sweep(healer); } + gf_msg_debug(this->name, 0, "finished index sweep on subvol %s", + ec_subvol_name(this, healer->subvol)); + } - return NULL; + return NULL; } - void * -ec_shd_full_healer (void *data) +ec_shd_full_healer(void *data) { - struct subvol_healer *healer = NULL; - xlator_t *this = NULL; - loc_t rootloc = {0}; - - int run = 0; - - healer = data; - THIS = this = healer->this; - ec_t *ec = this->private; - - rootloc.inode = this->itable->root; - for (;;) { - pthread_mutex_lock (&healer->mutex); - { - run = __ec_shd_healer_wait (healer); - if (!run) - healer->running = _gf_false; - } - pthread_mutex_unlock (&healer->mutex); - - if (!run) - break; - - if (ec->xl_up_count > ec->fragments) { - gf_msg (this->name, GF_LOG_INFO, 0, - EC_MSG_FULL_SWEEP_START, - "starting full sweep on subvol %s", - ec_subvol_name (this, healer->subvol)); - - ec_shd_selfheal (healer, healer->subvol, &rootloc); - ec_shd_full_sweep (healer, this->itable->root); - } - - gf_msg (this->name, GF_LOG_INFO, 0, - EC_MSG_FULL_SWEEP_STOP, - "finished full sweep on subvol %s", - ec_subvol_name (this, healer->subvol)); + struct subvol_healer *healer = NULL; + xlator_t *this = NULL; + loc_t rootloc = {0}; + + int run = 0; + + healer = data; + THIS = this = healer->this; + ec_t *ec = this->private; + + rootloc.inode = this->itable->root; + for (;;) { + pthread_mutex_lock(&healer->mutex); + { + run = __ec_shd_healer_wait(healer); + if (!run) + healer->running = _gf_false; } + pthread_mutex_unlock(&healer->mutex); - return NULL; -} + if (!run) + break; + + if (ec->xl_up_count > ec->fragments) { + gf_msg(this->name, GF_LOG_INFO, 0, EC_MSG_FULL_SWEEP_START, + "starting full sweep on subvol %s", + ec_subvol_name(this, healer->subvol)); + + ec_shd_selfheal(healer, healer->subvol, &rootloc); + ec_shd_full_sweep(healer, this->itable->root); + } + + gf_msg(this->name, GF_LOG_INFO, 0, EC_MSG_FULL_SWEEP_STOP, + "finished full sweep on subvol %s", + ec_subvol_name(this, healer->subvol)); + } + return NULL; +} int -ec_shd_healer_init (xlator_t *this, struct subvol_healer *healer) +ec_shd_healer_init(xlator_t *this, struct subvol_healer *healer) { - int ret = 0; + int ret = 0; - ret = pthread_mutex_init (&healer->mutex, NULL); - if (ret) - goto out; + ret = pthread_mutex_init(&healer->mutex, NULL); + if (ret) + goto out; - ret = pthread_cond_init (&healer->cond, NULL); - if (ret) - goto out; + ret = pthread_cond_init(&healer->cond, NULL); + if (ret) + goto out; - healer->this = this; - healer->running = _gf_false; - healer->rerun = _gf_false; + healer->this = this; + healer->running = _gf_false; + healer->rerun = _gf_false; out: - return ret; + return ret; } - int -ec_shd_healer_spawn (xlator_t *this, struct subvol_healer *healer, - void *(threadfn)(void *)) +ec_shd_healer_spawn(xlator_t *this, struct subvol_healer *healer, + void *(threadfn)(void *)) { - int ret = 0; - - pthread_mutex_lock (&healer->mutex); - { - if (healer->running) { - pthread_cond_signal (&healer->cond); - } else { - ret = gf_thread_create (&healer->thread, NULL, - threadfn, healer, "ecshd"); - if (ret) - goto unlock; - healer->running = 1; - } - - healer->rerun = 1; + int ret = 0; + + pthread_mutex_lock(&healer->mutex); + { + if (healer->running) { + pthread_cond_signal(&healer->cond); + } else { + ret = gf_thread_create(&healer->thread, NULL, threadfn, healer, + "ecshd"); + if (ret) + goto unlock; + healer->running = 1; } + + healer->rerun = 1; + } unlock: - pthread_mutex_unlock (&healer->mutex); + pthread_mutex_unlock(&healer->mutex); - return ret; + return ret; } int -ec_shd_full_healer_spawn (xlator_t *this, int subvol) +ec_shd_full_healer_spawn(xlator_t *this, int subvol) { - return ec_shd_healer_spawn (this, NTH_FULL_HEALER (this, subvol), - ec_shd_full_healer); + return ec_shd_healer_spawn(this, NTH_FULL_HEALER(this, subvol), + ec_shd_full_healer); } - int -ec_shd_index_healer_spawn (xlator_t *this, int subvol) +ec_shd_index_healer_spawn(xlator_t *this, int subvol) { - return ec_shd_healer_spawn (this, NTH_INDEX_HEALER (this, subvol), - ec_shd_index_healer); + return ec_shd_healer_spawn(this, NTH_INDEX_HEALER(this, subvol), + ec_shd_index_healer); } void ec_shd_index_healer_wake(ec_t *ec) { - int32_t i; + int32_t i; - for (i = 0; i < ec->nodes; i++) { - if (((ec->xl_up >> i) & 1) != 0) { - ec_shd_index_healer_spawn(ec->xl, i); - } + for (i = 0; i < ec->nodes; i++) { + if (((ec->xl_up >> i) & 1) != 0) { + ec_shd_index_healer_spawn(ec->xl, i); } + } } int -ec_selfheal_daemon_init (xlator_t *this) +ec_selfheal_daemon_init(xlator_t *this) { - ec_t *ec = NULL; - ec_self_heald_t *shd = NULL; - int ret = -1; - int i = 0; - - ec = this->private; - shd = &ec->shd; - - shd->index_healers = GF_CALLOC (sizeof(*shd->index_healers), - ec->nodes, - ec_mt_subvol_healer_t); - if (!shd->index_healers) - goto out; - - for (i = 0; i < ec->nodes; i++) { - shd->index_healers[i].subvol = i; - ret = ec_shd_healer_init (this, &shd->index_healers[i]); - if (ret) - goto out; - } + ec_t *ec = NULL; + ec_self_heald_t *shd = NULL; + int ret = -1; + int i = 0; + + ec = this->private; + shd = &ec->shd; + + shd->index_healers = GF_CALLOC(sizeof(*shd->index_healers), ec->nodes, + ec_mt_subvol_healer_t); + if (!shd->index_healers) + goto out; + + for (i = 0; i < ec->nodes; i++) { + shd->index_healers[i].subvol = i; + ret = ec_shd_healer_init(this, &shd->index_healers[i]); + if (ret) + goto out; + } - shd->full_healers = GF_CALLOC (sizeof(*shd->full_healers), - ec->nodes, - ec_mt_subvol_healer_t); - if (!shd->full_healers) - goto out; - - for (i = 0; i < ec->nodes; i++) { - shd->full_healers[i].subvol = i; - ret = ec_shd_healer_init (this, &shd->full_healers[i]); - if (ret) - goto out; - } + shd->full_healers = GF_CALLOC(sizeof(*shd->full_healers), ec->nodes, + ec_mt_subvol_healer_t); + if (!shd->full_healers) + goto out; + + for (i = 0; i < ec->nodes; i++) { + shd->full_healers[i].subvol = i; + ret = ec_shd_healer_init(this, &shd->full_healers[i]); + if (ret) + goto out; + } - ret = 0; + ret = 0; out: - return ret; + return ret; } - int -ec_heal_op (xlator_t *this, dict_t *output, gf_xl_afr_op_t op, int xl_id) +ec_heal_op(xlator_t *this, dict_t *output, gf_xl_afr_op_t op, int xl_id) { - char key[64] = {0}; - int op_ret = 0; - ec_t *ec = NULL; - int i = 0; - GF_UNUSED int ret = 0; - - ec = this->private; - - op_ret = -1; - for (i = 0; i < ec->nodes; i++) { - snprintf (key, sizeof (key), "%d-%d-status", xl_id, i); - - if (((ec->xl_up >> i) & 1) == 0) { - ret = dict_set_str (output, key, "Brick is not connected"); - } else if (!ec->up) { - ret = dict_set_str (output, key, - "Disperse subvolume is not up"); - } else if (!ec_shd_is_subvol_local (this, i)) { - ret = dict_set_str (output, key, "Brick is remote"); - } else { - ret = dict_set_str (output, key, "Started self-heal"); - if (op == GF_SHD_OP_HEAL_FULL) { - ec_shd_full_healer_spawn (this, i); - } else if (op == GF_SHD_OP_HEAL_INDEX) { - ec_shd_index_healer_spawn (this, i); - } - op_ret = 0; - } + char key[64] = {0}; + int op_ret = 0; + ec_t *ec = NULL; + int i = 0; + GF_UNUSED int ret = 0; + + ec = this->private; + + op_ret = -1; + for (i = 0; i < ec->nodes; i++) { + snprintf(key, sizeof(key), "%d-%d-status", xl_id, i); + + if (((ec->xl_up >> i) & 1) == 0) { + ret = dict_set_str(output, key, "Brick is not connected"); + } else if (!ec->up) { + ret = dict_set_str(output, key, "Disperse subvolume is not up"); + } else if (!ec_shd_is_subvol_local(this, i)) { + ret = dict_set_str(output, key, "Brick is remote"); + } else { + ret = dict_set_str(output, key, "Started self-heal"); + if (op == GF_SHD_OP_HEAL_FULL) { + ec_shd_full_healer_spawn(this, i); + } else if (op == GF_SHD_OP_HEAL_INDEX) { + ec_shd_index_healer_spawn(this, i); + } + op_ret = 0; } - return op_ret; + } + return op_ret; } int -ec_xl_op (xlator_t *this, dict_t *input, dict_t *output) +ec_xl_op(xlator_t *this, dict_t *input, dict_t *output) { - gf_xl_afr_op_t op = GF_SHD_OP_INVALID; - int ret = 0; - int xl_id = 0; + gf_xl_afr_op_t op = GF_SHD_OP_INVALID; + int ret = 0; + int xl_id = 0; - ret = dict_get_int32 (input, "xl-op", (int32_t *)&op); - if (ret) - goto out; + ret = dict_get_int32(input, "xl-op", (int32_t *)&op); + if (ret) + goto out; - ret = dict_get_int32 (input, this->name, &xl_id); - if (ret) - goto out; + ret = dict_get_int32(input, this->name, &xl_id); + if (ret) + goto out; - ret = dict_set_int32 (output, this->name, xl_id); - if (ret) - goto out; + ret = dict_set_int32(output, this->name, xl_id); + if (ret) + goto out; - switch (op) { + switch (op) { case GF_SHD_OP_HEAL_FULL: - ret = ec_heal_op (this, output, op, xl_id); - break; + ret = ec_heal_op(this, output, op, xl_id); + break; case GF_SHD_OP_HEAL_INDEX: - ret = ec_heal_op (this, output, op, xl_id); - break; + ret = ec_heal_op(this, output, op, xl_id); + break; default: - ret = -1; - break; - } + ret = -1; + break; + } out: - dict_del (output, this->name); - return ret; + dict_del(output, this->name); + return ret; } diff --git a/xlators/cluster/ec/src/ec-helpers.c b/xlators/cluster/ec/src/ec-helpers.c index ff3558def86..dec39b9d2aa 100644 --- a/xlators/cluster/ec/src/ec-helpers.c +++ b/xlators/cluster/ec/src/ec-helpers.c @@ -19,25 +19,20 @@ #include "ec-method.h" #include "ec-helpers.h" -static const char * ec_fop_list[] = -{ - [-EC_FOP_HEAL] = "HEAL" -}; +static const char *ec_fop_list[] = {[-EC_FOP_HEAL] = "HEAL"}; -const char * ec_bin(char * str, size_t size, uint64_t value, int32_t digits) +const char * +ec_bin(char *str, size_t size, uint64_t value, int32_t digits) { str += size; - if (size-- < 1) - { + if (size-- < 1) { goto failed; } *--str = 0; - while ((value != 0) || (digits > 0)) - { - if (size-- < 1) - { + while ((value != 0) || (digits > 0)) { + if (size-- < 1) { goto failed; } *--str = '0' + (value & 1); @@ -51,21 +46,22 @@ failed: return ""; } -const char * ec_fop_name(int32_t id) +const char * +ec_fop_name(int32_t id) { - if (id >= 0) - { + if (id >= 0) { return gf_fop_list[id]; } return ec_fop_list[-id]; } -void ec_trace(const char * event, ec_fop_data_t * fop, const char * fmt, ...) +void +ec_trace(const char *event, ec_fop_data_t *fop, const char *fmt, ...) { char str1[32], str2[32], str3[32]; - char * msg; - ec_t * ec = fop->xl->private; + char *msg; + ec_t *ec = fop->xl->private; va_list args; int32_t ret; @@ -73,28 +69,28 @@ void ec_trace(const char * event, ec_fop_data_t * fop, const char * fmt, ...) ret = vasprintf(&msg, fmt, args); va_end(args); - if (ret < 0) - { + if (ret < 0) { msg = ""; } - gf_msg_trace ("ec", 0, "%s(%s) %p(%p) [refs=%d, winds=%d, jobs=%d] " - "frame=%p/%p, min/exp=%d/%d, err=%d state=%d " - "{%s:%s:%s} %s", - event, ec_fop_name(fop->id), fop, fop->parent, fop->refs, - fop->winds, fop->jobs, fop->req_frame, fop->frame, fop->minimum, - fop->expected, fop->error, fop->state, - ec_bin(str1, sizeof(str1), fop->mask, ec->nodes), - ec_bin(str2, sizeof(str2), fop->remaining, ec->nodes), - ec_bin(str3, sizeof(str3), fop->good, ec->nodes), msg); + gf_msg_trace("ec", 0, + "%s(%s) %p(%p) [refs=%d, winds=%d, jobs=%d] " + "frame=%p/%p, min/exp=%d/%d, err=%d state=%d " + "{%s:%s:%s} %s", + event, ec_fop_name(fop->id), fop, fop->parent, fop->refs, + fop->winds, fop->jobs, fop->req_frame, fop->frame, + fop->minimum, fop->expected, fop->error, fop->state, + ec_bin(str1, sizeof(str1), fop->mask, ec->nodes), + ec_bin(str2, sizeof(str2), fop->remaining, ec->nodes), + ec_bin(str3, sizeof(str3), fop->good, ec->nodes), msg); - if (ret >= 0) - { + if (ret >= 0) { free(msg); } } -int32_t ec_bits_consume(uint64_t * n) +int32_t +ec_bits_consume(uint64_t *n) { uint64_t tmp; @@ -105,21 +101,18 @@ int32_t ec_bits_consume(uint64_t * n) return gf_bits_index(tmp); } -size_t ec_iov_copy_to(void * dst, struct iovec * vector, int32_t count, - off_t offset, size_t size) +size_t +ec_iov_copy_to(void *dst, struct iovec *vector, int32_t count, off_t offset, + size_t size) { int32_t i = 0; size_t total = 0, len = 0; - while (i < count) - { - if (offset < vector[i].iov_len) - { - while ((i < count) && (size > 0)) - { + while (i < count) { + if (offset < vector[i].iov_len) { + while ((i < count) && (size > 0)) { len = size; - if (len > vector[i].iov_len - offset) - { + if (len > vector[i].iov_len - offset) { len = vector[i].iov_len - offset; } memcpy(dst, vector[i++].iov_base + offset, len); @@ -139,15 +132,15 @@ size_t ec_iov_copy_to(void * dst, struct iovec * vector, int32_t count, return total; } -int32_t ec_buffer_alloc(xlator_t *xl, size_t size, struct iobref **piobref, - void **ptr) +int32_t +ec_buffer_alloc(xlator_t *xl, size_t size, struct iobref **piobref, void **ptr) { struct iobref *iobref = NULL; struct iobuf *iobuf = NULL; int32_t ret = -ENOMEM; - iobuf = iobuf_get_page_aligned (xl->ctx->iobuf_pool, size, - EC_METHOD_WORD_SIZE); + iobuf = iobuf_get_page_aligned(xl->ctx->iobuf_pool, size, + EC_METHOD_WORD_SIZE); if (iobuf == NULL) { goto out; } @@ -186,12 +179,12 @@ out: return ret; } -int32_t ec_dict_set_array(dict_t *dict, char *key, uint64_t value[], - int32_t size) +int32_t +ec_dict_set_array(dict_t *dict, char *key, uint64_t value[], int32_t size) { - int ret = -1; - uint64_t *ptr = NULL; - int32_t vindex; + int ret = -1; + uint64_t *ptr = NULL; + int32_t vindex; if (value == NULL) { return -EINVAL; @@ -202,19 +195,18 @@ int32_t ec_dict_set_array(dict_t *dict, char *key, uint64_t value[], return -ENOMEM; } for (vindex = 0; vindex < size; vindex++) { - ptr[vindex] = hton64(value[vindex]); + ptr[vindex] = hton64(value[vindex]); } ret = dict_set_bin(dict, key, ptr, sizeof(uint64_t) * size); if (ret) - GF_FREE (ptr); + GF_FREE(ptr); return ret; } - int32_t -ec_dict_get_array (dict_t *dict, char *key, uint64_t value[], int32_t size) +ec_dict_get_array(dict_t *dict, char *key, uint64_t value[], int32_t size) { - void *ptr; + void *ptr; int32_t len; int32_t vindex; int32_t old_size = 0; @@ -228,43 +220,43 @@ ec_dict_get_array (dict_t *dict, char *key, uint64_t value[], int32_t size) return err; } - if (len > (size * sizeof(uint64_t)) || (len % sizeof (uint64_t))) { + if (len > (size * sizeof(uint64_t)) || (len % sizeof(uint64_t))) { return -EINVAL; } /* 3.6 version ec would have stored version in 64 bit. In that case treat * metadata versions same as data*/ - old_size = min (size, len/sizeof(uint64_t)); + old_size = min(size, len / sizeof(uint64_t)); for (vindex = 0; vindex < old_size; vindex++) { - value[vindex] = ntoh64(*((uint64_t *)ptr + vindex)); + value[vindex] = ntoh64(*((uint64_t *)ptr + vindex)); } if (old_size < size) { - for (vindex = old_size; vindex < size; vindex++) { - value[vindex] = value[old_size-1]; - } + for (vindex = old_size; vindex < size; vindex++) { + value[vindex] = value[old_size - 1]; + } } return 0; } int32_t -ec_dict_del_array (dict_t *dict, char *key, uint64_t value[], int32_t size) +ec_dict_del_array(dict_t *dict, char *key, uint64_t value[], int32_t size) { int ret = 0; - ret = ec_dict_get_array (dict, key, value, size); + ret = ec_dict_get_array(dict, key, value, size); if (ret == 0) - dict_del(dict, key); + dict_del(dict, key); return ret; } - -int32_t ec_dict_set_number(dict_t * dict, char * key, uint64_t value) +int32_t +ec_dict_set_number(dict_t *dict, char *key, uint64_t value) { - int ret = -1; - uint64_t * ptr; + int ret = -1; + uint64_t *ptr; ptr = GF_MALLOC(sizeof(value), gf_common_mt_char); if (ptr == NULL) { @@ -275,14 +267,15 @@ int32_t ec_dict_set_number(dict_t * dict, char * key, uint64_t value) ret = dict_set_bin(dict, key, ptr, sizeof(value)); if (ret) - GF_FREE (ptr); + GF_FREE(ptr); return ret; } -int32_t ec_dict_del_number(dict_t * dict, char * key, uint64_t * value) +int32_t +ec_dict_del_number(dict_t *dict, char *key, uint64_t *value) { - void * ptr; + void *ptr; int32_t len, err; if (dict == NULL) { @@ -303,24 +296,23 @@ int32_t ec_dict_del_number(dict_t * dict, char * key, uint64_t * value) return 0; } -int32_t ec_dict_set_config(dict_t * dict, char * key, ec_config_t * config) +int32_t +ec_dict_set_config(dict_t *dict, char *key, ec_config_t *config) { int ret = -1; - uint64_t * ptr, data; + uint64_t *ptr, data; - if (config->version > EC_CONFIG_VERSION) - { - gf_msg ("ec", GF_LOG_ERROR, EINVAL, - EC_MSG_UNSUPPORTED_VERSION, - "Trying to store an unsupported config " - "version (%u)", config->version); + if (config->version > EC_CONFIG_VERSION) { + gf_msg("ec", GF_LOG_ERROR, EINVAL, EC_MSG_UNSUPPORTED_VERSION, + "Trying to store an unsupported config " + "version (%u)", + config->version); return -EINVAL; } ptr = GF_MALLOC(sizeof(uint64_t), gf_common_mt_char); - if (ptr == NULL) - { + if (ptr == NULL) { return -ENOMEM; } @@ -335,14 +327,15 @@ int32_t ec_dict_set_config(dict_t * dict, char * key, ec_config_t * config) ret = dict_set_bin(dict, key, ptr, sizeof(uint64_t)); if (ret) - GF_FREE (ptr); + GF_FREE(ptr); return ret; } -int32_t ec_dict_del_config(dict_t * dict, char * key, ec_config_t * config) +int32_t +ec_dict_del_config(dict_t *dict, char *key, ec_config_t *config) { - void * ptr; + void *ptr; uint64_t data; int32_t len, err; @@ -372,12 +365,9 @@ int32_t ec_dict_del_config(dict_t * dict, char * key, ec_config_t * config) } config->version = (data >> 56) & 0xff; - if (config->version > EC_CONFIG_VERSION) - { - gf_msg ("ec", GF_LOG_ERROR, EINVAL, - EC_MSG_UNSUPPORTED_VERSION, - "Found an unsupported config version (%u)", - config->version); + if (config->version > EC_CONFIG_VERSION) { + gf_msg("ec", GF_LOG_ERROR, EINVAL, EC_MSG_UNSUPPORTED_VERSION, + "Found an unsupported config version (%u)", config->version); return -EINVAL; } @@ -393,7 +383,8 @@ int32_t ec_dict_del_config(dict_t * dict, char * key, ec_config_t * config) return 0; } -gf_boolean_t ec_loc_gfid_check(xlator_t *xl, uuid_t dst, uuid_t src) +gf_boolean_t +ec_loc_gfid_check(xlator_t *xl, uuid_t dst, uuid_t src) { if (gf_uuid_is_null(src)) { return _gf_true; @@ -406,9 +397,8 @@ gf_boolean_t ec_loc_gfid_check(xlator_t *xl, uuid_t dst, uuid_t src) } if (gf_uuid_compare(dst, src) != 0) { - gf_msg (xl->name, GF_LOG_WARNING, 0, - EC_MSG_GFID_MISMATCH, - "Mismatching GFID's in loc"); + gf_msg(xl->name, GF_LOG_WARNING, 0, EC_MSG_GFID_MISMATCH, + "Mismatching GFID's in loc"); return _gf_false; } @@ -416,7 +406,8 @@ gf_boolean_t ec_loc_gfid_check(xlator_t *xl, uuid_t dst, uuid_t src) return _gf_true; } -int32_t ec_loc_setup_inode(xlator_t *xl, inode_table_t *table, loc_t *loc) +int32_t +ec_loc_setup_inode(xlator_t *xl, inode_table_t *table, loc_t *loc) { int32_t ret = -EINVAL; @@ -427,7 +418,7 @@ int32_t ec_loc_setup_inode(xlator_t *xl, inode_table_t *table, loc_t *loc) } else if (table != NULL) { if (!gf_uuid_is_null(loc->gfid)) { loc->inode = inode_find(table, loc->gfid); - } else if (loc->path && strchr (loc->path, '/')) { + } else if (loc->path && strchr(loc->path, '/')) { loc->inode = inode_resolve(table, (char *)loc->path); } } @@ -438,7 +429,8 @@ out: return ret; } -int32_t ec_loc_setup_parent(xlator_t *xl, inode_table_t *table, loc_t *loc) +int32_t +ec_loc_setup_parent(xlator_t *xl, inode_table_t *table, loc_t *loc) { char *path, *parent; int32_t ret = -EINVAL; @@ -450,13 +442,11 @@ int32_t ec_loc_setup_parent(xlator_t *xl, inode_table_t *table, loc_t *loc) } else if (table != NULL) { if (!gf_uuid_is_null(loc->pargfid)) { loc->parent = inode_find(table, loc->pargfid); - } else if (loc->path && strchr (loc->path, '/')) { + } else if (loc->path && strchr(loc->path, '/')) { path = gf_strdup(loc->path); if (path == NULL) { - gf_msg (xl->name, GF_LOG_ERROR, ENOMEM, - EC_MSG_NO_MEMORY, - "Unable to duplicate path '%s'", - loc->path); + gf_msg(xl->name, GF_LOG_ERROR, ENOMEM, EC_MSG_NO_MEMORY, + "Unable to duplicate path '%s'", loc->path); ret = -ENOMEM; @@ -483,7 +473,8 @@ out: return ret; } -int32_t ec_loc_setup_path(xlator_t *xl, loc_t *loc) +int32_t +ec_loc_setup_path(xlator_t *xl, loc_t *loc) { uuid_t root = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1}; char *name; @@ -513,10 +504,8 @@ int32_t ec_loc_setup_path(xlator_t *xl, loc_t *loc) if (loc->name != NULL) { if (strcmp(loc->name, name) != 0) { - gf_msg (xl->name, GF_LOG_ERROR, EINVAL, - EC_MSG_INVALID_LOC_NAME, - "Invalid name '%s' in loc", - loc->name); + gf_msg(xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_INVALID_LOC_NAME, + "Invalid name '%s' in loc", loc->name); goto out; } @@ -531,7 +520,8 @@ out: return ret; } -int32_t ec_loc_parent(xlator_t *xl, loc_t *loc, loc_t *parent) +int32_t +ec_loc_parent(xlator_t *xl, loc_t *loc, loc_t *parent) { inode_table_t *table = NULL; char *str = NULL; @@ -548,24 +538,20 @@ int32_t ec_loc_parent(xlator_t *xl, loc_t *loc, loc_t *parent) if (!gf_uuid_is_null(loc->pargfid)) { gf_uuid_copy(parent->gfid, loc->pargfid); } - if (loc->path && strchr (loc->path, '/')) { + if (loc->path && strchr(loc->path, '/')) { str = gf_strdup(loc->path); if (str == NULL) { - gf_msg (xl->name, GF_LOG_ERROR, ENOMEM, - EC_MSG_NO_MEMORY, - "Unable to duplicate path '%s'", - loc->path); + gf_msg(xl->name, GF_LOG_ERROR, ENOMEM, EC_MSG_NO_MEMORY, + "Unable to duplicate path '%s'", loc->path); - goto out; + goto out; } parent->path = gf_strdup(dirname(str)); if (parent->path == NULL) { - gf_msg (xl->name, GF_LOG_ERROR, ENOMEM, - EC_MSG_NO_MEMORY, - "Unable to duplicate path '%s'", - dirname(str)); + gf_msg(xl->name, GF_LOG_ERROR, ENOMEM, EC_MSG_NO_MEMORY, + "Unable to duplicate path '%s'", dirname(str)); - goto out; + goto out; } } @@ -582,9 +568,8 @@ int32_t ec_loc_parent(xlator_t *xl, loc_t *loc, loc_t *parent) if ((parent->inode == NULL) && (parent->path == NULL) && gf_uuid_is_null(parent->gfid)) { - gf_msg (xl->name, GF_LOG_ERROR, EINVAL, - EC_MSG_LOC_PARENT_INODE_MISSING, - "Parent inode missing for loc_t"); + gf_msg(xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_LOC_PARENT_INODE_MISSING, + "Parent inode missing for loc_t"); ret = -EINVAL; @@ -603,8 +588,8 @@ out: return ret; } -int32_t ec_loc_update(xlator_t *xl, loc_t *loc, inode_t *inode, - struct iatt *iatt) +int32_t +ec_loc_update(xlator_t *xl, loc_t *loc, inode_t *inode, struct iatt *iatt) { inode_table_t *table = NULL; int32_t ret = -EINVAL; @@ -645,9 +630,10 @@ out: return ret; } -int32_t ec_loc_from_fd(xlator_t * xl, loc_t * loc, fd_t * fd) +int32_t +ec_loc_from_fd(xlator_t *xl, loc_t *loc, fd_t *fd) { - ec_fd_t * ctx; + ec_fd_t *ctx; int32_t ret = -ENOMEM; memset(loc, 0, sizeof(*loc)); @@ -672,7 +658,8 @@ out: return ret; } -int32_t ec_loc_from_loc(xlator_t * xl, loc_t * dst, loc_t * src) +int32_t +ec_loc_from_loc(xlator_t *xl, loc_t *dst, loc_t *src) { int32_t ret = -ENOMEM; @@ -695,62 +682,61 @@ out: return ret; } -void ec_owner_set(call_frame_t * frame, void * owner) +void +ec_owner_set(call_frame_t *frame, void *owner) { set_lk_owner_from_ptr(&frame->root->lk_owner, owner); } -void ec_owner_copy(call_frame_t *frame, gf_lkowner_t *owner) +void +ec_owner_copy(call_frame_t *frame, gf_lkowner_t *owner) { - lk_owner_copy (&frame->root->lk_owner, owner); + lk_owner_copy(&frame->root->lk_owner, owner); } static void -ec_stripe_cache_init (ec_t *ec, ec_inode_t *ctx) +ec_stripe_cache_init(ec_t *ec, ec_inode_t *ctx) { - ec_stripe_list_t *stripe_cache = NULL; + ec_stripe_list_t *stripe_cache = NULL; - stripe_cache = &(ctx->stripe_cache); - if (stripe_cache->max == 0) { - stripe_cache->max = ec->stripe_cache; - } + stripe_cache = &(ctx->stripe_cache); + if (stripe_cache->max == 0) { + stripe_cache->max = ec->stripe_cache; + } } -ec_inode_t * __ec_inode_get(inode_t * inode, xlator_t * xl) +ec_inode_t * +__ec_inode_get(inode_t *inode, xlator_t *xl) { - ec_inode_t * ctx = NULL; + ec_inode_t *ctx = NULL; uint64_t value = 0; - if ((__inode_ctx_get(inode, xl, &value) != 0) || (value == 0)) - { + if ((__inode_ctx_get(inode, xl, &value) != 0) || (value == 0)) { ctx = GF_MALLOC(sizeof(*ctx), ec_mt_ec_inode_t); - if (ctx != NULL) - { + if (ctx != NULL) { memset(ctx, 0, sizeof(*ctx)); INIT_LIST_HEAD(&ctx->heal); INIT_LIST_HEAD(&ctx->stripe_cache.lru); value = (uint64_t)(uintptr_t)ctx; - if (__inode_ctx_set(inode, xl, &value) != 0) - { + if (__inode_ctx_set(inode, xl, &value) != 0) { GF_FREE(ctx); return NULL; } } - } - else - { + } else { ctx = (ec_inode_t *)(uintptr_t)value; } if (ctx) - ec_stripe_cache_init (xl->private, ctx); + ec_stripe_cache_init(xl->private, ctx); return ctx; } -ec_inode_t * ec_inode_get(inode_t * inode, xlator_t * xl) +ec_inode_t * +ec_inode_get(inode_t *inode, xlator_t *xl) { - ec_inode_t * ctx = NULL; + ec_inode_t *ctx = NULL; LOCK(&inode->lock); @@ -761,30 +747,31 @@ ec_inode_t * ec_inode_get(inode_t * inode, xlator_t * xl) return ctx; } -ec_fd_t * __ec_fd_get(fd_t * fd, xlator_t * xl) +ec_fd_t * +__ec_fd_get(fd_t *fd, xlator_t *xl) { int i = 0; - ec_fd_t * ctx = NULL; + ec_fd_t *ctx = NULL; uint64_t value = 0; ec_t *ec = xl->private; if ((__fd_ctx_get(fd, xl, &value) != 0) || (value == 0)) { - ctx = GF_MALLOC(sizeof(*ctx) + (sizeof (ec_fd_status_t) * ec->nodes), + ctx = GF_MALLOC(sizeof(*ctx) + (sizeof(ec_fd_status_t) * ec->nodes), ec_mt_ec_fd_t); if (ctx != NULL) { memset(ctx, 0, sizeof(*ctx)); for (i = 0; i < ec->nodes; i++) { - if (fd_is_anonymous (fd)) { - ctx->fd_status[i] = EC_FD_OPENED; + if (fd_is_anonymous(fd)) { + ctx->fd_status[i] = EC_FD_OPENED; } else { - ctx->fd_status[i] = EC_FD_NOT_OPENED; + ctx->fd_status[i] = EC_FD_NOT_OPENED; } } value = (uint64_t)(uintptr_t)ctx; if (__fd_ctx_set(fd, xl, value) != 0) { - GF_FREE (ctx); + GF_FREE(ctx); return NULL; } } @@ -803,9 +790,10 @@ ec_fd_t * __ec_fd_get(fd_t * fd, xlator_t * xl) return ctx; } -ec_fd_t * ec_fd_get(fd_t * fd, xlator_t * xl) +ec_fd_t * +ec_fd_get(fd_t *fd, xlator_t *xl) { - ec_fd_t * ctx = NULL; + ec_fd_t *ctx = NULL; LOCK(&fd->lock); @@ -817,37 +805,36 @@ ec_fd_t * ec_fd_get(fd_t * fd, xlator_t * xl) } gf_boolean_t -ec_is_internal_xattr (dict_t *dict, char *key, data_t *value, void *data) +ec_is_internal_xattr(dict_t *dict, char *key, data_t *value, void *data) { - if (key && - (strncmp (key, EC_XATTR_PREFIX, SLEN (EC_XATTR_PREFIX)) == 0)) - return _gf_true; + if (key && (strncmp(key, EC_XATTR_PREFIX, SLEN(EC_XATTR_PREFIX)) == 0)) + return _gf_true; - return _gf_false; + return _gf_false; } void -ec_filter_internal_xattrs (dict_t *xattr) +ec_filter_internal_xattrs(dict_t *xattr) { - dict_foreach_match (xattr, ec_is_internal_xattr, NULL, - dict_remove_foreach_fn, NULL); + dict_foreach_match(xattr, ec_is_internal_xattr, NULL, + dict_remove_foreach_fn, NULL); } gf_boolean_t -ec_is_data_fop (glusterfs_fop_t fop) +ec_is_data_fop(glusterfs_fop_t fop) { - switch (fop) { + switch (fop) { case GF_FOP_WRITE: case GF_FOP_TRUNCATE: case GF_FOP_FTRUNCATE: case GF_FOP_FALLOCATE: case GF_FOP_DISCARD: case GF_FOP_ZEROFILL: - return _gf_true; + return _gf_true; default: - return _gf_false; - } - return _gf_false; + return _gf_false; + } + return _gf_false; } /* gf_boolean_t diff --git a/xlators/cluster/ec/src/ec-inode-read.c b/xlators/cluster/ec/src/ec-inode-read.c index 30d76371da6..7a8b174bbed 100644 --- a/xlators/cluster/ec/src/ec-inode-read.c +++ b/xlators/cluster/ec/src/ec-inode-read.c @@ -21,8 +21,9 @@ /* FOP: access */ -int32_t ec_access_cbk(call_frame_t * frame, void * cookie, xlator_t * this, - int32_t op_ret, int32_t op_errno, dict_t * xdata) +int32_t +ec_access_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + int32_t op_errno, dict_t *xdata) { ec_fop_data_t *fop = NULL; ec_cbk_data_t *cbk = NULL; @@ -35,27 +36,27 @@ int32_t ec_access_cbk(call_frame_t * frame, void * cookie, xlator_t * this, fop = frame->local; - ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx, - frame, op_ret, op_errno); + ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx, frame, + op_ret, op_errno); - cbk = ec_cbk_data_allocate (frame, this, fop, GF_FOP_ACCESS, - idx, op_ret, op_errno); + cbk = ec_cbk_data_allocate(frame, this, fop, GF_FOP_ACCESS, idx, op_ret, + op_errno); if (cbk) { if (xdata) - cbk->xdata = dict_ref (xdata); - ec_combine (cbk, NULL); + cbk->xdata = dict_ref(xdata); + ec_combine(cbk, NULL); } out: - if (fop != NULL) - { - ec_complete (fop); + if (fop != NULL) { + ec_complete(fop); } return 0; } -void ec_wind_access(ec_t * ec, ec_fop_data_t * fop, int32_t idx) +void +ec_wind_access(ec_t *ec, ec_fop_data_t *fop, int32_t idx) { ec_trace("WIND", fop, "idx=%d", idx); @@ -67,19 +68,19 @@ void ec_wind_access(ec_t * ec, ec_fop_data_t * fop, int32_t idx) int32_t ec_manager_access(ec_fop_data_t *fop, int32_t state) { - ec_cbk_data_t *cbk = NULL; + ec_cbk_data_t *cbk = NULL; - switch (state) { + switch (state) { case EC_STATE_INIT: case EC_STATE_LOCK: - ec_lock_prepare_inode (fop, &fop->loc[0], EC_QUERY_INFO, 0, - LLONG_MAX); - ec_lock (fop); + ec_lock_prepare_inode(fop, &fop->loc[0], EC_QUERY_INFO, 0, + LLONG_MAX); + ec_lock(fop); return EC_STATE_DISPATCH; case EC_STATE_DISPATCH: - ec_dispatch_one (fop); + ec_dispatch_one(fop); return EC_STATE_PREPARE_ANSWER; @@ -92,12 +93,11 @@ ec_manager_access(ec_fop_data_t *fop, int32_t state) case EC_STATE_REPORT: cbk = fop->answer; - GF_ASSERT (cbk); + GF_ASSERT(cbk); if (fop->cbks.access != NULL) { if (cbk) { - fop->cbks.access(fop->req_frame, fop, fop->xl, - cbk->op_ret, cbk->op_errno, - cbk->xdata); + fop->cbks.access(fop->req_frame, fop, fop->xl, cbk->op_ret, + cbk->op_errno, cbk->xdata); } } return EC_STATE_LOCK_REUSE; @@ -108,8 +108,8 @@ ec_manager_access(ec_fop_data_t *fop, int32_t state) case -EC_STATE_PREPARE_ANSWER: case -EC_STATE_REPORT: if (fop->cbks.access != NULL) { - fop->cbks.access(fop->req_frame, fop, fop->xl, -1, - fop->error, NULL); + fop->cbks.access(fop->req_frame, fop, fop->xl, -1, fop->error, + NULL); } return EC_STATE_LOCK_REUSE; @@ -126,24 +126,23 @@ ec_manager_access(ec_fop_data_t *fop, int32_t state) return EC_STATE_END; default: - gf_msg (fop->xl->name, GF_LOG_ERROR, EINVAL, - EC_MSG_UNHANDLED_STATE, - "Unhandled state %d for %s", - state, ec_fop_name(fop->id)); + gf_msg(fop->xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_UNHANDLED_STATE, + "Unhandled state %d for %s", state, ec_fop_name(fop->id)); return EC_STATE_END; - } + } } -void ec_access(call_frame_t * frame, xlator_t * this, uintptr_t target, - int32_t minimum, fop_access_cbk_t func, void * data, - loc_t * loc, int32_t mask, dict_t * xdata) +void +ec_access(call_frame_t *frame, xlator_t *this, uintptr_t target, + int32_t minimum, fop_access_cbk_t func, void *data, loc_t *loc, + int32_t mask, dict_t *xdata) { - ec_cbk_t callback = { .access = func }; - ec_fop_data_t * fop = NULL; + ec_cbk_t callback = {.access = func}; + ec_fop_data_t *fop = NULL; int32_t error = ENOMEM; - gf_msg_trace ("ec", 0, "EC(ACCESS) %p", frame); + gf_msg_trace("ec", 0, "EC(ACCESS) %p", frame); VALIDATE_OR_GOTO(this, out); GF_VALIDATE_OR_GOTO(this->name, frame, out); @@ -160,9 +159,8 @@ void ec_access(call_frame_t * frame, xlator_t * this, uintptr_t target, if (loc != NULL) { if (loc_copy(&fop->loc[0], loc) != 0) { - gf_msg (this->name, GF_LOG_ERROR, ENOMEM, - EC_MSG_LOC_COPY_FAIL, - "Failed to copy a location."); + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_LOC_COPY_FAIL, + "Failed to copy a location."); goto out; } @@ -170,10 +168,9 @@ void ec_access(call_frame_t * frame, xlator_t * this, uintptr_t target, if (xdata != NULL) { fop->xdata = dict_ref(xdata); if (fop->xdata == NULL) { - gf_msg (this->name, GF_LOG_ERROR, 0, - EC_MSG_DICT_REF_FAIL, - "Failed to reference a " - "dictionary."); + gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL, + "Failed to reference a " + "dictionary."); goto out; } @@ -191,14 +188,13 @@ out: /* FOP: getxattr */ -int32_t ec_combine_getxattr(ec_fop_data_t * fop, ec_cbk_data_t * dst, - ec_cbk_data_t * src) +int32_t +ec_combine_getxattr(ec_fop_data_t *fop, ec_cbk_data_t *dst, ec_cbk_data_t *src) { - if (!ec_dict_compare(dst->dict, src->dict)) - { - gf_msg (fop->xl->name, GF_LOG_NOTICE, 0, - EC_MSG_DICT_MISMATCH, "Mismatching dictionary in " - "answers of 'GF_FOP_GETXATTR'"); + if (!ec_dict_compare(dst->dict, src->dict)) { + gf_msg(fop->xl->name, GF_LOG_NOTICE, 0, EC_MSG_DICT_MISMATCH, + "Mismatching dictionary in " + "answers of 'GF_FOP_GETXATTR'"); return 0; } @@ -206,12 +202,12 @@ int32_t ec_combine_getxattr(ec_fop_data_t * fop, ec_cbk_data_t * dst, return 1; } -int32_t ec_getxattr_cbk(call_frame_t * frame, void * cookie, xlator_t * this, - int32_t op_ret, int32_t op_errno, dict_t * dict, - dict_t * xdata) +int32_t +ec_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata) { - ec_fop_data_t * fop = NULL; - ec_cbk_data_t * cbk = NULL; + ec_fop_data_t *fop = NULL; + ec_cbk_data_t *cbk = NULL; int32_t idx = (int32_t)(uintptr_t)cookie; VALIDATE_OR_GOTO(this, out); @@ -221,38 +217,30 @@ int32_t ec_getxattr_cbk(call_frame_t * frame, void * cookie, xlator_t * this, fop = frame->local; - ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx, - frame, op_ret, op_errno); + ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx, frame, + op_ret, op_errno); cbk = ec_cbk_data_allocate(frame, this, fop, GF_FOP_GETXATTR, idx, op_ret, op_errno); - if (cbk != NULL) - { - if (op_ret >= 0) - { - if (dict != NULL) - { + if (cbk != NULL) { + if (op_ret >= 0) { + if (dict != NULL) { cbk->dict = dict_ref(dict); - if (cbk->dict == NULL) - { - gf_msg (this->name, GF_LOG_ERROR, 0, - EC_MSG_DICT_REF_FAIL, - "Failed to reference a " - "dictionary."); + if (cbk->dict == NULL) { + gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL, + "Failed to reference a " + "dictionary."); goto out; } } } - if (xdata != NULL) - { + if (xdata != NULL) { cbk->xdata = dict_ref(xdata); - if (cbk->xdata == NULL) - { - gf_msg (this->name, GF_LOG_ERROR, 0, - EC_MSG_DICT_REF_FAIL, - "Failed to reference a " - "dictionary."); + if (cbk->xdata == NULL) { + gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL, + "Failed to reference a " + "dictionary."); goto out; } @@ -262,15 +250,15 @@ int32_t ec_getxattr_cbk(call_frame_t * frame, void * cookie, xlator_t * this, } out: - if (fop != NULL) - { + if (fop != NULL) { ec_complete(fop); } return 0; } -void ec_wind_getxattr(ec_t * ec, ec_fop_data_t * fop, int32_t idx) +void +ec_wind_getxattr(ec_t *ec, ec_fop_data_t *fop, int32_t idx) { ec_trace("WIND", fop, "idx=%d", idx); @@ -280,40 +268,40 @@ void ec_wind_getxattr(ec_t * ec, ec_fop_data_t * fop, int32_t idx) } void -ec_handle_special_xattrs (ec_fop_data_t *fop) +ec_handle_special_xattrs(ec_fop_data_t *fop) { - ec_cbk_data_t *cbk = NULL; - /* Stime may not be available on all the bricks, so even if some of the - * subvols succeed the operation, treat it as answer.*/ - if (fop->str[0] && - fnmatch (GF_XATTR_STIME_PATTERN, fop->str[0], 0) == 0) { - if (!fop->answer || (fop->answer->op_ret < 0)) { - list_for_each_entry (cbk, &fop->cbk_list, list) { - if (cbk->op_ret >= 0) { - fop->answer = cbk; - break; - } - } + ec_cbk_data_t *cbk = NULL; + /* Stime may not be available on all the bricks, so even if some of the + * subvols succeed the operation, treat it as answer.*/ + if (fop->str[0] && fnmatch(GF_XATTR_STIME_PATTERN, fop->str[0], 0) == 0) { + if (!fop->answer || (fop->answer->op_ret < 0)) { + list_for_each_entry(cbk, &fop->cbk_list, list) + { + if (cbk->op_ret >= 0) { + fop->answer = cbk; + break; } + } } + } } -int32_t ec_manager_getxattr(ec_fop_data_t * fop, int32_t state) +int32_t +ec_manager_getxattr(ec_fop_data_t *fop, int32_t state) { - ec_cbk_data_t * cbk; + ec_cbk_data_t *cbk; - switch (state) - { + switch (state) { case EC_STATE_INIT: case EC_STATE_LOCK: /* clear-locks commands must be done without any locks acquired to avoid interferences. */ if ((fop->str[0] == NULL) || (strncmp(fop->str[0], GF_XATTR_CLRLK_CMD, - SLEN (GF_XATTR_CLRLK_CMD)) != 0)) { + SLEN(GF_XATTR_CLRLK_CMD)) != 0)) { if (fop->fd == NULL) { - ec_lock_prepare_inode(fop, &fop->loc[0], EC_QUERY_INFO, - 0, LLONG_MAX); + ec_lock_prepare_inode(fop, &fop->loc[0], EC_QUERY_INFO, 0, + LLONG_MAX); } else { ec_lock_prepare_fd(fop, fop->fd, EC_QUERY_INFO, 0, LLONG_MAX); @@ -333,11 +321,11 @@ int32_t ec_manager_getxattr(ec_fop_data_t * fop, int32_t state) return EC_STATE_PREPARE_ANSWER; case EC_STATE_PREPARE_ANSWER: - ec_handle_special_xattrs (fop); + ec_handle_special_xattrs(fop); if (fop->minimum == EC_MINIMUM_ALL) { cbk = ec_fop_prepare_answer(fop, _gf_true); } else { - if (ec_dispatch_one_retry (fop, &cbk)) { + if (ec_dispatch_one_retry(fop, &cbk)) { return EC_STATE_DISPATCH; } } @@ -347,10 +335,10 @@ int32_t ec_manager_getxattr(ec_fop_data_t * fop, int32_t state) err = ec_dict_combine(cbk, EC_COMBINE_DICT); if (!ec_cbk_set_error(cbk, -err, _gf_true)) { if (cbk->xdata != NULL) - ec_filter_internal_xattrs (cbk->xdata); + ec_filter_internal_xattrs(cbk->xdata); if (cbk->dict != NULL) - ec_filter_internal_xattrs (cbk->dict); + ec_filter_internal_xattrs(cbk->dict); } } @@ -361,8 +349,7 @@ int32_t ec_manager_getxattr(ec_fop_data_t * fop, int32_t state) GF_ASSERT(cbk != NULL); - if (fop->cbks.getxattr != NULL) - { + if (fop->cbks.getxattr != NULL) { fop->cbks.getxattr(fop->req_frame, fop, fop->xl, cbk->op_ret, cbk->op_errno, cbk->dict, cbk->xdata); } @@ -376,10 +363,9 @@ int32_t ec_manager_getxattr(ec_fop_data_t * fop, int32_t state) case -EC_STATE_REPORT: GF_ASSERT(fop->error != 0); - if (fop->cbks.getxattr != NULL) - { - fop->cbks.getxattr(fop->req_frame, fop, fop->xl, -1, - fop->error, NULL, NULL); + if (fop->cbks.getxattr != NULL) { + fop->cbks.getxattr(fop->req_frame, fop, fop->xl, -1, fop->error, + NULL, NULL); } return EC_STATE_LOCK_REUSE; @@ -397,18 +383,17 @@ int32_t ec_manager_getxattr(ec_fop_data_t * fop, int32_t state) return EC_STATE_END; default: - gf_msg (fop->xl->name, GF_LOG_ERROR, EINVAL, - EC_MSG_UNHANDLED_STATE, - "Unhandled state %d for %s", - state, ec_fop_name(fop->id)); + gf_msg(fop->xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_UNHANDLED_STATE, + "Unhandled state %d for %s", state, ec_fop_name(fop->id)); return EC_STATE_END; } } -int32_t ec_getxattr_heal_cbk(call_frame_t *frame, void *cookie, xlator_t *xl, - int32_t op_ret, int32_t op_errno, uintptr_t mask, - uintptr_t good, uintptr_t bad, dict_t *xdata) +int32_t +ec_getxattr_heal_cbk(call_frame_t *frame, void *cookie, xlator_t *xl, + int32_t op_ret, int32_t op_errno, uintptr_t mask, + uintptr_t good, uintptr_t bad, dict_t *xdata) { ec_fop_data_t *fop = cookie; fop_getxattr_cbk_t func = fop->data; @@ -460,15 +445,15 @@ out: } void -ec_getxattr (call_frame_t *frame, xlator_t *this, uintptr_t target, - int32_t minimum, fop_getxattr_cbk_t func, void *data, - loc_t *loc, const char *name, dict_t *xdata) +ec_getxattr(call_frame_t *frame, xlator_t *this, uintptr_t target, + int32_t minimum, fop_getxattr_cbk_t func, void *data, loc_t *loc, + const char *name, dict_t *xdata) { - ec_cbk_t callback = { .getxattr = func }; - ec_fop_data_t * fop = NULL; + ec_cbk_t callback = {.getxattr = func}; + ec_fop_data_t *fop = NULL; int32_t error = ENOMEM; - gf_msg_trace ("ec", 0, "EC(GETXATTR) %p", frame); + gf_msg_trace("ec", 0, "EC(GETXATTR) %p", frame); VALIDATE_OR_GOTO(this, out); GF_VALIDATE_OR_GOTO(this->name, frame, out); @@ -476,25 +461,23 @@ ec_getxattr (call_frame_t *frame, xlator_t *this, uintptr_t target, /* Special handling of an explicit self-heal request */ if ((name != NULL) && (strcmp(name, EC_XATTR_HEAL) == 0)) { - ec_heal(frame, this, target, EC_MINIMUM_ONE, ec_getxattr_heal_cbk, - func, loc, 0, NULL); + ec_heal(frame, this, target, EC_MINIMUM_ONE, ec_getxattr_heal_cbk, func, + loc, 0, NULL); return; } - fop = ec_fop_data_allocate(frame, this, GF_FOP_GETXATTR, - EC_FLAG_LOCK_SHARED, target, minimum, - ec_wind_getxattr, ec_manager_getxattr, callback, - data); + fop = ec_fop_data_allocate( + frame, this, GF_FOP_GETXATTR, EC_FLAG_LOCK_SHARED, target, minimum, + ec_wind_getxattr, ec_manager_getxattr, callback, data); if (fop == NULL) { goto out; } if (loc != NULL) { if (loc_copy(&fop->loc[0], loc) != 0) { - gf_msg (this->name, GF_LOG_ERROR, ENOMEM, - EC_MSG_LOC_COPY_FAIL, - "Failed to copy a location."); + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_LOC_COPY_FAIL, + "Failed to copy a location."); goto out; } @@ -503,15 +486,14 @@ ec_getxattr (call_frame_t *frame, xlator_t *this, uintptr_t target, /* In case of list-node-uuids xattr, set flag to indicate * the same and use node-uuid xattr for winding fop */ if (XATTR_IS_NODE_UUID_LIST(name)) { - fop->int32 = 1; - fop->str[0] = gf_strdup(GF_XATTR_NODE_UUID_KEY); + fop->int32 = 1; + fop->str[0] = gf_strdup(GF_XATTR_NODE_UUID_KEY); } else { - fop->str[0] = gf_strdup(name); + fop->str[0] = gf_strdup(name); } if (fop->str[0] == NULL) { - gf_msg (this->name, GF_LOG_ERROR, ENOMEM, - EC_MSG_NO_MEMORY, - "Failed to duplicate a string."); + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_NO_MEMORY, + "Failed to duplicate a string."); goto out; } @@ -519,10 +501,9 @@ ec_getxattr (call_frame_t *frame, xlator_t *this, uintptr_t target, if (xdata != NULL) { fop->xdata = dict_ref(xdata); if (fop->xdata == NULL) { - gf_msg (this->name, GF_LOG_ERROR, 0, - EC_MSG_DICT_REF_FAIL, - "Failed to reference a " - "dictionary."); + gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL, + "Failed to reference a " + "dictionary."); goto out; } @@ -532,20 +513,20 @@ ec_getxattr (call_frame_t *frame, xlator_t *this, uintptr_t target, out: if (fop != NULL) { - ec_manager (fop, error); + ec_manager(fop, error); } else { - func (frame, NULL, this, -1, error, NULL, NULL); + func(frame, NULL, this, -1, error, NULL, NULL); } } /* FOP: fgetxattr */ -int32_t ec_fgetxattr_cbk(call_frame_t * frame, void * cookie, xlator_t * this, - int32_t op_ret, int32_t op_errno, dict_t * dict, - dict_t * xdata) +int32_t +ec_fgetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata) { - ec_fop_data_t * fop = NULL; - ec_cbk_data_t * cbk = NULL; + ec_fop_data_t *fop = NULL; + ec_cbk_data_t *cbk = NULL; int32_t idx = (int32_t)(uintptr_t)cookie; VALIDATE_OR_GOTO(this, out); @@ -555,38 +536,30 @@ int32_t ec_fgetxattr_cbk(call_frame_t * frame, void * cookie, xlator_t * this, fop = frame->local; - ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx, - frame, op_ret, op_errno); + ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx, frame, + op_ret, op_errno); cbk = ec_cbk_data_allocate(frame, this, fop, GF_FOP_FGETXATTR, idx, op_ret, op_errno); - if (cbk != NULL) - { - if (op_ret >= 0) - { - if (dict != NULL) - { + if (cbk != NULL) { + if (op_ret >= 0) { + if (dict != NULL) { cbk->dict = dict_ref(dict); - if (cbk->dict == NULL) - { - gf_msg (this->name, GF_LOG_ERROR, 0, - EC_MSG_DICT_REF_FAIL, - "Failed to reference a " - "dictionary."); + if (cbk->dict == NULL) { + gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL, + "Failed to reference a " + "dictionary."); goto out; } } } - if (xdata != NULL) - { + if (xdata != NULL) { cbk->xdata = dict_ref(xdata); - if (cbk->xdata == NULL) - { - gf_msg (this->name, GF_LOG_ERROR, 0, - EC_MSG_DICT_REF_FAIL, - "Failed to reference a " - "dictionary."); + if (cbk->xdata == NULL) { + gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL, + "Failed to reference a " + "dictionary."); goto out; } @@ -596,8 +569,7 @@ int32_t ec_fgetxattr_cbk(call_frame_t * frame, void * cookie, xlator_t * this, } out: - if (fop != NULL) - { + if (fop != NULL) { ec_complete(fop); } @@ -605,7 +577,7 @@ out: } void -ec_wind_fgetxattr (ec_t *ec, ec_fop_data_t *fop, int32_t idx) +ec_wind_fgetxattr(ec_t *ec, ec_fop_data_t *fop, int32_t idx) { ec_trace("WIND", fop, "idx=%d", idx); @@ -615,24 +587,23 @@ ec_wind_fgetxattr (ec_t *ec, ec_fop_data_t *fop, int32_t idx) } void -ec_fgetxattr (call_frame_t *frame, xlator_t *this, uintptr_t target, - int32_t minimum, fop_fgetxattr_cbk_t func, void *data, - fd_t *fd, const char *name, dict_t *xdata) +ec_fgetxattr(call_frame_t *frame, xlator_t *this, uintptr_t target, + int32_t minimum, fop_fgetxattr_cbk_t func, void *data, fd_t *fd, + const char *name, dict_t *xdata) { - ec_cbk_t callback = { .fgetxattr = func }; - ec_fop_data_t * fop = NULL; + ec_cbk_t callback = {.fgetxattr = func}; + ec_fop_data_t *fop = NULL; int32_t error = ENOMEM; - gf_msg_trace ("ec", 0, "EC(FGETXATTR) %p", frame); + gf_msg_trace("ec", 0, "EC(FGETXATTR) %p", frame); VALIDATE_OR_GOTO(this, out); GF_VALIDATE_OR_GOTO(this->name, frame, out); GF_VALIDATE_OR_GOTO(this->name, this->private, out); - fop = ec_fop_data_allocate(frame, this, GF_FOP_FGETXATTR, - EC_FLAG_LOCK_SHARED, target, minimum, - ec_wind_fgetxattr, ec_manager_getxattr, - callback, data); + fop = ec_fop_data_allocate( + frame, this, GF_FOP_FGETXATTR, EC_FLAG_LOCK_SHARED, target, minimum, + ec_wind_fgetxattr, ec_manager_getxattr, callback, data); if (fop == NULL) { goto out; } @@ -642,10 +613,9 @@ ec_fgetxattr (call_frame_t *frame, xlator_t *this, uintptr_t target, if (fd != NULL) { fop->fd = fd_ref(fd); if (fop->fd == NULL) { - gf_msg (this->name, GF_LOG_ERROR, 0, - EC_MSG_FILE_DESC_REF_FAIL, - "Failed to reference a " - "file descriptor."); + gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_FILE_DESC_REF_FAIL, + "Failed to reference a " + "file descriptor."); goto out; } @@ -653,8 +623,8 @@ ec_fgetxattr (call_frame_t *frame, xlator_t *this, uintptr_t target, if (name != NULL) { fop->str[0] = gf_strdup(name); if (fop->str[0] == NULL) { - gf_msg (this->name, GF_LOG_ERROR, ENOMEM, - EC_MSG_NO_MEMORY, "Failed to duplicate a string."); + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_NO_MEMORY, + "Failed to duplicate a string."); goto out; } @@ -662,9 +632,9 @@ ec_fgetxattr (call_frame_t *frame, xlator_t *this, uintptr_t target, if (xdata != NULL) { fop->xdata = dict_ref(xdata); if (fop->xdata == NULL) { - gf_msg (this->name, GF_LOG_ERROR, 0, - EC_MSG_DICT_REF_FAIL, "Failed to reference a " - "dictionary."); + gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL, + "Failed to reference a " + "dictionary."); goto out; } @@ -674,22 +644,21 @@ ec_fgetxattr (call_frame_t *frame, xlator_t *this, uintptr_t target, out: if (fop != NULL) { - ec_manager (fop, error); + ec_manager(fop, error); } else { - func (frame, NULL, this, -1, error, NULL, NULL); + func(frame, NULL, this, -1, error, NULL, NULL); } } /* FOP: open */ -int32_t ec_combine_open(ec_fop_data_t * fop, ec_cbk_data_t * dst, - ec_cbk_data_t * src) +int32_t +ec_combine_open(ec_fop_data_t *fop, ec_cbk_data_t *dst, ec_cbk_data_t *src) { - if (dst->fd != src->fd) - { - gf_msg (fop->xl->name, GF_LOG_NOTICE, 0, - EC_MSG_FD_MISMATCH, "Mismatching fd in answers " - "of 'GF_FOP_OPEN': %p <-> %p", + if (dst->fd != src->fd) { + gf_msg(fop->xl->name, GF_LOG_NOTICE, 0, EC_MSG_FD_MISMATCH, + "Mismatching fd in answers " + "of 'GF_FOP_OPEN': %p <-> %p", dst->fd, src->fd); return 0; @@ -698,12 +667,12 @@ int32_t ec_combine_open(ec_fop_data_t * fop, ec_cbk_data_t * dst, return 1; } -int32_t ec_open_cbk(call_frame_t * frame, void * cookie, xlator_t * this, - int32_t op_ret, int32_t op_errno, fd_t * fd, - dict_t * xdata) +int32_t +ec_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + int32_t op_errno, fd_t *fd, dict_t *xdata) { - ec_fop_data_t * fop = NULL; - ec_cbk_data_t * cbk = NULL; + ec_fop_data_t *fop = NULL; + ec_cbk_data_t *cbk = NULL; int32_t idx = (int32_t)(uintptr_t)cookie; VALIDATE_OR_GOTO(this, out); @@ -713,36 +682,31 @@ int32_t ec_open_cbk(call_frame_t * frame, void * cookie, xlator_t * this, fop = frame->local; - ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx, - frame, op_ret, op_errno); + ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx, frame, + op_ret, op_errno); cbk = ec_cbk_data_allocate(frame, this, fop, GF_FOP_OPEN, idx, op_ret, op_errno); - if (cbk != NULL) - { - if (op_ret >= 0) - { - if (fd != NULL) - { + if (cbk != NULL) { + if (op_ret >= 0) { + if (fd != NULL) { cbk->fd = fd_ref(fd); - if (cbk->fd == NULL) - { - gf_msg (this->name, GF_LOG_ERROR, 0, - EC_MSG_FILE_DESC_REF_FAIL, "Failed to reference a " - "file descriptor."); + if (cbk->fd == NULL) { + gf_msg(this->name, GF_LOG_ERROR, 0, + EC_MSG_FILE_DESC_REF_FAIL, + "Failed to reference a " + "file descriptor."); goto out; } } } - if (xdata != NULL) - { + if (xdata != NULL) { cbk->xdata = dict_ref(xdata); - if (cbk->xdata == NULL) - { - gf_msg (this->name, GF_LOG_ERROR, 0, - EC_MSG_DICT_REF_FAIL, "Failed to reference a " - "dictionary."); + if (cbk->xdata == NULL) { + gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL, + "Failed to reference a " + "dictionary."); goto out; } @@ -750,20 +714,19 @@ int32_t ec_open_cbk(call_frame_t * frame, void * cookie, xlator_t * this, ec_combine(cbk, ec_combine_open); - ec_update_fd_status (fd, this, idx, op_ret); - + ec_update_fd_status(fd, this, idx, op_ret); } out: - if (fop != NULL) - { + if (fop != NULL) { ec_complete(fop); } return 0; } -void ec_wind_open(ec_t * ec, ec_fop_data_t * fop, int32_t idx) +void +ec_wind_open(ec_t *ec, ec_fop_data_t *fop, int32_t idx) { ec_trace("WIND", fop, "idx=%d", idx); @@ -772,10 +735,10 @@ void ec_wind_open(ec_t * ec, ec_fop_data_t * fop, int32_t idx) &fop->loc[0], fop->int32, fop->fd, fop->xdata); } -int32_t ec_open_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, - struct iatt *prebuf, struct iatt *postbuf, - dict_t *xdata) +int32_t +ec_open_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) { ec_fop_data_t *fop = cookie; int32_t error = 0; @@ -792,14 +755,14 @@ int32_t ec_open_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, return 0; } -int32_t ec_manager_open(ec_fop_data_t * fop, int32_t state) +int32_t +ec_manager_open(ec_fop_data_t *fop, int32_t state) { - ec_cbk_data_t * cbk; - ec_fd_t * ctx; + ec_cbk_data_t *cbk; + ec_fd_t *ctx; int32_t err; - switch (state) - { + switch (state) { case EC_STATE_INIT: LOCK(&fop->fd->lock); @@ -833,7 +796,7 @@ int32_t ec_manager_open(ec_fop_data_t * fop, int32_t state) fop->uint32 = fop->int32 & O_TRUNC; fop->int32 &= ~(O_APPEND | O_TRUNC); - /* Fall through */ + /* Fall through */ case EC_STATE_DISPATCH: ec_dispatch_all(fop); @@ -864,8 +827,8 @@ int32_t ec_manager_open(ec_fop_data_t * fop, int32_t state) if (fop->uint32 != 0) { ec_sleep(fop); ec_ftruncate(fop->req_frame, fop->xl, cbk->mask, - fop->minimum, ec_open_truncate_cbk, - fop, cbk->fd, 0, NULL); + fop->minimum, ec_open_truncate_cbk, fop, + cbk->fd, 0, NULL); } } } @@ -877,8 +840,7 @@ int32_t ec_manager_open(ec_fop_data_t * fop, int32_t state) GF_ASSERT(cbk != NULL); - if (fop->cbks.open != NULL) - { + if (fop->cbks.open != NULL) { fop->cbks.open(fop->req_frame, fop, fop->xl, cbk->op_ret, cbk->op_errno, cbk->fd, cbk->xdata); } @@ -891,8 +853,7 @@ int32_t ec_manager_open(ec_fop_data_t * fop, int32_t state) case -EC_STATE_REPORT: GF_ASSERT(fop->error != 0); - if (fop->cbks.open != NULL) - { + if (fop->cbks.open != NULL) { fop->cbks.open(fop->req_frame, fop, fop->xl, -1, fop->error, NULL, NULL); } @@ -900,23 +861,23 @@ int32_t ec_manager_open(ec_fop_data_t * fop, int32_t state) return EC_STATE_END; default: - gf_msg (fop->xl->name, GF_LOG_ERROR, EINVAL, - EC_MSG_UNHANDLED_STATE, "Unhandled state %d for %s", - state, ec_fop_name(fop->id)); + gf_msg(fop->xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_UNHANDLED_STATE, + "Unhandled state %d for %s", state, ec_fop_name(fop->id)); return EC_STATE_END; } } -void ec_open(call_frame_t * frame, xlator_t * this, uintptr_t target, - int32_t minimum, fop_open_cbk_t func, void * data, loc_t * loc, - int32_t flags, fd_t * fd, dict_t * xdata) +void +ec_open(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum, + fop_open_cbk_t func, void *data, loc_t *loc, int32_t flags, fd_t *fd, + dict_t *xdata) { - ec_cbk_t callback = { .open = func }; - ec_fop_data_t * fop = NULL; + ec_cbk_t callback = {.open = func}; + ec_fop_data_t *fop = NULL; int32_t error = ENOMEM; - gf_msg_trace ("ec", 0, "EC(OPEN) %p", frame); + gf_msg_trace("ec", 0, "EC(OPEN) %p", frame); VALIDATE_OR_GOTO(this, out); GF_VALIDATE_OR_GOTO(this->name, frame, out); @@ -933,8 +894,8 @@ void ec_open(call_frame_t * frame, xlator_t * this, uintptr_t target, if (loc != NULL) { if (loc_copy(&fop->loc[0], loc) != 0) { - gf_msg (this->name, GF_LOG_ERROR, ENOMEM, - EC_MSG_LOC_COPY_FAIL, "Failed to copy a location."); + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_LOC_COPY_FAIL, + "Failed to copy a location."); goto out; } @@ -942,9 +903,9 @@ void ec_open(call_frame_t * frame, xlator_t * this, uintptr_t target, if (fd != NULL) { fop->fd = fd_ref(fd); if (fop->fd == NULL) { - gf_msg (this->name, GF_LOG_ERROR, 0, - EC_MSG_FILE_DESC_REF_FAIL, "Failed to reference a " - "file descriptor."); + gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_FILE_DESC_REF_FAIL, + "Failed to reference a " + "file descriptor."); goto out; } @@ -952,9 +913,9 @@ void ec_open(call_frame_t * frame, xlator_t * this, uintptr_t target, if (xdata != NULL) { fop->xdata = dict_ref(xdata); if (fop->xdata == NULL) { - gf_msg (this->name, GF_LOG_ERROR, 0, - EC_MSG_DICT_REF_FAIL, "Failed to reference a " - "dictionary."); + gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL, + "Failed to reference a " + "dictionary."); goto out; } @@ -972,13 +933,13 @@ out: /* FOP: readlink */ -int32_t ec_combine_readlink(ec_fop_data_t * fop, ec_cbk_data_t * dst, - ec_cbk_data_t * src) +int32_t +ec_combine_readlink(ec_fop_data_t *fop, ec_cbk_data_t *dst, ec_cbk_data_t *src) { if (!ec_iatt_combine(fop, dst->iatt, src->iatt, 1)) { - gf_msg (fop->xl->name, GF_LOG_NOTICE, 0, - EC_MSG_IATT_MISMATCH, "Mismatching iatt in " - "answers of 'GF_FOP_READLINK'"); + gf_msg(fop->xl->name, GF_LOG_NOTICE, 0, EC_MSG_IATT_MISMATCH, + "Mismatching iatt in " + "answers of 'GF_FOP_READLINK'"); return 0; } @@ -987,13 +948,13 @@ int32_t ec_combine_readlink(ec_fop_data_t * fop, ec_cbk_data_t * dst, } int32_t -ec_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, const char *path, - struct iatt *buf, dict_t *xdata) +ec_readlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, const char *path, + struct iatt *buf, dict_t *xdata) { - ec_fop_data_t *fop = NULL; - ec_cbk_data_t *cbk = NULL; - int32_t idx = (int32_t)(uintptr_t)cookie; + ec_fop_data_t *fop = NULL; + ec_cbk_data_t *cbk = NULL; + int32_t idx = (int32_t)(uintptr_t)cookie; VALIDATE_OR_GOTO(this, out); GF_VALIDATE_OR_GOTO(this->name, frame, out); @@ -1002,23 +963,23 @@ ec_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this, fop = frame->local; - ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx, - frame, op_ret, op_errno); + ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx, frame, + op_ret, op_errno); - cbk = ec_cbk_data_allocate (frame, this, fop, fop->id, - idx, op_ret, op_errno); + cbk = ec_cbk_data_allocate(frame, this, fop, fop->id, idx, op_ret, + op_errno); if (cbk) { - if (xdata) - cbk->xdata = dict_ref (xdata); - - if (cbk->op_ret >= 0) { - cbk->iatt[0] = *buf; - cbk->str = gf_strdup (path); - if (!cbk->str) { - ec_cbk_set_error(cbk, ENOMEM, _gf_true); - } + if (xdata) + cbk->xdata = dict_ref(xdata); + + if (cbk->op_ret >= 0) { + cbk->iatt[0] = *buf; + cbk->str = gf_strdup(path); + if (!cbk->str) { + ec_cbk_set_error(cbk, ENOMEM, _gf_true); } - ec_combine (cbk, NULL); + } + ec_combine(cbk, NULL); } out: @@ -1028,7 +989,8 @@ out: return 0; } -void ec_wind_readlink(ec_t * ec, ec_fop_data_t * fop, int32_t idx) +void +ec_wind_readlink(ec_t *ec, ec_fop_data_t *fop, int32_t idx) { ec_trace("WIND", fop, "idx=%d", idx); @@ -1037,21 +999,21 @@ void ec_wind_readlink(ec_t * ec, ec_fop_data_t * fop, int32_t idx) &fop->loc[0], fop->size, fop->xdata); } -int32_t ec_manager_readlink(ec_fop_data_t * fop, int32_t state) +int32_t +ec_manager_readlink(ec_fop_data_t *fop, int32_t state) { ec_cbk_data_t *cbk = NULL; - switch (state) - { + switch (state) { case EC_STATE_INIT: case EC_STATE_LOCK: - ec_lock_prepare_inode (fop, &fop->loc[0], EC_QUERY_INFO, 0, - LLONG_MAX); - ec_lock (fop); + ec_lock_prepare_inode(fop, &fop->loc[0], EC_QUERY_INFO, 0, + LLONG_MAX); + ec_lock(fop); return EC_STATE_DISPATCH; case EC_STATE_DISPATCH: - ec_dispatch_one (fop); + ec_dispatch_one(fop); return EC_STATE_PREPARE_ANSWER; @@ -1068,11 +1030,11 @@ int32_t ec_manager_readlink(ec_fop_data_t * fop, int32_t state) case EC_STATE_REPORT: cbk = fop->answer; - GF_ASSERT (cbk); + GF_ASSERT(cbk); if (fop->cbks.readlink != NULL) { - fop->cbks.readlink (fop->req_frame, fop, fop->xl, cbk->op_ret, - cbk->op_errno, cbk->str, &cbk->iatt[0], - cbk->xdata); + fop->cbks.readlink(fop->req_frame, fop, fop->xl, cbk->op_ret, + cbk->op_errno, cbk->str, &cbk->iatt[0], + cbk->xdata); } return EC_STATE_LOCK_REUSE; @@ -1083,8 +1045,8 @@ int32_t ec_manager_readlink(ec_fop_data_t * fop, int32_t state) case -EC_STATE_PREPARE_ANSWER: case -EC_STATE_REPORT: if (fop->cbks.readlink != NULL) { - fop->cbks.readlink(fop->req_frame, fop, fop->xl, -1, - fop->error, NULL, NULL, NULL); + fop->cbks.readlink(fop->req_frame, fop, fop->xl, -1, fop->error, + NULL, NULL, NULL); } return EC_STATE_LOCK_REUSE; @@ -1100,32 +1062,31 @@ int32_t ec_manager_readlink(ec_fop_data_t * fop, int32_t state) return EC_STATE_END; default: - gf_msg (fop->xl->name, GF_LOG_ERROR, EINVAL, - EC_MSG_UNHANDLED_STATE, "Unhandled state %d for %s", - state, ec_fop_name(fop->id)); + gf_msg(fop->xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_UNHANDLED_STATE, + "Unhandled state %d for %s", state, ec_fop_name(fop->id)); return EC_STATE_END; } } -void ec_readlink(call_frame_t * frame, xlator_t * this, uintptr_t target, - int32_t minimum, fop_readlink_cbk_t func, void * data, - loc_t * loc, size_t size, dict_t * xdata) +void +ec_readlink(call_frame_t *frame, xlator_t *this, uintptr_t target, + int32_t minimum, fop_readlink_cbk_t func, void *data, loc_t *loc, + size_t size, dict_t *xdata) { - ec_cbk_t callback = { .readlink = func }; - ec_fop_data_t * fop = NULL; + ec_cbk_t callback = {.readlink = func}; + ec_fop_data_t *fop = NULL; int32_t error = ENOMEM; - gf_msg_trace ("ec", 0, "EC(READLINK) %p", frame); + gf_msg_trace("ec", 0, "EC(READLINK) %p", frame); VALIDATE_OR_GOTO(this, out); GF_VALIDATE_OR_GOTO(this->name, frame, out); GF_VALIDATE_OR_GOTO(this->name, this->private, out); - fop = ec_fop_data_allocate(frame, this, GF_FOP_READLINK, - EC_FLAG_LOCK_SHARED, target, minimum, - ec_wind_readlink, ec_manager_readlink, callback, - data); + fop = ec_fop_data_allocate( + frame, this, GF_FOP_READLINK, EC_FLAG_LOCK_SHARED, target, minimum, + ec_wind_readlink, ec_manager_readlink, callback, data); if (fop == NULL) { goto out; } @@ -1134,8 +1095,8 @@ void ec_readlink(call_frame_t * frame, xlator_t * this, uintptr_t target, if (loc != NULL) { if (loc_copy(&fop->loc[0], loc) != 0) { - gf_msg (this->name, GF_LOG_ERROR, ENOMEM, - EC_MSG_LOC_COPY_FAIL, "Failed to copy a location."); + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_LOC_COPY_FAIL, + "Failed to copy a location."); goto out; } @@ -1143,9 +1104,9 @@ void ec_readlink(call_frame_t * frame, xlator_t * this, uintptr_t target, if (xdata != NULL) { fop->xdata = dict_ref(xdata); if (fop->xdata == NULL) { - gf_msg (this->name, GF_LOG_ERROR, 0, - EC_MSG_DICT_REF_FAIL, "Failed to reference a " - "dictionary."); + gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL, + "Failed to reference a " + "dictionary."); goto out; } @@ -1163,7 +1124,8 @@ out: /* FOP: readv */ -int32_t ec_readv_rebuild(ec_t * ec, ec_fop_data_t * fop, ec_cbk_data_t * cbk) +int32_t +ec_readv_rebuild(ec_t *ec, ec_fop_data_t *fop, ec_cbk_data_t *cbk) { struct iovec vector[1]; ec_cbk_data_t *ans = NULL; @@ -1256,22 +1218,21 @@ out: return err; } -int32_t ec_combine_readv(ec_fop_data_t * fop, ec_cbk_data_t * dst, - ec_cbk_data_t * src) +int32_t +ec_combine_readv(ec_fop_data_t *fop, ec_cbk_data_t *dst, ec_cbk_data_t *src) { - if (!ec_vector_compare(dst->vector, dst->int32, src->vector, src->int32)) - { - gf_msg (fop->xl->name, GF_LOG_NOTICE, 0, - EC_MSG_VECTOR_MISMATCH, "Mismatching vector in " - "answers of 'GF_FOP_READ'"); + if (!ec_vector_compare(dst->vector, dst->int32, src->vector, src->int32)) { + gf_msg(fop->xl->name, GF_LOG_NOTICE, 0, EC_MSG_VECTOR_MISMATCH, + "Mismatching vector in " + "answers of 'GF_FOP_READ'"); return 0; } if (!ec_iatt_combine(fop, dst->iatt, src->iatt, 1)) { - gf_msg (fop->xl->name, GF_LOG_NOTICE, 0, - EC_MSG_IATT_MISMATCH, "Mismatching iatt in " - "answers of 'GF_FOP_READ'"); + gf_msg(fop->xl->name, GF_LOG_NOTICE, 0, EC_MSG_IATT_MISMATCH, + "Mismatching iatt in " + "answers of 'GF_FOP_READ'"); return 0; } @@ -1279,14 +1240,14 @@ int32_t ec_combine_readv(ec_fop_data_t * fop, ec_cbk_data_t * dst, return 1; } -int32_t ec_readv_cbk(call_frame_t * frame, void * cookie, xlator_t * this, - int32_t op_ret, int32_t op_errno, struct iovec * vector, - int32_t count, struct iatt * stbuf, - struct iobref * iobref, dict_t * xdata) +int32_t +ec_readv_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + int32_t op_errno, struct iovec *vector, int32_t count, + struct iatt *stbuf, struct iobref *iobref, dict_t *xdata) { - ec_fop_data_t * fop = NULL; - ec_cbk_data_t * cbk = NULL; - ec_t * ec = this->private; + ec_fop_data_t *fop = NULL; + ec_cbk_data_t *cbk = NULL; + ec_t *ec = this->private; int32_t idx = (int32_t)(uintptr_t)cookie; VALIDATE_OR_GOTO(this, out); @@ -1296,8 +1257,8 @@ int32_t ec_readv_cbk(call_frame_t * frame, void * cookie, xlator_t * this, fop = frame->local; - ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx, - frame, op_ret, op_errno); + ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx, frame, + op_ret, op_errno); cbk = ec_cbk_data_allocate(frame, this, fop, GF_FOP_READ, idx, op_ret, op_errno); @@ -1308,9 +1269,9 @@ int32_t ec_readv_cbk(call_frame_t * frame, void * cookie, xlator_t * this, if (count > 0) { cbk->vector = iov_dup(vector, count); if (cbk->vector == NULL) { - gf_msg (this->name, GF_LOG_ERROR, ENOMEM, - EC_MSG_NO_MEMORY, "Failed to duplicate a " - "vector list."); + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_NO_MEMORY, + "Failed to duplicate a " + "vector list."); goto out; } @@ -1322,9 +1283,9 @@ int32_t ec_readv_cbk(call_frame_t * frame, void * cookie, xlator_t * this, if (iobref != NULL) { cbk->buffers = iobref_ref(iobref); if (cbk->buffers == NULL) { - gf_msg (this->name, GF_LOG_ERROR, 0, - EC_MSG_BUF_REF_FAIL, "Failed to reference a " - "buffer."); + gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_BUF_REF_FAIL, + "Failed to reference a " + "buffer."); goto out; } @@ -1333,9 +1294,9 @@ int32_t ec_readv_cbk(call_frame_t * frame, void * cookie, xlator_t * this, if (xdata != NULL) { cbk->xdata = dict_ref(xdata); if (cbk->xdata == NULL) { - gf_msg (this->name, GF_LOG_ERROR, 0, - EC_MSG_DICT_REF_FAIL, "Failed to reference a " - "dictionary."); + gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL, + "Failed to reference a " + "dictionary."); goto out; } @@ -1356,7 +1317,8 @@ out: return 0; } -void ec_wind_readv(ec_t * ec, ec_fop_data_t * fop, int32_t idx) +void +ec_wind_readv(ec_t *ec, ec_fop_data_t *fop, int32_t idx) { ec_trace("WIND", fop, "idx=%d", idx); @@ -1365,12 +1327,12 @@ void ec_wind_readv(ec_t * ec, ec_fop_data_t * fop, int32_t idx) fop->size, fop->offset, fop->uint32, fop->xdata); } -int32_t ec_manager_readv(ec_fop_data_t * fop, int32_t state) +int32_t +ec_manager_readv(ec_fop_data_t *fop, int32_t state) { - ec_cbk_data_t * cbk; + ec_cbk_data_t *cbk; - switch (state) - { + switch (state) { case EC_STATE_INIT: fop->user_size = fop->size; fop->head = ec_adjust_offset_down(fop->xl->private, &fop->offset, @@ -1378,7 +1340,7 @@ int32_t ec_manager_readv(ec_fop_data_t * fop, int32_t state) fop->size += fop->head; ec_adjust_size_up(fop->xl->private, &fop->size, _gf_true); - /* Fall through */ + /* Fall through */ case EC_STATE_LOCK: ec_lock_prepare_fd(fop, fop->fd, EC_QUERY_INFO, fop->offset, @@ -1397,8 +1359,7 @@ int32_t ec_manager_readv(ec_fop_data_t * fop, int32_t state) if (cbk != NULL) { int32_t err; - ec_iatt_rebuild(fop->xl->private, cbk->iatt, 1, - cbk->count); + ec_iatt_rebuild(fop->xl->private, cbk->iatt, 1, cbk->count); err = ec_readv_rebuild(fop->xl->private, fop, cbk); if (err != 0) { @@ -1413,8 +1374,7 @@ int32_t ec_manager_readv(ec_fop_data_t * fop, int32_t state) GF_ASSERT(cbk != NULL); - if (fop->cbks.readv != NULL) - { + if (fop->cbks.readv != NULL) { fop->cbks.readv(fop->req_frame, fop, fop->xl, cbk->op_ret, cbk->op_errno, cbk->vector, cbk->int32, &cbk->iatt[0], cbk->buffers, cbk->xdata); @@ -1429,8 +1389,7 @@ int32_t ec_manager_readv(ec_fop_data_t * fop, int32_t state) case -EC_STATE_REPORT: GF_ASSERT(fop->error != 0); - if (fop->cbks.readv != NULL) - { + if (fop->cbks.readv != NULL) { fop->cbks.readv(fop->req_frame, fop, fop->xl, -1, fop->error, NULL, 0, NULL, NULL, NULL); } @@ -1450,31 +1409,31 @@ int32_t ec_manager_readv(ec_fop_data_t * fop, int32_t state) return EC_STATE_END; default: - gf_msg (fop->xl->name, GF_LOG_ERROR, EINVAL, - EC_MSG_UNHANDLED_STATE, "Unhandled state %d for %s", - state, ec_fop_name(fop->id)); + gf_msg(fop->xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_UNHANDLED_STATE, + "Unhandled state %d for %s", state, ec_fop_name(fop->id)); return EC_STATE_END; } } -void ec_readv(call_frame_t * frame, xlator_t * this, uintptr_t target, - int32_t minimum, fop_readv_cbk_t func, void * data, fd_t * fd, - size_t size, off_t offset, uint32_t flags, dict_t * xdata) +void +ec_readv(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum, + fop_readv_cbk_t func, void *data, fd_t *fd, size_t size, off_t offset, + uint32_t flags, dict_t *xdata) { - ec_cbk_t callback = { .readv = func }; - ec_fop_data_t * fop = NULL; + ec_cbk_t callback = {.readv = func}; + ec_fop_data_t *fop = NULL; int32_t error = ENOMEM; - gf_msg_trace ("ec", 0, "EC(READ) %p", frame); + gf_msg_trace("ec", 0, "EC(READ) %p", frame); VALIDATE_OR_GOTO(this, out); GF_VALIDATE_OR_GOTO(this->name, frame, out); GF_VALIDATE_OR_GOTO(this->name, this->private, out); fop = ec_fop_data_allocate(frame, this, GF_FOP_READ, EC_FLAG_LOCK_SHARED, - target, minimum, ec_wind_readv, - ec_manager_readv, callback, data); + target, minimum, ec_wind_readv, ec_manager_readv, + callback, data); if (fop == NULL) { goto out; } @@ -1488,9 +1447,9 @@ void ec_readv(call_frame_t * frame, xlator_t * this, uintptr_t target, if (fd != NULL) { fop->fd = fd_ref(fd); if (fop->fd == NULL) { - gf_msg (this->name, GF_LOG_ERROR, 0, - EC_MSG_FILE_DESC_REF_FAIL, "Failed to reference a " - "file descriptor."); + gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_FILE_DESC_REF_FAIL, + "Failed to reference a " + "file descriptor."); goto out; } @@ -1498,9 +1457,9 @@ void ec_readv(call_frame_t * frame, xlator_t * this, uintptr_t target, if (xdata != NULL) { fop->xdata = dict_ref(xdata); if (fop->xdata == NULL) { - gf_msg (this->name, GF_LOG_ERROR, 0, - EC_MSG_DICT_REF_FAIL, "Failed to reference a " - "dictionary."); + gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL, + "Failed to reference a " + "dictionary."); goto out; } @@ -1518,9 +1477,9 @@ out: /* FOP: seek */ -int32_t ec_seek_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, off_t offset, - dict_t *xdata) +int32_t +ec_seek_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + int32_t op_errno, off_t offset, dict_t *xdata) { ec_fop_data_t *fop = NULL; ec_cbk_data_t *cbk = NULL; @@ -1534,8 +1493,8 @@ int32_t ec_seek_cbk(call_frame_t *frame, void *cookie, xlator_t *this, fop = frame->local; - ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx, - frame, op_ret, op_errno); + ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx, frame, + op_ret, op_errno); cbk = ec_cbk_data_allocate(frame, this, fop, GF_FOP_SEEK, idx, op_ret, op_errno); @@ -1563,7 +1522,8 @@ out: return 0; } -void ec_wind_seek(ec_t *ec, ec_fop_data_t *fop, int32_t idx) +void +ec_wind_seek(ec_t *ec, ec_fop_data_t *fop, int32_t idx) { ec_trace("WIND", fop, "idx=%d", idx); @@ -1572,126 +1532,128 @@ void ec_wind_seek(ec_t *ec, ec_fop_data_t *fop, int32_t idx) fop->offset, fop->seek, fop->xdata); } -int32_t ec_manager_seek(ec_fop_data_t *fop, int32_t state) +int32_t +ec_manager_seek(ec_fop_data_t *fop, int32_t state) { ec_cbk_data_t *cbk; size_t size; switch (state) { - case EC_STATE_INIT: - fop->user_size = fop->offset; - fop->head = ec_adjust_offset_down(fop->xl->private, &fop->offset, - _gf_true); + case EC_STATE_INIT: + fop->user_size = fop->offset; + fop->head = ec_adjust_offset_down(fop->xl->private, &fop->offset, + _gf_true); - /* Fall through */ + /* Fall through */ - case EC_STATE_LOCK: - ec_lock_prepare_fd(fop, fop->fd, EC_QUERY_INFO, fop->offset, LLONG_MAX); - ec_lock(fop); + case EC_STATE_LOCK: + ec_lock_prepare_fd(fop, fop->fd, EC_QUERY_INFO, fop->offset, + LLONG_MAX); + ec_lock(fop); - return EC_STATE_DISPATCH; + return EC_STATE_DISPATCH; - case EC_STATE_DISPATCH: - /* This shouldn't fail because we have the inode locked. */ - GF_ASSERT(ec_get_inode_size(fop, fop->locks[0].lock->loc.inode, - &size)); + case EC_STATE_DISPATCH: + /* This shouldn't fail because we have the inode locked. */ + GF_ASSERT( + ec_get_inode_size(fop, fop->locks[0].lock->loc.inode, &size)); - if (fop->user_size >= size) { - ec_fop_set_error(fop, ENXIO); + if (fop->user_size >= size) { + ec_fop_set_error(fop, ENXIO); - return EC_STATE_REPORT; - } + return EC_STATE_REPORT; + } - ec_dispatch_one(fop); + ec_dispatch_one(fop); - return EC_STATE_PREPARE_ANSWER; + return EC_STATE_PREPARE_ANSWER; - case EC_STATE_PREPARE_ANSWER: - if (ec_dispatch_one_retry(fop, &cbk)) { - return EC_STATE_DISPATCH; - } - if ((cbk != NULL) && (cbk->op_ret >= 0)) { - ec_t *ec = fop->xl->private; + case EC_STATE_PREPARE_ANSWER: + if (ec_dispatch_one_retry(fop, &cbk)) { + return EC_STATE_DISPATCH; + } + if ((cbk != NULL) && (cbk->op_ret >= 0)) { + ec_t *ec = fop->xl->private; - /* This shouldn't fail because we have the inode locked. */ - GF_ASSERT(ec_get_inode_size(fop, fop->locks[0].lock->loc.inode, - &size)); + /* This shouldn't fail because we have the inode locked. */ + GF_ASSERT(ec_get_inode_size(fop, fop->locks[0].lock->loc.inode, + &size)); - cbk->offset *= ec->fragments; - if (cbk->offset < fop->user_size) { - cbk->offset = fop->user_size; - } - if (cbk->offset > size) { - cbk->offset = size; + cbk->offset *= ec->fragments; + if (cbk->offset < fop->user_size) { + cbk->offset = fop->user_size; + } + if (cbk->offset > size) { + cbk->offset = size; + } } - } - return EC_STATE_REPORT; + return EC_STATE_REPORT; - case EC_STATE_REPORT: - cbk = fop->answer; + case EC_STATE_REPORT: + cbk = fop->answer; - GF_ASSERT(cbk != NULL); + GF_ASSERT(cbk != NULL); - if (fop->cbks.seek != NULL) { - fop->cbks.seek(fop->req_frame, fop, fop->xl, cbk->op_ret, - cbk->op_errno, cbk->offset, cbk->xdata); - } + if (fop->cbks.seek != NULL) { + fop->cbks.seek(fop->req_frame, fop, fop->xl, cbk->op_ret, + cbk->op_errno, cbk->offset, cbk->xdata); + } - return EC_STATE_LOCK_REUSE; + return EC_STATE_LOCK_REUSE; - case -EC_STATE_INIT: - case -EC_STATE_LOCK: - case -EC_STATE_DISPATCH: - case -EC_STATE_PREPARE_ANSWER: - case -EC_STATE_REPORT: - GF_ASSERT(fop->error != 0); + case -EC_STATE_INIT: + case -EC_STATE_LOCK: + case -EC_STATE_DISPATCH: + case -EC_STATE_PREPARE_ANSWER: + case -EC_STATE_REPORT: + GF_ASSERT(fop->error != 0); - if (fop->cbks.seek != NULL) { - fop->cbks.seek(fop->req_frame, fop, fop->xl, -1, fop->error, 0, - NULL); - } + if (fop->cbks.seek != NULL) { + fop->cbks.seek(fop->req_frame, fop, fop->xl, -1, fop->error, 0, + NULL); + } - return EC_STATE_LOCK_REUSE; + return EC_STATE_LOCK_REUSE; - case -EC_STATE_LOCK_REUSE: - case EC_STATE_LOCK_REUSE: - ec_lock_reuse(fop); + case -EC_STATE_LOCK_REUSE: + case EC_STATE_LOCK_REUSE: + ec_lock_reuse(fop); - return EC_STATE_UNLOCK; + return EC_STATE_UNLOCK; - case -EC_STATE_UNLOCK: - case EC_STATE_UNLOCK: - ec_unlock(fop); + case -EC_STATE_UNLOCK: + case EC_STATE_UNLOCK: + ec_unlock(fop); - return EC_STATE_END; + return EC_STATE_END; - default: - gf_msg (fop->xl->name, GF_LOG_ERROR, 0, - EC_MSG_UNHANDLED_STATE, "Unhandled state %d for %s", state, - ec_fop_name(fop->id)); + default: + gf_msg(fop->xl->name, GF_LOG_ERROR, 0, EC_MSG_UNHANDLED_STATE, + "Unhandled state %d for %s", state, ec_fop_name(fop->id)); - return EC_STATE_END; + return EC_STATE_END; } } -void ec_seek(call_frame_t *frame, xlator_t *this, uintptr_t target, - int32_t minimum, fop_seek_cbk_t func, void *data, fd_t *fd, - off_t offset, gf_seek_what_t what, dict_t *xdata) +void +ec_seek(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum, + fop_seek_cbk_t func, void *data, fd_t *fd, off_t offset, + gf_seek_what_t what, dict_t *xdata) { - ec_cbk_t callback = { .seek = func }; + ec_cbk_t callback = {.seek = func}; ec_fop_data_t *fop = NULL; int32_t error = EIO; - gf_msg_trace ("ec", 0, "EC(SEEK) %p", frame); + gf_msg_trace("ec", 0, "EC(SEEK) %p", frame); VALIDATE_OR_GOTO(this, out); GF_VALIDATE_OR_GOTO(this->name, frame, out); GF_VALIDATE_OR_GOTO(this->name, this->private, out); fop = ec_fop_data_allocate(frame, this, GF_FOP_SEEK, EC_FLAG_LOCK_SHARED, - target, minimum, ec_wind_seek, - ec_manager_seek, callback, data); + target, minimum, ec_wind_seek, ec_manager_seek, + callback, data); if (fop == NULL) { goto out; } @@ -1720,13 +1682,13 @@ out: /* FOP: stat */ -int32_t ec_combine_stat(ec_fop_data_t * fop, ec_cbk_data_t * dst, - ec_cbk_data_t * src) +int32_t +ec_combine_stat(ec_fop_data_t *fop, ec_cbk_data_t *dst, ec_cbk_data_t *src) { if (!ec_iatt_combine(fop, dst->iatt, src->iatt, 1)) { - gf_msg (fop->xl->name, GF_LOG_NOTICE, 0, - EC_MSG_IATT_MISMATCH, "Mismatching iatt in " - "answers of 'GF_FOP_STAT'"); + gf_msg(fop->xl->name, GF_LOG_NOTICE, 0, EC_MSG_IATT_MISMATCH, + "Mismatching iatt in " + "answers of 'GF_FOP_STAT'"); return 0; } @@ -1734,12 +1696,12 @@ int32_t ec_combine_stat(ec_fop_data_t * fop, ec_cbk_data_t * dst, return 1; } -int32_t ec_stat_cbk(call_frame_t * frame, void * cookie, xlator_t * this, - int32_t op_ret, int32_t op_errno, struct iatt * buf, - dict_t * xdata) +int32_t +ec_stat_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + int32_t op_errno, struct iatt *buf, dict_t *xdata) { - ec_fop_data_t * fop = NULL; - ec_cbk_data_t * cbk = NULL; + ec_fop_data_t *fop = NULL; + ec_cbk_data_t *cbk = NULL; int32_t idx = (int32_t)(uintptr_t)cookie; VALIDATE_OR_GOTO(this, out); @@ -1749,28 +1711,23 @@ int32_t ec_stat_cbk(call_frame_t * frame, void * cookie, xlator_t * this, fop = frame->local; - ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx, - frame, op_ret, op_errno); + ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx, frame, + op_ret, op_errno); cbk = ec_cbk_data_allocate(frame, this, fop, GF_FOP_STAT, idx, op_ret, op_errno); - if (cbk != NULL) - { - if (op_ret >= 0) - { - if (buf != NULL) - { + if (cbk != NULL) { + if (op_ret >= 0) { + if (buf != NULL) { cbk->iatt[0] = *buf; } } - if (xdata != NULL) - { + if (xdata != NULL) { cbk->xdata = dict_ref(xdata); - if (cbk->xdata == NULL) - { - gf_msg (this->name, GF_LOG_ERROR, 0, - EC_MSG_DICT_REF_FAIL, "Failed to reference a " - "dictionary."); + if (cbk->xdata == NULL) { + gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL, + "Failed to reference a " + "dictionary."); goto out; } @@ -1780,15 +1737,15 @@ int32_t ec_stat_cbk(call_frame_t * frame, void * cookie, xlator_t * this, } out: - if (fop != NULL) - { + if (fop != NULL) { ec_complete(fop); } return 0; } -void ec_wind_stat(ec_t * ec, ec_fop_data_t * fop, int32_t idx) +void +ec_wind_stat(ec_t *ec, ec_fop_data_t *fop, int32_t idx) { ec_trace("WIND", fop, "idx=%d", idx); @@ -1797,12 +1754,12 @@ void ec_wind_stat(ec_t * ec, ec_fop_data_t * fop, int32_t idx) &fop->loc[0], fop->xdata); } -int32_t ec_manager_stat(ec_fop_data_t * fop, int32_t state) +int32_t +ec_manager_stat(ec_fop_data_t *fop, int32_t state) { - ec_cbk_data_t * cbk; + ec_cbk_data_t *cbk; - switch (state) - { + switch (state) { case EC_STATE_INIT: case EC_STATE_LOCK: if (fop->fd == NULL) { @@ -1825,8 +1782,7 @@ int32_t ec_manager_stat(ec_fop_data_t * fop, int32_t state) if (cbk != NULL) { if (cbk->iatt[0].ia_type == IA_IFREG) { - ec_iatt_rebuild(fop->xl->private, cbk->iatt, 1, - cbk->count); + ec_iatt_rebuild(fop->xl->private, cbk->iatt, 1, cbk->count); /* This shouldn't fail because we have the inode locked. */ GF_ASSERT(ec_get_inode_size(fop, @@ -1842,18 +1798,13 @@ int32_t ec_manager_stat(ec_fop_data_t * fop, int32_t state) GF_ASSERT(cbk != NULL); - if (fop->id == GF_FOP_STAT) - { - if (fop->cbks.stat != NULL) - { + if (fop->id == GF_FOP_STAT) { + if (fop->cbks.stat != NULL) { fop->cbks.stat(fop->req_frame, fop, fop->xl, cbk->op_ret, cbk->op_errno, &cbk->iatt[0], cbk->xdata); } - } - else - { - if (fop->cbks.fstat != NULL) - { + } else { + if (fop->cbks.fstat != NULL) { fop->cbks.fstat(fop->req_frame, fop, fop->xl, cbk->op_ret, cbk->op_errno, &cbk->iatt[0], cbk->xdata); } @@ -1868,18 +1819,13 @@ int32_t ec_manager_stat(ec_fop_data_t * fop, int32_t state) case -EC_STATE_REPORT: GF_ASSERT(fop->error != 0); - if (fop->id == GF_FOP_STAT) - { - if (fop->cbks.stat != NULL) - { - fop->cbks.stat(fop->req_frame, fop, fop->xl, -1, - fop->error, NULL, NULL); + if (fop->id == GF_FOP_STAT) { + if (fop->cbks.stat != NULL) { + fop->cbks.stat(fop->req_frame, fop, fop->xl, -1, fop->error, + NULL, NULL); } - } - else - { - if (fop->cbks.fstat != NULL) - { + } else { + if (fop->cbks.fstat != NULL) { fop->cbks.fstat(fop->req_frame, fop, fop->xl, -1, fop->error, NULL, NULL); } @@ -1900,23 +1846,22 @@ int32_t ec_manager_stat(ec_fop_data_t * fop, int32_t state) return EC_STATE_END; default: - gf_msg (fop->xl->name, GF_LOG_ERROR, EINVAL, - EC_MSG_UNHANDLED_STATE, "Unhandled state %d for %s", - state, ec_fop_name(fop->id)); + gf_msg(fop->xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_UNHANDLED_STATE, + "Unhandled state %d for %s", state, ec_fop_name(fop->id)); return EC_STATE_END; } } -void ec_stat(call_frame_t * frame, xlator_t * this, uintptr_t target, - int32_t minimum, fop_stat_cbk_t func, void * data, loc_t * loc, - dict_t * xdata) +void +ec_stat(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum, + fop_stat_cbk_t func, void *data, loc_t *loc, dict_t *xdata) { - ec_cbk_t callback = { .stat = func }; - ec_fop_data_t * fop = NULL; + ec_cbk_t callback = {.stat = func}; + ec_fop_data_t *fop = NULL; int32_t error = ENOMEM; - gf_msg_trace ("ec", 0, "EC(STAT) %p", frame); + gf_msg_trace("ec", 0, "EC(STAT) %p", frame); VALIDATE_OR_GOTO(this, out); GF_VALIDATE_OR_GOTO(this->name, frame, out); @@ -1931,8 +1876,8 @@ void ec_stat(call_frame_t * frame, xlator_t * this, uintptr_t target, if (loc != NULL) { if (loc_copy(&fop->loc[0], loc) != 0) { - gf_msg (this->name, GF_LOG_ERROR, ENOMEM, - EC_MSG_LOC_COPY_FAIL, "Failed to copy a location."); + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_LOC_COPY_FAIL, + "Failed to copy a location."); goto out; } @@ -1940,9 +1885,9 @@ void ec_stat(call_frame_t * frame, xlator_t * this, uintptr_t target, if (xdata != NULL) { fop->xdata = dict_ref(xdata); if (fop->xdata == NULL) { - gf_msg (this->name, GF_LOG_ERROR, 0, - EC_MSG_DICT_REF_FAIL, "Failed to reference a " - "dictionary."); + gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL, + "Failed to reference a " + "dictionary."); goto out; } @@ -1960,12 +1905,12 @@ out: /* FOP: fstat */ -int32_t ec_fstat_cbk(call_frame_t * frame, void * cookie, xlator_t * this, - int32_t op_ret, int32_t op_errno, struct iatt * buf, - dict_t * xdata) +int32_t +ec_fstat_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + int32_t op_errno, struct iatt *buf, dict_t *xdata) { - ec_fop_data_t * fop = NULL; - ec_cbk_data_t * cbk = NULL; + ec_fop_data_t *fop = NULL; + ec_cbk_data_t *cbk = NULL; int32_t idx = (int32_t)(uintptr_t)cookie; VALIDATE_OR_GOTO(this, out); @@ -1975,28 +1920,23 @@ int32_t ec_fstat_cbk(call_frame_t * frame, void * cookie, xlator_t * this, fop = frame->local; - ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx, - frame, op_ret, op_errno); + ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx, frame, + op_ret, op_errno); cbk = ec_cbk_data_allocate(frame, this, fop, GF_FOP_FSTAT, idx, op_ret, op_errno); - if (cbk != NULL) - { - if (op_ret >= 0) - { - if (buf != NULL) - { + if (cbk != NULL) { + if (op_ret >= 0) { + if (buf != NULL) { cbk->iatt[0] = *buf; } } - if (xdata != NULL) - { + if (xdata != NULL) { cbk->xdata = dict_ref(xdata); - if (cbk->xdata == NULL) - { - gf_msg (this->name, GF_LOG_ERROR, 0, - EC_MSG_DICT_REF_FAIL, "Failed to reference a " - "dictionary."); + if (cbk->xdata == NULL) { + gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL, + "Failed to reference a " + "dictionary."); goto out; } @@ -2006,15 +1946,15 @@ int32_t ec_fstat_cbk(call_frame_t * frame, void * cookie, xlator_t * this, } out: - if (fop != NULL) - { + if (fop != NULL) { ec_complete(fop); } return 0; } -void ec_wind_fstat(ec_t * ec, ec_fop_data_t * fop, int32_t idx) +void +ec_wind_fstat(ec_t *ec, ec_fop_data_t *fop, int32_t idx) { ec_trace("WIND", fop, "idx=%d", idx); @@ -2023,15 +1963,15 @@ void ec_wind_fstat(ec_t * ec, ec_fop_data_t * fop, int32_t idx) fop->xdata); } -void ec_fstat(call_frame_t * frame, xlator_t * this, uintptr_t target, - int32_t minimum, fop_fstat_cbk_t func, void * data, fd_t * fd, - dict_t * xdata) +void +ec_fstat(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum, + fop_fstat_cbk_t func, void *data, fd_t *fd, dict_t *xdata) { - ec_cbk_t callback = { .fstat = func }; - ec_fop_data_t * fop = NULL; + ec_cbk_t callback = {.fstat = func}; + ec_fop_data_t *fop = NULL; int32_t error = ENOMEM; - gf_msg_trace ("ec", 0, "EC(FSTAT) %p", frame); + gf_msg_trace("ec", 0, "EC(FSTAT) %p", frame); VALIDATE_OR_GOTO(this, out); GF_VALIDATE_OR_GOTO(this->name, frame, out); @@ -2049,9 +1989,9 @@ void ec_fstat(call_frame_t * frame, xlator_t * this, uintptr_t target, if (fd != NULL) { fop->fd = fd_ref(fd); if (fop->fd == NULL) { - gf_msg (this->name, GF_LOG_ERROR, 0, - EC_MSG_FILE_DESC_REF_FAIL, "Failed to reference a " - "file descriptor."); + gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_FILE_DESC_REF_FAIL, + "Failed to reference a " + "file descriptor."); goto out; } @@ -2059,9 +1999,9 @@ void ec_fstat(call_frame_t * frame, xlator_t * this, uintptr_t target, if (xdata != NULL) { fop->xdata = dict_ref(xdata); if (fop->xdata == NULL) { - gf_msg (this->name, GF_LOG_ERROR, 0, - EC_MSG_DICT_REF_FAIL, "Failed to reference a " - "dictionary."); + gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL, + "Failed to reference a " + "dictionary."); goto out; } diff --git a/xlators/cluster/ec/src/ec-inode-write.c b/xlators/cluster/ec/src/ec-inode-write.c index c9fbd3cf7df..ffdac632683 100644 --- a/xlators/cluster/ec/src/ec-inode-write.c +++ b/xlators/cluster/ec/src/ec-inode-write.c @@ -21,56 +21,55 @@ #include "ec-mem-types.h" int32_t -ec_update_writev_cbk (call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, int32_t op_errno, - struct iatt *prebuf, struct iatt *postbuf, - dict_t *xdata) +ec_update_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) { - ec_fop_data_t *fop = cookie; - ec_cbk_data_t *cbk = NULL; + ec_fop_data_t *fop = cookie; + ec_cbk_data_t *cbk = NULL; ec_fop_data_t *parent = fop->parent; - int i = 0; + int i = 0; ec_trace("UPDATE_WRITEV_CBK", cookie, "ret=%d, errno=%d, parent-fop=%s", - op_ret, op_errno, ec_fop_name (parent->id)); + op_ret, op_errno, ec_fop_name(parent->id)); if (op_ret < 0) { - ec_fop_set_error (parent, op_errno); - goto out; + ec_fop_set_error(parent, op_errno); + goto out; } - cbk = ec_cbk_data_allocate (parent->frame, this, parent, - parent->id, 0, op_ret, op_errno); + cbk = ec_cbk_data_allocate(parent->frame, this, parent, parent->id, 0, + op_ret, op_errno); if (!cbk) { - ec_fop_set_error (parent, ENOMEM); - goto out; + ec_fop_set_error(parent, ENOMEM); + goto out; } if (xdata) - cbk->xdata = dict_ref (xdata); + cbk->xdata = dict_ref(xdata); if (prebuf) - cbk->iatt[i++] = *prebuf; + cbk->iatt[i++] = *prebuf; if (postbuf) - cbk->iatt[i++] = *postbuf; + cbk->iatt[i++] = *postbuf; - LOCK (&parent->lock); + LOCK(&parent->lock); { - parent->good &= fop->good; + parent->good &= fop->good; - if (gf_bits_count (parent->good) < parent->minimum) { - __ec_fop_set_error (parent, EIO); - } else if (fop->error == 0 && parent->answer == NULL) { - parent->answer = cbk; - } + if (gf_bits_count(parent->good) < parent->minimum) { + __ec_fop_set_error(parent, EIO); + } else if (fop->error == 0 && parent->answer == NULL) { + parent->answer = cbk; + } } - UNLOCK (&parent->lock); + UNLOCK(&parent->lock); out: return 0; } -int32_t ec_update_write(ec_fop_data_t *fop, uintptr_t mask, off_t offset, - size_t size) +int32_t +ec_update_write(ec_fop_data_t *fop, uintptr_t mask, off_t offset, size_t size) { struct iobref *iobref = NULL; struct iobuf *iobuf = NULL; @@ -94,9 +93,8 @@ int32_t ec_update_write(ec_fop_data_t *fop, uintptr_t mask, off_t offset, vector.iov_len = size; memset(vector.iov_base, 0, vector.iov_len); - ec_writev(fop->frame, fop->xl, mask, fop->minimum, - ec_update_writev_cbk, NULL, fop->fd, &vector, 1, - offset, 0, iobref, NULL); + ec_writev(fop->frame, fop->xl, mask, fop->minimum, ec_update_writev_cbk, + NULL, fop->fd, &vector, 1, offset, 0, iobref, NULL); err = 0; @@ -112,62 +110,63 @@ out: } int -ec_inode_write_cbk (call_frame_t *frame, xlator_t *this, void *cookie, - int op_ret, int op_errno, struct iatt *prestat, - struct iatt *poststat, dict_t *xdata) +ec_inode_write_cbk(call_frame_t *frame, xlator_t *this, void *cookie, + int op_ret, int op_errno, struct iatt *prestat, + struct iatt *poststat, dict_t *xdata) { - ec_fop_data_t *fop = NULL; - ec_cbk_data_t *cbk = NULL; - int i = 0; - int idx = 0; + ec_fop_data_t *fop = NULL; + ec_cbk_data_t *cbk = NULL; + int i = 0; + int idx = 0; - VALIDATE_OR_GOTO (this, out); - GF_VALIDATE_OR_GOTO (this->name, frame, out); - GF_VALIDATE_OR_GOTO (this->name, frame->local, out); - GF_VALIDATE_OR_GOTO (this->name, this->private, out); + VALIDATE_OR_GOTO(this, out); + GF_VALIDATE_OR_GOTO(this->name, frame, out); + GF_VALIDATE_OR_GOTO(this->name, frame->local, out); + GF_VALIDATE_OR_GOTO(this->name, this->private, out); - fop = frame->local; - idx = (int32_t)(uintptr_t) cookie; + fop = frame->local; + idx = (int32_t)(uintptr_t)cookie; - ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx, - frame, op_ret, op_errno); + ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx, frame, + op_ret, op_errno); - cbk = ec_cbk_data_allocate (frame, this, fop, fop->id, idx, op_ret, - op_errno); - if (!cbk) - goto out; + cbk = ec_cbk_data_allocate(frame, this, fop, fop->id, idx, op_ret, + op_errno); + if (!cbk) + goto out; - if (op_ret < 0) - goto out; + if (op_ret < 0) + goto out; - if (xdata) - cbk->xdata = dict_ref (xdata); + if (xdata) + cbk->xdata = dict_ref(xdata); - if (prestat) - cbk->iatt[i++] = *prestat; + if (prestat) + cbk->iatt[i++] = *prestat; - if (poststat) - cbk->iatt[i++] = *poststat; + if (poststat) + cbk->iatt[i++] = *poststat; out: - if (cbk) - ec_combine (cbk, ec_combine_write); + if (cbk) + ec_combine(cbk, ec_combine_write); - if (fop) - ec_complete (fop); - return 0; + if (fop) + ec_complete(fop); + return 0; } /* FOP: removexattr */ -int32_t ec_removexattr_cbk (call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, int32_t op_errno, - dict_t *xdata) +int32_t +ec_removexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - return ec_inode_write_cbk (frame, this, cookie, op_ret, op_errno, - NULL, NULL, xdata); + return ec_inode_write_cbk(frame, this, cookie, op_ret, op_errno, NULL, NULL, + xdata); } -void ec_wind_removexattr(ec_t * ec, ec_fop_data_t * fop, int32_t idx) +void +ec_wind_removexattr(ec_t *ec, ec_fop_data_t *fop, int32_t idx) { ec_trace("WIND", fop, "idx=%d", idx); @@ -177,127 +176,123 @@ void ec_wind_removexattr(ec_t * ec, ec_fop_data_t * fop, int32_t idx) } void -ec_xattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, - int32_t op_errno, dict_t *xdata) +ec_xattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + int32_t op_errno, dict_t *xdata) { - ec_fop_data_t *fop = cookie; - switch (fop->id) { + ec_fop_data_t *fop = cookie; + switch (fop->id) { case GF_FOP_SETXATTR: - if (fop->cbks.setxattr) { - fop->cbks.setxattr (frame, cookie, this, op_ret, - op_errno, xdata); - } - break; + if (fop->cbks.setxattr) { + fop->cbks.setxattr(frame, cookie, this, op_ret, op_errno, + xdata); + } + break; case GF_FOP_REMOVEXATTR: - if (fop->cbks.removexattr) { - fop->cbks.removexattr (frame, cookie, this, op_ret, - op_errno, xdata); - } - break; + if (fop->cbks.removexattr) { + fop->cbks.removexattr(frame, cookie, this, op_ret, op_errno, + xdata); + } + break; case GF_FOP_FSETXATTR: - if (fop->cbks.fsetxattr) { - fop->cbks.fsetxattr (frame, cookie, this, op_ret, - op_errno, xdata); - } - break; + if (fop->cbks.fsetxattr) { + fop->cbks.fsetxattr(frame, cookie, this, op_ret, op_errno, + xdata); + } + break; case GF_FOP_FREMOVEXATTR: - if (fop->cbks.fremovexattr) { - fop->cbks.fremovexattr (frame, cookie, this, op_ret, - op_errno, xdata); - } - break; - } + if (fop->cbks.fremovexattr) { + fop->cbks.fremovexattr(frame, cookie, this, op_ret, op_errno, + xdata); + } + break; + } } int32_t -ec_manager_xattr (ec_fop_data_t *fop, int32_t state) +ec_manager_xattr(ec_fop_data_t *fop, int32_t state) { - ec_cbk_data_t * cbk; + ec_cbk_data_t *cbk; - switch (state) { + switch (state) { case EC_STATE_INIT: case EC_STATE_LOCK: - if (fop->fd == NULL) { - ec_lock_prepare_inode(fop, &fop->loc[0], - EC_UPDATE_META | EC_QUERY_INFO, - 0, LLONG_MAX); - } else { - ec_lock_prepare_fd(fop, fop->fd, - EC_UPDATE_META | EC_QUERY_INFO, - 0, LLONG_MAX); - } - ec_lock(fop); + if (fop->fd == NULL) { + ec_lock_prepare_inode(fop, &fop->loc[0], + EC_UPDATE_META | EC_QUERY_INFO, 0, + LLONG_MAX); + } else { + ec_lock_prepare_fd(fop, fop->fd, EC_UPDATE_META | EC_QUERY_INFO, + 0, LLONG_MAX); + } + ec_lock(fop); - return EC_STATE_DISPATCH; + return EC_STATE_DISPATCH; case EC_STATE_DISPATCH: - ec_dispatch_all(fop); + ec_dispatch_all(fop); - return EC_STATE_PREPARE_ANSWER; + return EC_STATE_PREPARE_ANSWER; case EC_STATE_PREPARE_ANSWER: - ec_fop_prepare_answer(fop, _gf_false); + ec_fop_prepare_answer(fop, _gf_false); - return EC_STATE_REPORT; + return EC_STATE_REPORT; case EC_STATE_REPORT: - cbk = fop->answer; + cbk = fop->answer; - GF_ASSERT(cbk != NULL); + GF_ASSERT(cbk != NULL); - ec_xattr_cbk (fop->req_frame, fop, fop->xl, cbk->op_ret, - cbk->op_errno, cbk->xdata); + ec_xattr_cbk(fop->req_frame, fop, fop->xl, cbk->op_ret, + cbk->op_errno, cbk->xdata); - return EC_STATE_LOCK_REUSE; + return EC_STATE_LOCK_REUSE; case -EC_STATE_INIT: case -EC_STATE_LOCK: case -EC_STATE_DISPATCH: case -EC_STATE_PREPARE_ANSWER: case -EC_STATE_REPORT: - GF_ASSERT(fop->error != 0); + GF_ASSERT(fop->error != 0); - ec_xattr_cbk (fop->req_frame, fop, fop->xl, -1, fop->error, - NULL); + ec_xattr_cbk(fop->req_frame, fop, fop->xl, -1, fop->error, NULL); - return EC_STATE_LOCK_REUSE; + return EC_STATE_LOCK_REUSE; case -EC_STATE_LOCK_REUSE: case EC_STATE_LOCK_REUSE: - ec_lock_reuse(fop); + ec_lock_reuse(fop); - return EC_STATE_UNLOCK; + return EC_STATE_UNLOCK; case -EC_STATE_UNLOCK: case EC_STATE_UNLOCK: - ec_unlock(fop); + ec_unlock(fop); - return EC_STATE_END; + return EC_STATE_END; default: - gf_msg (fop->xl->name, GF_LOG_ERROR, EINVAL, - EC_MSG_UNHANDLED_STATE, - "Unhandled state %d for %s", - state, ec_fop_name(fop->id)); + gf_msg(fop->xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_UNHANDLED_STATE, + "Unhandled state %d for %s", state, ec_fop_name(fop->id)); - return EC_STATE_END; - } + return EC_STATE_END; + } } void -ec_removexattr (call_frame_t *frame, xlator_t *this, uintptr_t target, - int32_t minimum, fop_removexattr_cbk_t func, void *data, - loc_t *loc, const char *name, dict_t *xdata) +ec_removexattr(call_frame_t *frame, xlator_t *this, uintptr_t target, + int32_t minimum, fop_removexattr_cbk_t func, void *data, + loc_t *loc, const char *name, dict_t *xdata) { - ec_cbk_t callback = { .removexattr = func }; - ec_fop_data_t * fop = NULL; + ec_cbk_t callback = {.removexattr = func}; + ec_fop_data_t *fop = NULL; int32_t error = ENOMEM; - gf_msg_trace ("ec", 0, "EC(REMOVEXATTR) %p", frame); + gf_msg_trace("ec", 0, "EC(REMOVEXATTR) %p", frame); - VALIDATE_OR_GOTO (this, out); - GF_VALIDATE_OR_GOTO (this->name, frame, out); - GF_VALIDATE_OR_GOTO (this->name, this->private, out); + VALIDATE_OR_GOTO(this, out); + GF_VALIDATE_OR_GOTO(this->name, frame, out); + GF_VALIDATE_OR_GOTO(this->name, this->private, out); fop = ec_fop_data_allocate(frame, this, GF_FOP_REMOVEXATTR, 0, target, minimum, ec_wind_removexattr, ec_manager_xattr, @@ -308,9 +303,8 @@ ec_removexattr (call_frame_t *frame, xlator_t *this, uintptr_t target, if (loc != NULL) { if (loc_copy(&fop->loc[0], loc) != 0) { - gf_msg (this->name, GF_LOG_ERROR, ENOMEM, - EC_MSG_LOC_COPY_FAIL, - "Failed to copy a location."); + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_LOC_COPY_FAIL, + "Failed to copy a location."); goto out; } @@ -318,20 +312,18 @@ ec_removexattr (call_frame_t *frame, xlator_t *this, uintptr_t target, if (name != NULL) { fop->str[0] = gf_strdup(name); if (fop->str[0] == NULL) { - gf_msg (this->name, GF_LOG_ERROR, ENOMEM, - EC_MSG_NO_MEMORY, - "Failed to duplicate a string."); + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_NO_MEMORY, + "Failed to duplicate a string."); goto out; } } if (xdata != NULL) { - fop->xdata = dict_copy_with_ref (xdata, NULL); + fop->xdata = dict_copy_with_ref(xdata, NULL); if (fop->xdata == NULL) { - gf_msg (this->name, GF_LOG_ERROR, 0, - EC_MSG_DICT_REF_FAIL, - "Failed to reference a " - "dictionary."); + gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL, + "Failed to reference a " + "dictionary."); goto out; } @@ -341,23 +333,24 @@ ec_removexattr (call_frame_t *frame, xlator_t *this, uintptr_t target, out: if (fop != NULL) { - ec_manager (fop, error); + ec_manager(fop, error); } else { - func (frame, NULL, this, -1, error, NULL); + func(frame, NULL, this, -1, error, NULL); } } /* FOP: fremovexattr */ -int32_t ec_fremovexattr_cbk (call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, int32_t op_errno, - dict_t *xdata) +int32_t +ec_fremovexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - return ec_inode_write_cbk (frame, this, cookie, op_ret, op_errno, - NULL, NULL, xdata); + return ec_inode_write_cbk(frame, this, cookie, op_ret, op_errno, NULL, NULL, + xdata); } -void ec_wind_fremovexattr(ec_t * ec, ec_fop_data_t * fop, int32_t idx) +void +ec_wind_fremovexattr(ec_t *ec, ec_fop_data_t *fop, int32_t idx) { ec_trace("WIND", fop, "idx=%d", idx); @@ -367,19 +360,19 @@ void ec_wind_fremovexattr(ec_t * ec, ec_fop_data_t * fop, int32_t idx) } void -ec_fremovexattr (call_frame_t *frame, xlator_t *this, uintptr_t target, - int32_t minimum, fop_fremovexattr_cbk_t func, void *data, - fd_t *fd, const char *name, dict_t *xdata) +ec_fremovexattr(call_frame_t *frame, xlator_t *this, uintptr_t target, + int32_t minimum, fop_fremovexattr_cbk_t func, void *data, + fd_t *fd, const char *name, dict_t *xdata) { - ec_cbk_t callback = { .fremovexattr = func }; - ec_fop_data_t * fop = NULL; + ec_cbk_t callback = {.fremovexattr = func}; + ec_fop_data_t *fop = NULL; int32_t error = ENOMEM; - gf_msg_trace ("ec", 0, "EC(FREMOVEXATTR) %p", frame); + gf_msg_trace("ec", 0, "EC(FREMOVEXATTR) %p", frame); - VALIDATE_OR_GOTO (this, out); - GF_VALIDATE_OR_GOTO (this->name, frame, out); - GF_VALIDATE_OR_GOTO (this->name, this->private, out); + VALIDATE_OR_GOTO(this, out); + GF_VALIDATE_OR_GOTO(this->name, frame, out); + GF_VALIDATE_OR_GOTO(this->name, this->private, out); fop = ec_fop_data_allocate(frame, this, GF_FOP_FREMOVEXATTR, 0, target, minimum, ec_wind_fremovexattr, ec_manager_xattr, @@ -393,10 +386,9 @@ ec_fremovexattr (call_frame_t *frame, xlator_t *this, uintptr_t target, if (fd != NULL) { fop->fd = fd_ref(fd); if (fop->fd == NULL) { - gf_msg (this->name, GF_LOG_ERROR, 0, - EC_MSG_FILE_DESC_REF_FAIL, - "Failed to reference a " - "file descriptor."); + gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_FILE_DESC_REF_FAIL, + "Failed to reference a " + "file descriptor."); goto out; } @@ -404,9 +396,8 @@ ec_fremovexattr (call_frame_t *frame, xlator_t *this, uintptr_t target, if (name != NULL) { fop->str[0] = gf_strdup(name); if (fop->str[0] == NULL) { - gf_msg (this->name, GF_LOG_ERROR, ENOMEM, - EC_MSG_NO_MEMORY, - "Failed to duplicate a string."); + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_NO_MEMORY, + "Failed to duplicate a string."); goto out; } @@ -414,10 +405,9 @@ ec_fremovexattr (call_frame_t *frame, xlator_t *this, uintptr_t target, if (xdata != NULL) { fop->xdata = dict_copy_with_ref(xdata, NULL); if (fop->xdata == NULL) { - gf_msg (this->name, GF_LOG_ERROR, 0, - EC_MSG_DICT_REF_FAIL, - "Failed to reference a " - "dictionary."); + gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL, + "Failed to reference a " + "dictionary."); goto out; } @@ -427,24 +417,25 @@ ec_fremovexattr (call_frame_t *frame, xlator_t *this, uintptr_t target, out: if (fop != NULL) { - ec_manager (fop, error); + ec_manager(fop, error); } else { - func (frame, NULL, this, -1, error, NULL); + func(frame, NULL, this, -1, error, NULL); } } /* FOP: setattr */ -int32_t ec_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, - struct iatt *prestat, struct iatt *poststat, - dict_t *xdata) +int32_t +ec_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prestat, + struct iatt *poststat, dict_t *xdata) { - return ec_inode_write_cbk (frame, this, cookie, op_ret, op_errno, - prestat, poststat, xdata); + return ec_inode_write_cbk(frame, this, cookie, op_ret, op_errno, prestat, + poststat, xdata); } -void ec_wind_setattr(ec_t * ec, ec_fop_data_t * fop, int32_t idx) +void +ec_wind_setattr(ec_t *ec, ec_fop_data_t *fop, int32_t idx) { ec_trace("WIND", fop, "idx=%d", idx); @@ -453,18 +444,18 @@ void ec_wind_setattr(ec_t * ec, ec_fop_data_t * fop, int32_t idx) &fop->loc[0], &fop->iatt, fop->int32, fop->xdata); } -int32_t ec_manager_setattr(ec_fop_data_t * fop, int32_t state) +int32_t +ec_manager_setattr(ec_fop_data_t *fop, int32_t state) { - ec_cbk_data_t * cbk; + ec_cbk_data_t *cbk; - switch (state) - { + switch (state) { case EC_STATE_INIT: case EC_STATE_LOCK: if (fop->fd == NULL) { ec_lock_prepare_inode(fop, &fop->loc[0], - EC_UPDATE_META | EC_QUERY_INFO, - 0, LLONG_MAX); + EC_UPDATE_META | EC_QUERY_INFO, 0, + LLONG_MAX); } else { ec_lock_prepare_fd(fop, fop->fd, EC_UPDATE_META | EC_QUERY_INFO, 0, LLONG_MAX); @@ -482,8 +473,7 @@ int32_t ec_manager_setattr(ec_fop_data_t * fop, int32_t state) cbk = ec_fop_prepare_answer(fop, _gf_false); if (cbk != NULL) { if (cbk->iatt[0].ia_type == IA_IFREG) { - ec_iatt_rebuild(fop->xl->private, cbk->iatt, 2, - cbk->count); + ec_iatt_rebuild(fop->xl->private, cbk->iatt, 2, cbk->count); /* This shouldn't fail because we have the inode locked. */ GF_ASSERT(ec_get_inode_size(fop, @@ -500,20 +490,14 @@ int32_t ec_manager_setattr(ec_fop_data_t * fop, int32_t state) GF_ASSERT(cbk != NULL); - if (fop->id == GF_FOP_SETATTR) - { - if (fop->cbks.setattr != NULL) - { - fop->cbks.setattr(fop->req_frame, fop, fop->xl, - cbk->op_ret, cbk->op_errno, - &cbk->iatt[0], &cbk->iatt[1], - cbk->xdata); + if (fop->id == GF_FOP_SETATTR) { + if (fop->cbks.setattr != NULL) { + fop->cbks.setattr(fop->req_frame, fop, fop->xl, cbk->op_ret, + cbk->op_errno, &cbk->iatt[0], + &cbk->iatt[1], cbk->xdata); } - } - else - { - if (fop->cbks.fsetattr != NULL) - { + } else { + if (fop->cbks.fsetattr != NULL) { fop->cbks.fsetattr(fop->req_frame, fop, fop->xl, cbk->op_ret, cbk->op_errno, &cbk->iatt[0], &cbk->iatt[1], @@ -530,18 +514,13 @@ int32_t ec_manager_setattr(ec_fop_data_t * fop, int32_t state) case -EC_STATE_REPORT: GF_ASSERT(fop->error != 0); - if (fop->id == GF_FOP_SETATTR) - { - if (fop->cbks.setattr != NULL) - { + if (fop->id == GF_FOP_SETATTR) { + if (fop->cbks.setattr != NULL) { fop->cbks.setattr(fop->req_frame, fop, fop->xl, -1, fop->error, NULL, NULL, NULL); } - } - else - { - if (fop->cbks.fsetattr != NULL) - { + } else { + if (fop->cbks.fsetattr != NULL) { fop->cbks.fsetattr(fop->req_frame, fop, fop->xl, -1, fop->error, NULL, NULL, NULL); } @@ -562,25 +541,23 @@ int32_t ec_manager_setattr(ec_fop_data_t * fop, int32_t state) return EC_STATE_END; default: - gf_msg (fop->xl->name, GF_LOG_ERROR, EINVAL, - EC_MSG_UNHANDLED_STATE, - "Unhandled state %d for %s", - state, ec_fop_name(fop->id)); + gf_msg(fop->xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_UNHANDLED_STATE, + "Unhandled state %d for %s", state, ec_fop_name(fop->id)); return EC_STATE_END; } } -void ec_setattr(call_frame_t * frame, xlator_t * this, uintptr_t target, - int32_t minimum, fop_setattr_cbk_t func, void * data, - loc_t * loc, struct iatt * stbuf, int32_t valid, - dict_t * xdata) +void +ec_setattr(call_frame_t *frame, xlator_t *this, uintptr_t target, + int32_t minimum, fop_setattr_cbk_t func, void *data, loc_t *loc, + struct iatt *stbuf, int32_t valid, dict_t *xdata) { - ec_cbk_t callback = { .setattr = func }; - ec_fop_data_t * fop = NULL; + ec_cbk_t callback = {.setattr = func}; + ec_fop_data_t *fop = NULL; int32_t error = ENOMEM; - gf_msg_trace ("ec", 0, "EC(SETATTR) %p", frame); + gf_msg_trace("ec", 0, "EC(SETATTR) %p", frame); VALIDATE_OR_GOTO(this, out); GF_VALIDATE_OR_GOTO(this->name, frame, out); @@ -597,9 +574,8 @@ void ec_setattr(call_frame_t * frame, xlator_t * this, uintptr_t target, if (loc != NULL) { if (loc_copy(&fop->loc[0], loc) != 0) { - gf_msg (this->name, GF_LOG_ERROR, ENOMEM, - EC_MSG_LOC_COPY_FAIL, - "Failed to copy a location."); + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_LOC_COPY_FAIL, + "Failed to copy a location."); goto out; } @@ -610,10 +586,9 @@ void ec_setattr(call_frame_t * frame, xlator_t * this, uintptr_t target, if (xdata != NULL) { fop->xdata = dict_copy_with_ref(xdata, NULL); if (fop->xdata == NULL) { - gf_msg (this->name, GF_LOG_ERROR, 0, - EC_MSG_DICT_REF_FAIL, - "Failed to reference a " - "dictionary."); + gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL, + "Failed to reference a " + "dictionary."); goto out; } @@ -631,16 +606,17 @@ out: /* FOP: fsetattr */ -int32_t ec_fsetattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, - struct iatt *prestat, struct iatt *poststat, - dict_t *xdata) +int32_t +ec_fsetattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prestat, + struct iatt *poststat, dict_t *xdata) { - return ec_inode_write_cbk (frame, this, cookie, op_ret, op_errno, - prestat, poststat, xdata); + return ec_inode_write_cbk(frame, this, cookie, op_ret, op_errno, prestat, + poststat, xdata); } -void ec_wind_fsetattr(ec_t * ec, ec_fop_data_t * fop, int32_t idx) +void +ec_wind_fsetattr(ec_t *ec, ec_fop_data_t *fop, int32_t idx) { ec_trace("WIND", fop, "idx=%d", idx); @@ -649,23 +625,24 @@ void ec_wind_fsetattr(ec_t * ec, ec_fop_data_t * fop, int32_t idx) fop->fd, &fop->iatt, fop->int32, fop->xdata); } -void ec_fsetattr(call_frame_t * frame, xlator_t * this, uintptr_t target, - int32_t minimum, fop_fsetattr_cbk_t func, void * data, - fd_t * fd, struct iatt * stbuf, int32_t valid, dict_t * xdata) +void +ec_fsetattr(call_frame_t *frame, xlator_t *this, uintptr_t target, + int32_t minimum, fop_fsetattr_cbk_t func, void *data, fd_t *fd, + struct iatt *stbuf, int32_t valid, dict_t *xdata) { - ec_cbk_t callback = { .fsetattr = func }; - ec_fop_data_t * fop = NULL; + ec_cbk_t callback = {.fsetattr = func}; + ec_fop_data_t *fop = NULL; int32_t error = ENOMEM; - gf_msg_trace ("ec", 0, "EC(FSETATTR) %p", frame); + gf_msg_trace("ec", 0, "EC(FSETATTR) %p", frame); VALIDATE_OR_GOTO(this, out); GF_VALIDATE_OR_GOTO(this->name, frame, out); GF_VALIDATE_OR_GOTO(this->name, this->private, out); - fop = ec_fop_data_allocate(frame, this, GF_FOP_FSETATTR, 0, target, - minimum, ec_wind_fsetattr, ec_manager_setattr, - callback, data); + fop = ec_fop_data_allocate(frame, this, GF_FOP_FSETATTR, 0, target, minimum, + ec_wind_fsetattr, ec_manager_setattr, callback, + data); if (fop == NULL) { goto out; } @@ -677,10 +654,9 @@ void ec_fsetattr(call_frame_t * frame, xlator_t * this, uintptr_t target, if (fd != NULL) { fop->fd = fd_ref(fd); if (fop->fd == NULL) { - gf_msg (this->name, GF_LOG_ERROR, 0, - EC_MSG_FILE_DESC_REF_FAIL, - "Failed to reference a " - "file descriptor."); + gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_FILE_DESC_REF_FAIL, + "Failed to reference a " + "file descriptor."); goto out; } @@ -691,10 +667,9 @@ void ec_fsetattr(call_frame_t * frame, xlator_t * this, uintptr_t target, if (xdata != NULL) { fop->xdata = dict_copy_with_ref(xdata, NULL); if (fop->xdata == NULL) { - gf_msg (this->name, GF_LOG_ERROR, 0, - EC_MSG_DICT_REF_FAIL, - "Failed to reference a " - "dictionary."); + gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL, + "Failed to reference a " + "dictionary."); goto out; } @@ -712,14 +687,16 @@ out: /* FOP: setxattr */ -int32_t ec_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) +int32_t +ec_setxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - return ec_inode_write_cbk (frame, this, cookie, op_ret, op_errno, - NULL, NULL, xdata); + return ec_inode_write_cbk(frame, this, cookie, op_ret, op_errno, NULL, NULL, + xdata); } -void ec_wind_setxattr(ec_t * ec, ec_fop_data_t * fop, int32_t idx) +void +ec_wind_setxattr(ec_t *ec, ec_fop_data_t *fop, int32_t idx) { ec_trace("WIND", fop, "idx=%d", idx); @@ -729,23 +706,23 @@ void ec_wind_setxattr(ec_t * ec, ec_fop_data_t * fop, int32_t idx) } void -ec_setxattr (call_frame_t *frame, xlator_t *this, uintptr_t target, - int32_t minimum, fop_setxattr_cbk_t func, void *data, - loc_t *loc, dict_t *dict, int32_t flags, dict_t *xdata) +ec_setxattr(call_frame_t *frame, xlator_t *this, uintptr_t target, + int32_t minimum, fop_setxattr_cbk_t func, void *data, loc_t *loc, + dict_t *dict, int32_t flags, dict_t *xdata) { - ec_cbk_t callback = { .setxattr = func }; - ec_fop_data_t * fop = NULL; + ec_cbk_t callback = {.setxattr = func}; + ec_fop_data_t *fop = NULL; int32_t error = ENOMEM; - gf_msg_trace ("ec", 0, "EC(SETXATTR) %p", frame); + gf_msg_trace("ec", 0, "EC(SETXATTR) %p", frame); VALIDATE_OR_GOTO(this, out); GF_VALIDATE_OR_GOTO(this->name, frame, out); GF_VALIDATE_OR_GOTO(this->name, this->private, out); - fop = ec_fop_data_allocate(frame, this, GF_FOP_SETXATTR, 0, target, - minimum, ec_wind_setxattr, ec_manager_xattr, - callback, data); + fop = ec_fop_data_allocate(frame, this, GF_FOP_SETXATTR, 0, target, minimum, + ec_wind_setxattr, ec_manager_xattr, callback, + data); if (fop == NULL) { goto out; } @@ -754,9 +731,8 @@ ec_setxattr (call_frame_t *frame, xlator_t *this, uintptr_t target, if (loc != NULL) { if (loc_copy(&fop->loc[0], loc) != 0) { - gf_msg (this->name, GF_LOG_ERROR, ENOMEM, - EC_MSG_LOC_COPY_FAIL, - "Failed to copy a location."); + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_LOC_COPY_FAIL, + "Failed to copy a location."); goto out; } @@ -764,10 +740,9 @@ ec_setxattr (call_frame_t *frame, xlator_t *this, uintptr_t target, if (dict != NULL) { fop->dict = dict_copy_with_ref(dict, NULL); if (fop->dict == NULL) { - gf_msg (this->name, GF_LOG_ERROR, 0, - EC_MSG_DICT_REF_FAIL, - "Failed to reference a " - "dictionary."); + gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL, + "Failed to reference a " + "dictionary."); goto out; } @@ -775,10 +750,9 @@ ec_setxattr (call_frame_t *frame, xlator_t *this, uintptr_t target, if (xdata != NULL) { fop->xdata = dict_copy_with_ref(xdata, NULL); if (fop->xdata == NULL) { - gf_msg (this->name, GF_LOG_ERROR, 0, - EC_MSG_DICT_REF_FAIL, - "Failed to reference a " - "dictionary."); + gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL, + "Failed to reference a " + "dictionary."); goto out; } @@ -788,45 +762,41 @@ ec_setxattr (call_frame_t *frame, xlator_t *this, uintptr_t target, out: if (fop != NULL) { - ec_manager (fop, error); + ec_manager(fop, error); } else { - func (frame, NULL, this, -1, error, NULL); + func(frame, NULL, this, -1, error, NULL); } } /* FOP: fsetxattr */ int32_t -ec_fsetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) +ec_fsetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - ec_fop_data_t * fop = NULL; - ec_cbk_data_t * cbk = NULL; + ec_fop_data_t *fop = NULL; + ec_cbk_data_t *cbk = NULL; int32_t idx = (int32_t)(uintptr_t)cookie; - VALIDATE_OR_GOTO (this, out); - GF_VALIDATE_OR_GOTO (this->name, frame, out); - GF_VALIDATE_OR_GOTO (this->name, frame->local, out); - GF_VALIDATE_OR_GOTO (this->name, this->private, out); + VALIDATE_OR_GOTO(this, out); + GF_VALIDATE_OR_GOTO(this->name, frame, out); + GF_VALIDATE_OR_GOTO(this->name, frame->local, out); + GF_VALIDATE_OR_GOTO(this->name, this->private, out); fop = frame->local; - ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx, - frame, op_ret, op_errno); + ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx, frame, + op_ret, op_errno); cbk = ec_cbk_data_allocate(frame, this, fop, GF_FOP_FSETXATTR, idx, op_ret, op_errno); - if (cbk != NULL) - { - if (xdata != NULL) - { + if (cbk != NULL) { + if (xdata != NULL) { cbk->xdata = dict_ref(xdata); - if (cbk->xdata == NULL) - { - gf_msg (this->name, GF_LOG_ERROR, 0, - EC_MSG_DICT_REF_FAIL, - "Failed to reference a " - "dictionary."); + if (cbk->xdata == NULL) { + gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL, + "Failed to reference a " + "dictionary."); goto out; } @@ -836,15 +806,15 @@ ec_fsetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, } out: - if (fop != NULL) - { + if (fop != NULL) { ec_complete(fop); } return 0; } -void ec_wind_fsetxattr(ec_t * ec, ec_fop_data_t * fop, int32_t idx) +void +ec_wind_fsetxattr(ec_t *ec, ec_fop_data_t *fop, int32_t idx) { ec_trace("WIND", fop, "idx=%d", idx); @@ -854,15 +824,15 @@ void ec_wind_fsetxattr(ec_t * ec, ec_fop_data_t * fop, int32_t idx) } void -ec_fsetxattr (call_frame_t *frame, xlator_t *this, uintptr_t target, - int32_t minimum, fop_fsetxattr_cbk_t func, void *data, - fd_t *fd, dict_t *dict, int32_t flags, dict_t *xdata) +ec_fsetxattr(call_frame_t *frame, xlator_t *this, uintptr_t target, + int32_t minimum, fop_fsetxattr_cbk_t func, void *data, fd_t *fd, + dict_t *dict, int32_t flags, dict_t *xdata) { - ec_cbk_t callback = { .fsetxattr = func }; - ec_fop_data_t * fop = NULL; + ec_cbk_t callback = {.fsetxattr = func}; + ec_fop_data_t *fop = NULL; int32_t error = ENOMEM; - gf_msg_trace ("ec", 0, "EC(FSETXATTR) %p", frame); + gf_msg_trace("ec", 0, "EC(FSETXATTR) %p", frame); VALIDATE_OR_GOTO(this, out); GF_VALIDATE_OR_GOTO(this->name, frame, out); @@ -882,10 +852,9 @@ ec_fsetxattr (call_frame_t *frame, xlator_t *this, uintptr_t target, if (fd != NULL) { fop->fd = fd_ref(fd); if (fop->fd == NULL) { - gf_msg (this->name, GF_LOG_ERROR, 0, - EC_MSG_FILE_DESC_REF_FAIL, - "Failed to reference a " - "file descriptor."); + gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_FILE_DESC_REF_FAIL, + "Failed to reference a " + "file descriptor."); goto out; } @@ -893,10 +862,9 @@ ec_fsetxattr (call_frame_t *frame, xlator_t *this, uintptr_t target, if (dict != NULL) { fop->dict = dict_copy_with_ref(dict, NULL); if (fop->dict == NULL) { - gf_msg (this->name, GF_LOG_ERROR, 0, - EC_MSG_DICT_REF_FAIL, - "Failed to reference a " - "dictionary."); + gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL, + "Failed to reference a " + "dictionary."); goto out; } @@ -904,10 +872,9 @@ ec_fsetxattr (call_frame_t *frame, xlator_t *this, uintptr_t target, if (xdata != NULL) { fop->xdata = dict_copy_with_ref(xdata, NULL); if (fop->xdata == NULL) { - gf_msg (this->name, GF_LOG_ERROR, 0, - EC_MSG_DICT_REF_FAIL, - "Failed to reference a " - "dictionary."); + gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL, + "Failed to reference a " + "dictionary."); goto out; } @@ -917,9 +884,9 @@ ec_fsetxattr (call_frame_t *frame, xlator_t *this, uintptr_t target, out: if (fop != NULL) { - ec_manager (fop, error); + ec_manager(fop, error); } else { - func (frame, NULL, this, -1, error, NULL); + func(frame, NULL, this, -1, error, NULL); } } @@ -929,154 +896,153 @@ out: * *********************************************************************/ -int32_t ec_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *prebuf, - struct iatt *postbuf, dict_t *xdata) +int32_t +ec_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) { - return ec_inode_write_cbk (frame, this, cookie, op_ret, op_errno, - prebuf, postbuf, xdata); + return ec_inode_write_cbk(frame, this, cookie, op_ret, op_errno, prebuf, + postbuf, xdata); } -void ec_wind_fallocate(ec_t *ec, ec_fop_data_t *fop, int32_t idx) +void +ec_wind_fallocate(ec_t *ec, ec_fop_data_t *fop, int32_t idx) { ec_trace("WIND", fop, "idx=%d", idx); STACK_WIND_COOKIE(fop->frame, ec_fallocate_cbk, (void *)(uintptr_t)idx, ec->xl_list[idx], ec->xl_list[idx]->fops->fallocate, - fop->fd, fop->int32, fop->offset, - fop->size, fop->xdata); + fop->fd, fop->int32, fop->offset, fop->size, fop->xdata); } -int32_t ec_manager_fallocate(ec_fop_data_t *fop, int32_t state) +int32_t +ec_manager_fallocate(ec_fop_data_t *fop, int32_t state) { ec_cbk_data_t *cbk = NULL; switch (state) { - case EC_STATE_INIT: - if (fop->size == 0) { + case EC_STATE_INIT: + if (fop->size == 0) { ec_fop_set_error(fop, EINVAL); return EC_STATE_REPORT; - } - if (fop->int32 & (FALLOC_FL_COLLAPSE_RANGE - |FALLOC_FL_INSERT_RANGE - |FALLOC_FL_ZERO_RANGE - |FALLOC_FL_PUNCH_HOLE)) { + } + if (fop->int32 & + (FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_INSERT_RANGE | + FALLOC_FL_ZERO_RANGE | FALLOC_FL_PUNCH_HOLE)) { ec_fop_set_error(fop, ENOTSUP); return EC_STATE_REPORT; - } - fop->user_size = fop->offset + fop->size; - fop->head = ec_adjust_offset_down (fop->xl->private, &fop->offset, - _gf_true); - fop->size += fop->head; - ec_adjust_size_up (fop->xl->private, &fop->size, _gf_true); + } + fop->user_size = fop->offset + fop->size; + fop->head = ec_adjust_offset_down(fop->xl->private, &fop->offset, + _gf_true); + fop->size += fop->head; + ec_adjust_size_up(fop->xl->private, &fop->size, _gf_true); - /* Fall through */ + /* Fall through */ - case EC_STATE_LOCK: - ec_lock_prepare_fd(fop, fop->fd, - EC_UPDATE_DATA | EC_UPDATE_META | EC_QUERY_INFO, - fop->offset, fop->size); - ec_lock(fop); + case EC_STATE_LOCK: + ec_lock_prepare_fd(fop, fop->fd, + EC_UPDATE_DATA | EC_UPDATE_META | EC_QUERY_INFO, + fop->offset, fop->size); + ec_lock(fop); - return EC_STATE_DISPATCH; + return EC_STATE_DISPATCH; - case EC_STATE_DISPATCH: + case EC_STATE_DISPATCH: - ec_dispatch_all(fop); + ec_dispatch_all(fop); - return EC_STATE_PREPARE_ANSWER; + return EC_STATE_PREPARE_ANSWER; - case EC_STATE_PREPARE_ANSWER: - cbk = ec_fop_prepare_answer(fop, _gf_false); - if (cbk != NULL) { - ec_iatt_rebuild(fop->xl->private, cbk->iatt, 2, - cbk->count); + case EC_STATE_PREPARE_ANSWER: + cbk = ec_fop_prepare_answer(fop, _gf_false); + if (cbk != NULL) { + ec_iatt_rebuild(fop->xl->private, cbk->iatt, 2, cbk->count); /* This shouldn't fail because we have the inode locked. */ LOCK(&fop->locks[0].lock->loc.inode->lock); { - GF_ASSERT(__ec_get_inode_size(fop, - fop->locks[0].lock->loc.inode, - &cbk->iatt[0].ia_size)); + GF_ASSERT(__ec_get_inode_size(fop, + fop->locks[0].lock->loc.inode, + &cbk->iatt[0].ia_size)); - /*If mode has FALLOC_FL_KEEP_SIZE keep the size */ - if (fop->int32 & FALLOC_FL_KEEP_SIZE) { - cbk->iatt[1].ia_size = cbk->iatt[0].ia_size; - } else if (fop->user_size > cbk->iatt[0].ia_size) { - cbk->iatt[1].ia_size = fop->user_size; - - /* This shouldn't fail because we have the inode - * locked. */ - GF_ASSERT(__ec_set_inode_size(fop, - fop->locks[0].lock->loc.inode, - cbk->iatt[1].ia_size)); - } else { - cbk->iatt[1].ia_size = cbk->iatt[0].ia_size; - } + /*If mode has FALLOC_FL_KEEP_SIZE keep the size */ + if (fop->int32 & FALLOC_FL_KEEP_SIZE) { + cbk->iatt[1].ia_size = cbk->iatt[0].ia_size; + } else if (fop->user_size > cbk->iatt[0].ia_size) { + cbk->iatt[1].ia_size = fop->user_size; + + /* This shouldn't fail because we have the inode + * locked. */ + GF_ASSERT(__ec_set_inode_size( + fop, fop->locks[0].lock->loc.inode, + cbk->iatt[1].ia_size)); + } else { + cbk->iatt[1].ia_size = cbk->iatt[0].ia_size; + } } UNLOCK(&fop->locks[0].lock->loc.inode->lock); - } + } - return EC_STATE_REPORT; + return EC_STATE_REPORT; - case EC_STATE_REPORT: - cbk = fop->answer; + case EC_STATE_REPORT: + cbk = fop->answer; - GF_ASSERT(cbk != NULL); + GF_ASSERT(cbk != NULL); - if (fop->cbks.fallocate != NULL) { + if (fop->cbks.fallocate != NULL) { fop->cbks.fallocate(fop->req_frame, fop, fop->xl, cbk->op_ret, cbk->op_errno, &cbk->iatt[0], &cbk->iatt[1], cbk->xdata); - } + } - return EC_STATE_LOCK_REUSE; + return EC_STATE_LOCK_REUSE; - case -EC_STATE_INIT: - case -EC_STATE_LOCK: - case -EC_STATE_DISPATCH: - case -EC_STATE_PREPARE_ANSWER: - case -EC_STATE_REPORT: - GF_ASSERT(fop->error != 0); + case -EC_STATE_INIT: + case -EC_STATE_LOCK: + case -EC_STATE_DISPATCH: + case -EC_STATE_PREPARE_ANSWER: + case -EC_STATE_REPORT: + GF_ASSERT(fop->error != 0); - if (fop->cbks.fallocate != NULL) { + if (fop->cbks.fallocate != NULL) { fop->cbks.fallocate(fop->req_frame, fop, fop->xl, -1, fop->error, NULL, NULL, NULL); - } + } - return EC_STATE_LOCK_REUSE; + return EC_STATE_LOCK_REUSE; - case -EC_STATE_LOCK_REUSE: - case EC_STATE_LOCK_REUSE: - ec_lock_reuse(fop); + case -EC_STATE_LOCK_REUSE: + case EC_STATE_LOCK_REUSE: + ec_lock_reuse(fop); - return EC_STATE_UNLOCK; + return EC_STATE_UNLOCK; - case -EC_STATE_UNLOCK: - case EC_STATE_UNLOCK: - ec_unlock(fop); + case -EC_STATE_UNLOCK: + case EC_STATE_UNLOCK: + ec_unlock(fop); - return EC_STATE_END; + return EC_STATE_END; - default: - gf_msg (fop->xl->name, GF_LOG_ERROR, EINVAL, - EC_MSG_UNHANDLED_STATE, - "Unhandled state %d for %s", - state, ec_fop_name(fop->id)); + default: + gf_msg(fop->xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_UNHANDLED_STATE, + "Unhandled state %d for %s", state, ec_fop_name(fop->id)); - return EC_STATE_END; + return EC_STATE_END; } } -void ec_fallocate(call_frame_t *frame, xlator_t *this, uintptr_t target, - int32_t minimum, fop_fallocate_cbk_t func, void *data, fd_t *fd, - int32_t mode, off_t offset, size_t len, dict_t *xdata) +void +ec_fallocate(call_frame_t *frame, xlator_t *this, uintptr_t target, + int32_t minimum, fop_fallocate_cbk_t func, void *data, fd_t *fd, + int32_t mode, off_t offset, size_t len, dict_t *xdata) { - ec_cbk_t callback = { .fallocate = func }; + ec_cbk_t callback = {.fallocate = func}; ec_fop_data_t *fop = NULL; int32_t error = ENOMEM; - gf_msg_trace ("ec", 0, "EC(FALLOCATE) %p", frame); + gf_msg_trace("ec", 0, "EC(FALLOCATE) %p", frame); VALIDATE_OR_GOTO(this, out); GF_VALIDATE_OR_GOTO(this->name, frame, out); @@ -1097,22 +1063,20 @@ void ec_fallocate(call_frame_t *frame, xlator_t *this, uintptr_t target, if (fd != NULL) { fop->fd = fd_ref(fd); if (fop->fd == NULL) { - gf_msg (this->name, GF_LOG_ERROR, 0, - EC_MSG_FILE_DESC_REF_FAIL, - "Failed to reference a " - "file descriptor."); - goto out; + gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_FILE_DESC_REF_FAIL, + "Failed to reference a " + "file descriptor."); + goto out; } } if (xdata != NULL) { fop->xdata = dict_ref(xdata); if (fop->xdata == NULL) { - gf_msg (this->name, GF_LOG_ERROR, 0, - EC_MSG_DICT_REF_FAIL, - "Failed to reference a " - "dictionary."); - goto out; + gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL, + "Failed to reference a " + "dictionary."); + goto out; } } @@ -1131,66 +1095,70 @@ out: * File Operation : Discard * *********************************************************************/ -void ec_update_discard_write(ec_fop_data_t *fop, uintptr_t mask) +void +ec_update_discard_write(ec_fop_data_t *fop, uintptr_t mask) { - ec_t *ec = fop->xl->private; - off_t off_head = 0; - off_t off_tail = 0; + ec_t *ec = fop->xl->private; + off_t off_head = 0; + off_t off_tail = 0; size_t size_head = 0; size_t size_tail = 0; - int error = 0; + int error = 0; off_head = fop->offset * ec->fragments - fop->int32; if (fop->size == 0) { - error = ec_update_write (fop, mask, off_head, fop->user_size); + error = ec_update_write(fop, mask, off_head, fop->user_size); } else { - size_head = fop->int32; - size_tail = (off_head + fop->user_size) % ec->stripe_size; - off_tail = off_head + fop->user_size - size_tail; - if (size_head) { - error = ec_update_write (fop, mask, off_head, size_head); - if (error) { - goto out; - } - } - if (size_tail) { - error = ec_update_write (fop, mask, off_tail, size_tail); + size_head = fop->int32; + size_tail = (off_head + fop->user_size) % ec->stripe_size; + off_tail = off_head + fop->user_size - size_tail; + if (size_head) { + error = ec_update_write(fop, mask, off_head, size_head); + if (error) { + goto out; } + } + if (size_tail) { + error = ec_update_write(fop, mask, off_tail, size_tail); + } } out: if (error) - ec_fop_set_error (fop, -error); + ec_fop_set_error(fop, -error); } -void ec_discard_adjust_offset_size(ec_fop_data_t *fop) +void +ec_discard_adjust_offset_size(ec_fop_data_t *fop) { - ec_t *ec = fop->xl->private; + ec_t *ec = fop->xl->private; - fop->user_size = fop->size; - /* If discard length covers at least a fragment on brick, we will - * perform discard operation(when fop->size is non-zero) else we just - * write zeros. - */ - fop->int32 = ec_adjust_offset_up(ec, &fop->offset, _gf_true); - fop->frag_range.first = fop->offset; - if (fop->size < fop->int32) { - fop->size = 0; - } else { - fop->size -= fop->int32; - ec_adjust_size_down(ec, &fop->size, _gf_true); - } - fop->frag_range.last = fop->offset + fop->size; + fop->user_size = fop->size; + /* If discard length covers at least a fragment on brick, we will + * perform discard operation(when fop->size is non-zero) else we just + * write zeros. + */ + fop->int32 = ec_adjust_offset_up(ec, &fop->offset, _gf_true); + fop->frag_range.first = fop->offset; + if (fop->size < fop->int32) { + fop->size = 0; + } else { + fop->size -= fop->int32; + ec_adjust_size_down(ec, &fop->size, _gf_true); + } + fop->frag_range.last = fop->offset + fop->size; } -int32_t ec_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *prebuf, - struct iatt *postbuf, dict_t *xdata) +int32_t +ec_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) { - return ec_inode_write_cbk (frame, this, cookie, op_ret, op_errno, - prebuf, postbuf, xdata); + return ec_inode_write_cbk(frame, this, cookie, op_ret, op_errno, prebuf, + postbuf, xdata); } -void ec_wind_discard(ec_t *ec, ec_fop_data_t *fop, int32_t idx) +void +ec_wind_discard(ec_t *ec, ec_fop_data_t *fop, int32_t idx) { ec_trace("WIND", fop, "idx=%d", idx); @@ -1199,146 +1167,144 @@ void ec_wind_discard(ec_t *ec, ec_fop_data_t *fop, int32_t idx) fop->fd, fop->offset, fop->size, fop->xdata); } -int32_t ec_manager_discard(ec_fop_data_t *fop, int32_t state) +int32_t +ec_manager_discard(ec_fop_data_t *fop, int32_t state) { - ec_cbk_data_t *cbk = NULL; - off_t fl_start = 0; - size_t fl_size = 0; - + ec_cbk_data_t *cbk = NULL; + off_t fl_start = 0; + size_t fl_size = 0; switch (state) { - case EC_STATE_INIT: - if ((fop->size <= 0) || (fop->offset < 0)) { + case EC_STATE_INIT: + if ((fop->size <= 0) || (fop->offset < 0)) { ec_fop_set_error(fop, EINVAL); return EC_STATE_REPORT; - } - /* Because of the head/tail writes, "discard" happens on the remaining - * regions, but we need to compute region including head/tail writes - * so compute them separately*/ - fl_start = fop->offset; - fl_size = fop->size; - fl_size += ec_adjust_offset_down (fop->xl->private, &fl_start, - _gf_true); - ec_adjust_size_up (fop->xl->private, &fl_size, _gf_true); + } + /* Because of the head/tail writes, "discard" happens on the + * remaining regions, but we need to compute region including + * head/tail writes so compute them separately*/ + fl_start = fop->offset; + fl_size = fop->size; + fl_size += ec_adjust_offset_down(fop->xl->private, &fl_start, + _gf_true); + ec_adjust_size_up(fop->xl->private, &fl_size, _gf_true); - ec_discard_adjust_offset_size(fop); + ec_discard_adjust_offset_size(fop); - /* Fall through */ + /* Fall through */ - case EC_STATE_LOCK: - ec_lock_prepare_fd(fop, fop->fd, - EC_UPDATE_DATA | EC_UPDATE_META | - EC_QUERY_INFO, fl_start, fl_size); - ec_lock(fop); + case EC_STATE_LOCK: + ec_lock_prepare_fd(fop, fop->fd, + EC_UPDATE_DATA | EC_UPDATE_META | EC_QUERY_INFO, + fl_start, fl_size); + ec_lock(fop); - return EC_STATE_DISPATCH; + return EC_STATE_DISPATCH; - case EC_STATE_DISPATCH: + case EC_STATE_DISPATCH: - /* Dispatch discard fop only if we have whole fragment - * to deallocate */ - if (fop->size) { + /* Dispatch discard fop only if we have whole fragment + * to deallocate */ + if (fop->size) { ec_dispatch_all(fop); return EC_STATE_DELAYED_START; - } else { + } else { /*Assume discard to have succeeded on mask*/ fop->good = fop->mask; - } + } - /* Fall through */ + /* Fall through */ - case EC_STATE_DELAYED_START: + case EC_STATE_DELAYED_START: - if (fop->size) { + if (fop->size) { if (fop->answer && fop->answer->op_ret == 0) - ec_update_discard_write (fop, fop->answer->mask); - } else { - ec_update_discard_write (fop, fop->mask); - } + ec_update_discard_write(fop, fop->answer->mask); + } else { + ec_update_discard_write(fop, fop->mask); + } - return EC_STATE_PREPARE_ANSWER; + return EC_STATE_PREPARE_ANSWER; - case EC_STATE_PREPARE_ANSWER: - cbk = ec_fop_prepare_answer(fop, _gf_false); - if (cbk != NULL) { - ec_iatt_rebuild(fop->xl->private, cbk->iatt, 2, - cbk->count); + case EC_STATE_PREPARE_ANSWER: + cbk = ec_fop_prepare_answer(fop, _gf_false); + if (cbk != NULL) { + ec_iatt_rebuild(fop->xl->private, cbk->iatt, 2, cbk->count); /* This shouldn't fail because we have the inode locked. */ GF_ASSERT(ec_get_inode_size(fop, fop->locks[0].lock->loc.inode, &cbk->iatt[0].ia_size)); cbk->iatt[1].ia_size = cbk->iatt[0].ia_size; - } - return EC_STATE_REPORT; + } + return EC_STATE_REPORT; - case EC_STATE_REPORT: - cbk = fop->answer; + case EC_STATE_REPORT: + cbk = fop->answer; - GF_ASSERT(cbk != NULL); + GF_ASSERT(cbk != NULL); - if (fop->cbks.discard != NULL) { + if (fop->cbks.discard != NULL) { fop->cbks.discard(fop->req_frame, fop, fop->xl, cbk->op_ret, cbk->op_errno, &cbk->iatt[0], &cbk->iatt[1], cbk->xdata); - } + } - return EC_STATE_LOCK_REUSE; + return EC_STATE_LOCK_REUSE; - case -EC_STATE_INIT: - case -EC_STATE_LOCK: - case -EC_STATE_DISPATCH: - case -EC_STATE_DELAYED_START: - case -EC_STATE_PREPARE_ANSWER: - case -EC_STATE_REPORT: - GF_ASSERT(fop->error != 0); + case -EC_STATE_INIT: + case -EC_STATE_LOCK: + case -EC_STATE_DISPATCH: + case -EC_STATE_DELAYED_START: + case -EC_STATE_PREPARE_ANSWER: + case -EC_STATE_REPORT: + GF_ASSERT(fop->error != 0); - if (fop->cbks.discard != NULL) { - fop->cbks.discard(fop->req_frame, fop, fop->xl, -1, - fop->error, NULL, NULL, NULL); - } + if (fop->cbks.discard != NULL) { + fop->cbks.discard(fop->req_frame, fop, fop->xl, -1, fop->error, + NULL, NULL, NULL); + } - return EC_STATE_LOCK_REUSE; + return EC_STATE_LOCK_REUSE; - case -EC_STATE_LOCK_REUSE: - case EC_STATE_LOCK_REUSE: - ec_lock_reuse(fop); + case -EC_STATE_LOCK_REUSE: + case EC_STATE_LOCK_REUSE: + ec_lock_reuse(fop); - return EC_STATE_UNLOCK; + return EC_STATE_UNLOCK; - case -EC_STATE_UNLOCK: - case EC_STATE_UNLOCK: - ec_unlock(fop); + case -EC_STATE_UNLOCK: + case EC_STATE_UNLOCK: + ec_unlock(fop); - return EC_STATE_END; + return EC_STATE_END; - default: - gf_msg (fop->xl->name, GF_LOG_ERROR, EINVAL, - EC_MSG_UNHANDLED_STATE, - "Unhandled state %d for %s", - state, ec_fop_name(fop->id)); + default: + gf_msg(fop->xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_UNHANDLED_STATE, + "Unhandled state %d for %s", state, ec_fop_name(fop->id)); - return EC_STATE_END; + return EC_STATE_END; } } -void ec_discard(call_frame_t *frame, xlator_t *this, uintptr_t target, - int32_t minimum, fop_discard_cbk_t func, void *data, fd_t *fd, - off_t offset, size_t len, dict_t *xdata) +void +ec_discard(call_frame_t *frame, xlator_t *this, uintptr_t target, + int32_t minimum, fop_discard_cbk_t func, void *data, fd_t *fd, + off_t offset, size_t len, dict_t *xdata) { - ec_cbk_t callback = { .discard = func }; + ec_cbk_t callback = {.discard = func}; ec_fop_data_t *fop = NULL; int32_t error = ENOMEM; - gf_msg_trace ("ec", 0, "EC(DISCARD) %p", frame); + gf_msg_trace("ec", 0, "EC(DISCARD) %p", frame); VALIDATE_OR_GOTO(this, out); GF_VALIDATE_OR_GOTO(this->name, frame, out); GF_VALIDATE_OR_GOTO(this->name, this->private, out); - fop = ec_fop_data_allocate(frame, this, GF_FOP_DISCARD, 0, target, - minimum, ec_wind_discard, ec_manager_discard, - callback, data); + fop = ec_fop_data_allocate(frame, this, GF_FOP_DISCARD, 0, target, minimum, + ec_wind_discard, ec_manager_discard, callback, + data); if (fop == NULL) { goto out; } @@ -1371,33 +1337,35 @@ out: * *********************************************************************/ -int32_t ec_update_truncate_write (ec_fop_data_t *fop, uintptr_t mask) +int32_t +ec_update_truncate_write(ec_fop_data_t *fop, uintptr_t mask) { - ec_t *ec = fop->xl->private; - size_t size = fop->offset * ec->fragments - fop->user_size; - return ec_update_write (fop, mask, fop->user_size, size); + ec_t *ec = fop->xl->private; + size_t size = fop->offset * ec->fragments - fop->user_size; + return ec_update_write(fop, mask, fop->user_size, size); } -int32_t ec_truncate_open_cbk(call_frame_t * frame, void * cookie, - xlator_t * this, int32_t op_ret, int32_t op_errno, - fd_t * fd, dict_t * xdata) +int32_t +ec_truncate_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata) { - ec_fop_data_t * fop = cookie; + ec_fop_data_t *fop = cookie; int32_t err; fop->parent->good &= fop->good; if (op_ret >= 0) { - fd_bind (fd); - err = ec_update_truncate_write (fop->parent, fop->answer->mask); + fd_bind(fd); + err = ec_update_truncate_write(fop->parent, fop->answer->mask); if (err != 0) { - ec_fop_set_error (fop->parent, -err); + ec_fop_set_error(fop->parent, -err); } } return 0; } -int32_t ec_truncate_clean(ec_fop_data_t * fop) +int32_t +ec_truncate_clean(ec_fop_data_t *fop) { if (fop->fd == NULL) { fop->fd = fd_create(fop->loc[0].inode, fop->frame->root->pid); @@ -1406,24 +1374,25 @@ int32_t ec_truncate_clean(ec_fop_data_t * fop) } ec_open(fop->frame, fop->xl, fop->answer->mask, fop->minimum, - ec_truncate_open_cbk, fop, &fop->loc[0], O_RDWR, fop->fd, - NULL); + ec_truncate_open_cbk, fop, &fop->loc[0], O_RDWR, fop->fd, NULL); return 0; } else { - return ec_update_truncate_write (fop, fop->answer->mask); + return ec_update_truncate_write(fop, fop->answer->mask); } } -int32_t ec_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *prestat, - struct iatt *poststat, dict_t *xdata) +int32_t +ec_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prestat, + struct iatt *poststat, dict_t *xdata) { - return ec_inode_write_cbk (frame, this, cookie, op_ret, op_errno, - prestat, poststat, xdata); + return ec_inode_write_cbk(frame, this, cookie, op_ret, op_errno, prestat, + poststat, xdata); } -void ec_wind_truncate(ec_t * ec, ec_fop_data_t * fop, int32_t idx) +void +ec_wind_truncate(ec_t *ec, ec_fop_data_t *fop, int32_t idx) { ec_trace("WIND", fop, "idx=%d", idx); @@ -1432,29 +1401,31 @@ void ec_wind_truncate(ec_t * ec, ec_fop_data_t * fop, int32_t idx) &fop->loc[0], fop->offset, fop->xdata); } -int32_t ec_manager_truncate(ec_fop_data_t * fop, int32_t state) +int32_t +ec_manager_truncate(ec_fop_data_t *fop, int32_t state) { - ec_cbk_data_t * cbk; + ec_cbk_data_t *cbk; - switch (state) - { + switch (state) { case EC_STATE_INIT: fop->user_size = fop->offset; ec_adjust_offset_up(fop->xl->private, &fop->offset, _gf_true); fop->frag_range.first = fop->offset; fop->frag_range.last = UINT64_MAX; - /* Fall through */ + /* Fall through */ case EC_STATE_LOCK: if (fop->id == GF_FOP_TRUNCATE) { - ec_lock_prepare_inode(fop, &fop->loc[0], - EC_UPDATE_DATA | EC_UPDATE_META | - EC_QUERY_INFO, fop->offset, LLONG_MAX); + ec_lock_prepare_inode( + fop, &fop->loc[0], + EC_UPDATE_DATA | EC_UPDATE_META | EC_QUERY_INFO, + fop->offset, LLONG_MAX); } else { - ec_lock_prepare_fd(fop, fop->fd, - EC_UPDATE_DATA | EC_UPDATE_META | - EC_QUERY_INFO, fop->offset, LLONG_MAX); + ec_lock_prepare_fd( + fop, fop->fd, + EC_UPDATE_DATA | EC_UPDATE_META | EC_QUERY_INFO, + fop->offset, LLONG_MAX); } ec_lock(fop); @@ -1470,8 +1441,7 @@ int32_t ec_manager_truncate(ec_fop_data_t * fop, int32_t state) if (cbk != NULL) { int32_t err; - ec_iatt_rebuild(fop->xl->private, cbk->iatt, 2, - cbk->count); + ec_iatt_rebuild(fop->xl->private, cbk->iatt, 2, cbk->count); /* This shouldn't fail because we have the inode locked. */ /* Inode size doesn't need to be updated under locks, because @@ -1499,20 +1469,15 @@ int32_t ec_manager_truncate(ec_fop_data_t * fop, int32_t state) GF_ASSERT(cbk != NULL); - if (fop->id == GF_FOP_TRUNCATE) - { - if (fop->cbks.truncate != NULL) - { + if (fop->id == GF_FOP_TRUNCATE) { + if (fop->cbks.truncate != NULL) { fop->cbks.truncate(fop->req_frame, fop, fop->xl, cbk->op_ret, cbk->op_errno, &cbk->iatt[0], &cbk->iatt[1], cbk->xdata); } - } - else - { - if (fop->cbks.ftruncate != NULL) - { + } else { + if (fop->cbks.ftruncate != NULL) { fop->cbks.ftruncate(fop->req_frame, fop, fop->xl, cbk->op_ret, cbk->op_errno, &cbk->iatt[0], &cbk->iatt[1], @@ -1529,18 +1494,13 @@ int32_t ec_manager_truncate(ec_fop_data_t * fop, int32_t state) case -EC_STATE_REPORT: GF_ASSERT(fop->error != 0); - if (fop->id == GF_FOP_TRUNCATE) - { - if (fop->cbks.truncate != NULL) - { + if (fop->id == GF_FOP_TRUNCATE) { + if (fop->cbks.truncate != NULL) { fop->cbks.truncate(fop->req_frame, fop, fop->xl, -1, fop->error, NULL, NULL, NULL); } - } - else - { - if (fop->cbks.ftruncate != NULL) - { + } else { + if (fop->cbks.ftruncate != NULL) { fop->cbks.ftruncate(fop->req_frame, fop, fop->xl, -1, fop->error, NULL, NULL, NULL); } @@ -1561,32 +1521,31 @@ int32_t ec_manager_truncate(ec_fop_data_t * fop, int32_t state) return EC_STATE_END; default: - gf_msg (fop->xl->name, GF_LOG_ERROR, EINVAL, - EC_MSG_UNHANDLED_STATE, - "Unhandled state %d for %s", - state, ec_fop_name(fop->id)); + gf_msg(fop->xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_UNHANDLED_STATE, + "Unhandled state %d for %s", state, ec_fop_name(fop->id)); return EC_STATE_END; } } -void ec_truncate(call_frame_t * frame, xlator_t * this, uintptr_t target, - int32_t minimum, fop_truncate_cbk_t func, void * data, - loc_t * loc, off_t offset, dict_t * xdata) +void +ec_truncate(call_frame_t *frame, xlator_t *this, uintptr_t target, + int32_t minimum, fop_truncate_cbk_t func, void *data, loc_t *loc, + off_t offset, dict_t *xdata) { - ec_cbk_t callback = { .truncate = func }; - ec_fop_data_t * fop = NULL; + ec_cbk_t callback = {.truncate = func}; + ec_fop_data_t *fop = NULL; int32_t error = ENOMEM; - gf_msg_trace ("ec", 0, "EC(TRUNCATE) %p", frame); + gf_msg_trace("ec", 0, "EC(TRUNCATE) %p", frame); VALIDATE_OR_GOTO(this, out); GF_VALIDATE_OR_GOTO(this->name, frame, out); GF_VALIDATE_OR_GOTO(this->name, this->private, out); - fop = ec_fop_data_allocate(frame, this, GF_FOP_TRUNCATE, 0, target, - minimum, ec_wind_truncate, ec_manager_truncate, - callback, data); + fop = ec_fop_data_allocate(frame, this, GF_FOP_TRUNCATE, 0, target, minimum, + ec_wind_truncate, ec_manager_truncate, callback, + data); if (fop == NULL) { goto out; } @@ -1595,9 +1554,8 @@ void ec_truncate(call_frame_t * frame, xlator_t * this, uintptr_t target, if (loc != NULL) { if (loc_copy(&fop->loc[0], loc) != 0) { - gf_msg (this->name, GF_LOG_ERROR, ENOMEM, - EC_MSG_LOC_COPY_FAIL, - "Failed to copy a location."); + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_LOC_COPY_FAIL, + "Failed to copy a location."); goto out; } @@ -1605,10 +1563,9 @@ void ec_truncate(call_frame_t * frame, xlator_t * this, uintptr_t target, if (xdata != NULL) { fop->xdata = dict_copy_with_ref(xdata, NULL); if (fop->xdata == NULL) { - gf_msg (this->name, GF_LOG_ERROR, 0, - EC_MSG_DICT_REF_FAIL, - "Failed to reference a " - "dictionary."); + gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL, + "Failed to reference a " + "dictionary."); goto out; } @@ -1626,16 +1583,17 @@ out: /* FOP: ftruncate */ -int32_t ec_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, - struct iatt *prestat, struct iatt *poststat, - dict_t *xdata) +int32_t +ec_ftruncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prestat, + struct iatt *poststat, dict_t *xdata) { - return ec_inode_write_cbk (frame, this, cookie, op_ret, op_errno, - prestat, poststat, xdata); + return ec_inode_write_cbk(frame, this, cookie, op_ret, op_errno, prestat, + poststat, xdata); } -void ec_wind_ftruncate(ec_t * ec, ec_fop_data_t * fop, int32_t idx) +void +ec_wind_ftruncate(ec_t *ec, ec_fop_data_t *fop, int32_t idx) { ec_trace("WIND", fop, "idx=%d", idx); @@ -1644,15 +1602,16 @@ void ec_wind_ftruncate(ec_t * ec, ec_fop_data_t * fop, int32_t idx) fop->fd, fop->offset, fop->xdata); } -void ec_ftruncate(call_frame_t * frame, xlator_t * this, uintptr_t target, - int32_t minimum, fop_ftruncate_cbk_t func, void * data, - fd_t * fd, off_t offset, dict_t * xdata) +void +ec_ftruncate(call_frame_t *frame, xlator_t *this, uintptr_t target, + int32_t minimum, fop_ftruncate_cbk_t func, void *data, fd_t *fd, + off_t offset, dict_t *xdata) { - ec_cbk_t callback = { .ftruncate = func }; - ec_fop_data_t * fop = NULL; + ec_cbk_t callback = {.ftruncate = func}; + ec_fop_data_t *fop = NULL; int32_t error = ENOMEM; - gf_msg_trace ("ec", 0, "EC(FTRUNCATE) %p", frame); + gf_msg_trace("ec", 0, "EC(FTRUNCATE) %p", frame); VALIDATE_OR_GOTO(this, out); GF_VALIDATE_OR_GOTO(this->name, frame, out); @@ -1672,10 +1631,9 @@ void ec_ftruncate(call_frame_t * frame, xlator_t * this, uintptr_t target, if (fd != NULL) { fop->fd = fd_ref(fd); if (fop->fd == NULL) { - gf_msg (this->name, GF_LOG_ERROR, 0, - EC_MSG_FILE_DESC_REF_FAIL, - "Failed to reference a " - "file descriptor."); + gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_FILE_DESC_REF_FAIL, + "Failed to reference a " + "file descriptor."); goto out; } @@ -1683,10 +1641,9 @@ void ec_ftruncate(call_frame_t * frame, xlator_t * this, uintptr_t target, if (xdata != NULL) { fop->xdata = dict_copy_with_ref(xdata, NULL); if (fop->xdata == NULL) { - gf_msg (this->name, GF_LOG_ERROR, 0, - EC_MSG_DICT_REF_FAIL, - "Failed to reference a " - "dictionary."); + gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL, + "Failed to reference a " + "dictionary."); goto out; } @@ -1704,95 +1661,91 @@ out: /* FOP: writev */ static ec_stripe_t * -ec_allocate_stripe (ec_t *ec, ec_stripe_list_t *stripe_cache) +ec_allocate_stripe(ec_t *ec, ec_stripe_list_t *stripe_cache) { - ec_stripe_t *stripe = NULL; + ec_stripe_t *stripe = NULL; - if (stripe_cache->count >= stripe_cache->max) { - GF_ASSERT (!list_empty(&stripe_cache->lru)); - stripe = list_first_entry(&stripe_cache->lru, ec_stripe_t, lru); - list_move_tail(&stripe->lru, &stripe_cache->lru); - GF_ATOMIC_INC(ec->stats.stripe_cache.evicts); + if (stripe_cache->count >= stripe_cache->max) { + GF_ASSERT(!list_empty(&stripe_cache->lru)); + stripe = list_first_entry(&stripe_cache->lru, ec_stripe_t, lru); + list_move_tail(&stripe->lru, &stripe_cache->lru); + GF_ATOMIC_INC(ec->stats.stripe_cache.evicts); + } else { + stripe = GF_MALLOC(sizeof(ec_stripe_t) + ec->stripe_size, + ec_mt_ec_stripe_t); + if (stripe != NULL) { + stripe_cache->count++; + list_add_tail(&stripe->lru, &stripe_cache->lru); + GF_ATOMIC_INC(ec->stats.stripe_cache.allocs); } else { - stripe = GF_MALLOC (sizeof (ec_stripe_t) + ec->stripe_size, - ec_mt_ec_stripe_t); - if (stripe != NULL) { - stripe_cache->count++; - list_add_tail (&stripe->lru, &stripe_cache->lru); - GF_ATOMIC_INC(ec->stats.stripe_cache.allocs); - } else { - GF_ATOMIC_INC(ec->stats.stripe_cache.errors); - } + GF_ATOMIC_INC(ec->stats.stripe_cache.errors); } + } - return stripe; + return stripe; } static void -ec_write_stripe_data (ec_t *ec, ec_fop_data_t *fop, - ec_stripe_t *stripe) +ec_write_stripe_data(ec_t *ec, ec_fop_data_t *fop, ec_stripe_t *stripe) { - off_t base; + off_t base; - base = fop->size - ec->stripe_size; - memcpy(stripe->data, fop->vector[0].iov_base + base, ec->stripe_size); - stripe->frag_offset = fop->frag_range.last - ec->fragment_size; + base = fop->size - ec->stripe_size; + memcpy(stripe->data, fop->vector[0].iov_base + base, ec->stripe_size); + stripe->frag_offset = fop->frag_range.last - ec->fragment_size; } static void -ec_add_stripe_in_cache (ec_t *ec, ec_fop_data_t *fop) +ec_add_stripe_in_cache(ec_t *ec, ec_fop_data_t *fop) { - ec_inode_t *ctx = NULL; - ec_stripe_t *stripe = NULL; - ec_stripe_list_t *stripe_cache = NULL; - gf_boolean_t failed = _gf_true; - - LOCK(&fop->fd->inode->lock); + ec_inode_t *ctx = NULL; + ec_stripe_t *stripe = NULL; + ec_stripe_list_t *stripe_cache = NULL; + gf_boolean_t failed = _gf_true; - ctx = __ec_inode_get (fop->fd->inode, fop->xl); - if (ctx == NULL) { - goto out; - } + LOCK(&fop->fd->inode->lock); - stripe_cache = &ctx->stripe_cache; - if (stripe_cache->max > 0) { - stripe = ec_allocate_stripe (ec, stripe_cache); - if (stripe == NULL) { - goto out; - } + ctx = __ec_inode_get(fop->fd->inode, fop->xl); + if (ctx == NULL) { + goto out; + } - ec_write_stripe_data (ec, fop, stripe); + stripe_cache = &ctx->stripe_cache; + if (stripe_cache->max > 0) { + stripe = ec_allocate_stripe(ec, stripe_cache); + if (stripe == NULL) { + goto out; } - failed = _gf_false; + ec_write_stripe_data(ec, fop, stripe); + } + + failed = _gf_false; out: - UNLOCK(&fop->fd->inode->lock); + UNLOCK(&fop->fd->inode->lock); - if (failed) { - gf_msg (ec->xl->name, GF_LOG_DEBUG, ENOMEM, - EC_MSG_FILE_DESC_REF_FAIL, - "Failed to create and add stripe in cache"); - } + if (failed) { + gf_msg(ec->xl->name, GF_LOG_DEBUG, ENOMEM, EC_MSG_FILE_DESC_REF_FAIL, + "Failed to create and add stripe in cache"); + } } -int32_t ec_writev_merge_tail(call_frame_t * frame, void * cookie, - xlator_t * this, int32_t op_ret, int32_t op_errno, - struct iovec * vector, int32_t count, - struct iatt * stbuf, struct iobref * iobref, - dict_t * xdata) +int32_t +ec_writev_merge_tail(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iovec *vector, + int32_t count, struct iatt *stbuf, struct iobref *iobref, + dict_t *xdata) { - ec_t * ec = this->private; - ec_fop_data_t * fop = frame->local; + ec_t *ec = this->private; + ec_fop_data_t *fop = frame->local; size_t size, base, tmp; - if (op_ret >= 0) - { + if (op_ret >= 0) { tmp = 0; size = fop->size - fop->user_size - fop->head; base = ec->stripe_size - size; - if (op_ret > base) - { + if (op_ret > base) { tmp = min(op_ret - base, size); ec_iov_copy_to(fop->vector[0].iov_base + fop->size - size, vector, count, base, tmp); @@ -1800,49 +1753,44 @@ int32_t ec_writev_merge_tail(call_frame_t * frame, void * cookie, size -= tmp; } - if (size > 0) - { + if (size > 0) { memset(fop->vector[0].iov_base + fop->size - size, 0, size); } if (ec->stripe_cache) { - ec_add_stripe_in_cache (ec, fop); + ec_add_stripe_in_cache(ec, fop); } } return 0; } -int32_t ec_writev_merge_head(call_frame_t * frame, void * cookie, - xlator_t * this, int32_t op_ret, int32_t op_errno, - struct iovec * vector, int32_t count, - struct iatt * stbuf, struct iobref * iobref, - dict_t * xdata) +int32_t +ec_writev_merge_head(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iovec *vector, + int32_t count, struct iatt *stbuf, struct iobref *iobref, + dict_t *xdata) { - ec_t * ec = this->private; - ec_fop_data_t * fop = frame->local; + ec_t *ec = this->private; + ec_fop_data_t *fop = frame->local; size_t size, base; - if (op_ret >= 0) - { + if (op_ret >= 0) { size = fop->head; base = 0; - if (op_ret > 0) - { + if (op_ret > 0) { base = min(op_ret, size); ec_iov_copy_to(fop->vector[0].iov_base, vector, count, 0, base); size -= base; } - if (size > 0) - { + if (size > 0) { memset(fop->vector[0].iov_base + base, 0, size); } size = fop->size - fop->user_size - fop->head; - if ((size > 0) && (fop->size == ec->stripe_size)) - { + if ((size > 0) && (fop->size == ec->stripe_size)) { ec_writev_merge_tail(frame, cookie, this, op_ret, op_errno, vector, count, stbuf, iobref, xdata); } @@ -1852,7 +1800,7 @@ int32_t ec_writev_merge_head(call_frame_t * frame, void * cookie, } static int -ec_make_internal_fop_xdata (dict_t **xdata) +ec_make_internal_fop_xdata(dict_t **xdata) { dict_t *dict = NULL; @@ -1861,16 +1809,16 @@ ec_make_internal_fop_xdata (dict_t **xdata) dict = dict_new(); if (!dict) - goto out; + goto out; - if (dict_set_str (dict, GLUSTERFS_INTERNAL_FOP_KEY, "yes")) - goto out; + if (dict_set_str(dict, GLUSTERFS_INTERNAL_FOP_KEY, "yes")) + goto out; *xdata = dict; return 0; out: if (dict) - dict_unref (dict); + dict_unref(dict); return -1; } @@ -1889,8 +1837,7 @@ ec_writev_prepare_buffers(ec_t *ec, ec_fop_data_t *fop) ec_adjust_size_up(ec, &fop->size, _gf_false); fop->frag_range.last = fop->frag_range.first + fop->size / ec->fragments; - if ((fop->int32 != 1) || (fop->head != 0) || - (fop->size > fop->user_size) || + if ((fop->int32 != 1) || (fop->head != 0) || (fop->size > fop->user_size) || !EC_ALIGN_CHECK(fop->vector[0].iov_base, EC_METHOD_WORD_SIZE)) { err = ec_buffer_alloc(ec->xl, fop->size, &iobref, &ptr); if (err != 0) { @@ -1935,98 +1882,99 @@ out: } static void -ec_merge_stripe_head_locked (ec_t *ec, ec_fop_data_t *fop, ec_stripe_t *stripe) +ec_merge_stripe_head_locked(ec_t *ec, ec_fop_data_t *fop, ec_stripe_t *stripe) { - size_t head, size; + size_t head, size; - head = fop->head; - memcpy(fop->vector[0].iov_base, stripe->data, head); + head = fop->head; + memcpy(fop->vector[0].iov_base, stripe->data, head); + size = ec->stripe_size - head; + if (size > fop->user_size) { + head += fop->user_size; size = ec->stripe_size - head; - if (size > fop->user_size) { - head += fop->user_size; - size = ec->stripe_size - head; - memcpy(fop->vector[0].iov_base + head, stripe->data + head, - size); - } + memcpy(fop->vector[0].iov_base + head, stripe->data + head, size); + } } static void -ec_merge_stripe_tail_locked (ec_t *ec, ec_fop_data_t *fop, ec_stripe_t *stripe) +ec_merge_stripe_tail_locked(ec_t *ec, ec_fop_data_t *fop, ec_stripe_t *stripe) { - size_t head, tail; - off_t offset; + size_t head, tail; + off_t offset; - offset = fop->user_size + fop->head; - tail = fop->size - offset; - head = ec->stripe_size - tail; + offset = fop->user_size + fop->head; + tail = fop->size - offset; + head = ec->stripe_size - tail; - memcpy(fop->vector[0].iov_base + offset, stripe->data + head, tail); + memcpy(fop->vector[0].iov_base + offset, stripe->data + head, tail); } static ec_stripe_t * -ec_get_stripe_from_cache_locked (ec_t *ec, ec_fop_data_t *fop, - uint64_t frag_offset) +ec_get_stripe_from_cache_locked(ec_t *ec, ec_fop_data_t *fop, + uint64_t frag_offset) { - ec_inode_t *ctx = NULL; - ec_stripe_t *stripe = NULL; - ec_stripe_list_t *stripe_cache = NULL; - - ctx = __ec_inode_get (fop->fd->inode, fop->xl); - if (ctx == NULL) { - GF_ATOMIC_INC(ec->stats.stripe_cache.errors); - return NULL; - } + ec_inode_t *ctx = NULL; + ec_stripe_t *stripe = NULL; + ec_stripe_list_t *stripe_cache = NULL; - stripe_cache = &ctx->stripe_cache; - list_for_each_entry (stripe, &stripe_cache->lru, lru) { - if (stripe->frag_offset == frag_offset) { - list_move_tail (&stripe->lru, &stripe_cache->lru); - GF_ATOMIC_INC(ec->stats.stripe_cache.hits); - return stripe; - } + ctx = __ec_inode_get(fop->fd->inode, fop->xl); + if (ctx == NULL) { + GF_ATOMIC_INC(ec->stats.stripe_cache.errors); + return NULL; + } + + stripe_cache = &ctx->stripe_cache; + list_for_each_entry(stripe, &stripe_cache->lru, lru) + { + if (stripe->frag_offset == frag_offset) { + list_move_tail(&stripe->lru, &stripe_cache->lru); + GF_ATOMIC_INC(ec->stats.stripe_cache.hits); + return stripe; } + } - GF_ATOMIC_INC(ec->stats.stripe_cache.misses); + GF_ATOMIC_INC(ec->stats.stripe_cache.misses); - return NULL; + return NULL; } static gf_boolean_t -ec_get_and_merge_stripe (ec_t *ec, ec_fop_data_t *fop, ec_stripe_part_t which) +ec_get_and_merge_stripe(ec_t *ec, ec_fop_data_t *fop, ec_stripe_part_t which) { - uint64_t frag_offset; - ec_stripe_t *stripe = NULL; - gf_boolean_t found = _gf_false; + uint64_t frag_offset; + ec_stripe_t *stripe = NULL; + gf_boolean_t found = _gf_false; - if (!ec->stripe_cache) { - return found; - } + if (!ec->stripe_cache) { + return found; + } - LOCK(&fop->fd->inode->lock); - if (which == EC_STRIPE_HEAD) { - frag_offset = fop->frag_range.first; - stripe = ec_get_stripe_from_cache_locked(ec, fop, frag_offset); - if (stripe) { - ec_merge_stripe_head_locked (ec, fop, stripe); - found = _gf_true; - } + LOCK(&fop->fd->inode->lock); + if (which == EC_STRIPE_HEAD) { + frag_offset = fop->frag_range.first; + stripe = ec_get_stripe_from_cache_locked(ec, fop, frag_offset); + if (stripe) { + ec_merge_stripe_head_locked(ec, fop, stripe); + found = _gf_true; } + } - if (which == EC_STRIPE_TAIL) { - frag_offset = fop->frag_range.last - ec->fragment_size; - stripe = ec_get_stripe_from_cache_locked(ec, fop, frag_offset); - if (stripe) { - ec_merge_stripe_tail_locked (ec, fop, stripe); - found = _gf_true; - } + if (which == EC_STRIPE_TAIL) { + frag_offset = fop->frag_range.last - ec->fragment_size; + stripe = ec_get_stripe_from_cache_locked(ec, fop, frag_offset); + if (stripe) { + ec_merge_stripe_tail_locked(ec, fop, stripe); + found = _gf_true; } - UNLOCK(&fop->fd->inode->lock); + } + UNLOCK(&fop->fd->inode->lock); - return found; + return found; } -void ec_writev_start(ec_fop_data_t *fop) +void +ec_writev_start(ec_fop_data_t *fop) { ec_t *ec = fop->xl->private; ec_fd_t *ctx; @@ -2062,40 +2010,39 @@ void ec_writev_start(ec_fop_data_t *fop) goto failed_fd; } if (fop->head > 0) { - found_stripe = ec_get_and_merge_stripe (ec, fop, EC_STRIPE_HEAD); + found_stripe = ec_get_and_merge_stripe(ec, fop, EC_STRIPE_HEAD); if (!found_stripe) { - if (ec_make_internal_fop_xdata (&xdata)) { - err = -ENOMEM; - goto failed_xdata; - } - ec_readv(fop->frame, fop->xl, -1, EC_MINIMUM_MIN, - ec_writev_merge_head, - NULL, fd, ec->stripe_size, fop->offset, 0, xdata); + if (ec_make_internal_fop_xdata(&xdata)) { + err = -ENOMEM; + goto failed_xdata; + } + ec_readv(fop->frame, fop->xl, -1, EC_MINIMUM_MIN, + ec_writev_merge_head, NULL, fd, ec->stripe_size, + fop->offset, 0, xdata); } } tail = fop->size - fop->user_size - fop->head; if ((tail > 0) && ((fop->head == 0) || (fop->size > ec->stripe_size))) { - /* Current locking scheme will make sure the 'current' below will - * never decrease while the fop is in progress, so the checks will - * work as expected - */ + /* Current locking scheme will make sure the 'current' below will + * never decrease while the fop is in progress, so the checks will + * work as expected + */ if (current > fop->offset + fop->head + fop->user_size) { - found_stripe = ec_get_and_merge_stripe (ec, fop, EC_STRIPE_TAIL); + found_stripe = ec_get_and_merge_stripe(ec, fop, EC_STRIPE_TAIL); if (!found_stripe) { - if (ec_make_internal_fop_xdata (&xdata)) { - err = -ENOMEM; - goto failed_xdata; - } - ec_readv(fop->frame, fop->xl, -1, EC_MINIMUM_MIN, - ec_writev_merge_tail, NULL, fd, ec->stripe_size, - fop->offset + fop->size - ec->stripe_size, - 0, xdata); + if (ec_make_internal_fop_xdata(&xdata)) { + err = -ENOMEM; + goto failed_xdata; + } + ec_readv(fop->frame, fop->xl, -1, EC_MINIMUM_MIN, + ec_writev_merge_tail, NULL, fd, ec->stripe_size, + fop->offset + fop->size - ec->stripe_size, 0, xdata); } } else { memset(fop->vector[0].iov_base + fop->size - tail, 0, tail); if (ec->stripe_cache) { - ec_add_stripe_in_cache (ec, fop); + ec_add_stripe_in_cache(ec, fop); } } } @@ -2112,23 +2059,25 @@ failed: ec_fop_set_error(fop, -err); } -int32_t ec_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *prestat, - struct iatt *poststat, dict_t *xdata) +int32_t +ec_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + int32_t op_errno, struct iatt *prestat, struct iatt *poststat, + dict_t *xdata) { - ec_t *ec = NULL; - if (this && this->private) { - ec = this->private; - if ((op_ret > 0) && ((op_ret % ec->fragment_size) != 0)) { - op_ret = -1; - op_errno = EIO; - } + ec_t *ec = NULL; + if (this && this->private) { + ec = this->private; + if ((op_ret > 0) && ((op_ret % ec->fragment_size) != 0)) { + op_ret = -1; + op_errno = EIO; } - return ec_inode_write_cbk (frame, this, cookie, op_ret, op_errno, - prestat, poststat, xdata); + } + return ec_inode_write_cbk(frame, this, cookie, op_ret, op_errno, prestat, + poststat, xdata); } -void ec_wind_writev(ec_t * ec, ec_fop_data_t * fop, int32_t idx) +void +ec_wind_writev(ec_t *ec, ec_fop_data_t *fop, int32_t idx) { ec_trace("WIND", fop, "idx=%d", idx); @@ -2141,9 +2090,9 @@ void ec_wind_writev(ec_t * ec, ec_fop_data_t * fop, int32_t idx) vector[0].iov_len = size; STACK_WIND_COOKIE(fop->frame, ec_writev_cbk, (void *)(uintptr_t)idx, - ec->xl_list[idx], ec->xl_list[idx]->fops->writev, - fop->fd, vector, 1, fop->offset / ec->fragments, - fop->uint32, fop->buffers, fop->xdata); + ec->xl_list[idx], ec->xl_list[idx]->fops->writev, fop->fd, + vector, 1, fop->offset / ec->fragments, fop->uint32, + fop->buffers, fop->xdata); } static void @@ -2161,35 +2110,34 @@ ec_writev_encode(ec_fop_data_t *fop) fop->vector[0].iov_base, blocks); } -int32_t ec_manager_writev(ec_fop_data_t *fop, int32_t state) +int32_t +ec_manager_writev(ec_fop_data_t *fop, int32_t state) { ec_cbk_data_t *cbk; ec_fd_t *ctx = NULL; - ec_t *ec = fop->xl->private; + ec_t *ec = fop->xl->private; off_t fl_start = 0; size_t fl_size = LLONG_MAX; - switch (state) - { + switch (state) { case EC_STATE_INIT: case EC_STATE_LOCK: - ctx = ec_fd_get(fop->fd, fop->xl); - if (ctx != NULL) { - if ((ctx->flags & O_APPEND) == 0) { - off_t user_size = 0; - off_t head = 0; - - fl_start = fop->offset; - user_size = iov_length(fop->vector, fop->int32); - head = ec_adjust_offset_down(ec, &fl_start, - _gf_true); - fl_size = user_size + head; - ec_adjust_size_up(ec, &fl_size, _gf_true); - } + ctx = ec_fd_get(fop->fd, fop->xl); + if (ctx != NULL) { + if ((ctx->flags & O_APPEND) == 0) { + off_t user_size = 0; + off_t head = 0; + + fl_start = fop->offset; + user_size = iov_length(fop->vector, fop->int32); + head = ec_adjust_offset_down(ec, &fl_start, _gf_true); + fl_size = user_size + head; + ec_adjust_size_up(ec, &fl_size, _gf_true); } + } ec_lock_prepare_fd(fop, fop->fd, - EC_UPDATE_DATA | EC_UPDATE_META | - EC_QUERY_INFO, fl_start, fl_size); + EC_UPDATE_DATA | EC_UPDATE_META | EC_QUERY_INFO, + fl_start, fl_size); ec_lock(fop); return EC_STATE_DISPATCH; @@ -2217,29 +2165,28 @@ int32_t ec_manager_writev(ec_fop_data_t *fop, int32_t state) ec_t *ec = fop->xl->private; size_t size; - ec_iatt_rebuild(fop->xl->private, cbk->iatt, 2, - cbk->count); + ec_iatt_rebuild(fop->xl->private, cbk->iatt, 2, cbk->count); /* This shouldn't fail because we have the inode locked. */ LOCK(&fop->fd->inode->lock); { - GF_ASSERT(__ec_get_inode_size(fop, fop->fd->inode, - &cbk->iatt[0].ia_size)); - cbk->iatt[1].ia_size = cbk->iatt[0].ia_size; - size = fop->offset + fop->head + fop->user_size; - if (size > cbk->iatt[0].ia_size) { - /* Only update inode size if this is a top level fop. - * Otherwise this is an internal write and the top - * level fop should take care of the real inode size. - */ - if (fop->parent == NULL) { - /* This shouldn't fail because we have the inode - * locked. */ - GF_ASSERT(__ec_set_inode_size(fop, - fop->fd->inode, size)); - } - cbk->iatt[1].ia_size = size; + GF_ASSERT(__ec_get_inode_size(fop, fop->fd->inode, + &cbk->iatt[0].ia_size)); + cbk->iatt[1].ia_size = cbk->iatt[0].ia_size; + size = fop->offset + fop->head + fop->user_size; + if (size > cbk->iatt[0].ia_size) { + /* Only update inode size if this is a top level fop. + * Otherwise this is an internal write and the top + * level fop should take care of the real inode size. + */ + if (fop->parent == NULL) { + /* This shouldn't fail because we have the inode + * locked. */ + GF_ASSERT( + __ec_set_inode_size(fop, fop->fd->inode, size)); } + cbk->iatt[1].ia_size = size; + } } UNLOCK(&fop->fd->inode->lock); @@ -2263,8 +2210,7 @@ int32_t ec_manager_writev(ec_fop_data_t *fop, int32_t state) GF_ASSERT(cbk != NULL); - if (fop->cbks.writev != NULL) - { + if (fop->cbks.writev != NULL) { fop->cbks.writev(fop->req_frame, fop, fop->xl, cbk->op_ret, cbk->op_errno, &cbk->iatt[0], &cbk->iatt[1], cbk->xdata); @@ -2278,7 +2224,7 @@ int32_t ec_manager_writev(ec_fop_data_t *fop, int32_t state) fop->frame->root->uid = fop->uid; fop->frame->root->gid = fop->gid; - /* Fall through */ + /* Fall through */ case -EC_STATE_INIT: case -EC_STATE_LOCK: @@ -2287,8 +2233,7 @@ int32_t ec_manager_writev(ec_fop_data_t *fop, int32_t state) case -EC_STATE_REPORT: GF_ASSERT(fop->error != 0); - if (fop->cbks.writev != NULL) - { + if (fop->cbks.writev != NULL) { fop->cbks.writev(fop->req_frame, fop, fop->xl, -1, fop->error, NULL, NULL, NULL); } @@ -2308,25 +2253,24 @@ int32_t ec_manager_writev(ec_fop_data_t *fop, int32_t state) return EC_STATE_END; default: - gf_msg (fop->xl->name, GF_LOG_ERROR, EINVAL, - EC_MSG_UNHANDLED_STATE, - "Unhandled state %d for %s", - state, ec_fop_name(fop->id)); + gf_msg(fop->xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_UNHANDLED_STATE, + "Unhandled state %d for %s", state, ec_fop_name(fop->id)); return EC_STATE_END; } } -void ec_writev(call_frame_t * frame, xlator_t * this, uintptr_t target, - int32_t minimum, fop_writev_cbk_t func, void * data, fd_t * fd, - struct iovec * vector, int32_t count, off_t offset, - uint32_t flags, struct iobref * iobref, dict_t * xdata) +void +ec_writev(call_frame_t *frame, xlator_t *this, uintptr_t target, + int32_t minimum, fop_writev_cbk_t func, void *data, fd_t *fd, + struct iovec *vector, int32_t count, off_t offset, uint32_t flags, + struct iobref *iobref, dict_t *xdata) { - ec_cbk_t callback = { .writev = func }; - ec_fop_data_t * fop = NULL; + ec_cbk_t callback = {.writev = func}; + ec_fop_data_t *fop = NULL; int32_t error = ENOMEM; - gf_msg_trace ("ec", 0, "EC(WRITE) %p", frame); + gf_msg_trace("ec", 0, "EC(WRITE) %p", frame); VALIDATE_OR_GOTO(this, out); GF_VALIDATE_OR_GOTO(this->name, frame, out); @@ -2348,10 +2292,9 @@ void ec_writev(call_frame_t * frame, xlator_t * this, uintptr_t target, if (fd != NULL) { fop->fd = fd_ref(fd); if (fop->fd == NULL) { - gf_msg (this->name, GF_LOG_ERROR, 0, - EC_MSG_FILE_DESC_REF_FAIL, - "Failed to reference a " - "file descriptor."); + gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_FILE_DESC_REF_FAIL, + "Failed to reference a " + "file descriptor."); goto out; } @@ -2359,10 +2302,9 @@ void ec_writev(call_frame_t * frame, xlator_t * this, uintptr_t target, if (count > 0) { fop->vector = iov_dup(vector, count); if (fop->vector == NULL) { - gf_msg (this->name, GF_LOG_ERROR, ENOMEM, - EC_MSG_NO_MEMORY, - "Failed to duplicate a " - "vector list."); + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_NO_MEMORY, + "Failed to duplicate a " + "vector list."); goto out; } @@ -2371,10 +2313,9 @@ void ec_writev(call_frame_t * frame, xlator_t * this, uintptr_t target, if (iobref != NULL) { fop->buffers = iobref_ref(iobref); if (fop->buffers == NULL) { - gf_msg (this->name, GF_LOG_ERROR, 0, - EC_MSG_BUF_REF_FAIL, - "Failed to reference a " - "buffer."); + gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_BUF_REF_FAIL, + "Failed to reference a " + "buffer."); goto out; } @@ -2382,10 +2323,9 @@ void ec_writev(call_frame_t * frame, xlator_t * this, uintptr_t target, if (xdata != NULL) { fop->xdata = dict_copy_with_ref(xdata, NULL); if (fop->xdata == NULL) { - gf_msg (this->name, GF_LOG_ERROR, 0, - EC_MSG_DICT_REF_FAIL, - "Failed to reference a " - "dictionary."); + gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL, + "Failed to reference a " + "dictionary."); goto out; } diff --git a/xlators/cluster/ec/src/ec-locks.c b/xlators/cluster/ec/src/ec-locks.c index d8ad7721f53..47a069b1775 100644 --- a/xlators/cluster/ec/src/ec-locks.c +++ b/xlators/cluster/ec/src/ec-locks.c @@ -19,10 +19,11 @@ #include "ec-messages.h" #define EC_LOCK_MODE_NONE 0 -#define EC_LOCK_MODE_INC 1 -#define EC_LOCK_MODE_ALL 2 +#define EC_LOCK_MODE_INC 1 +#define EC_LOCK_MODE_ALL 2 -int32_t ec_lock_check(ec_fop_data_t *fop, uintptr_t *mask) +int32_t +ec_lock_check(ec_fop_data_t *fop, uintptr_t *mask) { ec_t *ec = fop->xl->private; ec_cbk_data_t *ans = NULL; @@ -30,7 +31,8 @@ int32_t ec_lock_check(ec_fop_data_t *fop, uintptr_t *mask) uintptr_t locked = 0, notlocked = 0; int32_t error = -1; - list_for_each_entry(ans, &fop->cbk_list, list) { + list_for_each_entry(ans, &fop->cbk_list, list) + { if (ans->op_ret >= 0) { if (locked != 0) { error = EIO; @@ -38,16 +40,16 @@ int32_t ec_lock_check(ec_fop_data_t *fop, uintptr_t *mask) locked |= ans->mask; cbk = ans; } else { - if (ans->op_errno == EAGAIN) { - switch (fop->uint32) { - case EC_LOCK_MODE_NONE: - case EC_LOCK_MODE_ALL: - /* Goal is to treat non-blocking lock as failure - * even if there is a single EAGAIN*/ - notlocked |= ans->mask; - break; - } + if (ans->op_errno == EAGAIN) { + switch (fop->uint32) { + case EC_LOCK_MODE_NONE: + case EC_LOCK_MODE_ALL: + /* Goal is to treat non-blocking lock as failure + * even if there is a single EAGAIN*/ + notlocked |= ans->mask; + break; } + } } } @@ -63,24 +65,24 @@ int32_t ec_lock_check(ec_fop_data_t *fop, uintptr_t *mask) error = 0; } else { switch (fop->uint32) { - case EC_LOCK_MODE_NONE: - error = EAGAIN; - break; + case EC_LOCK_MODE_NONE: + error = EAGAIN; + break; - case EC_LOCK_MODE_ALL: - fop->uint32 = EC_LOCK_MODE_INC; - break; + case EC_LOCK_MODE_ALL: + fop->uint32 = EC_LOCK_MODE_INC; + break; - default: - error = EIO; - break; + default: + error = EIO; + break; } } } else { if (fop->answer && fop->answer->op_ret < 0) - error = fop->answer->op_errno; + error = fop->answer->op_errno; else - error = EIO; + error = EIO; } } @@ -89,28 +91,25 @@ int32_t ec_lock_check(ec_fop_data_t *fop, uintptr_t *mask) return error; } -int32_t ec_lock_unlocked(call_frame_t * frame, void * cookie, - xlator_t * this, int32_t op_ret, int32_t op_errno, - dict_t * xdata) +int32_t +ec_lock_unlocked(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - if (op_ret < 0) - { - gf_msg (this->name, GF_LOG_WARNING, op_errno, - EC_MSG_UNLOCK_FAILED, - "Failed to unlock an entry/inode"); + if (op_ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, op_errno, EC_MSG_UNLOCK_FAILED, + "Failed to unlock an entry/inode"); } return 0; } -int32_t ec_lock_lk_unlocked(call_frame_t * frame, void * cookie, - xlator_t * this, int32_t op_ret, int32_t op_errno, - struct gf_flock * flock, dict_t * xdata) +int32_t +ec_lock_lk_unlocked(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct gf_flock *flock, + dict_t *xdata) { - if (op_ret < 0) - { - gf_msg(this->name, GF_LOG_WARNING, op_errno, - EC_MSG_LK_UNLOCK_FAILED, + if (op_ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, op_errno, EC_MSG_LK_UNLOCK_FAILED, "Failed to unlock an lk"); } @@ -119,11 +118,12 @@ int32_t ec_lock_lk_unlocked(call_frame_t * frame, void * cookie, /* FOP: entrylk */ -int32_t ec_entrylk_cbk(call_frame_t * frame, void * cookie, xlator_t * this, - int32_t op_ret, int32_t op_errno, dict_t * xdata) +int32_t +ec_entrylk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - ec_fop_data_t * fop = NULL; - ec_cbk_data_t * cbk = NULL; + ec_fop_data_t *fop = NULL; + ec_cbk_data_t *cbk = NULL; int32_t idx = (int32_t)(uintptr_t)cookie; VALIDATE_OR_GOTO(this, out); @@ -133,20 +133,16 @@ int32_t ec_entrylk_cbk(call_frame_t * frame, void * cookie, xlator_t * this, fop = frame->local; - ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx, - frame, op_ret, op_errno); + ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx, frame, + op_ret, op_errno); cbk = ec_cbk_data_allocate(frame, this, fop, GF_FOP_ENTRYLK, idx, op_ret, op_errno); - if (cbk != NULL) - { - if (xdata != NULL) - { + if (cbk != NULL) { + if (xdata != NULL) { cbk->xdata = dict_ref(xdata); - if (cbk->xdata == NULL) - { - gf_msg(this->name, GF_LOG_ERROR, 0, - EC_MSG_DICT_REF_FAIL, + if (cbk->xdata == NULL) { + gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL, "Failed to reference a " "dictionary."); @@ -158,15 +154,15 @@ int32_t ec_entrylk_cbk(call_frame_t * frame, void * cookie, xlator_t * this, } out: - if (fop != NULL) - { + if (fop != NULL) { ec_complete(fop); } return 0; } -void ec_wind_entrylk(ec_t * ec, ec_fop_data_t * fop, int32_t idx) +void +ec_wind_entrylk(ec_t *ec, ec_fop_data_t *fop, int32_t idx) { ec_trace("WIND", fop, "idx=%d", idx); @@ -176,20 +172,19 @@ void ec_wind_entrylk(ec_t * ec, ec_fop_data_t * fop, int32_t idx) fop->entrylk_type, fop->xdata); } -int32_t ec_manager_entrylk(ec_fop_data_t * fop, int32_t state) +int32_t +ec_manager_entrylk(ec_fop_data_t *fop, int32_t state) { - ec_cbk_data_t * cbk; + ec_cbk_data_t *cbk; - switch (state) - { + switch (state) { case EC_STATE_INIT: - if (fop->entrylk_cmd == ENTRYLK_LOCK) - { + if (fop->entrylk_cmd == ENTRYLK_LOCK) { fop->uint32 = EC_LOCK_MODE_ALL; fop->entrylk_cmd = ENTRYLK_LOCK_NB; } - /* Fall through */ + /* Fall through */ case EC_STATE_DISPATCH: ec_dispatch_all(fop); @@ -201,15 +196,14 @@ int32_t ec_manager_entrylk(ec_fop_data_t * fop, int32_t state) if (fop->entrylk_cmd != ENTRYLK_UNLOCK) { uintptr_t mask; - ec_fop_set_error (fop, ec_lock_check(fop, &mask)); + ec_fop_set_error(fop, ec_lock_check(fop, &mask)); if (fop->error != 0) { if (mask != 0) { if (fop->id == GF_FOP_ENTRYLK) { - ec_entrylk(fop->frame, fop->xl, mask, 1, - ec_lock_unlocked, NULL, fop->str[0], - &fop->loc[0], fop->str[1], - ENTRYLK_UNLOCK, fop->entrylk_type, - fop->xdata); + ec_entrylk( + fop->frame, fop->xl, mask, 1, ec_lock_unlocked, + NULL, fop->str[0], &fop->loc[0], fop->str[1], + ENTRYLK_UNLOCK, fop->entrylk_type, fop->xdata); } else { ec_fentrylk(fop->frame, fop->xl, mask, 1, ec_lock_unlocked, NULL, fop->str[0], @@ -238,18 +232,13 @@ int32_t ec_manager_entrylk(ec_fop_data_t * fop, int32_t state) GF_ASSERT(cbk != NULL); - if (fop->id == GF_FOP_ENTRYLK) - { - if (fop->cbks.entrylk != NULL) - { - fop->cbks.entrylk(fop->req_frame, fop, fop->xl, - cbk->op_ret, cbk->op_errno, cbk->xdata); + if (fop->id == GF_FOP_ENTRYLK) { + if (fop->cbks.entrylk != NULL) { + fop->cbks.entrylk(fop->req_frame, fop, fop->xl, cbk->op_ret, + cbk->op_errno, cbk->xdata); } - } - else - { - if (fop->cbks.fentrylk != NULL) - { + } else { + if (fop->cbks.fentrylk != NULL) { fop->cbks.fentrylk(fop->req_frame, fop, fop->xl, cbk->op_ret, cbk->op_errno, cbk->xdata); } @@ -262,18 +251,13 @@ int32_t ec_manager_entrylk(ec_fop_data_t * fop, int32_t state) case -EC_STATE_REPORT: GF_ASSERT(fop->error != 0); - if (fop->id == GF_FOP_ENTRYLK) - { - if (fop->cbks.entrylk != NULL) - { + if (fop->id == GF_FOP_ENTRYLK) { + if (fop->cbks.entrylk != NULL) { fop->cbks.entrylk(fop->req_frame, fop, fop->xl, -1, fop->error, NULL); } - } - else - { - if (fop->cbks.fentrylk != NULL) - { + } else { + if (fop->cbks.fentrylk != NULL) { fop->cbks.fentrylk(fop->req_frame, fop, fop->xl, -1, fop->error, NULL); } @@ -282,25 +266,24 @@ int32_t ec_manager_entrylk(ec_fop_data_t * fop, int32_t state) return EC_STATE_END; default: - gf_msg (fop->xl->name, GF_LOG_ERROR, EINVAL, - EC_MSG_UNHANDLED_STATE, - "Unhandled state %d for %s", - state, ec_fop_name(fop->id)); + gf_msg(fop->xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_UNHANDLED_STATE, + "Unhandled state %d for %s", state, ec_fop_name(fop->id)); return EC_STATE_END; } } -void ec_entrylk(call_frame_t * frame, xlator_t * this, uintptr_t target, - int32_t minimum, fop_entrylk_cbk_t func, void * data, - const char * volume, loc_t * loc, const char * basename, - entrylk_cmd cmd, entrylk_type type, dict_t * xdata) +void +ec_entrylk(call_frame_t *frame, xlator_t *this, uintptr_t target, + int32_t minimum, fop_entrylk_cbk_t func, void *data, + const char *volume, loc_t *loc, const char *basename, + entrylk_cmd cmd, entrylk_type type, dict_t *xdata) { - ec_cbk_t callback = { .entrylk = func }; - ec_fop_data_t * fop = NULL; + ec_cbk_t callback = {.entrylk = func}; + ec_fop_data_t *fop = NULL; int32_t error = ENOMEM; - gf_msg_trace ("ec", 0, "EC(ENTRYLK) %p", frame); + gf_msg_trace("ec", 0, "EC(ENTRYLK) %p", frame); VALIDATE_OR_GOTO(this, out); GF_VALIDATE_OR_GOTO(this->name, frame, out); @@ -319,18 +302,16 @@ void ec_entrylk(call_frame_t * frame, xlator_t * this, uintptr_t target, if (volume != NULL) { fop->str[0] = gf_strdup(volume); if (fop->str[0] == NULL) { - gf_msg (this->name, GF_LOG_ERROR, ENOMEM, - EC_MSG_NO_MEMORY, - "Failed to duplicate a string."); + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_NO_MEMORY, + "Failed to duplicate a string."); goto out; } } if (loc != NULL) { if (loc_copy(&fop->loc[0], loc) != 0) { - gf_msg (this->name, GF_LOG_ERROR, ENOMEM, - EC_MSG_LOC_COPY_FAIL, - "Failed to copy a location."); + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_LOC_COPY_FAIL, + "Failed to copy a location."); goto out; } @@ -338,9 +319,8 @@ void ec_entrylk(call_frame_t * frame, xlator_t * this, uintptr_t target, if (basename != NULL) { fop->str[1] = gf_strdup(basename); if (fop->str[1] == NULL) { - gf_msg (this->name, GF_LOG_ERROR, ENOMEM, - EC_MSG_NO_MEMORY, - "Failed to duplicate a string."); + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_NO_MEMORY, + "Failed to duplicate a string."); goto out; } @@ -348,10 +328,9 @@ void ec_entrylk(call_frame_t * frame, xlator_t * this, uintptr_t target, if (xdata != NULL) { fop->xdata = dict_ref(xdata); if (fop->xdata == NULL) { - gf_msg (this->name, GF_LOG_ERROR, 0, - EC_MSG_DICT_REF_FAIL, - "Failed to reference a " - "dictionary."); + gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL, + "Failed to reference a " + "dictionary."); goto out; } @@ -369,11 +348,12 @@ out: /* FOP: fentrylk */ -int32_t ec_fentrylk_cbk(call_frame_t * frame, void * cookie, xlator_t * this, - int32_t op_ret, int32_t op_errno, dict_t * xdata) +int32_t +ec_fentrylk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - ec_fop_data_t * fop = NULL; - ec_cbk_data_t * cbk = NULL; + ec_fop_data_t *fop = NULL; + ec_cbk_data_t *cbk = NULL; int32_t idx = (int32_t)(uintptr_t)cookie; VALIDATE_OR_GOTO(this, out); @@ -383,22 +363,18 @@ int32_t ec_fentrylk_cbk(call_frame_t * frame, void * cookie, xlator_t * this, fop = frame->local; - ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx, - frame, op_ret, op_errno); + ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx, frame, + op_ret, op_errno); cbk = ec_cbk_data_allocate(frame, this, fop, GF_FOP_FENTRYLK, idx, op_ret, op_errno); - if (cbk != NULL) - { - if (xdata != NULL) - { + if (cbk != NULL) { + if (xdata != NULL) { cbk->xdata = dict_ref(xdata); - if (cbk->xdata == NULL) - { - gf_msg (this->name, GF_LOG_ERROR, 0, - EC_MSG_DICT_REF_FAIL, - "Failed to reference a " - "dictionary."); + if (cbk->xdata == NULL) { + gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL, + "Failed to reference a " + "dictionary."); goto out; } @@ -408,15 +384,15 @@ int32_t ec_fentrylk_cbk(call_frame_t * frame, void * cookie, xlator_t * this, } out: - if (fop != NULL) - { + if (fop != NULL) { ec_complete(fop); } return 0; } -void ec_wind_fentrylk(ec_t * ec, ec_fop_data_t * fop, int32_t idx) +void +ec_wind_fentrylk(ec_t *ec, ec_fop_data_t *fop, int32_t idx) { ec_trace("WIND", fop, "idx=%d", idx); @@ -426,24 +402,25 @@ void ec_wind_fentrylk(ec_t * ec, ec_fop_data_t * fop, int32_t idx) fop->entrylk_type, fop->xdata); } -void ec_fentrylk(call_frame_t * frame, xlator_t * this, uintptr_t target, - int32_t minimum, fop_fentrylk_cbk_t func, void * data, - const char * volume, fd_t * fd, const char * basename, - entrylk_cmd cmd, entrylk_type type, dict_t * xdata) +void +ec_fentrylk(call_frame_t *frame, xlator_t *this, uintptr_t target, + int32_t minimum, fop_fentrylk_cbk_t func, void *data, + const char *volume, fd_t *fd, const char *basename, entrylk_cmd cmd, + entrylk_type type, dict_t *xdata) { - ec_cbk_t callback = { .fentrylk = func }; - ec_fop_data_t * fop = NULL; + ec_cbk_t callback = {.fentrylk = func}; + ec_fop_data_t *fop = NULL; int32_t error = ENOMEM; - gf_msg_trace ("ec", 0, "EC(FENTRYLK) %p", frame); + gf_msg_trace("ec", 0, "EC(FENTRYLK) %p", frame); VALIDATE_OR_GOTO(this, out); GF_VALIDATE_OR_GOTO(this->name, frame, out); GF_VALIDATE_OR_GOTO(this->name, this->private, out); - fop = ec_fop_data_allocate(frame, this, GF_FOP_FENTRYLK, 0, target, - minimum, ec_wind_fentrylk, ec_manager_entrylk, - callback, data); + fop = ec_fop_data_allocate(frame, this, GF_FOP_FENTRYLK, 0, target, minimum, + ec_wind_fentrylk, ec_manager_entrylk, callback, + data); if (fop == NULL) { goto out; } @@ -456,9 +433,8 @@ void ec_fentrylk(call_frame_t * frame, xlator_t * this, uintptr_t target, if (volume != NULL) { fop->str[0] = gf_strdup(volume); if (fop->str[0] == NULL) { - gf_msg (this->name, GF_LOG_ERROR, ENOMEM, - EC_MSG_NO_MEMORY, - "Failed to duplicate a string."); + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_NO_MEMORY, + "Failed to duplicate a string."); goto out; } @@ -466,10 +442,9 @@ void ec_fentrylk(call_frame_t * frame, xlator_t * this, uintptr_t target, if (fd != NULL) { fop->fd = fd_ref(fd); if (fop->fd == NULL) { - gf_msg (this->name, GF_LOG_ERROR, 0, - EC_MSG_FILE_DESC_REF_FAIL, - "Failed to reference a " - "file descriptor."); + gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_FILE_DESC_REF_FAIL, + "Failed to reference a " + "file descriptor."); goto out; } @@ -477,9 +452,8 @@ void ec_fentrylk(call_frame_t * frame, xlator_t * this, uintptr_t target, if (basename != NULL) { fop->str[1] = gf_strdup(basename); if (fop->str[1] == NULL) { - gf_msg (this->name, GF_LOG_ERROR, ENOMEM, - EC_MSG_NO_MEMORY, - "Failed to duplicate a string."); + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_NO_MEMORY, + "Failed to duplicate a string."); goto out; } @@ -487,10 +461,9 @@ void ec_fentrylk(call_frame_t * frame, xlator_t * this, uintptr_t target, if (xdata != NULL) { fop->xdata = dict_ref(xdata); if (fop->xdata == NULL) { - gf_msg (this->name, GF_LOG_ERROR, 0, - EC_MSG_DICT_REF_FAIL, - "Failed to reference a " - "dictionary."); + gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL, + "Failed to reference a " + "dictionary."); goto out; } @@ -508,11 +481,12 @@ out: /* FOP: inodelk */ -int32_t ec_inodelk_cbk(call_frame_t * frame, void * cookie, xlator_t * this, - int32_t op_ret, int32_t op_errno, dict_t * xdata) +int32_t +ec_inodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - ec_fop_data_t * fop = NULL; - ec_cbk_data_t * cbk = NULL; + ec_fop_data_t *fop = NULL; + ec_cbk_data_t *cbk = NULL; int32_t idx = (int32_t)(uintptr_t)cookie; VALIDATE_OR_GOTO(this, out); @@ -522,22 +496,18 @@ int32_t ec_inodelk_cbk(call_frame_t * frame, void * cookie, xlator_t * this, fop = frame->local; - ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx, - frame, op_ret, op_errno); + ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx, frame, + op_ret, op_errno); cbk = ec_cbk_data_allocate(frame, this, fop, GF_FOP_INODELK, idx, op_ret, op_errno); - if (cbk != NULL) - { - if (xdata != NULL) - { + if (cbk != NULL) { + if (xdata != NULL) { cbk->xdata = dict_ref(xdata); - if (cbk->xdata == NULL) - { - gf_msg (this->name, GF_LOG_ERROR, 0, - EC_MSG_DICT_REF_FAIL, - "Failed to reference a " - "dictionary."); + if (cbk->xdata == NULL) { + gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL, + "Failed to reference a " + "dictionary."); goto out; } @@ -547,15 +517,15 @@ int32_t ec_inodelk_cbk(call_frame_t * frame, void * cookie, xlator_t * this, } out: - if (fop != NULL) - { + if (fop != NULL) { ec_complete(fop); } return 0; } -void ec_wind_inodelk(ec_t * ec, ec_fop_data_t * fop, int32_t idx) +void +ec_wind_inodelk(ec_t *ec, ec_fop_data_t *fop, int32_t idx) { ec_trace("WIND", fop, "idx=%d", idx); @@ -565,24 +535,22 @@ void ec_wind_inodelk(ec_t * ec, ec_fop_data_t * fop, int32_t idx) fop->xdata); } -int32_t ec_manager_inodelk(ec_fop_data_t * fop, int32_t state) +int32_t +ec_manager_inodelk(ec_fop_data_t *fop, int32_t state) { - ec_cbk_data_t * cbk; + ec_cbk_data_t *cbk; - switch (state) - { + switch (state) { case EC_STATE_INIT: - fop->flock.l_len += ec_adjust_offset_down(fop->xl->private, - &fop->flock.l_start, - _gf_true); + fop->flock.l_len += ec_adjust_offset_down( + fop->xl->private, &fop->flock.l_start, _gf_true); ec_adjust_offset_up(fop->xl->private, &fop->flock.l_len, _gf_true); - if ((fop->int32 == F_SETLKW) && (fop->flock.l_type != F_UNLCK)) - { + if ((fop->int32 == F_SETLKW) && (fop->flock.l_type != F_UNLCK)) { fop->uint32 = EC_LOCK_MODE_ALL; fop->int32 = F_SETLK; } - /* Fall through */ + /* Fall through */ case EC_STATE_DISPATCH: ec_dispatch_all(fop); @@ -594,7 +562,7 @@ int32_t ec_manager_inodelk(ec_fop_data_t * fop, int32_t state) if (fop->flock.l_type != F_UNLCK) { uintptr_t mask; - ec_fop_set_error (fop, ec_lock_check(fop, &mask)); + ec_fop_set_error(fop, ec_lock_check(fop, &mask)); if (fop->error != 0) { if (mask != 0) { ec_t *ec = fop->xl->private; @@ -641,18 +609,13 @@ int32_t ec_manager_inodelk(ec_fop_data_t * fop, int32_t state) GF_ASSERT(cbk != NULL); - if (fop->id == GF_FOP_INODELK) - { - if (fop->cbks.inodelk != NULL) - { - fop->cbks.inodelk(fop->req_frame, fop, fop->xl, - cbk->op_ret, cbk->op_errno, cbk->xdata); + if (fop->id == GF_FOP_INODELK) { + if (fop->cbks.inodelk != NULL) { + fop->cbks.inodelk(fop->req_frame, fop, fop->xl, cbk->op_ret, + cbk->op_errno, cbk->xdata); } - } - else - { - if (fop->cbks.finodelk != NULL) - { + } else { + if (fop->cbks.finodelk != NULL) { fop->cbks.finodelk(fop->req_frame, fop, fop->xl, cbk->op_ret, cbk->op_errno, cbk->xdata); } @@ -665,18 +628,13 @@ int32_t ec_manager_inodelk(ec_fop_data_t * fop, int32_t state) case -EC_STATE_REPORT: GF_ASSERT(fop->error != 0); - if (fop->id == GF_FOP_INODELK) - { - if (fop->cbks.inodelk != NULL) - { + if (fop->id == GF_FOP_INODELK) { + if (fop->cbks.inodelk != NULL) { fop->cbks.inodelk(fop->req_frame, fop, fop->xl, -1, fop->error, NULL); } - } - else - { - if (fop->cbks.finodelk != NULL) - { + } else { + if (fop->cbks.finodelk != NULL) { fop->cbks.finodelk(fop->req_frame, fop, fop->xl, -1, fop->error, NULL); } @@ -685,25 +643,24 @@ int32_t ec_manager_inodelk(ec_fop_data_t * fop, int32_t state) return EC_STATE_END; default: - gf_msg (fop->xl->name, GF_LOG_ERROR, EINVAL, - EC_MSG_UNHANDLED_STATE, - "Unhandled state %d for %s", - state, ec_fop_name(fop->id)); + gf_msg(fop->xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_UNHANDLED_STATE, + "Unhandled state %d for %s", state, ec_fop_name(fop->id)); return EC_STATE_END; } } -void ec_inodelk (call_frame_t *frame, xlator_t *this, gf_lkowner_t *owner, - uintptr_t target, int32_t minimum, fop_inodelk_cbk_t func, - void *data, const char *volume, loc_t *loc, int32_t cmd, - struct gf_flock *flock, dict_t *xdata) +void +ec_inodelk(call_frame_t *frame, xlator_t *this, gf_lkowner_t *owner, + uintptr_t target, int32_t minimum, fop_inodelk_cbk_t func, + void *data, const char *volume, loc_t *loc, int32_t cmd, + struct gf_flock *flock, dict_t *xdata) { - ec_cbk_t callback = { .inodelk = func }; - ec_fop_data_t * fop = NULL; + ec_cbk_t callback = {.inodelk = func}; + ec_fop_data_t *fop = NULL; int32_t error = ENOMEM; - gf_msg_trace ("ec", 0, "EC(INODELK) %p", frame); + gf_msg_trace("ec", 0, "EC(INODELK) %p", frame); VALIDATE_OR_GOTO(this, out); GF_VALIDATE_OR_GOTO(this->name, frame, out); @@ -717,23 +674,21 @@ void ec_inodelk (call_frame_t *frame, xlator_t *this, gf_lkowner_t *owner, } fop->int32 = cmd; - ec_owner_copy (fop->frame, owner); + ec_owner_copy(fop->frame, owner); if (volume != NULL) { fop->str[0] = gf_strdup(volume); if (fop->str[0] == NULL) { - gf_msg (this->name, GF_LOG_ERROR, ENOMEM, - EC_MSG_NO_MEMORY, - "Failed to duplicate a string."); + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_NO_MEMORY, + "Failed to duplicate a string."); goto out; } } if (loc != NULL) { if (loc_copy(&fop->loc[0], loc) != 0) { - gf_msg (this->name, GF_LOG_ERROR, ENOMEM, - EC_MSG_LOC_COPY_FAIL, - "Failed to copy a location."); + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_LOC_COPY_FAIL, + "Failed to copy a location."); goto out; } @@ -753,10 +708,9 @@ void ec_inodelk (call_frame_t *frame, xlator_t *this, gf_lkowner_t *owner, if (xdata != NULL) { fop->xdata = dict_ref(xdata); if (fop->xdata == NULL) { - gf_msg (this->name, GF_LOG_ERROR, 0, - EC_MSG_DICT_REF_FAIL, - "Failed to reference a " - "dictionary."); + gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL, + "Failed to reference a " + "dictionary."); goto out; } @@ -774,11 +728,12 @@ out: /* FOP: finodelk */ -int32_t ec_finodelk_cbk(call_frame_t * frame, void * cookie, xlator_t * this, - int32_t op_ret, int32_t op_errno, dict_t * xdata) +int32_t +ec_finodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - ec_fop_data_t * fop = NULL; - ec_cbk_data_t * cbk = NULL; + ec_fop_data_t *fop = NULL; + ec_cbk_data_t *cbk = NULL; int32_t idx = (int32_t)(uintptr_t)cookie; VALIDATE_OR_GOTO(this, out); @@ -788,22 +743,18 @@ int32_t ec_finodelk_cbk(call_frame_t * frame, void * cookie, xlator_t * this, fop = frame->local; - ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx, - frame, op_ret, op_errno); + ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx, frame, + op_ret, op_errno); cbk = ec_cbk_data_allocate(frame, this, fop, GF_FOP_FINODELK, idx, op_ret, op_errno); - if (cbk != NULL) - { - if (xdata != NULL) - { + if (cbk != NULL) { + if (xdata != NULL) { cbk->xdata = dict_ref(xdata); - if (cbk->xdata == NULL) - { - gf_msg (this->name, GF_LOG_ERROR, 0, - EC_MSG_DICT_REF_FAIL, - "Failed to reference a " - "dictionary."); + if (cbk->xdata == NULL) { + gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL, + "Failed to reference a " + "dictionary."); goto out; } @@ -813,15 +764,15 @@ int32_t ec_finodelk_cbk(call_frame_t * frame, void * cookie, xlator_t * this, } out: - if (fop != NULL) - { + if (fop != NULL) { ec_complete(fop); } return 0; } -void ec_wind_finodelk(ec_t * ec, ec_fop_data_t * fop, int32_t idx) +void +ec_wind_finodelk(ec_t *ec, ec_fop_data_t *fop, int32_t idx) { ec_trace("WIND", fop, "idx=%d", idx); @@ -831,24 +782,25 @@ void ec_wind_finodelk(ec_t * ec, ec_fop_data_t * fop, int32_t idx) fop->xdata); } -void ec_finodelk(call_frame_t *frame, xlator_t *this, gf_lkowner_t *owner, - uintptr_t target, int32_t minimum, fop_finodelk_cbk_t func, - void *data, const char *volume, fd_t *fd, int32_t cmd, - struct gf_flock *flock, dict_t *xdata) +void +ec_finodelk(call_frame_t *frame, xlator_t *this, gf_lkowner_t *owner, + uintptr_t target, int32_t minimum, fop_finodelk_cbk_t func, + void *data, const char *volume, fd_t *fd, int32_t cmd, + struct gf_flock *flock, dict_t *xdata) { - ec_cbk_t callback = { .finodelk = func }; - ec_fop_data_t * fop = NULL; + ec_cbk_t callback = {.finodelk = func}; + ec_fop_data_t *fop = NULL; int32_t error = ENOMEM; - gf_msg_trace ("ec", 0, "EC(FINODELK) %p", frame); + gf_msg_trace("ec", 0, "EC(FINODELK) %p", frame); VALIDATE_OR_GOTO(this, out); GF_VALIDATE_OR_GOTO(this->name, frame, out); GF_VALIDATE_OR_GOTO(this->name, this->private, out); - fop = ec_fop_data_allocate(frame, this, GF_FOP_FINODELK, 0, target, - minimum, ec_wind_finodelk, ec_manager_inodelk, - callback, data); + fop = ec_fop_data_allocate(frame, this, GF_FOP_FINODELK, 0, target, minimum, + ec_wind_finodelk, ec_manager_inodelk, callback, + data); if (fop == NULL) { goto out; } @@ -856,14 +808,13 @@ void ec_finodelk(call_frame_t *frame, xlator_t *this, gf_lkowner_t *owner, fop->use_fd = 1; fop->int32 = cmd; - ec_owner_copy (fop->frame, owner); + ec_owner_copy(fop->frame, owner); if (volume != NULL) { fop->str[0] = gf_strdup(volume); if (fop->str[0] == NULL) { - gf_msg (this->name, GF_LOG_ERROR, ENOMEM, - EC_MSG_NO_MEMORY, - "Failed to duplicate a string."); + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_NO_MEMORY, + "Failed to duplicate a string."); goto out; } @@ -871,10 +822,9 @@ void ec_finodelk(call_frame_t *frame, xlator_t *this, gf_lkowner_t *owner, if (fd != NULL) { fop->fd = fd_ref(fd); if (fop->fd == NULL) { - gf_msg (this->name, GF_LOG_ERROR, 0, - EC_MSG_DICT_REF_FAIL, - "Failed to reference a " - "file descriptor."); + gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL, + "Failed to reference a " + "file descriptor."); goto out; } @@ -894,10 +844,9 @@ void ec_finodelk(call_frame_t *frame, xlator_t *this, gf_lkowner_t *owner, if (xdata != NULL) { fop->xdata = dict_ref(xdata); if (fop->xdata == NULL) { - gf_msg (this->name, GF_LOG_ERROR, 0, - EC_MSG_DICT_REF_FAIL, - "Failed to reference a " - "dictionary."); + gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL, + "Failed to reference a " + "dictionary."); goto out; } @@ -915,15 +864,13 @@ out: /* FOP: lk */ -int32_t ec_combine_lk(ec_fop_data_t * fop, ec_cbk_data_t * dst, - ec_cbk_data_t * src) +int32_t +ec_combine_lk(ec_fop_data_t *fop, ec_cbk_data_t *dst, ec_cbk_data_t *src) { - if (!ec_flock_compare(&dst->flock, &src->flock)) - { - gf_msg (fop->xl->name, GF_LOG_NOTICE, 0, - EC_MSG_LOCK_MISMATCH, - "Mismatching lock in " - "answers of 'GF_FOP_LK'"); + if (!ec_flock_compare(&dst->flock, &src->flock)) { + gf_msg(fop->xl->name, GF_LOG_NOTICE, 0, EC_MSG_LOCK_MISMATCH, + "Mismatching lock in " + "answers of 'GF_FOP_LK'"); return 0; } @@ -931,12 +878,12 @@ int32_t ec_combine_lk(ec_fop_data_t * fop, ec_cbk_data_t * dst, return 1; } -int32_t ec_lk_cbk(call_frame_t * frame, void * cookie, xlator_t * this, - int32_t op_ret, int32_t op_errno, struct gf_flock * flock, - dict_t * xdata) +int32_t +ec_lk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + int32_t op_errno, struct gf_flock *flock, dict_t *xdata) { - ec_fop_data_t * fop = NULL; - ec_cbk_data_t * cbk = NULL; + ec_fop_data_t *fop = NULL; + ec_cbk_data_t *cbk = NULL; int32_t idx = (int32_t)(uintptr_t)cookie; VALIDATE_OR_GOTO(this, out); @@ -946,39 +893,32 @@ int32_t ec_lk_cbk(call_frame_t * frame, void * cookie, xlator_t * this, fop = frame->local; - ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx, - frame, op_ret, op_errno); + ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx, frame, + op_ret, op_errno); cbk = ec_cbk_data_allocate(frame, this, fop, GF_FOP_LK, idx, op_ret, op_errno); - if (cbk != NULL) - { - if (op_ret >= 0) - { - if (flock != NULL) - { + if (cbk != NULL) { + if (op_ret >= 0) { + if (flock != NULL) { cbk->flock.l_type = flock->l_type; cbk->flock.l_whence = flock->l_whence; cbk->flock.l_start = flock->l_start; cbk->flock.l_len = flock->l_len; cbk->flock.l_pid = flock->l_pid; cbk->flock.l_owner.len = flock->l_owner.len; - if (flock->l_owner.len > 0) - { + if (flock->l_owner.len > 0) { memcpy(cbk->flock.l_owner.data, flock->l_owner.data, flock->l_owner.len); } } } - if (xdata != NULL) - { + if (xdata != NULL) { cbk->xdata = dict_ref(xdata); - if (cbk->xdata == NULL) - { - gf_msg (this->name, GF_LOG_ERROR, 0, - EC_MSG_DICT_REF_FAIL, - "Failed to reference a " - "dictionary."); + if (cbk->xdata == NULL) { + gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL, + "Failed to reference a " + "dictionary."); goto out; } @@ -988,15 +928,15 @@ int32_t ec_lk_cbk(call_frame_t * frame, void * cookie, xlator_t * this, } out: - if (fop != NULL) - { + if (fop != NULL) { ec_complete(fop); } return 0; } -void ec_wind_lk(ec_t * ec, ec_fop_data_t * fop, int32_t idx) +void +ec_wind_lk(ec_t *ec, ec_fop_data_t *fop, int32_t idx) { ec_trace("WIND", fop, "idx=%d", idx); @@ -1005,20 +945,19 @@ void ec_wind_lk(ec_t * ec, ec_fop_data_t * fop, int32_t idx) fop->int32, &fop->flock, fop->xdata); } -int32_t ec_manager_lk(ec_fop_data_t * fop, int32_t state) +int32_t +ec_manager_lk(ec_fop_data_t *fop, int32_t state) { - ec_cbk_data_t * cbk; + ec_cbk_data_t *cbk; - switch (state) - { + switch (state) { case EC_STATE_INIT: - if ((fop->int32 == F_SETLKW) && (fop->flock.l_type != F_UNLCK)) - { + if ((fop->int32 == F_SETLKW) && (fop->flock.l_type != F_UNLCK)) { fop->uint32 = EC_LOCK_MODE_ALL; fop->int32 = F_SETLK; } - /* Fall through */ + /* Fall through */ case EC_STATE_DISPATCH: ec_dispatch_all(fop); @@ -1030,7 +969,7 @@ int32_t ec_manager_lk(ec_fop_data_t * fop, int32_t state) if (fop->flock.l_type != F_UNLCK) { uintptr_t mask; - ec_fop_set_error (fop, ec_lock_check(fop, &mask)); + ec_fop_set_error(fop, ec_lock_check(fop, &mask)); if (fop->error != 0) { if (mask != 0) { struct gf_flock flock = {0}; @@ -1040,11 +979,10 @@ int32_t ec_manager_lk(ec_fop_data_t * fop, int32_t state) flock.l_start = fop->flock.l_start; flock.l_len = fop->flock.l_len; flock.l_pid = fop->flock.l_pid; - lk_owner_copy (&flock.l_owner, &fop->flock.l_owner); + lk_owner_copy(&flock.l_owner, &fop->flock.l_owner); - ec_lk(fop->frame, fop->xl, mask, 1, - ec_lock_lk_unlocked, NULL, fop->fd, F_SETLK, - &flock, fop->xdata); + ec_lk(fop->frame, fop->xl, mask, 1, ec_lock_lk_unlocked, + NULL, fop->fd, F_SETLK, &flock, fop->xdata); } if (fop->error < 0) { @@ -1068,8 +1006,7 @@ int32_t ec_manager_lk(ec_fop_data_t * fop, int32_t state) GF_ASSERT(cbk != NULL); - if (fop->cbks.lk != NULL) - { + if (fop->cbks.lk != NULL) { fop->cbks.lk(fop->req_frame, fop, fop->xl, cbk->op_ret, cbk->op_errno, &cbk->flock, cbk->xdata); } @@ -1081,34 +1018,31 @@ int32_t ec_manager_lk(ec_fop_data_t * fop, int32_t state) case -EC_STATE_REPORT: GF_ASSERT(fop->error != 0); - if (fop->cbks.lk != NULL) - { - fop->cbks.lk(fop->req_frame, fop, fop->xl, -1, fop->error, - NULL, NULL); + if (fop->cbks.lk != NULL) { + fop->cbks.lk(fop->req_frame, fop, fop->xl, -1, fop->error, NULL, + NULL); } - return EC_STATE_END; default: - gf_msg (fop->xl->name, GF_LOG_ERROR, EINVAL, - EC_MSG_UNHANDLED_STATE, - "Unhandled state %d for %s", - state, ec_fop_name(fop->id)); + gf_msg(fop->xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_UNHANDLED_STATE, + "Unhandled state %d for %s", state, ec_fop_name(fop->id)); return EC_STATE_END; } } -void ec_lk(call_frame_t * frame, xlator_t * this, uintptr_t target, - int32_t minimum, fop_lk_cbk_t func, void * data, fd_t * fd, - int32_t cmd, struct gf_flock * flock, dict_t * xdata) +void +ec_lk(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum, + fop_lk_cbk_t func, void *data, fd_t *fd, int32_t cmd, + struct gf_flock *flock, dict_t *xdata) { - ec_cbk_t callback = { .lk = func }; - ec_fop_data_t * fop = NULL; + ec_cbk_t callback = {.lk = func}; + ec_fop_data_t *fop = NULL; int32_t error = ENOMEM; - gf_msg_trace ("ec", 0, "EC(LK) %p", frame); + gf_msg_trace("ec", 0, "EC(LK) %p", frame); VALIDATE_OR_GOTO(this, out); GF_VALIDATE_OR_GOTO(this->name, frame, out); @@ -1127,10 +1061,9 @@ void ec_lk(call_frame_t * frame, xlator_t * this, uintptr_t target, if (fd != NULL) { fop->fd = fd_ref(fd); if (fop->fd == NULL) { - gf_msg (this->name, GF_LOG_ERROR, 0, - EC_MSG_FILE_DESC_REF_FAIL, - "Failed to reference a " - "file descriptor."); + gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_FILE_DESC_REF_FAIL, + "Failed to reference a " + "file descriptor."); goto out; } @@ -1150,10 +1083,9 @@ void ec_lk(call_frame_t * frame, xlator_t * this, uintptr_t target, if (xdata != NULL) { fop->xdata = dict_ref(xdata); if (fop->xdata == NULL) { - gf_msg (this->name, GF_LOG_ERROR, 0, - EC_MSG_DICT_REF_FAIL, - "Failed to reference a " - "dictionary."); + gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL, + "Failed to reference a " + "dictionary."); goto out; } diff --git a/xlators/cluster/ec/src/ec-method.c b/xlators/cluster/ec/src/ec-method.c index fdc89391bfb..3aff6b096bd 100644 --- a/xlators/cluster/ec/src/ec-method.c +++ b/xlators/cluster/ec/src/ec-method.c @@ -88,11 +88,9 @@ ec_method_matrix_init(ec_matrix_list_t *list, ec_matrix_t *matrix, matrix->rows); for (i = 0; i < matrix->rows; i++) { matrix->row_data[i].values = matrix->values + i * matrix->columns; - matrix->row_data[i].func.interleaved = - ec_code_build_interleaved(matrix->code, - EC_METHOD_WORD_SIZE, - matrix->row_data[i].values, - matrix->columns); + matrix->row_data[i].func.interleaved = ec_code_build_interleaved( + matrix->code, EC_METHOD_WORD_SIZE, matrix->row_data[i].values, + matrix->columns); } } else { matrix->rows = list->rows; @@ -100,10 +98,9 @@ ec_method_matrix_init(ec_matrix_list_t *list, ec_matrix_t *matrix, matrix->columns, rows, matrix->rows); for (i = 0; i < matrix->rows; i++) { matrix->row_data[i].values = matrix->values + i * matrix->columns; - matrix->row_data[i].func.linear = - ec_code_build_linear(matrix->code, EC_METHOD_WORD_SIZE, - matrix->row_data[i].values, - matrix->columns); + matrix->row_data[i].func.linear = ec_code_build_linear( + matrix->code, EC_METHOD_WORD_SIZE, matrix->row_data[i].values, + matrix->columns); } } } @@ -266,8 +263,8 @@ ec_method_setup(xlator_t *xl, ec_matrix_list_t *list, const char *gen) int32_t err; matrix = GF_MALLOC(sizeof(ec_matrix_t) + - sizeof(ec_matrix_row_t) * list->rows + - sizeof(uint32_t) * list->columns * list->rows, + sizeof(ec_matrix_row_t) * list->rows + + sizeof(uint32_t) * list->columns * list->rows, ec_mt_ec_matrix_t); if (matrix == NULL) { err = -ENOMEM; @@ -310,9 +307,10 @@ ec_method_init(xlator_t *xl, ec_matrix_list_t *list, uint32_t columns, INIT_LIST_HEAD(&list->lru); int32_t err; - list->pool = mem_pool_new_fn(xl->ctx, sizeof(ec_matrix_t) + - sizeof(ec_matrix_row_t) * columns + - sizeof(uint32_t) * columns * columns, + list->pool = mem_pool_new_fn(xl->ctx, + sizeof(ec_matrix_t) + + sizeof(ec_matrix_row_t) * columns + + sizeof(uint32_t) * columns * columns, 128, "ec_matrix_t"); if (list->pool == NULL) { err = -ENOMEM; @@ -370,8 +368,8 @@ ec_method_fini(ec_matrix_list_t *list) GF_ASSERT(list->count == 0); - if (list->pool)/*Init was successful*/ - LOCK_DESTROY(&list->lock); + if (list->pool) /*Init was successful*/ + LOCK_DESTROY(&list->lock); ec_method_matrix_release(list->encode); GF_FREE(list->encode); @@ -402,9 +400,8 @@ ec_method_encode(ec_matrix_list_t *list, size_t size, void *in, void **out) matrix = list->encode; for (pos = 0; pos < size; pos += list->stripe) { for (i = 0; i < matrix->rows; i++) { - matrix->row_data[i].func.linear(out[i], in, pos, - matrix->row_data[i].values, - list->columns); + matrix->row_data[i].func.linear( + out[i], in, pos, matrix->row_data[i].values, list->columns); out[i] += EC_METHOD_CHUNK_SIZE; } } @@ -424,9 +421,8 @@ ec_method_decode(ec_matrix_list_t *list, size_t size, uintptr_t mask, } for (pos = 0; pos < size; pos += EC_METHOD_CHUNK_SIZE) { for (i = 0; i < matrix->rows; i++) { - matrix->row_data[i].func.interleaved(out, in, pos, - matrix->row_data[i].values, - list->columns); + matrix->row_data[i].func.interleaved( + out, in, pos, matrix->row_data[i].values, list->columns); out += EC_METHOD_CHUNK_SIZE; } } diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c index a82305104c5..0350325d6fb 100644 --- a/xlators/cluster/ec/src/ec.c +++ b/xlators/cluster/ec/src/ec.c @@ -26,47 +26,45 @@ #include "events.h" static char *ec_read_policies[EC_READ_POLICY_MAX + 1] = { - [EC_ROUND_ROBIN] = "round-robin", - [EC_GFID_HASH] = "gfid-hash", - [EC_READ_POLICY_MAX] = NULL -}; + [EC_ROUND_ROBIN] = "round-robin", + [EC_GFID_HASH] = "gfid-hash", + [EC_READ_POLICY_MAX] = NULL}; #define EC_INTERNAL_XATTR_OR_GOTO(name, xattr, op_errno, label) \ - do { \ - if (ec_is_internal_xattr (NULL, (char *)name, NULL, NULL)) { \ - op_errno = EPERM; \ - goto label; \ - } \ - if (name && (strlen (name) == 0) && xattr) { \ - /* Bulk [f]removexattr/[f]setxattr */ \ - GF_IF_INTERNAL_XATTR_GOTO (EC_XATTR_PREFIX"*", xattr, \ - op_errno, label); \ - } \ - } while (0) - -int32_t ec_parse_options(xlator_t * this) -{ - ec_t * ec = this->private; + do { \ + if (ec_is_internal_xattr(NULL, (char *)name, NULL, NULL)) { \ + op_errno = EPERM; \ + goto label; \ + } \ + if (name && (strlen(name) == 0) && xattr) { \ + /* Bulk [f]removexattr/[f]setxattr */ \ + GF_IF_INTERNAL_XATTR_GOTO(EC_XATTR_PREFIX "*", xattr, op_errno, \ + label); \ + } \ + } while (0) + +int32_t +ec_parse_options(xlator_t *this) +{ + ec_t *ec = this->private; int32_t error = EINVAL; uintptr_t mask; GF_OPTION_INIT("redundancy", ec->redundancy, int32, out); ec->fragments = ec->nodes - ec->redundancy; if ((ec->redundancy < 1) || (ec->redundancy >= ec->fragments) || - (ec->fragments > EC_MAX_FRAGMENTS)) - { - gf_msg (this->name, GF_LOG_ERROR, EINVAL, - EC_MSG_INVALID_REDUNDANCY, - "Invalid redundancy (must be between " - "1 and %d)", (ec->nodes - 1) / 2); + (ec->fragments > EC_MAX_FRAGMENTS)) { + gf_msg(this->name, GF_LOG_ERROR, EINVAL, EC_MSG_INVALID_REDUNDANCY, + "Invalid redundancy (must be between " + "1 and %d)", + (ec->nodes - 1) / 2); goto out; } ec->bits_for_nodes = 1; mask = 2; - while (ec->nodes > mask) - { + while (ec->nodes > mask) { ec->bits_for_nodes++; mask <<= 1; } @@ -74,9 +72,10 @@ int32_t ec_parse_options(xlator_t * this) ec->fragment_size = EC_METHOD_CHUNK_SIZE; ec->stripe_size = ec->fragment_size * ec->fragments; - gf_msg_debug ("ec", 0, "Initialized with: nodes=%u, fragments=%u, " - "stripe_size=%u, node_mask=%lX", - ec->nodes, ec->fragments, ec->stripe_size, ec->node_mask); + gf_msg_debug("ec", 0, + "Initialized with: nodes=%u, fragments=%u, " + "stripe_size=%u, node_mask=%lX", + ec->nodes, ec->fragments, ec->stripe_size, ec->node_mask); error = 0; @@ -84,30 +83,28 @@ out: return error; } -int32_t ec_prepare_childs(xlator_t * this) +int32_t +ec_prepare_childs(xlator_t *this) { - ec_t * ec = this->private; - xlator_list_t * child = NULL; + ec_t *ec = this->private; + xlator_list_t *child = NULL; int32_t count = 0; - for (child = this->children; child != NULL; child = child->next) - { + for (child = this->children; child != NULL; child = child->next) { count++; } - if (count > EC_MAX_NODES) - { - gf_msg (this->name, GF_LOG_ERROR, EINVAL, - EC_MSG_TOO_MANY_SUBVOLS, "Too many subvolumes"); + if (count > EC_MAX_NODES) { + gf_msg(this->name, GF_LOG_ERROR, EINVAL, EC_MSG_TOO_MANY_SUBVOLS, + "Too many subvolumes"); return EINVAL; } ec->nodes = count; ec->xl_list = GF_CALLOC(count, sizeof(ec->xl_list[0]), ec_mt_xlator_t); - if (ec->xl_list == NULL) - { - gf_msg (this->name, GF_LOG_ERROR, ENOMEM, - EC_MSG_NO_MEMORY, "Allocation of xlator list failed"); + if (ec->xl_list == NULL) { + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_NO_MEMORY, + "Allocation of xlator list failed"); return ENOMEM; } @@ -115,8 +112,7 @@ int32_t ec_prepare_childs(xlator_t * this) ec->xl_up_count = 0; count = 0; - for (child = this->children; child != NULL; child = child->next) - { + for (child = this->children; child != NULL; child = child->next) { ec->xl_list[count++] = child->xlator; } @@ -125,43 +121,42 @@ int32_t ec_prepare_childs(xlator_t * this) /* This function transforms the subvol to subvol-id*/ static int -_subvol_to_subvolid (dict_t *this, char *key, data_t *value, void *data) -{ - ec_t *ec = data; - xlator_t *subvol = NULL; - int i = 0; - int ret = -1; - - subvol = data_to_ptr (value); - for (i = 0; i < ec->nodes; i++) { - if (ec->xl_list[i] == subvol) { - ret = dict_set_int32 (this, key, i); - /* -1 stops dict_foreach and returns -1*/ - if (ret < 0) - ret = -1; - goto out; - } +_subvol_to_subvolid(dict_t *this, char *key, data_t *value, void *data) +{ + ec_t *ec = data; + xlator_t *subvol = NULL; + int i = 0; + int ret = -1; + + subvol = data_to_ptr(value); + for (i = 0; i < ec->nodes; i++) { + if (ec->xl_list[i] == subvol) { + ret = dict_set_int32(this, key, i); + /* -1 stops dict_foreach and returns -1*/ + if (ret < 0) + ret = -1; + goto out; } + } out: - return ret; + return ret; } int -ec_subvol_to_subvol_id_transform (ec_t *ec, dict_t *leaf_to_subvolid) +ec_subvol_to_subvol_id_transform(ec_t *ec, dict_t *leaf_to_subvolid) { - return dict_foreach (leaf_to_subvolid, _subvol_to_subvolid, ec); + return dict_foreach(leaf_to_subvolid, _subvol_to_subvolid, ec); } -void __ec_destroy_private(xlator_t * this) +void +__ec_destroy_private(xlator_t *this) { - ec_t * ec = this->private; + ec_t *ec = this->private; - if (ec != NULL) - { + if (ec != NULL) { LOCK(&ec->lock); - if (ec->timer != NULL) - { + if (ec->timer != NULL) { gf_timer_call_cancel(this->ctx, ec->timer); ec->timer = NULL; } @@ -179,31 +174,27 @@ void __ec_destroy_private(xlator_t * this) sleep(2); this->private = NULL; - if (ec->xl_list != NULL) - { + if (ec->xl_list != NULL) { GF_FREE(ec->xl_list); ec->xl_list = NULL; } - if (ec->fop_pool != NULL) - { + if (ec->fop_pool != NULL) { mem_pool_destroy(ec->fop_pool); } - if (ec->cbk_pool != NULL) - { + if (ec->cbk_pool != NULL) { mem_pool_destroy(ec->cbk_pool); } - if (ec->lock_pool != NULL) - { + if (ec->lock_pool != NULL) { mem_pool_destroy(ec->lock_pool); } LOCK_DESTROY(&ec->lock); if (ec->leaf_to_subvolid) - dict_unref (ec->leaf_to_subvolid); + dict_unref(ec->leaf_to_subvolid); ec_method_fini(&ec->matrix); @@ -211,13 +202,13 @@ void __ec_destroy_private(xlator_t * this) } } -int32_t mem_acct_init(xlator_t * this) +int32_t +mem_acct_init(xlator_t *this) { - if (xlator_mem_acct_init(this, ec_mt_end + 1) != 0) - { - gf_msg (this->name, GF_LOG_ERROR, ENOMEM, - EC_MSG_NO_MEMORY, "Memory accounting initialization " - "failed."); + if (xlator_mem_acct_init(this, ec_mt_end + 1) != 0) { + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_NO_MEMORY, + "Memory accounting initialization " + "failed."); return -1; } @@ -226,442 +217,435 @@ int32_t mem_acct_init(xlator_t * this) } void -ec_configure_background_heal_opts (ec_t *ec, int background_heals, - int heal_wait_qlen) +ec_configure_background_heal_opts(ec_t *ec, int background_heals, + int heal_wait_qlen) { - if (background_heals == 0) { - ec->heal_wait_qlen = 0; - } else { - ec->heal_wait_qlen = heal_wait_qlen; - } - ec->background_heals = background_heals; + if (background_heals == 0) { + ec->heal_wait_qlen = 0; + } else { + ec->heal_wait_qlen = heal_wait_qlen; + } + ec->background_heals = background_heals; } int -ec_assign_read_policy (ec_t *ec, char *read_policy) +ec_assign_read_policy(ec_t *ec, char *read_policy) { - int read_policy_idx = -1; + int read_policy_idx = -1; - read_policy_idx = gf_get_index_by_elem (ec_read_policies, read_policy); - if (read_policy_idx < 0 || read_policy_idx >= EC_READ_POLICY_MAX) - return -1; + read_policy_idx = gf_get_index_by_elem(ec_read_policies, read_policy); + if (read_policy_idx < 0 || read_policy_idx >= EC_READ_POLICY_MAX) + return -1; - ec->read_policy = read_policy_idx; - return 0; + ec->read_policy = read_policy_idx; + return 0; } int32_t -reconfigure (xlator_t *this, dict_t *options) -{ - ec_t *ec = this->private; - char *read_policy = NULL; - char *extensions = NULL; - uint32_t heal_wait_qlen = 0; - uint32_t background_heals = 0; - int32_t ret = -1; - int32_t err; - - GF_OPTION_RECONF ("cpu-extensions", extensions, options, str, failed); - - GF_OPTION_RECONF ("self-heal-daemon", ec->shd.enabled, options, bool, - failed); - GF_OPTION_RECONF ("iam-self-heal-daemon", ec->shd.iamshd, options, - bool, failed); - GF_OPTION_RECONF ("eager-lock", ec->eager_lock, options, - bool, failed); - GF_OPTION_RECONF ("other-eager-lock", ec->other_eager_lock, options, - bool, failed); - GF_OPTION_RECONF ("eager-lock-timeout", ec->eager_lock_timeout, - options, uint32, failed); - GF_OPTION_RECONF ("other-eager-lock-timeout", - ec->other_eager_lock_timeout, options, uint32, - failed); - GF_OPTION_RECONF ("background-heals", background_heals, options, - uint32, failed); - GF_OPTION_RECONF ("heal-wait-qlength", heal_wait_qlen, options, - uint32, failed); - GF_OPTION_RECONF ("self-heal-window-size", ec->self_heal_window_size, - options, uint32, failed); - GF_OPTION_RECONF ("heal-timeout", ec->shd.timeout, options, - int32, failed); - ec_configure_background_heal_opts (ec, background_heals, - heal_wait_qlen); - GF_OPTION_RECONF ("shd-max-threads", ec->shd.max_threads, - options, uint32, failed); - GF_OPTION_RECONF ("shd-wait-qlength", ec->shd.wait_qlength, - options, uint32, failed); - - GF_OPTION_RECONF ("read-policy", read_policy, options, str, failed); - - GF_OPTION_RECONF ("optimistic-change-log", ec->optimistic_changelog, - options, bool, failed); - GF_OPTION_RECONF ("parallel-writes", ec->parallel_writes, - options, bool, failed); - GF_OPTION_RECONF ("stripe-cache", ec->stripe_cache, options, uint32, - failed); - ret = 0; - if (ec_assign_read_policy (ec, read_policy)) { - ret = -1; - } +reconfigure(xlator_t *this, dict_t *options) +{ + ec_t *ec = this->private; + char *read_policy = NULL; + char *extensions = NULL; + uint32_t heal_wait_qlen = 0; + uint32_t background_heals = 0; + int32_t ret = -1; + int32_t err; - err = ec_method_update(this, &ec->matrix, extensions); - if (err != 0) { - ret = -1; - } + GF_OPTION_RECONF("cpu-extensions", extensions, options, str, failed); + + GF_OPTION_RECONF("self-heal-daemon", ec->shd.enabled, options, bool, + failed); + GF_OPTION_RECONF("iam-self-heal-daemon", ec->shd.iamshd, options, bool, + failed); + GF_OPTION_RECONF("eager-lock", ec->eager_lock, options, bool, failed); + GF_OPTION_RECONF("other-eager-lock", ec->other_eager_lock, options, bool, + failed); + GF_OPTION_RECONF("eager-lock-timeout", ec->eager_lock_timeout, options, + uint32, failed); + GF_OPTION_RECONF("other-eager-lock-timeout", ec->other_eager_lock_timeout, + options, uint32, failed); + GF_OPTION_RECONF("background-heals", background_heals, options, uint32, + failed); + GF_OPTION_RECONF("heal-wait-qlength", heal_wait_qlen, options, uint32, + failed); + GF_OPTION_RECONF("self-heal-window-size", ec->self_heal_window_size, + options, uint32, failed); + GF_OPTION_RECONF("heal-timeout", ec->shd.timeout, options, int32, failed); + ec_configure_background_heal_opts(ec, background_heals, heal_wait_qlen); + GF_OPTION_RECONF("shd-max-threads", ec->shd.max_threads, options, uint32, + failed); + GF_OPTION_RECONF("shd-wait-qlength", ec->shd.wait_qlength, options, uint32, + failed); + + GF_OPTION_RECONF("read-policy", read_policy, options, str, failed); + + GF_OPTION_RECONF("optimistic-change-log", ec->optimistic_changelog, options, + bool, failed); + GF_OPTION_RECONF("parallel-writes", ec->parallel_writes, options, bool, + failed); + GF_OPTION_RECONF("stripe-cache", ec->stripe_cache, options, uint32, failed); + ret = 0; + if (ec_assign_read_policy(ec, read_policy)) { + ret = -1; + } + + err = ec_method_update(this, &ec->matrix, extensions); + if (err != 0) { + ret = -1; + } failed: - return ret; + return ret; } glusterfs_event_t -ec_get_event_from_state (ec_t *ec) -{ - int down_count = 0; - - if (ec->xl_up_count >= ec->fragments) { - /* If ec is up but some subvolumes are yet to notify, give - * grace time for other subvols to notify to prevent start of - * I/O which may result in self-heals */ - if (ec->xl_notify_count < ec->nodes) - return GF_EVENT_MAXVAL; - - return GF_EVENT_CHILD_UP; - } else { - down_count = ec->xl_notify_count - ec->xl_up_count; - if (down_count > ec->redundancy) - return GF_EVENT_CHILD_DOWN; - } +ec_get_event_from_state(ec_t *ec) +{ + int down_count = 0; + + if (ec->xl_up_count >= ec->fragments) { + /* If ec is up but some subvolumes are yet to notify, give + * grace time for other subvols to notify to prevent start of + * I/O which may result in self-heals */ + if (ec->xl_notify_count < ec->nodes) + return GF_EVENT_MAXVAL; + + return GF_EVENT_CHILD_UP; + } else { + down_count = ec->xl_notify_count - ec->xl_up_count; + if (down_count > ec->redundancy) + return GF_EVENT_CHILD_DOWN; + } - return GF_EVENT_MAXVAL; + return GF_EVENT_MAXVAL; } void -ec_up (xlator_t *this, ec_t *ec) +ec_up(xlator_t *this, ec_t *ec) { - if (ec->timer != NULL) { - gf_timer_call_cancel (this->ctx, ec->timer); - ec->timer = NULL; - } + if (ec->timer != NULL) { + gf_timer_call_cancel(this->ctx, ec->timer); + ec->timer = NULL; + } - ec->up = 1; - gf_msg (this->name, GF_LOG_INFO, 0, EC_MSG_EC_UP, "Going UP"); + ec->up = 1; + gf_msg(this->name, GF_LOG_INFO, 0, EC_MSG_EC_UP, "Going UP"); - gf_event (EVENT_EC_MIN_BRICKS_UP, "subvol=%s", this->name); + gf_event(EVENT_EC_MIN_BRICKS_UP, "subvol=%s", this->name); } void -ec_down (xlator_t *this, ec_t *ec) +ec_down(xlator_t *this, ec_t *ec) { - if (ec->timer != NULL) { - gf_timer_call_cancel(this->ctx, ec->timer); - ec->timer = NULL; - } + if (ec->timer != NULL) { + gf_timer_call_cancel(this->ctx, ec->timer); + ec->timer = NULL; + } - ec->up = 0; - gf_msg (this->name, GF_LOG_INFO, 0, EC_MSG_EC_DOWN, "Going DOWN"); + ec->up = 0; + gf_msg(this->name, GF_LOG_INFO, 0, EC_MSG_EC_DOWN, "Going DOWN"); - gf_event (EVENT_EC_MIN_BRICKS_NOT_UP, "subvol=%s", this->name); + gf_event(EVENT_EC_MIN_BRICKS_NOT_UP, "subvol=%s", this->name); } void -ec_notify_cbk (void *data) +ec_notify_cbk(void *data) { - ec_t *ec = data; - glusterfs_event_t event = GF_EVENT_MAXVAL; - gf_boolean_t propagate = _gf_false; + ec_t *ec = data; + glusterfs_event_t event = GF_EVENT_MAXVAL; + gf_boolean_t propagate = _gf_false; - LOCK(&ec->lock); - { - if (!ec->timer) { - /* - * Either child_up/child_down is already sent to parent - * This is a spurious wake up. - */ - goto unlock; - } - - gf_timer_call_cancel (ec->xl->ctx, ec->timer); - ec->timer = NULL; - - /* The timeout has expired, so any subvolume that has not - * already reported its state, will be considered to be down. - * We mark as if all bricks had reported. */ - ec->xl_notify = (1ULL << ec->nodes) - 1ULL; - ec->xl_notify_count = ec->nodes; - - /* Since we have marked all subvolumes as notified, it's - * guaranteed that ec_get_event_from_state() will return - * CHILD_UP or CHILD_DOWN, but not MAXVAL. */ - event = ec_get_event_from_state (ec); - if (event == GF_EVENT_CHILD_UP) { - /* We are ready to bring the volume up. If there are - * still bricks DOWN, they will be healed when they - * come up. */ - ec_up (ec->xl, ec); - } - - propagate = _gf_true; + LOCK(&ec->lock); + { + if (!ec->timer) { + /* + * Either child_up/child_down is already sent to parent + * This is a spurious wake up. + */ + goto unlock; + } + + gf_timer_call_cancel(ec->xl->ctx, ec->timer); + ec->timer = NULL; + + /* The timeout has expired, so any subvolume that has not + * already reported its state, will be considered to be down. + * We mark as if all bricks had reported. */ + ec->xl_notify = (1ULL << ec->nodes) - 1ULL; + ec->xl_notify_count = ec->nodes; + + /* Since we have marked all subvolumes as notified, it's + * guaranteed that ec_get_event_from_state() will return + * CHILD_UP or CHILD_DOWN, but not MAXVAL. */ + event = ec_get_event_from_state(ec); + if (event == GF_EVENT_CHILD_UP) { + /* We are ready to bring the volume up. If there are + * still bricks DOWN, they will be healed when they + * come up. */ + ec_up(ec->xl, ec); } -unlock: - UNLOCK(&ec->lock); - if (propagate) { - if ((event == GF_EVENT_CHILD_UP) && ec->shd.iamshd) { - /* We have just brought the volume UP, so we trigger - * a self-heal check on the root directory. */ - ec_launch_replace_heal (ec); - } + propagate = _gf_true; + } +unlock: + UNLOCK(&ec->lock); - default_notify (ec->xl, event, NULL); + if (propagate) { + if ((event == GF_EVENT_CHILD_UP) && ec->shd.iamshd) { + /* We have just brought the volume UP, so we trigger + * a self-heal check on the root directory. */ + ec_launch_replace_heal(ec); } + default_notify(ec->xl, event, NULL); + } } void -ec_launch_notify_timer (xlator_t *this, ec_t *ec) -{ - struct timespec delay = {0, }; - - gf_msg_debug (this->name, 0, "Initiating child-down timer"); - delay.tv_sec = 10; - delay.tv_nsec = 0; - ec->timer = gf_timer_call_after (this->ctx, delay, ec_notify_cbk, ec); - if (ec->timer == NULL) { - gf_msg (this->name, GF_LOG_ERROR, ENOMEM, - EC_MSG_TIMER_CREATE_FAIL, "Cannot create timer " - "for delayed initialization"); - } +ec_launch_notify_timer(xlator_t *this, ec_t *ec) +{ + struct timespec delay = { + 0, + }; + + gf_msg_debug(this->name, 0, "Initiating child-down timer"); + delay.tv_sec = 10; + delay.tv_nsec = 0; + ec->timer = gf_timer_call_after(this->ctx, delay, ec_notify_cbk, ec); + if (ec->timer == NULL) { + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_TIMER_CREATE_FAIL, + "Cannot create timer " + "for delayed initialization"); + } } gf_boolean_t ec_disable_delays(ec_t *ec) { - ec->shutdown = _gf_true; + ec->shutdown = _gf_true; - return list_empty (&ec->pending_fops); + return list_empty(&ec->pending_fops); } void ec_pending_fops_completed(ec_t *ec) { - if (ec->shutdown) { - default_notify (ec->xl, GF_EVENT_PARENT_DOWN, NULL); - } + if (ec->shutdown) { + default_notify(ec->xl, GF_EVENT_PARENT_DOWN, NULL); + } } static gf_boolean_t ec_set_up_state(ec_t *ec, uintptr_t index_mask, uintptr_t new_state) { - uintptr_t current_state = 0; + uintptr_t current_state = 0; - if ((ec->xl_notify & index_mask) == 0) { - ec->xl_notify |= index_mask; - ec->xl_notify_count++; - } - current_state = ec->xl_up & index_mask; - if (current_state != new_state) { - ec->xl_up ^= index_mask; - ec->xl_up_count += (current_state ? -1 : 1); + if ((ec->xl_notify & index_mask) == 0) { + ec->xl_notify |= index_mask; + ec->xl_notify_count++; + } + current_state = ec->xl_up & index_mask; + if (current_state != new_state) { + ec->xl_up ^= index_mask; + ec->xl_up_count += (current_state ? -1 : 1); - return _gf_true; - } + return _gf_true; + } - return _gf_false; + return _gf_false; } static gf_boolean_t ec_upcall(ec_t *ec, struct gf_upcall *upcall) { - struct gf_upcall_cache_invalidation *ci = NULL; - struct gf_upcall_inodelk_contention *lc = NULL; - inode_t *inode; + struct gf_upcall_cache_invalidation *ci = NULL; + struct gf_upcall_inodelk_contention *lc = NULL; + inode_t *inode; - switch (upcall->event_type) { + switch (upcall->event_type) { case GF_UPCALL_CACHE_INVALIDATION: - ci = upcall->data; - ci->flags |= UP_INVAL_ATTR; - return _gf_true; + ci = upcall->data; + ci->flags |= UP_INVAL_ATTR; + return _gf_true; case GF_UPCALL_INODELK_CONTENTION: - lc = upcall->data; - if (strcmp(lc->domain, ec->xl->name) != 0) { - /* The lock is not owned by EC, ignore it. */ - return _gf_true; - } - inode = inode_find(((xlator_t *)ec->xl->graph->top)->itable, - upcall->gfid); - /* If inode is not found, it means that it's already released, - * so we can ignore it. Probably it has been released and - * destroyed while the contention notification was being sent. - */ - if (inode != NULL) { - ec_lock_release(ec, inode); - inode_unref(inode); - } - - return _gf_false; + lc = upcall->data; + if (strcmp(lc->domain, ec->xl->name) != 0) { + /* The lock is not owned by EC, ignore it. */ + return _gf_true; + } + inode = inode_find(((xlator_t *)ec->xl->graph->top)->itable, + upcall->gfid); + /* If inode is not found, it means that it's already released, + * so we can ignore it. Probably it has been released and + * destroyed while the contention notification was being sent. + */ + if (inode != NULL) { + ec_lock_release(ec, inode); + inode_unref(inode); + } + + return _gf_false; default: - return _gf_true; - } + return _gf_true; + } } int32_t -ec_notify (xlator_t *this, int32_t event, void *data, void *data2) -{ - ec_t *ec = this->private; - int32_t idx = 0; - int32_t error = 0; - glusterfs_event_t old_event = GF_EVENT_MAXVAL; - dict_t *input = NULL; - dict_t *output = NULL; - gf_boolean_t propagate = _gf_true; - gf_boolean_t needs_shd_check = _gf_false; - int32_t orig_event = event; - uintptr_t mask = 0; - - gf_msg_trace (this->name, 0, "NOTIFY(%d): %p, %p", - event, data, data2); - - if (event == GF_EVENT_UPCALL) { - propagate = ec_upcall(ec, data); - goto done; +ec_notify(xlator_t *this, int32_t event, void *data, void *data2) +{ + ec_t *ec = this->private; + int32_t idx = 0; + int32_t error = 0; + glusterfs_event_t old_event = GF_EVENT_MAXVAL; + dict_t *input = NULL; + dict_t *output = NULL; + gf_boolean_t propagate = _gf_true; + gf_boolean_t needs_shd_check = _gf_false; + int32_t orig_event = event; + uintptr_t mask = 0; + + gf_msg_trace(this->name, 0, "NOTIFY(%d): %p, %p", event, data, data2); + + if (event == GF_EVENT_UPCALL) { + propagate = ec_upcall(ec, data); + goto done; + } + + if (event == GF_EVENT_TRANSLATOR_OP) { + if (!ec->up) { + error = -1; + } else { + input = data; + output = data2; + error = ec_xl_op(this, input, output); } + goto out; + } - if (event == GF_EVENT_TRANSLATOR_OP) { - if (!ec->up) { - error = -1; - } else { - input = data; - output = data2; - error = ec_xl_op (this, input, output); - } - goto out; + for (idx = 0; idx < ec->nodes; idx++) { + if (ec->xl_list[idx] == data) { + break; } + } - for (idx = 0; idx < ec->nodes; idx++) { - if (ec->xl_list[idx] == data) { - break; - } + LOCK(&ec->lock); + + if (event == GF_EVENT_PARENT_UP) { + /* + * Start a timer which sends appropriate event to parent + * xlator to prevent the 'mount' syscall from hanging. + */ + ec_launch_notify_timer(this, ec); + goto unlock; + } else if (event == GF_EVENT_PARENT_DOWN) { + /* If there aren't pending fops running after we have waken up + * them, we immediately propagate the notification. */ + propagate = ec_disable_delays(ec); + goto unlock; + } + + if (idx < ec->nodes) { /* CHILD_* events */ + old_event = ec_get_event_from_state(ec); + + mask = 1ULL << idx; + if (event == GF_EVENT_CHILD_UP) { + /* We need to trigger a selfheal if a brick changes + * to UP state. */ + needs_shd_check = ec_set_up_state(ec, mask, mask); + } else if (event == GF_EVENT_CHILD_DOWN) { + ec_set_up_state(ec, mask, 0); } - LOCK (&ec->lock); - - if (event == GF_EVENT_PARENT_UP) { - /* - * Start a timer which sends appropriate event to parent - * xlator to prevent the 'mount' syscall from hanging. - */ - ec_launch_notify_timer (this, ec); - goto unlock; - } else if (event == GF_EVENT_PARENT_DOWN) { - /* If there aren't pending fops running after we have waken up - * them, we immediately propagate the notification. */ - propagate = ec_disable_delays(ec); - goto unlock; + event = ec_get_event_from_state(ec); + + if (event == GF_EVENT_CHILD_UP) { + if (!ec->up) { + ec_up(this, ec); + } + } else { + /* If the volume is not UP, it's irrelevant if one + * brick has come up. We cannot heal anything. */ + needs_shd_check = _gf_false; + + if ((event == GF_EVENT_CHILD_DOWN) && ec->up) { + ec_down(this, ec); + } } - if (idx < ec->nodes) { /* CHILD_* events */ - old_event = ec_get_event_from_state (ec); - - mask = 1ULL << idx; - if (event == GF_EVENT_CHILD_UP) { - /* We need to trigger a selfheal if a brick changes - * to UP state. */ - needs_shd_check = ec_set_up_state(ec, mask, mask); - } else if (event == GF_EVENT_CHILD_DOWN) { - ec_set_up_state(ec, mask, 0); - } - - event = ec_get_event_from_state (ec); - - if (event == GF_EVENT_CHILD_UP) { - if (!ec->up) { - ec_up (this, ec); - } - } else { - /* If the volume is not UP, it's irrelevant if one - * brick has come up. We cannot heal anything. */ - needs_shd_check = _gf_false; - - if ((event == GF_EVENT_CHILD_DOWN) && ec->up) { - ec_down (this, ec); - } - } - - if (event != GF_EVENT_MAXVAL) { - if (event == old_event) { - if (orig_event == GF_EVENT_CHILD_UP) - event = GF_EVENT_SOME_DESCENDENT_UP; - else /* orig_event has to be GF_EVENT_CHILD_DOWN */ - event = GF_EVENT_SOME_DESCENDENT_DOWN; - } - } else { - propagate = _gf_false; - } + if (event != GF_EVENT_MAXVAL) { + if (event == old_event) { + if (orig_event == GF_EVENT_CHILD_UP) + event = GF_EVENT_SOME_DESCENDENT_UP; + else /* orig_event has to be GF_EVENT_CHILD_DOWN */ + event = GF_EVENT_SOME_DESCENDENT_DOWN; + } + } else { + propagate = _gf_false; } + } unlock: - UNLOCK (&ec->lock); + UNLOCK(&ec->lock); done: - if (propagate) { - if (needs_shd_check && ec->shd.iamshd) { - ec_launch_replace_heal (ec); - } - - error = default_notify (this, event, data); + if (propagate) { + if (needs_shd_check && ec->shd.iamshd) { + ec_launch_replace_heal(ec); } + error = default_notify(this, event, data); + } + out: - return error; + return error; } int32_t -notify (xlator_t *this, int32_t event, void *data, ...) +notify(xlator_t *this, int32_t event, void *data, ...) { - int ret = -1; - va_list ap; - void *data2 = NULL; + int ret = -1; + va_list ap; + void *data2 = NULL; - va_start (ap, data); - data2 = va_arg (ap, dict_t*); - va_end (ap); - ret = ec_notify (this, event, data, data2); + va_start(ap, data); + data2 = va_arg(ap, dict_t *); + va_end(ap); + ret = ec_notify(this, event, data, data2); - return ret; + return ret; } static void ec_statistics_init(ec_t *ec) { - GF_ATOMIC_INIT(ec->stats.stripe_cache.hits, 0); - GF_ATOMIC_INIT(ec->stats.stripe_cache.misses, 0); - GF_ATOMIC_INIT(ec->stats.stripe_cache.updates, 0); - GF_ATOMIC_INIT(ec->stats.stripe_cache.invals, 0); - GF_ATOMIC_INIT(ec->stats.stripe_cache.evicts, 0); - GF_ATOMIC_INIT(ec->stats.stripe_cache.allocs, 0); - GF_ATOMIC_INIT(ec->stats.stripe_cache.errors, 0); + GF_ATOMIC_INIT(ec->stats.stripe_cache.hits, 0); + GF_ATOMIC_INIT(ec->stats.stripe_cache.misses, 0); + GF_ATOMIC_INIT(ec->stats.stripe_cache.updates, 0); + GF_ATOMIC_INIT(ec->stats.stripe_cache.invals, 0); + GF_ATOMIC_INIT(ec->stats.stripe_cache.evicts, 0); + GF_ATOMIC_INIT(ec->stats.stripe_cache.allocs, 0); + GF_ATOMIC_INIT(ec->stats.stripe_cache.errors, 0); } int32_t -init (xlator_t *this) +init(xlator_t *this) { - ec_t *ec = NULL; + ec_t *ec = NULL; char *read_policy = NULL; - char *extensions = NULL; + char *extensions = NULL; int32_t err; - if (this->parents == NULL) - { - gf_msg (this->name, GF_LOG_WARNING, 0, - EC_MSG_NO_PARENTS, "Volume does not have parents."); + if (this->parents == NULL) { + gf_msg(this->name, GF_LOG_WARNING, 0, EC_MSG_NO_PARENTS, + "Volume does not have parents."); } ec = GF_MALLOC(sizeof(*ec), ec_mt_ec_t); - if (ec == NULL) - { - gf_msg (this->name, GF_LOG_ERROR, ENOMEM, - EC_MSG_NO_MEMORY, "Failed to allocate private memory."); + if (ec == NULL) { + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_NO_MEMORY, + "Failed to allocate private memory."); return -1; } @@ -680,26 +664,23 @@ init (xlator_t *this) ec->cbk_pool = mem_pool_new(ec_cbk_data_t, 4096); ec->lock_pool = mem_pool_new(ec_lock_t, 1024); if ((ec->fop_pool == NULL) || (ec->cbk_pool == NULL) || - (ec->lock_pool == NULL)) - { - gf_msg (this->name, GF_LOG_ERROR, ENOMEM, - EC_MSG_NO_MEMORY, "Failed to create memory pools."); + (ec->lock_pool == NULL)) { + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_NO_MEMORY, + "Failed to create memory pools."); goto failed; } - if (ec_prepare_childs(this) != 0) - { - gf_msg (this->name, GF_LOG_ERROR, 0, - EC_MSG_XLATOR_INIT_FAIL, "Failed to initialize xlator"); + if (ec_prepare_childs(this) != 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_XLATOR_INIT_FAIL, + "Failed to initialize xlator"); goto failed; } - if (ec_parse_options(this) != 0) - { - gf_msg (this->name, GF_LOG_ERROR, EINVAL, - EC_MSG_XLATOR_PARSE_OPT_FAIL, "Failed to parse xlator options"); + if (ec_parse_options(this) != 0) { + gf_msg(this->name, GF_LOG_ERROR, EINVAL, EC_MSG_XLATOR_PARSE_OPT_FAIL, + "Failed to parse xlator options"); goto failed; } @@ -709,58 +690,59 @@ init (xlator_t *this) err = ec_method_init(this, &ec->matrix, ec->fragments, ec->nodes, ec->nodes * 2, extensions); if (err != 0) { - gf_msg (this->name, GF_LOG_ERROR, -err, EC_MSG_MATRIX_FAILED, - "Failed to initialize matrix management"); + gf_msg(this->name, GF_LOG_ERROR, -err, EC_MSG_MATRIX_FAILED, + "Failed to initialize matrix management"); goto failed; } - GF_OPTION_INIT ("self-heal-daemon", ec->shd.enabled, bool, failed); - GF_OPTION_INIT ("iam-self-heal-daemon", ec->shd.iamshd, bool, failed); - GF_OPTION_INIT ("eager-lock", ec->eager_lock, bool, failed); - GF_OPTION_INIT ("other-eager-lock", ec->other_eager_lock, bool, failed); - GF_OPTION_INIT ("eager-lock-timeout", ec->eager_lock_timeout, uint32, - failed); - GF_OPTION_INIT ("other-eager-lock-timeout", ec->other_eager_lock_timeout, - uint32, failed); - GF_OPTION_INIT ("background-heals", ec->background_heals, uint32, failed); - GF_OPTION_INIT ("heal-wait-qlength", ec->heal_wait_qlen, uint32, failed); - GF_OPTION_INIT ("self-heal-window-size", ec->self_heal_window_size, uint32, - failed); - ec_configure_background_heal_opts (ec, ec->background_heals, - ec->heal_wait_qlen); - GF_OPTION_INIT ("read-policy", read_policy, str, failed); - if (ec_assign_read_policy (ec, read_policy)) - goto failed; - - GF_OPTION_INIT ("shd-max-threads", ec->shd.max_threads, uint32, failed); - GF_OPTION_INIT ("shd-wait-qlength", ec->shd.wait_qlength, uint32, failed); - GF_OPTION_INIT ("optimistic-change-log", ec->optimistic_changelog, bool, failed); - GF_OPTION_INIT ("parallel-writes", ec->parallel_writes, bool, failed); - GF_OPTION_INIT ("stripe-cache", ec->stripe_cache, uint32, failed); - - this->itable = inode_table_new (EC_SHD_INODE_LRU_LIMIT, this); + GF_OPTION_INIT("self-heal-daemon", ec->shd.enabled, bool, failed); + GF_OPTION_INIT("iam-self-heal-daemon", ec->shd.iamshd, bool, failed); + GF_OPTION_INIT("eager-lock", ec->eager_lock, bool, failed); + GF_OPTION_INIT("other-eager-lock", ec->other_eager_lock, bool, failed); + GF_OPTION_INIT("eager-lock-timeout", ec->eager_lock_timeout, uint32, + failed); + GF_OPTION_INIT("other-eager-lock-timeout", ec->other_eager_lock_timeout, + uint32, failed); + GF_OPTION_INIT("background-heals", ec->background_heals, uint32, failed); + GF_OPTION_INIT("heal-wait-qlength", ec->heal_wait_qlen, uint32, failed); + GF_OPTION_INIT("self-heal-window-size", ec->self_heal_window_size, uint32, + failed); + ec_configure_background_heal_opts(ec, ec->background_heals, + ec->heal_wait_qlen); + GF_OPTION_INIT("read-policy", read_policy, str, failed); + if (ec_assign_read_policy(ec, read_policy)) + goto failed; + + GF_OPTION_INIT("shd-max-threads", ec->shd.max_threads, uint32, failed); + GF_OPTION_INIT("shd-wait-qlength", ec->shd.wait_qlength, uint32, failed); + GF_OPTION_INIT("optimistic-change-log", ec->optimistic_changelog, bool, + failed); + GF_OPTION_INIT("parallel-writes", ec->parallel_writes, bool, failed); + GF_OPTION_INIT("stripe-cache", ec->stripe_cache, uint32, failed); + + this->itable = inode_table_new(EC_SHD_INODE_LRU_LIMIT, this); if (!this->itable) - goto failed; + goto failed; if (ec->shd.iamshd) - ec_selfheal_daemon_init (this); - gf_msg_debug (this->name, 0, "Disperse translator initialized."); + ec_selfheal_daemon_init(this); + gf_msg_debug(this->name, 0, "Disperse translator initialized."); - ec->leaf_to_subvolid = dict_new (); + ec->leaf_to_subvolid = dict_new(); if (!ec->leaf_to_subvolid) - goto failed; - if (glusterfs_reachable_leaves (this, ec->leaf_to_subvolid)) { - gf_msg (this->name, GF_LOG_ERROR, 0, - EC_MSG_SUBVOL_BUILD_FAIL, "Failed to build subvol " - "dictionary"); + goto failed; + if (glusterfs_reachable_leaves(this, ec->leaf_to_subvolid)) { + gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_SUBVOL_BUILD_FAIL, + "Failed to build subvol " + "dictionary"); goto failed; } - if (ec_subvol_to_subvol_id_transform (ec, ec->leaf_to_subvolid) < 0) { - gf_msg (this->name, GF_LOG_ERROR, 0, - EC_MSG_SUBVOL_ID_DICT_SET_FAIL, "Failed to build subvol-id " - "dictionary"); + if (ec_subvol_to_subvol_id_transform(ec, ec->leaf_to_subvolid) < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_SUBVOL_ID_DICT_SET_FAIL, + "Failed to build subvol-id " + "dictionary"); goto failed; } @@ -774,13 +756,15 @@ failed: return -1; } -void fini(xlator_t * this) +void +fini(xlator_t *this) { __ec_destroy_private(this); } -int32_t ec_gf_access(call_frame_t * frame, xlator_t * this, loc_t * loc, - int32_t mask, dict_t * xdata) +int32_t +ec_gf_access(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask, + dict_t *xdata) { ec_access(frame, this, -1, EC_MINIMUM_ONE, default_access_cbk, NULL, loc, mask, xdata); @@ -788,9 +772,9 @@ int32_t ec_gf_access(call_frame_t * frame, xlator_t * this, loc_t * loc, return 0; } -int32_t ec_gf_create(call_frame_t * frame, xlator_t * this, loc_t * loc, - int32_t flags, mode_t mode, mode_t umask, fd_t * fd, - dict_t * xdata) +int32_t +ec_gf_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata) { ec_create(frame, this, -1, EC_MINIMUM_MIN, default_create_cbk, NULL, loc, flags, mode, umask, fd, xdata); @@ -798,53 +782,56 @@ int32_t ec_gf_create(call_frame_t * frame, xlator_t * this, loc_t * loc, return 0; } -int32_t ec_gf_discard(call_frame_t * frame, xlator_t * this, fd_t * fd, - off_t offset, size_t len, dict_t * xdata) +int32_t +ec_gf_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + size_t len, dict_t *xdata) { - ec_discard(frame, this, -1, EC_MINIMUM_MIN, default_discard_cbk, - NULL, fd, offset, len, xdata); + ec_discard(frame, this, -1, EC_MINIMUM_MIN, default_discard_cbk, NULL, fd, + offset, len, xdata); return 0; } -int32_t ec_gf_entrylk(call_frame_t * frame, xlator_t * this, - const char * volume, loc_t * loc, const char * basename, - entrylk_cmd cmd, entrylk_type type, dict_t * xdata) +int32_t +ec_gf_entrylk(call_frame_t *frame, xlator_t *this, const char *volume, + loc_t *loc, const char *basename, entrylk_cmd cmd, + entrylk_type type, dict_t *xdata) { int32_t minimum = EC_MINIMUM_ALL; if (cmd == ENTRYLK_UNLOCK) - minimum = EC_MINIMUM_ONE; - ec_entrylk(frame, this, -1, minimum, default_entrylk_cbk, NULL, - volume, loc, basename, cmd, type, xdata); + minimum = EC_MINIMUM_ONE; + ec_entrylk(frame, this, -1, minimum, default_entrylk_cbk, NULL, volume, loc, + basename, cmd, type, xdata); return 0; } -int32_t ec_gf_fentrylk(call_frame_t * frame, xlator_t * this, - const char * volume, fd_t * fd, const char * basename, - entrylk_cmd cmd, entrylk_type type, dict_t * xdata) +int32_t +ec_gf_fentrylk(call_frame_t *frame, xlator_t *this, const char *volume, + fd_t *fd, const char *basename, entrylk_cmd cmd, + entrylk_type type, dict_t *xdata) { int32_t minimum = EC_MINIMUM_ALL; if (cmd == ENTRYLK_UNLOCK) - minimum = EC_MINIMUM_ONE; - ec_fentrylk(frame, this, -1, minimum, default_fentrylk_cbk, NULL, - volume, fd, basename, cmd, type, xdata); + minimum = EC_MINIMUM_ONE; + ec_fentrylk(frame, this, -1, minimum, default_fentrylk_cbk, NULL, volume, + fd, basename, cmd, type, xdata); return 0; } -int32_t ec_gf_fallocate(call_frame_t * frame, xlator_t * this, fd_t * fd, - int32_t mode, off_t offset, size_t len, - dict_t * xdata) +int32_t +ec_gf_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode, + off_t offset, size_t len, dict_t *xdata) { - ec_fallocate(frame, this, -1, EC_MINIMUM_MIN, default_fallocate_cbk, - NULL, fd, mode, offset, len, xdata); + ec_fallocate(frame, this, -1, EC_MINIMUM_MIN, default_fallocate_cbk, NULL, + fd, mode, offset, len, xdata); return 0; } -int32_t ec_gf_flush(call_frame_t * frame, xlator_t * this, fd_t * fd, - dict_t * xdata) +int32_t +ec_gf_flush(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) { ec_flush(frame, this, -1, EC_MINIMUM_MIN, default_flush_cbk, NULL, fd, xdata); @@ -852,8 +839,9 @@ int32_t ec_gf_flush(call_frame_t * frame, xlator_t * this, fd_t * fd, return 0; } -int32_t ec_gf_fsync(call_frame_t * frame, xlator_t * this, fd_t * fd, - int32_t datasync, dict_t * xdata) +int32_t +ec_gf_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync, + dict_t *xdata) { ec_fsync(frame, this, -1, EC_MINIMUM_MIN, default_fsync_cbk, NULL, fd, datasync, xdata); @@ -861,114 +849,114 @@ int32_t ec_gf_fsync(call_frame_t * frame, xlator_t * this, fd_t * fd, return 0; } -int32_t ec_gf_fsyncdir(call_frame_t * frame, xlator_t * this, fd_t * fd, - int32_t datasync, dict_t * xdata) +int32_t +ec_gf_fsyncdir(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync, + dict_t *xdata) { - ec_fsyncdir(frame, this, -1, EC_MINIMUM_MIN, default_fsyncdir_cbk, NULL, - fd, datasync, xdata); + ec_fsyncdir(frame, this, -1, EC_MINIMUM_MIN, default_fsyncdir_cbk, NULL, fd, + datasync, xdata); return 0; } int -ec_marker_populate_args (call_frame_t *frame, int type, int *gauge, - xlator_t **subvols) +ec_marker_populate_args(call_frame_t *frame, int type, int *gauge, + xlator_t **subvols) { - xlator_t *this = frame->this; - ec_t *ec = this->private; + xlator_t *this = frame->this; + ec_t *ec = this->private; - memcpy (subvols, ec->xl_list, sizeof (*subvols) * ec->nodes); + memcpy(subvols, ec->xl_list, sizeof(*subvols) * ec->nodes); - if (type == MARKER_XTIME_TYPE) { - /*Don't error out on ENOENT/ENOTCONN */ - gauge[MCNT_NOTFOUND] = 0; - gauge[MCNT_ENOTCONN] = 0; - } + if (type == MARKER_XTIME_TYPE) { + /*Don't error out on ENOENT/ENOTCONN */ + gauge[MCNT_NOTFOUND] = 0; + gauge[MCNT_ENOTCONN] = 0; + } - return ec->nodes; + return ec->nodes; } int32_t -ec_handle_heal_commands (call_frame_t *frame, xlator_t *this, loc_t *loc, - const char *name, dict_t *xdata) +ec_handle_heal_commands(call_frame_t *frame, xlator_t *this, loc_t *loc, + const char *name, dict_t *xdata) { - dict_t *dict_rsp = NULL; - int op_ret = -1; - int op_errno = ENOMEM; + dict_t *dict_rsp = NULL; + int op_ret = -1; + int op_errno = ENOMEM; - if (!name || strcmp (name, GF_HEAL_INFO)) - return -1; + if (!name || strcmp(name, GF_HEAL_INFO)) + return -1; - op_errno = -ec_get_heal_info (this, loc, &dict_rsp); - if (op_errno <= 0) { - op_errno = op_ret = 0; - } + op_errno = -ec_get_heal_info(this, loc, &dict_rsp); + if (op_errno <= 0) { + op_errno = op_ret = 0; + } - STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict_rsp, NULL); - if (dict_rsp) - dict_unref (dict_rsp); - return 0; + STACK_UNWIND_STRICT(getxattr, frame, op_ret, op_errno, dict_rsp, NULL); + if (dict_rsp) + dict_unref(dict_rsp); + return 0; } int32_t -ec_gf_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, - const char *name, dict_t *xdata) +ec_gf_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, + const char *name, dict_t *xdata) { - int error = 0; - ec_t *ec = this->private; - int32_t minimum = EC_MINIMUM_ONE; + int error = 0; + ec_t *ec = this->private; + int32_t minimum = EC_MINIMUM_ONE; - if (name && strcmp (name, EC_XATTR_HEAL) != 0) { - EC_INTERNAL_XATTR_OR_GOTO(name, NULL, error, out); - } + if (name && strcmp(name, EC_XATTR_HEAL) != 0) { + EC_INTERNAL_XATTR_OR_GOTO(name, NULL, error, out); + } - if (ec_handle_heal_commands (frame, this, loc, name, xdata) == 0) - return 0; + if (ec_handle_heal_commands(frame, this, loc, name, xdata) == 0) + return 0; - if (cluster_handle_marker_getxattr (frame, loc, name, ec->vol_uuid, - NULL, ec_marker_populate_args) == 0) - return 0; + if (cluster_handle_marker_getxattr(frame, loc, name, ec->vol_uuid, NULL, + ec_marker_populate_args) == 0) + return 0; - if (name && - ((fnmatch (GF_XATTR_STIME_PATTERN, name, 0) == 0) || - XATTR_IS_NODE_UUID(name) || XATTR_IS_NODE_UUID_LIST(name))) { - minimum = EC_MINIMUM_ALL; - } + if (name && ((fnmatch(GF_XATTR_STIME_PATTERN, name, 0) == 0) || + XATTR_IS_NODE_UUID(name) || XATTR_IS_NODE_UUID_LIST(name))) { + minimum = EC_MINIMUM_ALL; + } - ec_getxattr (frame, this, -1, minimum, default_getxattr_cbk, - NULL, loc, name, xdata); + ec_getxattr(frame, this, -1, minimum, default_getxattr_cbk, NULL, loc, name, + xdata); - return 0; + return 0; out: - error = ENODATA; - STACK_UNWIND_STRICT (getxattr, frame, -1, error, NULL, NULL); - return 0; + error = ENODATA; + STACK_UNWIND_STRICT(getxattr, frame, -1, error, NULL, NULL); + return 0; } int32_t -ec_gf_fgetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, - const char *name, dict_t *xdata) +ec_gf_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name, + dict_t *xdata) { - int error = 0; + int error = 0; - EC_INTERNAL_XATTR_OR_GOTO(name, NULL, error, out); + EC_INTERNAL_XATTR_OR_GOTO(name, NULL, error, out); - ec_fgetxattr (frame, this, -1, EC_MINIMUM_ONE, default_fgetxattr_cbk, - NULL, fd, name, xdata); - return 0; + ec_fgetxattr(frame, this, -1, EC_MINIMUM_ONE, default_fgetxattr_cbk, NULL, + fd, name, xdata); + return 0; out: - error = ENODATA; - STACK_UNWIND_STRICT (fgetxattr, frame, -1, error, NULL, NULL); - return 0; + error = ENODATA; + STACK_UNWIND_STRICT(fgetxattr, frame, -1, error, NULL, NULL); + return 0; } -int32_t ec_gf_inodelk(call_frame_t * frame, xlator_t * this, - const char * volume, loc_t * loc, int32_t cmd, - struct gf_flock * flock, dict_t * xdata) +int32_t +ec_gf_inodelk(call_frame_t *frame, xlator_t *this, const char *volume, + loc_t *loc, int32_t cmd, struct gf_flock *flock, dict_t *xdata) { int32_t minimum = EC_MINIMUM_ALL; if (flock->l_type == F_UNLCK) - minimum = EC_MINIMUM_ONE; + minimum = EC_MINIMUM_ONE; ec_inodelk(frame, this, &frame->root->lk_owner, -1, minimum, default_inodelk_cbk, NULL, volume, loc, cmd, flock, xdata); @@ -976,21 +964,22 @@ int32_t ec_gf_inodelk(call_frame_t * frame, xlator_t * this, return 0; } -int32_t ec_gf_finodelk(call_frame_t * frame, xlator_t * this, - const char * volume, fd_t * fd, int32_t cmd, - struct gf_flock * flock, dict_t * xdata) +int32_t +ec_gf_finodelk(call_frame_t *frame, xlator_t *this, const char *volume, + fd_t *fd, int32_t cmd, struct gf_flock *flock, dict_t *xdata) { int32_t minimum = EC_MINIMUM_ALL; if (flock->l_type == F_UNLCK) - minimum = EC_MINIMUM_ONE; + minimum = EC_MINIMUM_ONE; ec_finodelk(frame, this, &frame->root->lk_owner, -1, minimum, default_finodelk_cbk, NULL, volume, fd, cmd, flock, xdata); return 0; } -int32_t ec_gf_link(call_frame_t * frame, xlator_t * this, loc_t * oldloc, - loc_t * newloc, dict_t * xdata) +int32_t +ec_gf_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, + dict_t *xdata) { ec_link(frame, this, -1, EC_MINIMUM_MIN, default_link_cbk, NULL, oldloc, newloc, xdata); @@ -998,20 +987,21 @@ int32_t ec_gf_link(call_frame_t * frame, xlator_t * this, loc_t * oldloc, return 0; } -int32_t ec_gf_lk(call_frame_t * frame, xlator_t * this, fd_t * fd, - int32_t cmd, struct gf_flock * flock, dict_t * xdata) +int32_t +ec_gf_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd, + struct gf_flock *flock, dict_t *xdata) { int32_t minimum = EC_MINIMUM_ALL; if (flock->l_type == F_UNLCK) - minimum = EC_MINIMUM_ONE; - ec_lk(frame, this, -1, minimum, default_lk_cbk, NULL, fd, cmd, - flock, xdata); + minimum = EC_MINIMUM_ONE; + ec_lk(frame, this, -1, minimum, default_lk_cbk, NULL, fd, cmd, flock, + xdata); return 0; } -int32_t ec_gf_lookup(call_frame_t * frame, xlator_t * this, loc_t * loc, - dict_t * xdata) +int32_t +ec_gf_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) { ec_lookup(frame, this, -1, EC_MINIMUM_MIN, default_lookup_cbk, NULL, loc, xdata); @@ -1019,8 +1009,9 @@ int32_t ec_gf_lookup(call_frame_t * frame, xlator_t * this, loc_t * loc, return 0; } -int32_t ec_gf_mkdir(call_frame_t * frame, xlator_t * this, loc_t * loc, - mode_t mode, mode_t umask, dict_t * xdata) +int32_t +ec_gf_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, + mode_t umask, dict_t *xdata) { ec_mkdir(frame, this, -1, EC_MINIMUM_MIN, default_mkdir_cbk, NULL, loc, mode, umask, xdata); @@ -1028,8 +1019,9 @@ int32_t ec_gf_mkdir(call_frame_t * frame, xlator_t * this, loc_t * loc, return 0; } -int32_t ec_gf_mknod(call_frame_t * frame, xlator_t * this, loc_t * loc, - mode_t mode, dev_t rdev, mode_t umask, dict_t * xdata) +int32_t +ec_gf_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, + dev_t rdev, mode_t umask, dict_t *xdata) { ec_mknod(frame, this, -1, EC_MINIMUM_MIN, default_mknod_cbk, NULL, loc, mode, rdev, umask, xdata); @@ -1037,17 +1029,19 @@ int32_t ec_gf_mknod(call_frame_t * frame, xlator_t * this, loc_t * loc, return 0; } -int32_t ec_gf_open(call_frame_t * frame, xlator_t * this, loc_t * loc, - int32_t flags, fd_t * fd, dict_t * xdata) +int32_t +ec_gf_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + fd_t *fd, dict_t *xdata) { - ec_open(frame, this, -1, EC_MINIMUM_MIN, default_open_cbk, NULL, loc, - flags, fd, xdata); + ec_open(frame, this, -1, EC_MINIMUM_MIN, default_open_cbk, NULL, loc, flags, + fd, xdata); return 0; } -int32_t ec_gf_opendir(call_frame_t * frame, xlator_t * this, loc_t * loc, - fd_t * fd, dict_t * xdata) +int32_t +ec_gf_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd, + dict_t *xdata) { ec_opendir(frame, this, -1, EC_MINIMUM_MIN, default_opendir_cbk, NULL, loc, fd, xdata); @@ -1055,8 +1049,9 @@ int32_t ec_gf_opendir(call_frame_t * frame, xlator_t * this, loc_t * loc, return 0; } -int32_t ec_gf_readdir(call_frame_t * frame, xlator_t * this, fd_t * fd, - size_t size, off_t offset, dict_t * xdata) +int32_t +ec_gf_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t offset, dict_t *xdata) { ec_readdir(frame, this, -1, EC_MINIMUM_ONE, default_readdir_cbk, NULL, fd, size, offset, xdata); @@ -1064,17 +1059,19 @@ int32_t ec_gf_readdir(call_frame_t * frame, xlator_t * this, fd_t * fd, return 0; } -int32_t ec_gf_readdirp(call_frame_t * frame, xlator_t * this, fd_t * fd, - size_t size, off_t offset, dict_t * xdata) +int32_t +ec_gf_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t offset, dict_t *xdata) { - ec_readdirp(frame, this, -1, EC_MINIMUM_ONE, default_readdirp_cbk, NULL, - fd, size, offset, xdata); + ec_readdirp(frame, this, -1, EC_MINIMUM_ONE, default_readdirp_cbk, NULL, fd, + size, offset, xdata); return 0; } -int32_t ec_gf_readlink(call_frame_t * frame, xlator_t * this, loc_t * loc, - size_t size, dict_t * xdata) +int32_t +ec_gf_readlink(call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size, + dict_t *xdata) { ec_readlink(frame, this, -1, EC_MINIMUM_ONE, default_readlink_cbk, NULL, loc, size, xdata); @@ -1082,60 +1079,63 @@ int32_t ec_gf_readlink(call_frame_t * frame, xlator_t * this, loc_t * loc, return 0; } -int32_t ec_gf_readv(call_frame_t * frame, xlator_t * this, fd_t * fd, - size_t size, off_t offset, uint32_t flags, dict_t * xdata) +int32_t +ec_gf_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t offset, uint32_t flags, dict_t *xdata) { - ec_readv(frame, this, -1, EC_MINIMUM_MIN, default_readv_cbk, NULL, fd, - size, offset, flags, xdata); + ec_readv(frame, this, -1, EC_MINIMUM_MIN, default_readv_cbk, NULL, fd, size, + offset, flags, xdata); return 0; } int32_t -ec_gf_removexattr (call_frame_t *frame, xlator_t *this, loc_t *loc, - const char *name, dict_t *xdata) +ec_gf_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc, + const char *name, dict_t *xdata) { - int error = 0; + int error = 0; - EC_INTERNAL_XATTR_OR_GOTO (name, xdata, error, out); + EC_INTERNAL_XATTR_OR_GOTO(name, xdata, error, out); - ec_removexattr (frame, this, -1, EC_MINIMUM_MIN, - default_removexattr_cbk, NULL, loc, name, xdata); + ec_removexattr(frame, this, -1, EC_MINIMUM_MIN, default_removexattr_cbk, + NULL, loc, name, xdata); - return 0; + return 0; out: - STACK_UNWIND_STRICT (removexattr, frame, -1, error, NULL); - return 0; + STACK_UNWIND_STRICT(removexattr, frame, -1, error, NULL); + return 0; } int32_t -ec_gf_fremovexattr (call_frame_t *frame, xlator_t *this, fd_t *fd, - const char *name, dict_t *xdata) +ec_gf_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd, + const char *name, dict_t *xdata) { - int error = 0; + int error = 0; - EC_INTERNAL_XATTR_OR_GOTO (name, xdata, error, out); + EC_INTERNAL_XATTR_OR_GOTO(name, xdata, error, out); - ec_fremovexattr (frame, this, -1, EC_MINIMUM_MIN, - default_fremovexattr_cbk, NULL, fd, name, xdata); + ec_fremovexattr(frame, this, -1, EC_MINIMUM_MIN, default_fremovexattr_cbk, + NULL, fd, name, xdata); - return 0; + return 0; out: - STACK_UNWIND_STRICT (fremovexattr, frame, -1, error, NULL); - return 0; + STACK_UNWIND_STRICT(fremovexattr, frame, -1, error, NULL); + return 0; } -int32_t ec_gf_rename(call_frame_t * frame, xlator_t * this, loc_t * oldloc, - loc_t * newloc, dict_t * xdata) +int32_t +ec_gf_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, + dict_t *xdata) { - ec_rename(frame, this, -1, EC_MINIMUM_MIN, default_rename_cbk, NULL, - oldloc, newloc, xdata); + ec_rename(frame, this, -1, EC_MINIMUM_MIN, default_rename_cbk, NULL, oldloc, + newloc, xdata); return 0; } -int32_t ec_gf_rmdir(call_frame_t * frame, xlator_t * this, loc_t * loc, - int xflags, dict_t * xdata) +int32_t +ec_gf_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflags, + dict_t *xdata) { ec_rmdir(frame, this, -1, EC_MINIMUM_MIN, default_rmdir_cbk, NULL, loc, xflags, xdata); @@ -1143,8 +1143,9 @@ int32_t ec_gf_rmdir(call_frame_t * frame, xlator_t * this, loc_t * loc, return 0; } -int32_t ec_gf_setattr(call_frame_t * frame, xlator_t * this, loc_t * loc, - struct iatt * stbuf, int32_t valid, dict_t * xdata) +int32_t +ec_gf_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, + struct iatt *stbuf, int32_t valid, dict_t *xdata) { ec_setattr(frame, this, -1, EC_MINIMUM_MIN, default_setattr_cbk, NULL, loc, stbuf, valid, xdata); @@ -1152,51 +1153,52 @@ int32_t ec_gf_setattr(call_frame_t * frame, xlator_t * this, loc_t * loc, return 0; } -int32_t ec_gf_fsetattr(call_frame_t * frame, xlator_t * this, fd_t * fd, - struct iatt * stbuf, int32_t valid, dict_t * xdata) +int32_t +ec_gf_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, + struct iatt *stbuf, int32_t valid, dict_t *xdata) { - ec_fsetattr(frame, this, -1, EC_MINIMUM_MIN, default_fsetattr_cbk, NULL, - fd, stbuf, valid, xdata); + ec_fsetattr(frame, this, -1, EC_MINIMUM_MIN, default_fsetattr_cbk, NULL, fd, + stbuf, valid, xdata); return 0; } int32_t -ec_gf_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, - dict_t *dict, int32_t flags, dict_t *xdata) +ec_gf_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, + int32_t flags, dict_t *xdata) { - int error = 0; + int error = 0; - EC_INTERNAL_XATTR_OR_GOTO ("", dict, error, out); + EC_INTERNAL_XATTR_OR_GOTO("", dict, error, out); - ec_setxattr (frame, this, -1, EC_MINIMUM_MIN, default_setxattr_cbk, - NULL, loc, dict, flags, xdata); + ec_setxattr(frame, this, -1, EC_MINIMUM_MIN, default_setxattr_cbk, NULL, + loc, dict, flags, xdata); - return 0; + return 0; out: - STACK_UNWIND_STRICT (setxattr, frame, -1, error, NULL); - return 0; + STACK_UNWIND_STRICT(setxattr, frame, -1, error, NULL); + return 0; } int32_t -ec_gf_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, - dict_t *dict, int32_t flags, dict_t *xdata) +ec_gf_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict, + int32_t flags, dict_t *xdata) { - int error = 0; + int error = 0; - EC_INTERNAL_XATTR_OR_GOTO ("", dict, error, out); + EC_INTERNAL_XATTR_OR_GOTO("", dict, error, out); - ec_fsetxattr (frame, this, -1, EC_MINIMUM_MIN, default_fsetxattr_cbk, - NULL, fd, dict, flags, xdata); + ec_fsetxattr(frame, this, -1, EC_MINIMUM_MIN, default_fsetxattr_cbk, NULL, + fd, dict, flags, xdata); - return 0; + return 0; out: - STACK_UNWIND_STRICT (fsetxattr, frame, -1, error, NULL); - return 0; + STACK_UNWIND_STRICT(fsetxattr, frame, -1, error, NULL); + return 0; } -int32_t ec_gf_stat(call_frame_t * frame, xlator_t * this, loc_t * loc, - dict_t * xdata) +int32_t +ec_gf_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) { ec_stat(frame, this, -1, EC_MINIMUM_MIN, default_stat_cbk, NULL, loc, xdata); @@ -1204,8 +1206,8 @@ int32_t ec_gf_stat(call_frame_t * frame, xlator_t * this, loc_t * loc, return 0; } -int32_t ec_gf_fstat(call_frame_t * frame, xlator_t * this, fd_t * fd, - dict_t * xdata) +int32_t +ec_gf_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) { ec_fstat(frame, this, -1, EC_MINIMUM_MIN, default_fstat_cbk, NULL, fd, xdata); @@ -1213,8 +1215,8 @@ int32_t ec_gf_fstat(call_frame_t * frame, xlator_t * this, fd_t * fd, return 0; } -int32_t ec_gf_statfs(call_frame_t * frame, xlator_t * this, loc_t * loc, - dict_t * xdata) +int32_t +ec_gf_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) { ec_statfs(frame, this, -1, EC_MINIMUM_MIN, default_statfs_cbk, NULL, loc, xdata); @@ -1222,9 +1224,9 @@ int32_t ec_gf_statfs(call_frame_t * frame, xlator_t * this, loc_t * loc, return 0; } -int32_t ec_gf_symlink(call_frame_t * frame, xlator_t * this, - const char * linkname, loc_t * loc, mode_t umask, - dict_t * xdata) +int32_t +ec_gf_symlink(call_frame_t *frame, xlator_t *this, const char *linkname, + loc_t *loc, mode_t umask, dict_t *xdata) { ec_symlink(frame, this, -1, EC_MINIMUM_MIN, default_symlink_cbk, NULL, linkname, loc, umask, xdata); @@ -1232,8 +1234,9 @@ int32_t ec_gf_symlink(call_frame_t * frame, xlator_t * this, return 0; } -int32_t ec_gf_truncate(call_frame_t * frame, xlator_t * this, loc_t * loc, - off_t offset, dict_t * xdata) +int32_t +ec_gf_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, + dict_t *xdata) { ec_truncate(frame, this, -1, EC_MINIMUM_MIN, default_truncate_cbk, NULL, loc, offset, xdata); @@ -1241,8 +1244,9 @@ int32_t ec_gf_truncate(call_frame_t * frame, xlator_t * this, loc_t * loc, return 0; } -int32_t ec_gf_ftruncate(call_frame_t * frame, xlator_t * this, fd_t * fd, - off_t offset, dict_t * xdata) +int32_t +ec_gf_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + dict_t *xdata) { ec_ftruncate(frame, this, -1, EC_MINIMUM_MIN, default_ftruncate_cbk, NULL, fd, offset, xdata); @@ -1250,8 +1254,9 @@ int32_t ec_gf_ftruncate(call_frame_t * frame, xlator_t * this, fd_t * fd, return 0; } -int32_t ec_gf_unlink(call_frame_t * frame, xlator_t * this, loc_t * loc, - int xflags, dict_t * xdata) +int32_t +ec_gf_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflags, + dict_t *xdata) { ec_unlink(frame, this, -1, EC_MINIMUM_MIN, default_unlink_cbk, NULL, loc, xflags, xdata); @@ -1259,9 +1264,10 @@ int32_t ec_gf_unlink(call_frame_t * frame, xlator_t * this, loc_t * loc, return 0; } -int32_t ec_gf_writev(call_frame_t * frame, xlator_t * this, fd_t * fd, - struct iovec * vector, int32_t count, off_t offset, - uint32_t flags, struct iobref * iobref, dict_t * xdata) +int32_t +ec_gf_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, + struct iovec *vector, int32_t count, off_t offset, uint32_t flags, + struct iobref *iobref, dict_t *xdata) { ec_writev(frame, this, -1, EC_MINIMUM_MIN, default_writev_cbk, NULL, fd, vector, count, offset, flags, iobref, xdata); @@ -1269,9 +1275,9 @@ int32_t ec_gf_writev(call_frame_t * frame, xlator_t * this, fd_t * fd, return 0; } -int32_t ec_gf_xattrop(call_frame_t * frame, xlator_t * this, loc_t * loc, - gf_xattrop_flags_t optype, dict_t * xattr, - dict_t * xdata) +int32_t +ec_gf_xattrop(call_frame_t *frame, xlator_t *this, loc_t *loc, + gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata) { ec_xattrop(frame, this, -1, EC_MINIMUM_MIN, default_xattrop_cbk, NULL, loc, optype, xattr, xdata); @@ -1279,47 +1285,49 @@ int32_t ec_gf_xattrop(call_frame_t * frame, xlator_t * this, loc_t * loc, return 0; } -int32_t ec_gf_fxattrop(call_frame_t * frame, xlator_t * this, fd_t * fd, - gf_xattrop_flags_t optype, dict_t * xattr, - dict_t * xdata) +int32_t +ec_gf_fxattrop(call_frame_t *frame, xlator_t *this, fd_t *fd, + gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata) { - ec_fxattrop(frame, this, -1, EC_MINIMUM_MIN, default_fxattrop_cbk, NULL, - fd, optype, xattr, xdata); + ec_fxattrop(frame, this, -1, EC_MINIMUM_MIN, default_fxattrop_cbk, NULL, fd, + optype, xattr, xdata); return 0; } -int32_t ec_gf_zerofill(call_frame_t * frame, xlator_t * this, fd_t * fd, - off_t offset, off_t len, dict_t * xdata) +int32_t +ec_gf_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + off_t len, dict_t *xdata) { default_zerofill_failure_cbk(frame, ENOTSUP); return 0; } -int32_t ec_gf_seek(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, - gf_seek_what_t what, dict_t *xdata) +int32_t +ec_gf_seek(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + gf_seek_what_t what, dict_t *xdata) { - ec_seek(frame, this, -1, EC_MINIMUM_ONE, default_seek_cbk, NULL, fd, - offset, what, xdata); + ec_seek(frame, this, -1, EC_MINIMUM_ONE, default_seek_cbk, NULL, fd, offset, + what, xdata); return 0; } -int32_t ec_gf_ipc(call_frame_t *frame, xlator_t *this, int32_t op, - dict_t *xdata) +int32_t +ec_gf_ipc(call_frame_t *frame, xlator_t *this, int32_t op, dict_t *xdata) { ec_ipc(frame, this, -1, EC_MINIMUM_MIN, default_ipc_cbk, NULL, op, xdata); return 0; } -int32_t ec_gf_forget(xlator_t * this, inode_t * inode) +int32_t +ec_gf_forget(xlator_t *this, inode_t *inode) { uint64_t value = 0; - ec_inode_t * ctx = NULL; + ec_inode_t *ctx = NULL; - if ((inode_ctx_del(inode, this, &value) == 0) && (value != 0)) - { + if ((inode_ctx_del(inode, this, &value) == 0) && (value != 0)) { ctx = (ec_inode_t *)(uintptr_t)value; /* We can only forget an inode if it has been unlocked, so the stripe * cache should also be empty. */ @@ -1330,38 +1338,41 @@ int32_t ec_gf_forget(xlator_t * this, inode_t * inode) return 0; } -void ec_gf_release_fd(xlator_t * this, fd_t * fd) +void +ec_gf_release_fd(xlator_t *this, fd_t *fd) { uint64_t value = 0; - ec_fd_t * ctx = NULL; + ec_fd_t *ctx = NULL; - if ((fd_ctx_del(fd, this, &value) == 0) && (value != 0)) - { + if ((fd_ctx_del(fd, this, &value) == 0) && (value != 0)) { ctx = (ec_fd_t *)(uintptr_t)value; loc_wipe(&ctx->loc); GF_FREE(ctx); } } -int32_t ec_gf_release(xlator_t * this, fd_t * fd) +int32_t +ec_gf_release(xlator_t *this, fd_t *fd) { ec_gf_release_fd(this, fd); return 0; } -int32_t ec_gf_releasedir(xlator_t * this, fd_t * fd) +int32_t +ec_gf_releasedir(xlator_t *this, fd_t *fd) { ec_gf_release_fd(this, fd); return 0; } -int32_t ec_dump_private(xlator_t *this) +int32_t +ec_dump_private(xlator_t *this) { ec_t *ec = NULL; - char key_prefix[GF_DUMP_MAX_BUF_LEN]; - char tmp[65]; + char key_prefix[GF_DUMP_MAX_BUF_LEN]; + char tmp[65]; GF_ASSERT(this); @@ -1380,7 +1391,7 @@ int32_t ec_dump_private(xlator_t *this) ec_bin(tmp, sizeof(tmp), ec->xl_up, ec->nodes)); gf_proc_dump_write("background-heals", "%d", ec->background_heals); gf_proc_dump_write("heal-wait-qlength", "%d", ec->heal_wait_qlen); - gf_proc_dump_write("self-heal-window-size", "%"PRIu32, + gf_proc_dump_write("self-heal-window-size", "%" PRIu32, ec->self_heal_window_size); gf_proc_dump_write("healers", "%d", ec->healers); gf_proc_dump_write("heal-waiters", "%d", ec->heal_waiters); @@ -1409,77 +1420,66 @@ int32_t ec_dump_private(xlator_t *this) return 0; } -struct xlator_fops fops = -{ - .lookup = ec_gf_lookup, - .stat = ec_gf_stat, - .fstat = ec_gf_fstat, - .truncate = ec_gf_truncate, - .ftruncate = ec_gf_ftruncate, - .access = ec_gf_access, - .readlink = ec_gf_readlink, - .mknod = ec_gf_mknod, - .mkdir = ec_gf_mkdir, - .unlink = ec_gf_unlink, - .rmdir = ec_gf_rmdir, - .symlink = ec_gf_symlink, - .rename = ec_gf_rename, - .link = ec_gf_link, - .create = ec_gf_create, - .open = ec_gf_open, - .readv = ec_gf_readv, - .writev = ec_gf_writev, - .flush = ec_gf_flush, - .fsync = ec_gf_fsync, - .opendir = ec_gf_opendir, - .readdir = ec_gf_readdir, - .readdirp = ec_gf_readdirp, - .fsyncdir = ec_gf_fsyncdir, - .statfs = ec_gf_statfs, - .setxattr = ec_gf_setxattr, - .getxattr = ec_gf_getxattr, - .fsetxattr = ec_gf_fsetxattr, - .fgetxattr = ec_gf_fgetxattr, - .removexattr = ec_gf_removexattr, - .fremovexattr = ec_gf_fremovexattr, - .lk = ec_gf_lk, - .inodelk = ec_gf_inodelk, - .finodelk = ec_gf_finodelk, - .entrylk = ec_gf_entrylk, - .fentrylk = ec_gf_fentrylk, - .xattrop = ec_gf_xattrop, - .fxattrop = ec_gf_fxattrop, - .setattr = ec_gf_setattr, - .fsetattr = ec_gf_fsetattr, - .fallocate = ec_gf_fallocate, - .discard = ec_gf_discard, - .zerofill = ec_gf_zerofill, - .seek = ec_gf_seek, - .ipc = ec_gf_ipc -}; - -struct xlator_cbks cbks = -{ - .forget = ec_gf_forget, - .release = ec_gf_release, - .releasedir = ec_gf_releasedir -}; - -struct xlator_dumpops dumpops = { - .priv = ec_dump_private -}; - -struct volume_options options[] = -{ - { - .key = { "redundancy" }, - .type = GF_OPTION_TYPE_INT, - .default_value = "{{ volume.redundancy }}", - .description = "Maximum number of bricks that can fail " - "simultaneously without losing data." - }, +struct xlator_fops fops = {.lookup = ec_gf_lookup, + .stat = ec_gf_stat, + .fstat = ec_gf_fstat, + .truncate = ec_gf_truncate, + .ftruncate = ec_gf_ftruncate, + .access = ec_gf_access, + .readlink = ec_gf_readlink, + .mknod = ec_gf_mknod, + .mkdir = ec_gf_mkdir, + .unlink = ec_gf_unlink, + .rmdir = ec_gf_rmdir, + .symlink = ec_gf_symlink, + .rename = ec_gf_rename, + .link = ec_gf_link, + .create = ec_gf_create, + .open = ec_gf_open, + .readv = ec_gf_readv, + .writev = ec_gf_writev, + .flush = ec_gf_flush, + .fsync = ec_gf_fsync, + .opendir = ec_gf_opendir, + .readdir = ec_gf_readdir, + .readdirp = ec_gf_readdirp, + .fsyncdir = ec_gf_fsyncdir, + .statfs = ec_gf_statfs, + .setxattr = ec_gf_setxattr, + .getxattr = ec_gf_getxattr, + .fsetxattr = ec_gf_fsetxattr, + .fgetxattr = ec_gf_fgetxattr, + .removexattr = ec_gf_removexattr, + .fremovexattr = ec_gf_fremovexattr, + .lk = ec_gf_lk, + .inodelk = ec_gf_inodelk, + .finodelk = ec_gf_finodelk, + .entrylk = ec_gf_entrylk, + .fentrylk = ec_gf_fentrylk, + .xattrop = ec_gf_xattrop, + .fxattrop = ec_gf_fxattrop, + .setattr = ec_gf_setattr, + .fsetattr = ec_gf_fsetattr, + .fallocate = ec_gf_fallocate, + .discard = ec_gf_discard, + .zerofill = ec_gf_zerofill, + .seek = ec_gf_seek, + .ipc = ec_gf_ipc}; + +struct xlator_cbks cbks = {.forget = ec_gf_forget, + .release = ec_gf_release, + .releasedir = ec_gf_releasedir}; + +struct xlator_dumpops dumpops = {.priv = ec_dump_private}; + +struct volume_options options[] = { + {.key = {"redundancy"}, + .type = GF_OPTION_TYPE_INT, + .default_value = "{{ volume.redundancy }}", + .description = "Maximum number of bricks that can fail " + "simultaneously without losing data."}, { - .key = { "self-heal-daemon" }, + .key = {"self-heal-daemon"}, .type = GF_OPTION_TYPE_BOOL, .description = "self-heal daemon enable/disable", .default_value = "enable", @@ -1487,193 +1487,183 @@ struct volume_options options[] = .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC, .tags = {"disperse"}, }, - { .key = {"iam-self-heal-daemon"}, - .type = GF_OPTION_TYPE_BOOL, - .default_value = "off", - .description = "This option differentiates if the disperse " - "translator is running as part of self-heal-daemon " - "or not." - }, - { .key = {"eager-lock"}, - .type = GF_OPTION_TYPE_BOOL, - .default_value = "on", - .op_version = {GD_OP_VERSION_3_7_10}, - .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT | OPT_FLAG_DOC, - .tags = {"disperse"}, - .description = "Enable/Disable eager lock for regular files on a " - "disperse volume. If a fop takes a lock and completes " - "its operation, it waits for next 1 second before " - "releasing the lock, to see if the lock can be reused " - "for next fop from the same client. If ec finds any lock " - "contention within 1 second it releases the lock " - "immediately before time expires. This improves the " - "performance of file operations. However, as it takes " - "lock on first brick, for few operations like read, " - "discovery of lock contention might take long time and " - "can actually degrade the performance. If eager lock is " - "disabled, lock will be released as soon as fop " - "completes." - }, - { .key = {"other-eager-lock"}, - .type = GF_OPTION_TYPE_BOOL, - .default_value = "on", - .op_version = { GD_OP_VERSION_3_13_0 }, - .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT | OPT_FLAG_DOC, - .tags = { "disperse" }, - .description = "It's equivalent to the eager-lock option but for non " - "regular files." - }, - { .key = {"eager-lock-timeout"}, - .type = GF_OPTION_TYPE_INT, - .min = 1, - .max = 60, - .default_value = "1", - .op_version = { GD_OP_VERSION_4_0_0 }, - .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT | OPT_FLAG_DOC, - .tags = { "disperse", "locks", "timeout" }, - .description = "Maximum time (in seconds) that a lock on an inode is " - "kept held if no new operations on the inode are " - "received." - }, - { .key = {"other-eager-lock-timeout"}, - .type = GF_OPTION_TYPE_INT, - .min = 1, - .max = 60, - .default_value = "1", - .op_version = { GD_OP_VERSION_4_0_0 }, - .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT | OPT_FLAG_DOC, - .tags = { "disperse", "locks", "timeout" }, - .description = "It's equivalent to eager-lock-timeout option but for " - "non regular files." - }, - { .key = {"background-heals"}, - .type = GF_OPTION_TYPE_INT, - .min = 0,/*Disabling background heals*/ - .max = 256, - .default_value = "8", - .op_version = {GD_OP_VERSION_3_7_3}, - .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT | OPT_FLAG_DOC, - .tags = {"disperse"}, - .description = "This option can be used to control number of parallel" - " heals", - }, - { .key = {"heal-wait-qlength"}, - .type = GF_OPTION_TYPE_INT, - .min = 0, - .max = 65536, /*Around 100MB as of now with sizeof(ec_fop_data_t) at 1800*/ - .default_value = "128", - .op_version = {GD_OP_VERSION_3_7_3}, - .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT | OPT_FLAG_DOC, - .tags = {"disperse"}, - .description = "This option can be used to control number of heals" - " that can wait", - }, - { .key = {"heal-timeout"}, - .type = GF_OPTION_TYPE_INT, - .min = 60, - .max = INT_MAX, - .default_value = "600", - .op_version = {GD_OP_VERSION_3_7_3}, - .flags = OPT_FLAG_SETTABLE, - .tags = {"disperse"}, - .description = "time interval for checking the need to self-heal " - "in self-heal-daemon" - }, - { .key = {"read-policy" }, - .type = GF_OPTION_TYPE_STR, - .value = {"round-robin", "gfid-hash"}, - .default_value = "gfid-hash", - .op_version = {GD_OP_VERSION_3_7_6}, - .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT | OPT_FLAG_DOC, - .tags = {"disperse"}, - .description = "inode-read fops happen only on 'k' number of bricks in" - " n=k+m disperse subvolume. 'round-robin' selects the read" - " subvolume using round-robin algo. 'gfid-hash' selects read" - " subvolume based on hash of the gfid of that file/directory.", - }, - { .key = {"shd-max-threads"}, - .type = GF_OPTION_TYPE_INT, - .min = 1, - .max = 64, - .default_value = "1", - .op_version = {GD_OP_VERSION_3_9_0}, - .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC, - .tags = {"disperse"}, - .description = "Maximum number of parallel heals SHD can do per local " - "brick. This can substantially lower heal times, " - "but can also crush your bricks if you don't have " - "the storage hardware to support this." + {.key = {"iam-self-heal-daemon"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "off", + .description = "This option differentiates if the disperse " + "translator is running as part of self-heal-daemon " + "or not."}, + {.key = {"eager-lock"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "on", + .op_version = {GD_OP_VERSION_3_7_10}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT | OPT_FLAG_DOC, + .tags = {"disperse"}, + .description = "Enable/Disable eager lock for regular files on a " + "disperse volume. If a fop takes a lock and completes " + "its operation, it waits for next 1 second before " + "releasing the lock, to see if the lock can be reused " + "for next fop from the same client. If ec finds any lock " + "contention within 1 second it releases the lock " + "immediately before time expires. This improves the " + "performance of file operations. However, as it takes " + "lock on first brick, for few operations like read, " + "discovery of lock contention might take long time and " + "can actually degrade the performance. If eager lock is " + "disabled, lock will be released as soon as fop " + "completes."}, + {.key = {"other-eager-lock"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "on", + .op_version = {GD_OP_VERSION_3_13_0}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT | OPT_FLAG_DOC, + .tags = {"disperse"}, + .description = "It's equivalent to the eager-lock option but for non " + "regular files."}, + {.key = {"eager-lock-timeout"}, + .type = GF_OPTION_TYPE_INT, + .min = 1, + .max = 60, + .default_value = "1", + .op_version = {GD_OP_VERSION_4_0_0}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT | OPT_FLAG_DOC, + .tags = {"disperse", "locks", "timeout"}, + .description = "Maximum time (in seconds) that a lock on an inode is " + "kept held if no new operations on the inode are " + "received."}, + {.key = {"other-eager-lock-timeout"}, + .type = GF_OPTION_TYPE_INT, + .min = 1, + .max = 60, + .default_value = "1", + .op_version = {GD_OP_VERSION_4_0_0}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT | OPT_FLAG_DOC, + .tags = {"disperse", "locks", "timeout"}, + .description = "It's equivalent to eager-lock-timeout option but for " + "non regular files."}, + { + .key = {"background-heals"}, + .type = GF_OPTION_TYPE_INT, + .min = 0, /*Disabling background heals*/ + .max = 256, + .default_value = "8", + .op_version = {GD_OP_VERSION_3_7_3}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT | OPT_FLAG_DOC, + .tags = {"disperse"}, + .description = "This option can be used to control number of parallel" + " heals", }, - { .key = {"shd-wait-qlength"}, - .type = GF_OPTION_TYPE_INT, - .min = 1, - .max = 65536, - .default_value = "1024", - .op_version = {GD_OP_VERSION_3_9_0}, - .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC, - .tags = {"disperse"}, - .description = "This option can be used to control number of heals" - " that can wait in SHD per subvolume" + { + .key = {"heal-wait-qlength"}, + .type = GF_OPTION_TYPE_INT, + .min = 0, + .max = + 65536, /*Around 100MB as of now with sizeof(ec_fop_data_t) at 1800*/ + .default_value = "128", + .op_version = {GD_OP_VERSION_3_7_3}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT | OPT_FLAG_DOC, + .tags = {"disperse"}, + .description = "This option can be used to control number of heals" + " that can wait", }, + {.key = {"heal-timeout"}, + .type = GF_OPTION_TYPE_INT, + .min = 60, + .max = INT_MAX, + .default_value = "600", + .op_version = {GD_OP_VERSION_3_7_3}, + .flags = OPT_FLAG_SETTABLE, + .tags = {"disperse"}, + .description = "time interval for checking the need to self-heal " + "in self-heal-daemon"}, { - .key = { "cpu-extensions" }, + .key = {"read-policy"}, .type = GF_OPTION_TYPE_STR, - .value = { "none", "auto", "x64", "sse", "avx" }, - .default_value = "auto", - .op_version = {GD_OP_VERSION_3_9_0}, + .value = {"round-robin", "gfid-hash"}, + .default_value = "gfid-hash", + .op_version = {GD_OP_VERSION_3_7_6}, .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT | OPT_FLAG_DOC, .tags = {"disperse"}, - .description = "force the cpu extensions to be used to accelerate the " - "galois field computations." - }, - { .key = {"self-heal-window-size"}, - .type = GF_OPTION_TYPE_INT, - .min = 1, - .max = 1024, - .default_value = "1", - .op_version = {GD_OP_VERSION_3_11_0}, - .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT | OPT_FLAG_DOC, - .tags = {"disperse"}, - .description = "Maximum number blocks(128KB) per file for which " - "self-heal process would be applied simultaneously." - }, - { .key = {"optimistic-change-log"}, - .type = GF_OPTION_TYPE_BOOL, - .default_value = "on", - .op_version = {GD_OP_VERSION_3_10_1}, - .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT, - .tags = {"disperse"}, - .description = "Set/Unset dirty flag for every update fop at the start" - "of the fop. If OFF, this option impacts performance of" - "entry operations or metadata operations as it will" - "set dirty flag at the start and unset it at the end of" - "ALL update fop. If ON and all the bricks are good," - "dirty flag will be set at the start only for file fops" - "For metadata and entry fops dirty flag will not be set" - "at the start, if all the bricks are good. This does" - "not impact performance for metadata operations and" - "entry operation but has a very small window to miss" - "marking entry as dirty in case it is required to be" - "healed" - }, - { .key = {"parallel-writes"}, - .type = GF_OPTION_TYPE_BOOL, - .default_value = "on", - .description = "This controls if writes can be wound in parallel as long" - "as it doesn't modify same stripes" - }, - { .key = {"stripe-cache"}, - .type = GF_OPTION_TYPE_INT, - .min = 0,/*Disabling stripe_cache*/ - .max = EC_STRIPE_CACHE_MAX_SIZE, - .default_value = "4", - .description = "This option will keep the last stripe of write fop" - "in memory. If next write falls in this stripe, we need" - "not to read it again from backend and we can save READ" - "fop going over the network. This will improve performance," - "specially for sequential writes. However, this will also" - "lead to extra memory consumption, maximum " - "(cache size * stripe size) Bytes per open file." + .description = + "inode-read fops happen only on 'k' number of bricks in" + " n=k+m disperse subvolume. 'round-robin' selects the read" + " subvolume using round-robin algo. 'gfid-hash' selects read" + " subvolume based on hash of the gfid of that file/directory.", }, - { .key = {NULL} } -}; + {.key = {"shd-max-threads"}, + .type = GF_OPTION_TYPE_INT, + .min = 1, + .max = 64, + .default_value = "1", + .op_version = {GD_OP_VERSION_3_9_0}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC, + .tags = {"disperse"}, + .description = "Maximum number of parallel heals SHD can do per local " + "brick. This can substantially lower heal times, " + "but can also crush your bricks if you don't have " + "the storage hardware to support this."}, + {.key = {"shd-wait-qlength"}, + .type = GF_OPTION_TYPE_INT, + .min = 1, + .max = 65536, + .default_value = "1024", + .op_version = {GD_OP_VERSION_3_9_0}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC, + .tags = {"disperse"}, + .description = "This option can be used to control number of heals" + " that can wait in SHD per subvolume"}, + {.key = {"cpu-extensions"}, + .type = GF_OPTION_TYPE_STR, + .value = {"none", "auto", "x64", "sse", "avx"}, + .default_value = "auto", + .op_version = {GD_OP_VERSION_3_9_0}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT | OPT_FLAG_DOC, + .tags = {"disperse"}, + .description = "force the cpu extensions to be used to accelerate the " + "galois field computations."}, + {.key = {"self-heal-window-size"}, + .type = GF_OPTION_TYPE_INT, + .min = 1, + .max = 1024, + .default_value = "1", + .op_version = {GD_OP_VERSION_3_11_0}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT | OPT_FLAG_DOC, + .tags = {"disperse"}, + .description = "Maximum number blocks(128KB) per file for which " + "self-heal process would be applied simultaneously."}, + {.key = {"optimistic-change-log"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "on", + .op_version = {GD_OP_VERSION_3_10_1}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT, + .tags = {"disperse"}, + .description = "Set/Unset dirty flag for every update fop at the start" + "of the fop. If OFF, this option impacts performance of" + "entry operations or metadata operations as it will" + "set dirty flag at the start and unset it at the end of" + "ALL update fop. If ON and all the bricks are good," + "dirty flag will be set at the start only for file fops" + "For metadata and entry fops dirty flag will not be set" + "at the start, if all the bricks are good. This does" + "not impact performance for metadata operations and" + "entry operation but has a very small window to miss" + "marking entry as dirty in case it is required to be" + "healed"}, + {.key = {"parallel-writes"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "on", + .description = "This controls if writes can be wound in parallel as long" + "as it doesn't modify same stripes"}, + {.key = {"stripe-cache"}, + .type = GF_OPTION_TYPE_INT, + .min = 0, /*Disabling stripe_cache*/ + .max = EC_STRIPE_CACHE_MAX_SIZE, + .default_value = "4", + .description = "This option will keep the last stripe of write fop" + "in memory. If next write falls in this stripe, we need" + "not to read it again from backend and we can save READ" + "fop going over the network. This will improve performance," + "specially for sequential writes. However, this will also" + "lead to extra memory consumption, maximum " + "(cache size * stripe size) Bytes per open file."}, + {.key = {NULL}}}; diff --git a/xlators/cluster/stripe/src/stripe-helpers.c b/xlators/cluster/stripe/src/stripe-helpers.c index 59dfdfad979..7301494415d 100644 --- a/xlators/cluster/stripe/src/stripe-helpers.c +++ b/xlators/cluster/stripe/src/stripe-helpers.c @@ -16,664 +16,643 @@ #include "logging.h" void -stripe_local_wipe (stripe_local_t *local) +stripe_local_wipe(stripe_local_t *local) { - if (!local) - goto out; + if (!local) + goto out; - loc_wipe (&local->loc); - loc_wipe (&local->loc2); + loc_wipe(&local->loc); + loc_wipe(&local->loc2); - if (local->fd) - fd_unref (local->fd); + if (local->fd) + fd_unref(local->fd); - if (local->inode) - inode_unref (local->inode); + if (local->inode) + inode_unref(local->inode); - if (local->xattr) - dict_unref (local->xattr); + if (local->xattr) + dict_unref(local->xattr); - if (local->xdata) - dict_unref (local->xdata); + if (local->xdata) + dict_unref(local->xdata); out: - return; + return; } - - int -stripe_aggregate (dict_t *this, char *key, data_t *value, void *data) +stripe_aggregate(dict_t *this, char *key, data_t *value, void *data) { - dict_t *dst = NULL; - int64_t *ptr = 0, *size = NULL; - int32_t ret = -1; - - dst = data; - - if (strcmp (key, QUOTA_SIZE_KEY) == 0) { - ret = dict_get_bin (dst, key, (void **)&size); - if (ret < 0) { - size = GF_CALLOC (1, sizeof (int64_t), - gf_common_mt_char); - if (size == NULL) { - gf_log ("stripe", GF_LOG_WARNING, - "memory allocation failed"); - goto out; - } - ret = dict_set_bin (dst, key, size, sizeof (int64_t)); - if (ret < 0) { - gf_log ("stripe", GF_LOG_WARNING, - "stripe aggregate dict set failed"); - GF_FREE (size); - goto out; - } - } - - ptr = data_to_bin (value); - if (ptr == NULL) { - gf_log ("stripe", GF_LOG_WARNING, "data to bin failed"); - goto out; - } - - *size = hton64 (ntoh64 (*size) + ntoh64 (*ptr)); - } else if (strcmp (key, GF_CONTENT_KEY)) { - /* No need to aggregate 'CONTENT' data */ - ret = dict_set (dst, key, value); - if (ret) - gf_log ("stripe", GF_LOG_WARNING, "xattr dict set failed"); + dict_t *dst = NULL; + int64_t *ptr = 0, *size = NULL; + int32_t ret = -1; + + dst = data; + + if (strcmp(key, QUOTA_SIZE_KEY) == 0) { + ret = dict_get_bin(dst, key, (void **)&size); + if (ret < 0) { + size = GF_CALLOC(1, sizeof(int64_t), gf_common_mt_char); + if (size == NULL) { + gf_log("stripe", GF_LOG_WARNING, "memory allocation failed"); + goto out; + } + ret = dict_set_bin(dst, key, size, sizeof(int64_t)); + if (ret < 0) { + gf_log("stripe", GF_LOG_WARNING, + "stripe aggregate dict set failed"); + GF_FREE(size); + goto out; + } } + ptr = data_to_bin(value); + if (ptr == NULL) { + gf_log("stripe", GF_LOG_WARNING, "data to bin failed"); + goto out; + } + + *size = hton64(ntoh64(*size) + ntoh64(*ptr)); + } else if (strcmp(key, GF_CONTENT_KEY)) { + /* No need to aggregate 'CONTENT' data */ + ret = dict_set(dst, key, value); + if (ret) + gf_log("stripe", GF_LOG_WARNING, "xattr dict set failed"); + } + out: - return 0; + return 0; } - void -stripe_aggregate_xattr (dict_t *dst, dict_t *src) +stripe_aggregate_xattr(dict_t *dst, dict_t *src) { - if ((dst == NULL) || (src == NULL)) { - goto out; - } + if ((dst == NULL) || (src == NULL)) { + goto out; + } - dict_foreach (src, stripe_aggregate, dst); + dict_foreach(src, stripe_aggregate, dst); out: - return; + return; } - int32_t -stripe_xattr_aggregate (char *buffer, stripe_local_t *local, int32_t *total) +stripe_xattr_aggregate(char *buffer, stripe_local_t *local, int32_t *total) { - int32_t i = 0; - int32_t ret = -1; - int32_t len = 0; - char *sbuf = NULL; - stripe_xattr_sort_t *xattr = NULL; + int32_t i = 0; + int32_t ret = -1; + int32_t len = 0; + char *sbuf = NULL; + stripe_xattr_sort_t *xattr = NULL; - if (!buffer || !local || !local->xattr_list) - goto out; + if (!buffer || !local || !local->xattr_list) + goto out; - sbuf = buffer; + sbuf = buffer; - for (i = 0; i < local->nallocs; i++) { - xattr = local->xattr_list + i; - len = xattr->xattr_len - 1; /* length includes \0 */ + for (i = 0; i < local->nallocs; i++) { + xattr = local->xattr_list + i; + len = xattr->xattr_len - 1; /* length includes \0 */ - if (len && xattr && xattr->xattr_value) { - memcpy (buffer, xattr->xattr_value, len); - buffer += len; - *buffer++ = ' '; - } + if (len && xattr && xattr->xattr_value) { + memcpy(buffer, xattr->xattr_value, len); + buffer += len; + *buffer++ = ' '; } + } - *--buffer = '\0'; - if (total) - *total = buffer - sbuf; - ret = 0; + *--buffer = '\0'; + if (total) + *total = buffer - sbuf; + ret = 0; - out: - return ret; +out: + return ret; } int32_t -stripe_free_xattr_str (stripe_local_t *local) +stripe_free_xattr_str(stripe_local_t *local) { - int32_t i = 0; - int32_t ret = -1; - stripe_xattr_sort_t *xattr = NULL; + int32_t i = 0; + int32_t ret = -1; + stripe_xattr_sort_t *xattr = NULL; - if (!local || !local->xattr_list) - goto out; + if (!local || !local->xattr_list) + goto out; - for (i = 0; i < local->nallocs; i++) { - xattr = local->xattr_list + i; + for (i = 0; i < local->nallocs; i++) { + xattr = local->xattr_list + i; - if (xattr && xattr->xattr_value) - GF_FREE (xattr->xattr_value); - } + if (xattr && xattr->xattr_value) + GF_FREE(xattr->xattr_value); + } - ret = 0; - out: - return ret; + ret = 0; +out: + return ret; } - int32_t -stripe_fill_lockinfo_xattr (xlator_t *this, stripe_local_t *local, - void **xattr_serz) +stripe_fill_lockinfo_xattr(xlator_t *this, stripe_local_t *local, + void **xattr_serz) { - int32_t ret = -1, i = 0, len = 0; - dict_t *tmp1 = NULL, *tmp2 = NULL; - char *buf = NULL; - stripe_xattr_sort_t *xattr = NULL; - - if (xattr_serz == NULL) { + int32_t ret = -1, i = 0, len = 0; + dict_t *tmp1 = NULL, *tmp2 = NULL; + char *buf = NULL; + stripe_xattr_sort_t *xattr = NULL; + + if (xattr_serz == NULL) { + goto out; + } + + tmp2 = dict_new(); + + if (tmp2 == NULL) { + goto out; + } + + for (i = 0; i < local->nallocs; i++) { + xattr = local->xattr_list + i; + len = xattr->xattr_len; + + if (len && xattr && xattr->xattr_value) { + ret = dict_reset(tmp2); + if (ret < 0) { + gf_log(this->name, GF_LOG_DEBUG, "dict_reset failed (%s)", + strerror(-ret)); + } + + ret = dict_unserialize(xattr->xattr_value, xattr->xattr_len, &tmp2); + if (ret < 0) { + gf_log(this->name, GF_LOG_WARNING, + "dict_unserialize failed (%s)", strerror(-ret)); + ret = -1; goto out; - } - - tmp2 = dict_new (); + } - if (tmp2 == NULL) { + tmp1 = dict_copy(tmp2, tmp1); + if (tmp1 == NULL) { + gf_log(this->name, GF_LOG_WARNING, "dict_copy failed (%s)", + strerror(-ret)); + ret = -1; goto out; + } } - - for (i = 0; i < local->nallocs; i++) { - xattr = local->xattr_list + i; - len = xattr->xattr_len; - - if (len && xattr && xattr->xattr_value) { - ret = dict_reset (tmp2); - if (ret < 0) { - gf_log (this->name, GF_LOG_DEBUG, - "dict_reset failed (%s)", - strerror (-ret)); - } - - ret = dict_unserialize (xattr->xattr_value, - xattr->xattr_len, - &tmp2); - if (ret < 0) { - gf_log (this->name, GF_LOG_WARNING, - "dict_unserialize failed (%s)", - strerror (-ret)); - ret = -1; - goto out; - } - - tmp1 = dict_copy (tmp2, tmp1); - if (tmp1 == NULL) { - gf_log (this->name, GF_LOG_WARNING, - "dict_copy failed (%s)", - strerror (-ret)); - ret = -1; - goto out; - } - } + } + + len = dict_serialized_length(tmp1); + if (len > 0) { + buf = GF_CALLOC(1, len, gf_common_mt_dict_t); + if (buf == NULL) { + ret = -1; + goto out; } - len = dict_serialized_length (tmp1); - if (len > 0) { - buf = GF_CALLOC (1, len, gf_common_mt_dict_t); - if (buf == NULL) { - ret = -1; - goto out; - } - - ret = dict_serialize (tmp1, buf); - if (ret < 0) { - gf_log (this->name, GF_LOG_WARNING, - "dict_serialize failed (%s)", strerror (-ret)); - GF_FREE(buf); - ret = -1; - goto out; - } - - *xattr_serz = buf; + ret = dict_serialize(tmp1, buf); + if (ret < 0) { + gf_log(this->name, GF_LOG_WARNING, "dict_serialize failed (%s)", + strerror(-ret)); + GF_FREE(buf); + ret = -1; + goto out; } - ret = 0; + *xattr_serz = buf; + } + + ret = 0; out: - if (tmp1 != NULL) { - dict_unref (tmp1); - } + if (tmp1 != NULL) { + dict_unref(tmp1); + } - if (tmp2 != NULL) { - dict_unref (tmp2); - } + if (tmp2 != NULL) { + dict_unref(tmp2); + } - return ret; + return ret; } - int32_t -stripe_fill_pathinfo_xattr (xlator_t *this, stripe_local_t *local, - char **xattr_serz) +stripe_fill_pathinfo_xattr(xlator_t *this, stripe_local_t *local, + char **xattr_serz) { - int ret = -1; - int32_t padding = 0; - int32_t tlen = 0; - int len = 0; - char stripe_size_str[20] = {0,}; - char *pathinfo_serz = NULL; - - if (!local) { - gf_log (this->name, GF_LOG_ERROR, "Possible NULL deref"); - goto out; - } - - len = snprintf (stripe_size_str, sizeof (stripe_size_str), "%"PRId64, - (long long) (local->fctx) ? local->fctx->stripe_size : 0); - if (len < 0 || len >= sizeof (stripe_size_str)) - goto out; - /* extra bytes for decorations (brackets and <>'s) */ - padding = strlen (this->name) + SLEN (STRIPE_PATHINFO_HEADER) - + len + 7; - local->xattr_total_len += (padding + 2); - - pathinfo_serz = GF_MALLOC (local->xattr_total_len, - gf_common_mt_char); - if (!pathinfo_serz) - goto out; - - /* xlator info */ - (void) sprintf (pathinfo_serz, "(<"STRIPE_PATHINFO_HEADER"%s:[%s]> ", - this->name, stripe_size_str); - - ret = stripe_xattr_aggregate (pathinfo_serz + padding, local, &tlen); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, - "Cannot aggregate pathinfo list"); - GF_FREE(pathinfo_serz); - goto out; - } - - *(pathinfo_serz + padding + tlen) = ')'; - *(pathinfo_serz + padding + tlen + 1) = '\0'; - - *xattr_serz = pathinfo_serz; - - ret = 0; - out: - return ret; + int ret = -1; + int32_t padding = 0; + int32_t tlen = 0; + int len = 0; + char stripe_size_str[20] = { + 0, + }; + char *pathinfo_serz = NULL; + + if (!local) { + gf_log(this->name, GF_LOG_ERROR, "Possible NULL deref"); + goto out; + } + + len = snprintf(stripe_size_str, sizeof(stripe_size_str), "%" PRId64, + (long long)(local->fctx) ? local->fctx->stripe_size : 0); + if (len < 0 || len >= sizeof(stripe_size_str)) + goto out; + /* extra bytes for decorations (brackets and <>'s) */ + padding = strlen(this->name) + SLEN(STRIPE_PATHINFO_HEADER) + len + 7; + local->xattr_total_len += (padding + 2); + + pathinfo_serz = GF_MALLOC(local->xattr_total_len, gf_common_mt_char); + if (!pathinfo_serz) + goto out; + + /* xlator info */ + (void)sprintf(pathinfo_serz, "(<" STRIPE_PATHINFO_HEADER "%s:[%s]> ", + this->name, stripe_size_str); + + ret = stripe_xattr_aggregate(pathinfo_serz + padding, local, &tlen); + if (ret) { + gf_log(this->name, GF_LOG_ERROR, "Cannot aggregate pathinfo list"); + GF_FREE(pathinfo_serz); + goto out; + } + + *(pathinfo_serz + padding + tlen) = ')'; + *(pathinfo_serz + padding + tlen + 1) = '\0'; + + *xattr_serz = pathinfo_serz; + + ret = 0; +out: + return ret; } /** * stripe_get_matching_bs - Get the matching block size for the given path. */ int32_t -stripe_get_matching_bs (const char *path, stripe_private_t *priv) +stripe_get_matching_bs(const char *path, stripe_private_t *priv) { - struct stripe_options *trav = NULL; - uint64_t block_size = 0; - - GF_VALIDATE_OR_GOTO ("stripe", priv, out); - GF_VALIDATE_OR_GOTO ("stripe", path, out); - - LOCK (&priv->lock); - { - block_size = priv->block_size; - trav = priv->pattern; - while (trav) { - if (!fnmatch (trav->path_pattern, path, FNM_NOESCAPE)) { - block_size = trav->block_size; - break; - } - trav = trav->next; - } + struct stripe_options *trav = NULL; + uint64_t block_size = 0; + + GF_VALIDATE_OR_GOTO("stripe", priv, out); + GF_VALIDATE_OR_GOTO("stripe", path, out); + + LOCK(&priv->lock); + { + block_size = priv->block_size; + trav = priv->pattern; + while (trav) { + if (!fnmatch(trav->path_pattern, path, FNM_NOESCAPE)) { + block_size = trav->block_size; + break; + } + trav = trav->next; } - UNLOCK (&priv->lock); + } + UNLOCK(&priv->lock); out: - return block_size; + return block_size; } int32_t -stripe_ctx_handle (xlator_t *this, call_frame_t *prev, stripe_local_t *local, - dict_t *dict) +stripe_ctx_handle(xlator_t *this, call_frame_t *prev, stripe_local_t *local, + dict_t *dict) { - char key[256] = {0,}; - data_t *data = NULL; - int32_t index = 0; - stripe_private_t *priv = NULL; - - priv = this->private; - - + char key[256] = { + 0, + }; + data_t *data = NULL; + int32_t index = 0; + stripe_private_t *priv = NULL; + + priv = this->private; + + if (!local->fctx) { + local->fctx = GF_CALLOC(1, sizeof(stripe_fd_ctx_t), + gf_stripe_mt_stripe_fd_ctx_t); if (!local->fctx) { - local->fctx = GF_CALLOC (1, sizeof (stripe_fd_ctx_t), - gf_stripe_mt_stripe_fd_ctx_t); - if (!local->fctx) { - local->op_errno = ENOMEM; - local->op_ret = -1; - goto out; - } - - local->fctx->static_array = 0; + local->op_errno = ENOMEM; + local->op_ret = -1; + goto out; } - /* Stripe block size */ - sprintf (key, "trusted.%s.stripe-size", this->name); - data = dict_get (dict, key); - if (!data) { - local->xattr_self_heal_needed = 1; - gf_log (this->name, GF_LOG_ERROR, - "Failed to get stripe-size"); - goto out; - } else { - if (!local->fctx->stripe_size) { - local->fctx->stripe_size = - data_to_int64 (data); - } - - if (local->fctx->stripe_size != data_to_int64 (data)) { - gf_log (this->name, GF_LOG_WARNING, - "stripe-size mismatch in blocks"); - local->xattr_self_heal_needed = 1; - } - } - - /* Stripe count */ - sprintf (key, "trusted.%s.stripe-count", this->name); - data = dict_get (dict, key); - if (!data) { - local->xattr_self_heal_needed = 1; - gf_log (this->name, GF_LOG_ERROR, - "Failed to get stripe-count"); - goto out; - } - if (!local->fctx->xl_array) { - local->fctx->stripe_count = data_to_int32 (data); - if (!local->fctx->stripe_count) { - gf_log (this->name, GF_LOG_ERROR, - "error with stripe-count xattr"); - local->op_ret = -1; - local->op_errno = EIO; - goto out; - } - - local->fctx->xl_array = GF_CALLOC (local->fctx->stripe_count, - sizeof (xlator_t *), - gf_stripe_mt_xlator_t); - - if (!local->fctx->xl_array) { - local->op_errno = ENOMEM; - local->op_ret = -1; - goto out; - } - } - if (local->fctx->stripe_count != data_to_int32 (data)) { - gf_log (this->name, GF_LOG_ERROR, - "error with stripe-count xattr (%d != %d)", - local->fctx->stripe_count, data_to_int32 (data)); - local->op_ret = -1; - local->op_errno = EIO; - goto out; + local->fctx->static_array = 0; + } + /* Stripe block size */ + sprintf(key, "trusted.%s.stripe-size", this->name); + data = dict_get(dict, key); + if (!data) { + local->xattr_self_heal_needed = 1; + gf_log(this->name, GF_LOG_ERROR, "Failed to get stripe-size"); + goto out; + } else { + if (!local->fctx->stripe_size) { + local->fctx->stripe_size = data_to_int64(data); } - /* index */ - sprintf (key, "trusted.%s.stripe-index", this->name); - data = dict_get (dict, key); - if (!data) { - local->xattr_self_heal_needed = 1; - gf_log (this->name, GF_LOG_ERROR, - "Failed to get stripe-index"); - goto out; - } - index = data_to_int32 (data); - if (index > priv->child_count) { - gf_log (this->name, GF_LOG_ERROR, - "error with stripe-index xattr (%d)", index); - local->op_ret = -1; - local->op_errno = EIO; - goto out; + if (local->fctx->stripe_size != data_to_int64(data)) { + gf_log(this->name, GF_LOG_WARNING, + "stripe-size mismatch in blocks"); + local->xattr_self_heal_needed = 1; } - if (local->fctx->xl_array) { - if (!local->fctx->xl_array[index]) - local->fctx->xl_array[index] = prev->this; + } + + /* Stripe count */ + sprintf(key, "trusted.%s.stripe-count", this->name); + data = dict_get(dict, key); + + if (!data) { + local->xattr_self_heal_needed = 1; + gf_log(this->name, GF_LOG_ERROR, "Failed to get stripe-count"); + goto out; + } + if (!local->fctx->xl_array) { + local->fctx->stripe_count = data_to_int32(data); + if (!local->fctx->stripe_count) { + gf_log(this->name, GF_LOG_ERROR, "error with stripe-count xattr"); + local->op_ret = -1; + local->op_errno = EIO; + goto out; } - sprintf(key, "trusted.%s.stripe-coalesce", this->name); - data = dict_get(dict, key); - if (!data) { - /* - * The file was probably created prior to coalesce support. - * Assume non-coalesce mode for this file to maintain backwards - * compatibility. - */ - gf_log(this->name, GF_LOG_DEBUG, "missing stripe-coalesce " - "attr, assume non-coalesce mode"); - local->fctx->stripe_coalesce = 0; - } else { - local->fctx->stripe_coalesce = data_to_int32(data); - } + local->fctx->xl_array = GF_CALLOC(local->fctx->stripe_count, + sizeof(xlator_t *), + gf_stripe_mt_xlator_t); + if (!local->fctx->xl_array) { + local->op_errno = ENOMEM; + local->op_ret = -1; + goto out; + } + } + if (local->fctx->stripe_count != data_to_int32(data)) { + gf_log(this->name, GF_LOG_ERROR, + "error with stripe-count xattr (%d != %d)", + local->fctx->stripe_count, data_to_int32(data)); + local->op_ret = -1; + local->op_errno = EIO; + goto out; + } + + /* index */ + sprintf(key, "trusted.%s.stripe-index", this->name); + data = dict_get(dict, key); + if (!data) { + local->xattr_self_heal_needed = 1; + gf_log(this->name, GF_LOG_ERROR, "Failed to get stripe-index"); + goto out; + } + index = data_to_int32(data); + if (index > priv->child_count) { + gf_log(this->name, GF_LOG_ERROR, "error with stripe-index xattr (%d)", + index); + local->op_ret = -1; + local->op_errno = EIO; + goto out; + } + if (local->fctx->xl_array) { + if (!local->fctx->xl_array[index]) + local->fctx->xl_array[index] = prev->this; + } + + sprintf(key, "trusted.%s.stripe-coalesce", this->name); + data = dict_get(dict, key); + if (!data) { + /* + * The file was probably created prior to coalesce support. + * Assume non-coalesce mode for this file to maintain backwards + * compatibility. + */ + gf_log(this->name, GF_LOG_DEBUG, + "missing stripe-coalesce " + "attr, assume non-coalesce mode"); + local->fctx->stripe_coalesce = 0; + } else { + local->fctx->stripe_coalesce = data_to_int32(data); + } out: - return 0; + return 0; } int32_t -stripe_xattr_request_build (xlator_t *this, dict_t *dict, uint64_t stripe_size, - uint32_t stripe_count, uint32_t stripe_index, - uint32_t stripe_coalesce) +stripe_xattr_request_build(xlator_t *this, dict_t *dict, uint64_t stripe_size, + uint32_t stripe_count, uint32_t stripe_index, + uint32_t stripe_coalesce) { - char key[256] = {0,}; - int32_t ret = -1; - - sprintf (key, "trusted.%s.stripe-size", this->name); - ret = dict_set_int64 (dict, key, stripe_size); - if (ret) { - gf_log (this->name, GF_LOG_WARNING, - "failed to set %s in xattr_req dict", key); - goto out; - } - - sprintf (key, "trusted.%s.stripe-count", this->name); - ret = dict_set_int32 (dict, key, stripe_count); - if (ret) { - gf_log (this->name, GF_LOG_WARNING, - "failed to set %s in xattr_req dict", key); - goto out; - } - - sprintf (key, "trusted.%s.stripe-index", this->name); - ret = dict_set_int32 (dict, key, stripe_index); - if (ret) { - gf_log (this->name, GF_LOG_WARNING, - "failed to set %s in xattr_req dict", key); - goto out; - } - - sprintf(key, "trusted.%s.stripe-coalesce", this->name); - ret = dict_set_int32(dict, key, stripe_coalesce); - if (ret) { - gf_log(this->name, GF_LOG_WARNING, - "failed to set %s in xattr_req_dict", key); - goto out; - } + char key[256] = { + 0, + }; + int32_t ret = -1; + + sprintf(key, "trusted.%s.stripe-size", this->name); + ret = dict_set_int64(dict, key, stripe_size); + if (ret) { + gf_log(this->name, GF_LOG_WARNING, "failed to set %s in xattr_req dict", + key); + goto out; + } + + sprintf(key, "trusted.%s.stripe-count", this->name); + ret = dict_set_int32(dict, key, stripe_count); + if (ret) { + gf_log(this->name, GF_LOG_WARNING, "failed to set %s in xattr_req dict", + key); + goto out; + } + + sprintf(key, "trusted.%s.stripe-index", this->name); + ret = dict_set_int32(dict, key, stripe_index); + if (ret) { + gf_log(this->name, GF_LOG_WARNING, "failed to set %s in xattr_req dict", + key); + goto out; + } + + sprintf(key, "trusted.%s.stripe-coalesce", this->name); + ret = dict_set_int32(dict, key, stripe_coalesce); + if (ret) { + gf_log(this->name, GF_LOG_WARNING, "failed to set %s in xattr_req_dict", + key); + goto out; + } out: - return ret; + return ret; } - static int -set_default_block_size (stripe_private_t *priv, char *num) +set_default_block_size(stripe_private_t *priv, char *num) { + int ret = -1; + GF_VALIDATE_OR_GOTO("stripe", THIS, out); + GF_VALIDATE_OR_GOTO(THIS->name, priv, out); + GF_VALIDATE_OR_GOTO(THIS->name, num, out); + + if (gf_string2bytesize_uint64(num, &priv->block_size) != 0) { + gf_log(THIS->name, GF_LOG_ERROR, "invalid number format \"%s\"", num); + goto out; + } - int ret = -1; - GF_VALIDATE_OR_GOTO ("stripe", THIS, out); - GF_VALIDATE_OR_GOTO (THIS->name, priv, out); - GF_VALIDATE_OR_GOTO (THIS->name, num, out); + ret = 0; +out: + return ret; +} - if (gf_string2bytesize_uint64 (num, &priv->block_size) != 0) { - gf_log (THIS->name, GF_LOG_ERROR, - "invalid number format \"%s\"", num); +int +set_stripe_block_size(xlator_t *this, stripe_private_t *priv, char *data) +{ + int ret = -1; + char *tmp_str = NULL; + char *tmp_str1 = NULL; + char *dup_str = NULL; + char *stripe_str = NULL; + char *pattern = NULL; + char *num = NULL; + struct stripe_options *temp_stripeopt = NULL; + struct stripe_options *stripe_opt = NULL; + + if (!this || !priv || !data) + goto out; + + /* Get the pattern for striping. + "option block-size *avi:10MB" etc */ + stripe_str = strtok_r(data, ",", &tmp_str); + while (stripe_str) { + dup_str = gf_strdup(stripe_str); + stripe_opt = GF_CALLOC(1, sizeof(struct stripe_options), + gf_stripe_mt_stripe_options); + if (!stripe_opt) { + goto out; + } + + pattern = strtok_r(dup_str, ":", &tmp_str1); + num = strtok_r(NULL, ":", &tmp_str1); + if (!num) { + num = pattern; + pattern = "*"; + ret = set_default_block_size(priv, num); + if (ret) goto out; } + if (gf_string2bytesize_uint64(num, &stripe_opt->block_size) != 0) { + gf_log(this->name, GF_LOG_ERROR, "invalid number format \"%s\"", + num); + goto out; + } - ret = 0; + if (stripe_opt->block_size < STRIPE_MIN_BLOCK_SIZE) { + gf_log(this->name, GF_LOG_ERROR, + "Invalid Block-size: " + "%s. Should be at least %llu bytes", + num, STRIPE_MIN_BLOCK_SIZE); + goto out; + } + if (stripe_opt->block_size % 512) { + gf_log(this->name, GF_LOG_ERROR, + "Block-size: %s should" + " be a multiple of 512 bytes", + num); + goto out; + } - out: - return ret; + memcpy(stripe_opt->path_pattern, pattern, strlen(pattern)); -} + gf_log(this->name, GF_LOG_DEBUG, + "block-size : pattern %s : size %" PRId64, + stripe_opt->path_pattern, stripe_opt->block_size); + if (priv->pattern) + temp_stripeopt = NULL; + else + temp_stripeopt = priv->pattern; -int -set_stripe_block_size (xlator_t *this, stripe_private_t *priv, char *data) -{ - int ret = -1; - char *tmp_str = NULL; - char *tmp_str1 = NULL; - char *dup_str = NULL; - char *stripe_str = NULL; - char *pattern = NULL; - char *num = NULL; - struct stripe_options *temp_stripeopt = NULL; - struct stripe_options *stripe_opt = NULL; - - if (!this || !priv || !data) - goto out; + stripe_opt->next = temp_stripeopt; - /* Get the pattern for striping. - "option block-size *avi:10MB" etc */ - stripe_str = strtok_r (data, ",", &tmp_str); - while (stripe_str) { - dup_str = gf_strdup (stripe_str); - stripe_opt = GF_CALLOC (1, sizeof (struct stripe_options), - gf_stripe_mt_stripe_options); - if (!stripe_opt) { - goto out; - } - - pattern = strtok_r (dup_str, ":", &tmp_str1); - num = strtok_r (NULL, ":", &tmp_str1); - if (!num) { - num = pattern; - pattern = "*"; - ret = set_default_block_size (priv, num); - if (ret) - goto out; - } - if (gf_string2bytesize_uint64 (num, &stripe_opt->block_size) != 0) { - gf_log (this->name, GF_LOG_ERROR, - "invalid number format \"%s\"", num); - goto out; - } - - if (stripe_opt->block_size < STRIPE_MIN_BLOCK_SIZE) { - gf_log (this->name, GF_LOG_ERROR, "Invalid Block-size: " - "%s. Should be at least %llu bytes", num, - STRIPE_MIN_BLOCK_SIZE); - goto out; - } - if (stripe_opt->block_size % 512) { - gf_log (this->name, GF_LOG_ERROR, "Block-size: %s should" - " be a multiple of 512 bytes", num); - goto out; - } - - memcpy (stripe_opt->path_pattern, pattern, strlen (pattern)); - - gf_log (this->name, GF_LOG_DEBUG, - "block-size : pattern %s : size %"PRId64, - stripe_opt->path_pattern, stripe_opt->block_size); - - if (priv->pattern) - temp_stripeopt = NULL; - else - temp_stripeopt = priv->pattern; - - stripe_opt->next = temp_stripeopt; - - priv->pattern = stripe_opt; - stripe_opt = NULL; - - GF_FREE (dup_str); - dup_str = NULL; - - stripe_str = strtok_r (NULL, ",", &tmp_str); - } + priv->pattern = stripe_opt; + stripe_opt = NULL; + + GF_FREE(dup_str); + dup_str = NULL; + + stripe_str = strtok_r(NULL, ",", &tmp_str); + } - ret = 0; + ret = 0; out: - GF_FREE (dup_str); + GF_FREE(dup_str); - GF_FREE (stripe_opt); + GF_FREE(stripe_opt); - return ret; + return ret; } int32_t -stripe_iatt_merge (struct iatt *from, struct iatt *to) +stripe_iatt_merge(struct iatt *from, struct iatt *to) { - if (to->ia_size < from->ia_size) - to->ia_size = from->ia_size; - if (to->ia_mtime < from->ia_mtime) - to->ia_mtime = from->ia_mtime; - if (to->ia_ctime < from->ia_ctime) - to->ia_ctime = from->ia_ctime; - if (to->ia_atime < from->ia_atime) - to->ia_atime = from->ia_atime; - return 0; + if (to->ia_size < from->ia_size) + to->ia_size = from->ia_size; + if (to->ia_mtime < from->ia_mtime) + to->ia_mtime = from->ia_mtime; + if (to->ia_ctime < from->ia_ctime) + to->ia_ctime = from->ia_ctime; + if (to->ia_atime < from->ia_atime) + to->ia_atime = from->ia_atime; + return 0; } off_t coalesced_offset(off_t offset, uint64_t stripe_size, int stripe_count) { - size_t line_size = 0; - uint64_t stripe_num = 0; - off_t coalesced_offset = 0; + size_t line_size = 0; + uint64_t stripe_num = 0; + off_t coalesced_offset = 0; - line_size = stripe_size * stripe_count; - stripe_num = offset / line_size; + line_size = stripe_size * stripe_count; + stripe_num = offset / line_size; - coalesced_offset = (stripe_num * stripe_size) + - (offset % stripe_size); + coalesced_offset = (stripe_num * stripe_size) + (offset % stripe_size); - return coalesced_offset; + return coalesced_offset; } off_t uncoalesced_size(off_t size, uint64_t stripe_size, int stripe_count, - int stripe_index) + int stripe_index) { - uint64_t nr_full_stripe_chunks = 0, mod = 0; - - if (!size) - return size; - - /* - * Estimate the number of fully written stripes from the - * local file size. Each stripe_size chunk corresponds to - * a stripe. - */ - nr_full_stripe_chunks = (size / stripe_size) * stripe_count; - mod = size % stripe_size; - - if (!mod) { - /* - * There is no remainder, thus we could have overestimated - * the size of the file in terms of chunks. Trim the number - * of chunks by the following stripe members and leave it - * up to those nodes to respond with a larger size (if - * necessary). - */ - nr_full_stripe_chunks -= stripe_count - - (stripe_index + 1); - size = nr_full_stripe_chunks * stripe_size; - } else { - /* - * There is a remainder and thus we own the last chunk of the - * file. Add the preceding stripe members of the final stripe - * along with the remainder to calculate the exact size. - */ - nr_full_stripe_chunks += stripe_index; - size = nr_full_stripe_chunks * stripe_size + mod; - } + uint64_t nr_full_stripe_chunks = 0, mod = 0; + if (!size) return size; + + /* + * Estimate the number of fully written stripes from the + * local file size. Each stripe_size chunk corresponds to + * a stripe. + */ + nr_full_stripe_chunks = (size / stripe_size) * stripe_count; + mod = size % stripe_size; + + if (!mod) { + /* + * There is no remainder, thus we could have overestimated + * the size of the file in terms of chunks. Trim the number + * of chunks by the following stripe members and leave it + * up to those nodes to respond with a larger size (if + * necessary). + */ + nr_full_stripe_chunks -= stripe_count - (stripe_index + 1); + size = nr_full_stripe_chunks * stripe_size; + } else { + /* + * There is a remainder and thus we own the last chunk of the + * file. Add the preceding stripe members of the final stripe + * along with the remainder to calculate the exact size. + */ + nr_full_stripe_chunks += stripe_index; + size = nr_full_stripe_chunks * stripe_size + mod; + } + + return size; } diff --git a/xlators/cluster/stripe/src/stripe.c b/xlators/cluster/stripe/src/stripe.c index 75851aa6c5d..a6027d1f281 100644 --- a/xlators/cluster/stripe/src/stripe.c +++ b/xlators/cluster/stripe/src/stripe.c @@ -32,2291 +32,2217 @@ struct volume_options options[]; int32_t -stripe_sh_chown_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, - struct iatt *preop, struct iatt *postop, dict_t *xdata) +stripe_sh_chown_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *preop, + struct iatt *postop, dict_t *xdata) { - int callcnt = -1; - stripe_local_t *local = NULL; + int callcnt = -1; + stripe_local_t *local = NULL; - if (!this || !frame || !frame->local) { - gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref"); - goto out; - } + if (!this || !frame || !frame->local) { + gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref"); + goto out; + } - local = frame->local; + local = frame->local; - LOCK (&frame->lock); - { - callcnt = --local->call_count; - } - UNLOCK (&frame->lock); + LOCK(&frame->lock); + { + callcnt = --local->call_count; + } + UNLOCK(&frame->lock); - if (!callcnt) { - STRIPE_STACK_DESTROY (frame); - } + if (!callcnt) { + STRIPE_STACK_DESTROY(frame); + } out: - return 0; + return 0; } int32_t -stripe_sh_make_entry_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, inode_t *inode, - struct iatt *buf, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) +stripe_sh_make_entry_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) { - stripe_local_t *local = NULL; - call_frame_t *prev = NULL; + stripe_local_t *local = NULL; + call_frame_t *prev = NULL; - if (!frame || !frame->local || !cookie || !this) { - gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref"); - goto out; - } + if (!frame || !frame->local || !cookie || !this) { + gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref"); + goto out; + } - prev = cookie; - local = frame->local; + prev = cookie; + local = frame->local; - STACK_WIND (frame, stripe_sh_chown_cbk, prev->this, - prev->this->fops->setattr, &local->loc, - &local->stbuf, (GF_SET_ATTR_UID | GF_SET_ATTR_GID), NULL); + STACK_WIND(frame, stripe_sh_chown_cbk, prev->this, + prev->this->fops->setattr, &local->loc, &local->stbuf, + (GF_SET_ATTR_UID | GF_SET_ATTR_GID), NULL); out: - return 0; + return 0; } int32_t -stripe_entry_self_heal (call_frame_t *frame, xlator_t *this, - stripe_local_t *local) +stripe_entry_self_heal(call_frame_t *frame, xlator_t *this, + stripe_local_t *local) { - xlator_list_t *trav = NULL; - call_frame_t *rframe = NULL; - stripe_local_t *rlocal = NULL; - stripe_private_t *priv = NULL; - dict_t *xdata = NULL; - int ret = 0; - - if (!local || !this || !frame) { - gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref"); - goto out; - } - - if (!(IA_ISREG (local->stbuf.ia_type) || - IA_ISDIR (local->stbuf.ia_type))) - return 0; - - priv = this->private; - trav = this->children; - rframe = copy_frame (frame); - if (!rframe) { - goto out; - } - rlocal = mem_get0 (this->local_pool); - if (!rlocal) { - goto out; - } - rframe->local = rlocal; - rlocal->call_count = priv->child_count; - loc_copy (&rlocal->loc, &local->loc); - memcpy (&rlocal->stbuf, &local->stbuf, sizeof (struct iatt)); + xlator_list_t *trav = NULL; + call_frame_t *rframe = NULL; + stripe_local_t *rlocal = NULL; + stripe_private_t *priv = NULL; + dict_t *xdata = NULL; + int ret = 0; + + if (!local || !this || !frame) { + gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref"); + goto out; + } - xdata = dict_new (); - if (!xdata) - goto out; + if (!(IA_ISREG(local->stbuf.ia_type) || IA_ISDIR(local->stbuf.ia_type))) + return 0; - ret = dict_set_gfuuid (xdata, "gfid-req", local->stbuf.ia_gfid, true); - if (ret) - gf_log (this->name, GF_LOG_WARNING, - "%s: failed to set gfid-req", local->loc.path); + priv = this->private; + trav = this->children; + rframe = copy_frame(frame); + if (!rframe) { + goto out; + } + rlocal = mem_get0(this->local_pool); + if (!rlocal) { + goto out; + } + rframe->local = rlocal; + rlocal->call_count = priv->child_count; + loc_copy(&rlocal->loc, &local->loc); + memcpy(&rlocal->stbuf, &local->stbuf, sizeof(struct iatt)); + + xdata = dict_new(); + if (!xdata) + goto out; - while (trav) { - if (IA_ISREG (local->stbuf.ia_type)) { - STACK_WIND (rframe, stripe_sh_make_entry_cbk, - trav->xlator, trav->xlator->fops->mknod, - &local->loc, - st_mode_from_ia (local->stbuf.ia_prot, - local->stbuf.ia_type), - 0, 0, xdata); - } - if (IA_ISDIR (local->stbuf.ia_type)) { - STACK_WIND (rframe, stripe_sh_make_entry_cbk, - trav->xlator, trav->xlator->fops->mkdir, - &local->loc, - st_mode_from_ia (local->stbuf.ia_prot, - local->stbuf.ia_type), - 0, xdata); - } - trav = trav->next; + ret = dict_set_gfuuid(xdata, "gfid-req", local->stbuf.ia_gfid, true); + if (ret) + gf_log(this->name, GF_LOG_WARNING, "%s: failed to set gfid-req", + local->loc.path); + + while (trav) { + if (IA_ISREG(local->stbuf.ia_type)) { + STACK_WIND( + rframe, stripe_sh_make_entry_cbk, trav->xlator, + trav->xlator->fops->mknod, &local->loc, + st_mode_from_ia(local->stbuf.ia_prot, local->stbuf.ia_type), 0, + 0, xdata); + } + if (IA_ISDIR(local->stbuf.ia_type)) { + STACK_WIND( + rframe, stripe_sh_make_entry_cbk, trav->xlator, + trav->xlator->fops->mkdir, &local->loc, + st_mode_from_ia(local->stbuf.ia_prot, local->stbuf.ia_type), 0, + xdata); } + trav = trav->next; + } - if (xdata) - dict_unref (xdata); - return 0; + if (xdata) + dict_unref(xdata); + return 0; out: - if (rframe) - STRIPE_STACK_DESTROY (rframe); + if (rframe) + STRIPE_STACK_DESTROY(rframe); - return 0; + return 0; } - int32_t -stripe_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, inode_t *inode, - struct iatt *buf, dict_t *xdata, struct iatt *postparent) +stripe_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, dict_t *xdata, struct iatt *postparent) { - int32_t callcnt = 0; - stripe_local_t *local = NULL; - call_frame_t *prev = NULL; - int ret = 0; + int32_t callcnt = 0; + stripe_local_t *local = NULL; + call_frame_t *prev = NULL; + int ret = 0; - if (!this || !frame || !frame->local || !cookie) { - gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref"); - goto out; - } + if (!this || !frame || !frame->local || !cookie) { + gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref"); + goto out; + } - prev = cookie; - local = frame->local; + prev = cookie; + local = frame->local; - LOCK (&frame->lock); - { - callcnt = --local->call_count; - - if (op_ret == -1) { - if ((op_errno != ENOENT) && (op_errno != ESTALE)) - gf_log (this->name, GF_LOG_DEBUG, - "%s returned error %s", - prev->this->name, - strerror (op_errno)); - if (local->op_errno != ESTALE) - local->op_errno = op_errno; - if (((op_errno != ENOENT) && (op_errno != ENOTCONN) - && (op_errno != ESTALE)) || - (prev->this == FIRST_CHILD (this))) - local->failed = 1; - if (op_errno == ENOENT) - local->entry_self_heal_needed = 1; - } + LOCK(&frame->lock); + { + callcnt = --local->call_count; - if (op_ret >= 0) { - local->op_ret = 0; - if (IA_ISREG (buf->ia_type)) { - ret = stripe_ctx_handle (this, prev, local, - xdata); - if (ret) - gf_log (this->name, GF_LOG_ERROR, - "Error getting fctx info from" - " dict"); - } - - if (FIRST_CHILD(this) == prev->this) { - local->stbuf = *buf; - local->postparent = *postparent; - local->inode = inode_ref (inode); - if (xdata) - local->xdata = dict_ref (xdata); - if (local->xattr) { - stripe_aggregate_xattr (local->xdata, - local->xattr); - dict_unref (local->xattr); - local->xattr = NULL; - } - } - - if (!local->xdata && !local->xattr) { - local->xattr = dict_ref (xdata); - } else if (local->xdata) { - stripe_aggregate_xattr (local->xdata, xdata); - } else if (local->xattr) { - stripe_aggregate_xattr (local->xattr, xdata); - } - - local->stbuf_blocks += buf->ia_blocks; - local->postparent_blocks += postparent->ia_blocks; - - correct_file_size(buf, local->fctx, prev); - - if (local->stbuf_size < buf->ia_size) - local->stbuf_size = buf->ia_size; - if (local->postparent_size < postparent->ia_size) - local->postparent_size = postparent->ia_size; - - if (gf_uuid_is_null (local->ia_gfid)) - gf_uuid_copy (local->ia_gfid, buf->ia_gfid); - - /* Make sure the gfid on all the nodes are same */ - if (gf_uuid_compare (local->ia_gfid, buf->ia_gfid)) { - gf_log (this->name, GF_LOG_WARNING, - "%s: gfid different on subvolume %s", - local->loc.path, prev->this->name); - } - } + if (op_ret == -1) { + if ((op_errno != ENOENT) && (op_errno != ESTALE)) + gf_log(this->name, GF_LOG_DEBUG, "%s returned error %s", + prev->this->name, strerror(op_errno)); + if (local->op_errno != ESTALE) + local->op_errno = op_errno; + if (((op_errno != ENOENT) && (op_errno != ENOTCONN) && + (op_errno != ESTALE)) || + (prev->this == FIRST_CHILD(this))) + local->failed = 1; + if (op_errno == ENOENT) + local->entry_self_heal_needed = 1; } - UNLOCK (&frame->lock); - - if (!callcnt) { - if (local->op_ret == 0 && local->entry_self_heal_needed && - !gf_uuid_is_null (local->loc.inode->gfid)) - stripe_entry_self_heal (frame, this, local); - - if (local->failed) - local->op_ret = -1; - - if (local->op_ret != -1) { - local->stbuf.ia_blocks = local->stbuf_blocks; - local->stbuf.ia_size = local->stbuf_size; - local->postparent.ia_blocks = local->postparent_blocks; - local->postparent.ia_size = local->postparent_size; - inode_ctx_put (local->inode, this, - (uint64_t) (long)local->fctx); + + if (op_ret >= 0) { + local->op_ret = 0; + if (IA_ISREG(buf->ia_type)) { + ret = stripe_ctx_handle(this, prev, local, xdata); + if (ret) + gf_log(this->name, GF_LOG_ERROR, + "Error getting fctx info from" + " dict"); + } + + if (FIRST_CHILD(this) == prev->this) { + local->stbuf = *buf; + local->postparent = *postparent; + local->inode = inode_ref(inode); + if (xdata) + local->xdata = dict_ref(xdata); + if (local->xattr) { + stripe_aggregate_xattr(local->xdata, local->xattr); + dict_unref(local->xattr); + local->xattr = NULL; } + } - STRIPE_STACK_UNWIND (lookup, frame, local->op_ret, - local->op_errno, local->inode, - &local->stbuf, local->xdata, - &local->postparent); - } -out: - return 0; -} + if (!local->xdata && !local->xattr) { + local->xattr = dict_ref(xdata); + } else if (local->xdata) { + stripe_aggregate_xattr(local->xdata, xdata); + } else if (local->xattr) { + stripe_aggregate_xattr(local->xattr, xdata); + } -int32_t -stripe_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, - dict_t *xdata) -{ - stripe_local_t *local = NULL; - xlator_list_t *trav = NULL; - stripe_private_t *priv = NULL; - int32_t op_errno = EINVAL; - int64_t filesize = 0; - int ret = 0; - uint64_t tmpctx = 0; - - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (loc, err); - VALIDATE_OR_GOTO (loc->inode, err); - - priv = this->private; - trav = this->children; + local->stbuf_blocks += buf->ia_blocks; + local->postparent_blocks += postparent->ia_blocks; - /* Initialization */ - local = mem_get0 (this->local_pool); - if (!local) { - op_errno = ENOMEM; - goto err; - } - local->op_ret = -1; - frame->local = local; - loc_copy (&local->loc, loc); + correct_file_size(buf, local->fctx, prev); - inode_ctx_get (local->inode, this, &tmpctx); - if (tmpctx) - local->fctx = (stripe_fd_ctx_t*) (long)tmpctx; + if (local->stbuf_size < buf->ia_size) + local->stbuf_size = buf->ia_size; + if (local->postparent_size < postparent->ia_size) + local->postparent_size = postparent->ia_size; - /* quick-read friendly changes */ - if (xdata && dict_get (xdata, GF_CONTENT_KEY)) { - ret = dict_get_int64 (xdata, GF_CONTENT_KEY, &filesize); - if (!ret && (filesize > priv->block_size)) - dict_del (xdata, GF_CONTENT_KEY); + if (gf_uuid_is_null(local->ia_gfid)) + gf_uuid_copy(local->ia_gfid, buf->ia_gfid); + + /* Make sure the gfid on all the nodes are same */ + if (gf_uuid_compare(local->ia_gfid, buf->ia_gfid)) { + gf_log(this->name, GF_LOG_WARNING, + "%s: gfid different on subvolume %s", local->loc.path, + prev->this->name); + } } + } + UNLOCK(&frame->lock); - /* get stripe-size xattr on lookup. This would be required for - * open/read/write/pathinfo calls. Hence we send down the request - * even when type == IA_INVAL */ - - /* - * We aren't guaranteed to have xdata here. We need the format info for - * the file, so allocate xdata if necessary. - */ - if (!xdata) - xdata = dict_new(); - else - xdata = dict_ref(xdata); - - if (xdata && (IA_ISREG (loc->inode->ia_type) || - (loc->inode->ia_type == IA_INVAL))) { - ret = stripe_xattr_request_build (this, xdata, 8, 4, 4, 0); - if (ret) - gf_log (this->name , GF_LOG_ERROR, "Failed to build" - " xattr request for %s", loc->path); + if (!callcnt) { + if (local->op_ret == 0 && local->entry_self_heal_needed && + !gf_uuid_is_null(local->loc.inode->gfid)) + stripe_entry_self_heal(frame, this, local); - } + if (local->failed) + local->op_ret = -1; - /* Every time in stripe lookup, all child nodes - should be looked up */ - local->call_count = priv->child_count; - while (trav) { - STACK_WIND (frame, stripe_lookup_cbk, trav->xlator, - trav->xlator->fops->lookup, loc, xdata); - trav = trav->next; + if (local->op_ret != -1) { + local->stbuf.ia_blocks = local->stbuf_blocks; + local->stbuf.ia_size = local->stbuf_size; + local->postparent.ia_blocks = local->postparent_blocks; + local->postparent.ia_size = local->postparent_size; + inode_ctx_put(local->inode, this, (uint64_t)(long)local->fctx); } - dict_unref(xdata); + STRIPE_STACK_UNWIND(lookup, frame, local->op_ret, local->op_errno, + local->inode, &local->stbuf, local->xdata, + &local->postparent); + } +out: + return 0; +} - return 0; +int32_t +stripe_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) +{ + stripe_local_t *local = NULL; + xlator_list_t *trav = NULL; + stripe_private_t *priv = NULL; + int32_t op_errno = EINVAL; + int64_t filesize = 0; + int ret = 0; + uint64_t tmpctx = 0; + + VALIDATE_OR_GOTO(frame, err); + VALIDATE_OR_GOTO(this, err); + VALIDATE_OR_GOTO(loc, err); + VALIDATE_OR_GOTO(loc->inode, err); + + priv = this->private; + trav = this->children; + + /* Initialization */ + local = mem_get0(this->local_pool); + if (!local) { + op_errno = ENOMEM; + goto err; + } + local->op_ret = -1; + frame->local = local; + loc_copy(&local->loc, loc); + + inode_ctx_get(local->inode, this, &tmpctx); + if (tmpctx) + local->fctx = (stripe_fd_ctx_t *)(long)tmpctx; + + /* quick-read friendly changes */ + if (xdata && dict_get(xdata, GF_CONTENT_KEY)) { + ret = dict_get_int64(xdata, GF_CONTENT_KEY, &filesize); + if (!ret && (filesize > priv->block_size)) + dict_del(xdata, GF_CONTENT_KEY); + } + + /* get stripe-size xattr on lookup. This would be required for + * open/read/write/pathinfo calls. Hence we send down the request + * even when type == IA_INVAL */ + + /* + * We aren't guaranteed to have xdata here. We need the format info for + * the file, so allocate xdata if necessary. + */ + if (!xdata) + xdata = dict_new(); + else + xdata = dict_ref(xdata); + + if (xdata && + (IA_ISREG(loc->inode->ia_type) || (loc->inode->ia_type == IA_INVAL))) { + ret = stripe_xattr_request_build(this, xdata, 8, 4, 4, 0); + if (ret) + gf_log(this->name, GF_LOG_ERROR, + "Failed to build" + " xattr request for %s", + loc->path); + } + + /* Every time in stripe lookup, all child nodes + should be looked up */ + local->call_count = priv->child_count; + while (trav) { + STACK_WIND(frame, stripe_lookup_cbk, trav->xlator, + trav->xlator->fops->lookup, loc, xdata); + trav = trav->next; + } + + dict_unref(xdata); + + return 0; err: - STRIPE_STACK_UNWIND (lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL); - return 0; + STRIPE_STACK_UNWIND(lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL); + return 0; } - int32_t -stripe_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *buf, dict_t *xdata) +stripe_stat_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *buf, + dict_t *xdata) { - int32_t callcnt = 0; - stripe_local_t *local = NULL; - call_frame_t *prev = NULL; + int32_t callcnt = 0; + stripe_local_t *local = NULL; + call_frame_t *prev = NULL; - if (!this || !frame || !frame->local || !cookie) { - gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref"); - goto out; - } - prev = cookie; - local = frame->local; + if (!this || !frame || !frame->local || !cookie) { + gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref"); + goto out; + } + prev = cookie; + local = frame->local; - LOCK (&frame->lock); - { - callcnt = --local->call_count; - - if (op_ret == -1) { - gf_log (this->name, GF_LOG_DEBUG, - "%s returned error %s", - prev->this->name, strerror (op_errno)); - local->op_errno = op_errno; - if ((op_errno != ENOENT) || - (prev->this == FIRST_CHILD (this))) - local->failed = 1; - } + LOCK(&frame->lock); + { + callcnt = --local->call_count; + + if (op_ret == -1) { + gf_log(this->name, GF_LOG_DEBUG, "%s returned error %s", + prev->this->name, strerror(op_errno)); + local->op_errno = op_errno; + if ((op_errno != ENOENT) || (prev->this == FIRST_CHILD(this))) + local->failed = 1; + } - if (op_ret == 0) { - local->op_ret = 0; + if (op_ret == 0) { + local->op_ret = 0; - if (FIRST_CHILD(this) == prev->this) { - local->stbuf = *buf; - } + if (FIRST_CHILD(this) == prev->this) { + local->stbuf = *buf; + } - local->stbuf_blocks += buf->ia_blocks; + local->stbuf_blocks += buf->ia_blocks; - correct_file_size(buf, local->fctx, prev); + correct_file_size(buf, local->fctx, prev); - if (local->stbuf_size < buf->ia_size) - local->stbuf_size = buf->ia_size; - } + if (local->stbuf_size < buf->ia_size) + local->stbuf_size = buf->ia_size; } - UNLOCK (&frame->lock); + } + UNLOCK(&frame->lock); - if (!callcnt) { - if (local->failed) - local->op_ret = -1; - - if (local->op_ret != -1) { - local->stbuf.ia_size = local->stbuf_size; - local->stbuf.ia_blocks = local->stbuf_blocks; - } + if (!callcnt) { + if (local->failed) + local->op_ret = -1; - STRIPE_STACK_UNWIND (stat, frame, local->op_ret, - local->op_errno, &local->stbuf, NULL); + if (local->op_ret != -1) { + local->stbuf.ia_size = local->stbuf_size; + local->stbuf.ia_blocks = local->stbuf_blocks; } + + STRIPE_STACK_UNWIND(stat, frame, local->op_ret, local->op_errno, + &local->stbuf, NULL); + } out: - return 0; + return 0; } int32_t -stripe_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) +stripe_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) { - xlator_list_t *trav = NULL; - stripe_local_t *local = NULL; - stripe_private_t *priv = NULL; - stripe_fd_ctx_t *fctx = NULL; - int32_t op_errno = EINVAL; - - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (loc, err); - VALIDATE_OR_GOTO (loc->path, err); - VALIDATE_OR_GOTO (loc->inode, err); - - priv = this->private; - trav = this->children; - - if (priv->first_child_down) { - op_errno = ENOTCONN; - goto err; - } - - /* Initialization */ - local = mem_get0 (this->local_pool); - if (!local) { - op_errno = ENOMEM; - goto err; - } - local->op_ret = -1; - frame->local = local; - local->call_count = priv->child_count; - - if (IA_ISREG(loc->inode->ia_type)) { - inode_ctx_get(loc->inode, this, (uint64_t *) &fctx); - if (!fctx) - goto err; - local->fctx = fctx; - } + xlator_list_t *trav = NULL; + stripe_local_t *local = NULL; + stripe_private_t *priv = NULL; + stripe_fd_ctx_t *fctx = NULL; + int32_t op_errno = EINVAL; + + VALIDATE_OR_GOTO(frame, err); + VALIDATE_OR_GOTO(this, err); + VALIDATE_OR_GOTO(loc, err); + VALIDATE_OR_GOTO(loc->path, err); + VALIDATE_OR_GOTO(loc->inode, err); + + priv = this->private; + trav = this->children; + + if (priv->first_child_down) { + op_errno = ENOTCONN; + goto err; + } + + /* Initialization */ + local = mem_get0(this->local_pool); + if (!local) { + op_errno = ENOMEM; + goto err; + } + local->op_ret = -1; + frame->local = local; + local->call_count = priv->child_count; + + if (IA_ISREG(loc->inode->ia_type)) { + inode_ctx_get(loc->inode, this, (uint64_t *)&fctx); + if (!fctx) + goto err; + local->fctx = fctx; + } - while (trav) { - STACK_WIND (frame, stripe_stat_cbk, trav->xlator, - trav->xlator->fops->stat, loc, NULL); - trav = trav->next; - } + while (trav) { + STACK_WIND(frame, stripe_stat_cbk, trav->xlator, + trav->xlator->fops->stat, loc, NULL); + trav = trav->next; + } - return 0; + return 0; err: - STRIPE_STACK_UNWIND (stat, frame, -1, op_errno, NULL, NULL); - return 0; + STRIPE_STACK_UNWIND(stat, frame, -1, op_errno, NULL, NULL); + return 0; } - int32_t -stripe_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct statvfs *stbuf, dict_t *xdata) +stripe_statfs_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct statvfs *stbuf, + dict_t *xdata) { - stripe_local_t *local = NULL; - int32_t callcnt = 0; - - if (!this || !frame || !frame->local) { - gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref"); - goto out; - } - local = frame->local; - - LOCK(&frame->lock); - { - callcnt = --local->call_count; - - if (op_ret && (op_errno != ENOTCONN)) { - local->op_errno = op_errno; - } - if (op_ret == 0) { - struct statvfs *dict_buf = &local->statvfs_buf; - dict_buf->f_bsize = stbuf->f_bsize; - dict_buf->f_frsize = stbuf->f_frsize; - dict_buf->f_blocks += stbuf->f_blocks; - dict_buf->f_bfree += stbuf->f_bfree; - dict_buf->f_bavail += stbuf->f_bavail; - dict_buf->f_files += stbuf->f_files; - dict_buf->f_ffree += stbuf->f_ffree; - dict_buf->f_favail += stbuf->f_favail; - dict_buf->f_fsid = stbuf->f_fsid; - dict_buf->f_flag = stbuf->f_flag; - dict_buf->f_namemax = stbuf->f_namemax; - local->op_ret = 0; - } - } - UNLOCK (&frame->lock); + stripe_local_t *local = NULL; + int32_t callcnt = 0; - if (!callcnt) { - STRIPE_STACK_UNWIND (statfs, frame, local->op_ret, - local->op_errno, &local->statvfs_buf, NULL); - } + if (!this || !frame || !frame->local) { + gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref"); + goto out; + } + local = frame->local; + + LOCK(&frame->lock); + { + callcnt = --local->call_count; + + if (op_ret && (op_errno != ENOTCONN)) { + local->op_errno = op_errno; + } + if (op_ret == 0) { + struct statvfs *dict_buf = &local->statvfs_buf; + dict_buf->f_bsize = stbuf->f_bsize; + dict_buf->f_frsize = stbuf->f_frsize; + dict_buf->f_blocks += stbuf->f_blocks; + dict_buf->f_bfree += stbuf->f_bfree; + dict_buf->f_bavail += stbuf->f_bavail; + dict_buf->f_files += stbuf->f_files; + dict_buf->f_ffree += stbuf->f_ffree; + dict_buf->f_favail += stbuf->f_favail; + dict_buf->f_fsid = stbuf->f_fsid; + dict_buf->f_flag = stbuf->f_flag; + dict_buf->f_namemax = stbuf->f_namemax; + local->op_ret = 0; + } + } + UNLOCK(&frame->lock); + + if (!callcnt) { + STRIPE_STACK_UNWIND(statfs, frame, local->op_ret, local->op_errno, + &local->statvfs_buf, NULL); + } out: - return 0; + return 0; } int32_t -stripe_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) +stripe_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) { - stripe_local_t *local = NULL; - xlator_list_t *trav = NULL; - stripe_private_t *priv = NULL; - int32_t op_errno = EINVAL; - - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (loc, err); - - trav = this->children; - priv = this->private; - - /* Initialization */ - local = mem_get0 (this->local_pool); - if (!local) { - op_errno = ENOMEM; - goto err; - } - local->op_ret = -1; - local->op_errno = ENOTCONN; - frame->local = local; - - local->call_count = priv->child_count; - while (trav) { - STACK_WIND (frame, stripe_statfs_cbk, trav->xlator, - trav->xlator->fops->statfs, loc, NULL); - trav = trav->next; - } + stripe_local_t *local = NULL; + xlator_list_t *trav = NULL; + stripe_private_t *priv = NULL; + int32_t op_errno = EINVAL; + + VALIDATE_OR_GOTO(frame, err); + VALIDATE_OR_GOTO(this, err); + VALIDATE_OR_GOTO(loc, err); + + trav = this->children; + priv = this->private; + + /* Initialization */ + local = mem_get0(this->local_pool); + if (!local) { + op_errno = ENOMEM; + goto err; + } + local->op_ret = -1; + local->op_errno = ENOTCONN; + frame->local = local; + + local->call_count = priv->child_count; + while (trav) { + STACK_WIND(frame, stripe_statfs_cbk, trav->xlator, + trav->xlator->fops->statfs, loc, NULL); + trav = trav->next; + } - return 0; + return 0; err: - STRIPE_STACK_UNWIND (statfs, frame, -1, op_errno, NULL, NULL); - return 0; + STRIPE_STACK_UNWIND(statfs, frame, -1, op_errno, NULL, NULL); + return 0; } - - int32_t -stripe_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *prebuf, - struct iatt *postbuf, dict_t *xdata) +stripe_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) { - int32_t callcnt = 0; - stripe_local_t *local = NULL; - call_frame_t *prev = NULL; + int32_t callcnt = 0; + stripe_local_t *local = NULL; + call_frame_t *prev = NULL; - if (!this || !frame || !frame->local || !cookie) { - gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref"); - goto out; - } + if (!this || !frame || !frame->local || !cookie) { + gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref"); + goto out; + } - prev = cookie; - local = frame->local; + prev = cookie; + local = frame->local; - LOCK (&frame->lock); - { - callcnt = --local->call_count; - - if (op_ret == -1) { - gf_log (this->name, GF_LOG_DEBUG, - "%s returned error %s", - prev->this->name, strerror (op_errno)); - local->op_errno = op_errno; - if ((op_errno != ENOENT) || - (prev->this == FIRST_CHILD (this))) - local->failed = 1; - } + LOCK(&frame->lock); + { + callcnt = --local->call_count; + + if (op_ret == -1) { + gf_log(this->name, GF_LOG_DEBUG, "%s returned error %s", + prev->this->name, strerror(op_errno)); + local->op_errno = op_errno; + if ((op_errno != ENOENT) || (prev->this == FIRST_CHILD(this))) + local->failed = 1; + } - if (op_ret == 0) { - local->op_ret = 0; - if (FIRST_CHILD(this) == prev->this) { - local->pre_buf = *prebuf; - local->post_buf = *postbuf; - } + if (op_ret == 0) { + local->op_ret = 0; + if (FIRST_CHILD(this) == prev->this) { + local->pre_buf = *prebuf; + local->post_buf = *postbuf; + } - local->prebuf_blocks += prebuf->ia_blocks; - local->postbuf_blocks += postbuf->ia_blocks; + local->prebuf_blocks += prebuf->ia_blocks; + local->postbuf_blocks += postbuf->ia_blocks; - correct_file_size(prebuf, local->fctx, prev); - correct_file_size(postbuf, local->fctx, prev); + correct_file_size(prebuf, local->fctx, prev); + correct_file_size(postbuf, local->fctx, prev); - if (local->prebuf_size < prebuf->ia_size) - local->prebuf_size = prebuf->ia_size; + if (local->prebuf_size < prebuf->ia_size) + local->prebuf_size = prebuf->ia_size; - if (local->postbuf_size < postbuf->ia_size) - local->postbuf_size = postbuf->ia_size; - } + if (local->postbuf_size < postbuf->ia_size) + local->postbuf_size = postbuf->ia_size; } - UNLOCK (&frame->lock); + } + UNLOCK(&frame->lock); - if (!callcnt) { - if (local->failed) - local->op_ret = -1; - - if (local->op_ret != -1) { - local->pre_buf.ia_blocks = local->prebuf_blocks; - local->pre_buf.ia_size = local->prebuf_size; - local->post_buf.ia_blocks = local->postbuf_blocks; - local->post_buf.ia_size = local->postbuf_size; - } + if (!callcnt) { + if (local->failed) + local->op_ret = -1; - STRIPE_STACK_UNWIND (truncate, frame, local->op_ret, - local->op_errno, &local->pre_buf, - &local->post_buf, NULL); + if (local->op_ret != -1) { + local->pre_buf.ia_blocks = local->prebuf_blocks; + local->pre_buf.ia_size = local->prebuf_size; + local->post_buf.ia_blocks = local->postbuf_blocks; + local->post_buf.ia_size = local->postbuf_size; } + + STRIPE_STACK_UNWIND(truncate, frame, local->op_ret, local->op_errno, + &local->pre_buf, &local->post_buf, NULL); + } out: - return 0; + return 0; } int32_t -stripe_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, dict_t *xdata) +stripe_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, + dict_t *xdata) { - stripe_local_t *local = NULL; - stripe_private_t *priv = NULL; - stripe_fd_ctx_t *fctx = NULL; - int32_t op_errno = EINVAL; - int i, eof_idx; - off_t dest_offset, tmp_offset; - - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (loc, err); - VALIDATE_OR_GOTO (loc->path, err); - VALIDATE_OR_GOTO (loc->inode, err); - - priv = this->private; - - if (priv->first_child_down) { - op_errno = ENOTCONN; - goto err; - } - - /* Initialization */ - local = mem_get0 (this->local_pool); - if (!local) { - op_errno = ENOMEM; - goto err; + stripe_local_t *local = NULL; + stripe_private_t *priv = NULL; + stripe_fd_ctx_t *fctx = NULL; + int32_t op_errno = EINVAL; + int i, eof_idx; + off_t dest_offset, tmp_offset; + + VALIDATE_OR_GOTO(frame, err); + VALIDATE_OR_GOTO(this, err); + VALIDATE_OR_GOTO(loc, err); + VALIDATE_OR_GOTO(loc->path, err); + VALIDATE_OR_GOTO(loc->inode, err); + + priv = this->private; + + if (priv->first_child_down) { + op_errno = ENOTCONN; + goto err; + } + + /* Initialization */ + local = mem_get0(this->local_pool); + if (!local) { + op_errno = ENOMEM; + goto err; + } + local->op_ret = -1; + frame->local = local; + local->call_count = priv->child_count; + + inode_ctx_get(loc->inode, this, (uint64_t *)&fctx); + if (!fctx) { + gf_log(this->name, GF_LOG_ERROR, "no stripe context"); + op_errno = EINVAL; + goto err; + } + + local->fctx = fctx; + eof_idx = (offset / fctx->stripe_size) % fctx->stripe_count; + + for (i = 0; i < fctx->stripe_count; i++) { + if (!fctx->xl_array[i]) { + gf_log(this->name, GF_LOG_ERROR, "no xlator at index %d", i); + op_errno = EINVAL; + goto err; + } + + if (fctx->stripe_coalesce) { + /* + * The node that owns EOF is truncated to the exact + * coalesced offset. Nodes prior to this index should + * be rounded up to the size of the complete stripe, + * while nodes after this index should be rounded down + * to the size of the previous stripe. + */ + if (i < eof_idx) + tmp_offset = roof(offset, + fctx->stripe_size * fctx->stripe_count); + else if (i > eof_idx) + tmp_offset = floor(offset, + fctx->stripe_size * fctx->stripe_count); + else + tmp_offset = offset; + + dest_offset = coalesced_offset(tmp_offset, fctx->stripe_size, + fctx->stripe_count); + } else { + dest_offset = offset; } - local->op_ret = -1; - frame->local = local; - local->call_count = priv->child_count; - inode_ctx_get(loc->inode, this, (uint64_t *) &fctx); - if (!fctx) { - gf_log(this->name, GF_LOG_ERROR, "no stripe context"); - op_errno = EINVAL; - goto err; - } - - local->fctx = fctx; - eof_idx = (offset / fctx->stripe_size) % fctx->stripe_count; - - for (i = 0; i < fctx->stripe_count; i++) { - if (!fctx->xl_array[i]) { - gf_log(this->name, GF_LOG_ERROR, - "no xlator at index %d", i); - op_errno = EINVAL; - goto err; - } - - if (fctx->stripe_coalesce) { - /* - * The node that owns EOF is truncated to the exact - * coalesced offset. Nodes prior to this index should - * be rounded up to the size of the complete stripe, - * while nodes after this index should be rounded down - * to the size of the previous stripe. - */ - if (i < eof_idx) - tmp_offset = roof(offset, fctx->stripe_size * - fctx->stripe_count); - else if (i > eof_idx) - tmp_offset = floor(offset, fctx->stripe_size * - fctx->stripe_count); - else - tmp_offset = offset; - - dest_offset = coalesced_offset(tmp_offset, - fctx->stripe_size, fctx->stripe_count); - } else { - dest_offset = offset; - } - - STACK_WIND(frame, stripe_truncate_cbk, fctx->xl_array[i], - fctx->xl_array[i]->fops->truncate, loc, dest_offset, - NULL); - } + STACK_WIND(frame, stripe_truncate_cbk, fctx->xl_array[i], + fctx->xl_array[i]->fops->truncate, loc, dest_offset, NULL); + } - return 0; + return 0; err: - STRIPE_STACK_UNWIND (truncate, frame, -1, op_errno, NULL, NULL, NULL); - return 0; + STRIPE_STACK_UNWIND(truncate, frame, -1, op_errno, NULL, NULL, NULL); + return 0; } - int32_t -stripe_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, - struct iatt *preop, struct iatt *postop, dict_t *xdata) +stripe_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *preop, + struct iatt *postop, dict_t *xdata) { - int32_t callcnt = 0; - stripe_local_t *local = NULL; - call_frame_t *prev = NULL; + int32_t callcnt = 0; + stripe_local_t *local = NULL; + call_frame_t *prev = NULL; - if (!this || !frame || !frame->local || !cookie) { - gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref"); - goto out; - } + if (!this || !frame || !frame->local || !cookie) { + gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref"); + goto out; + } - prev = cookie; - local = frame->local; + prev = cookie; + local = frame->local; - LOCK (&frame->lock); - { - callcnt = --local->call_count; - - if (op_ret == -1) { - gf_log (this->name, GF_LOG_DEBUG, - "%s returned error %s", - prev->this->name, strerror (op_errno)); - local->op_errno = op_errno; - if ((op_errno != ENOENT) || - (prev->this == FIRST_CHILD (this))) - local->failed = 1; - } + LOCK(&frame->lock); + { + callcnt = --local->call_count; + + if (op_ret == -1) { + gf_log(this->name, GF_LOG_DEBUG, "%s returned error %s", + prev->this->name, strerror(op_errno)); + local->op_errno = op_errno; + if ((op_errno != ENOENT) || (prev->this == FIRST_CHILD(this))) + local->failed = 1; + } - if (op_ret == 0) { - local->op_ret = 0; + if (op_ret == 0) { + local->op_ret = 0; - if (FIRST_CHILD(this) == prev->this) { - local->pre_buf = *preop; - local->post_buf = *postop; - } + if (FIRST_CHILD(this) == prev->this) { + local->pre_buf = *preop; + local->post_buf = *postop; + } - local->prebuf_blocks += preop->ia_blocks; - local->postbuf_blocks += postop->ia_blocks; + local->prebuf_blocks += preop->ia_blocks; + local->postbuf_blocks += postop->ia_blocks; - correct_file_size(preop, local->fctx, prev); - correct_file_size(postop, local->fctx, prev); + correct_file_size(preop, local->fctx, prev); + correct_file_size(postop, local->fctx, prev); - if (local->prebuf_size < preop->ia_size) - local->prebuf_size = preop->ia_size; - if (local->postbuf_size < postop->ia_size) - local->postbuf_size = postop->ia_size; - } + if (local->prebuf_size < preop->ia_size) + local->prebuf_size = preop->ia_size; + if (local->postbuf_size < postop->ia_size) + local->postbuf_size = postop->ia_size; } - UNLOCK (&frame->lock); + } + UNLOCK(&frame->lock); - if (!callcnt) { - if (local->failed) - local->op_ret = -1; - - if (local->op_ret != -1) { - local->pre_buf.ia_blocks = local->prebuf_blocks; - local->pre_buf.ia_size = local->prebuf_size; - local->post_buf.ia_blocks = local->postbuf_blocks; - local->post_buf.ia_size = local->postbuf_size; - } + if (!callcnt) { + if (local->failed) + local->op_ret = -1; - STRIPE_STACK_UNWIND (setattr, frame, local->op_ret, - local->op_errno, &local->pre_buf, - &local->post_buf, NULL); + if (local->op_ret != -1) { + local->pre_buf.ia_blocks = local->prebuf_blocks; + local->pre_buf.ia_size = local->prebuf_size; + local->post_buf.ia_blocks = local->postbuf_blocks; + local->post_buf.ia_size = local->postbuf_size; } + + STRIPE_STACK_UNWIND(setattr, frame, local->op_ret, local->op_errno, + &local->pre_buf, &local->post_buf, NULL); + } out: - return 0; + return 0; } - int32_t -stripe_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc, - struct iatt *stbuf, int32_t valid, dict_t *xdata) +stripe_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, + struct iatt *stbuf, int32_t valid, dict_t *xdata) { - xlator_list_t *trav = NULL; - stripe_local_t *local = NULL; - stripe_private_t *priv = NULL; - stripe_fd_ctx_t *fctx = NULL; - int32_t op_errno = EINVAL; - - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (loc, err); - VALIDATE_OR_GOTO (loc->path, err); - VALIDATE_OR_GOTO (loc->inode, err); - - priv = this->private; - trav = this->children; - - if (priv->first_child_down) { - op_errno = ENOTCONN; - goto err; - } - - /* Initialization */ - local = mem_get0 (this->local_pool); - if (!local) { - op_errno = ENOMEM; - goto err; - } - local->op_ret = -1; - frame->local = local; - if (!IA_ISDIR (loc->inode->ia_type) && - !IA_ISREG (loc->inode->ia_type)) { - local->call_count = 1; - STACK_WIND (frame, stripe_setattr_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->setattr, - loc, stbuf, valid, NULL); - return 0; - } + xlator_list_t *trav = NULL; + stripe_local_t *local = NULL; + stripe_private_t *priv = NULL; + stripe_fd_ctx_t *fctx = NULL; + int32_t op_errno = EINVAL; + + VALIDATE_OR_GOTO(frame, err); + VALIDATE_OR_GOTO(this, err); + VALIDATE_OR_GOTO(loc, err); + VALIDATE_OR_GOTO(loc->path, err); + VALIDATE_OR_GOTO(loc->inode, err); + + priv = this->private; + trav = this->children; + + if (priv->first_child_down) { + op_errno = ENOTCONN; + goto err; + } + + /* Initialization */ + local = mem_get0(this->local_pool); + if (!local) { + op_errno = ENOMEM; + goto err; + } + local->op_ret = -1; + frame->local = local; + if (!IA_ISDIR(loc->inode->ia_type) && !IA_ISREG(loc->inode->ia_type)) { + local->call_count = 1; + STACK_WIND(frame, stripe_setattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, NULL); + return 0; + } - if (IA_ISREG(loc->inode->ia_type)) { - inode_ctx_get(loc->inode, this, (uint64_t *) &fctx); - if (!fctx) - goto err; - local->fctx = fctx; - } + if (IA_ISREG(loc->inode->ia_type)) { + inode_ctx_get(loc->inode, this, (uint64_t *)&fctx); + if (!fctx) + goto err; + local->fctx = fctx; + } - local->call_count = priv->child_count; - while (trav) { - STACK_WIND (frame, stripe_setattr_cbk, - trav->xlator, trav->xlator->fops->setattr, - loc, stbuf, valid, NULL); - trav = trav->next; - } + local->call_count = priv->child_count; + while (trav) { + STACK_WIND(frame, stripe_setattr_cbk, trav->xlator, + trav->xlator->fops->setattr, loc, stbuf, valid, NULL); + trav = trav->next; + } - return 0; + return 0; err: - STRIPE_STACK_UNWIND (setattr, frame, -1, op_errno, NULL, NULL, NULL); - return 0; + STRIPE_STACK_UNWIND(setattr, frame, -1, op_errno, NULL, NULL, NULL); + return 0; } - int32_t -stripe_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd, - struct iatt *stbuf, int32_t valid, dict_t *xdata) +stripe_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, + struct iatt *stbuf, int32_t valid, dict_t *xdata) { - stripe_local_t *local = NULL; - stripe_private_t *priv = NULL; - xlator_list_t *trav = NULL; - int32_t op_errno = EINVAL; - - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (fd, err); - VALIDATE_OR_GOTO (fd->inode, err); - - priv = this->private; - trav = this->children; - - /* Initialization */ - local = mem_get0 (this->local_pool); - if (!local) { - op_errno = ENOMEM; - goto err; - } - local->op_ret = -1; - frame->local = local; - local->call_count = priv->child_count; - - while (trav) { - STACK_WIND (frame, stripe_setattr_cbk, trav->xlator, - trav->xlator->fops->fsetattr, fd, stbuf, valid, NULL); - trav = trav->next; - } + stripe_local_t *local = NULL; + stripe_private_t *priv = NULL; + xlator_list_t *trav = NULL; + int32_t op_errno = EINVAL; + + VALIDATE_OR_GOTO(frame, err); + VALIDATE_OR_GOTO(this, err); + VALIDATE_OR_GOTO(fd, err); + VALIDATE_OR_GOTO(fd->inode, err); + + priv = this->private; + trav = this->children; + + /* Initialization */ + local = mem_get0(this->local_pool); + if (!local) { + op_errno = ENOMEM; + goto err; + } + local->op_ret = -1; + frame->local = local; + local->call_count = priv->child_count; + + while (trav) { + STACK_WIND(frame, stripe_setattr_cbk, trav->xlator, + trav->xlator->fops->fsetattr, fd, stbuf, valid, NULL); + trav = trav->next; + } - return 0; + return 0; err: - STRIPE_STACK_UNWIND (fsetattr, frame, -1, op_errno, NULL, NULL, NULL); - return 0; + STRIPE_STACK_UNWIND(fsetattr, frame, -1, op_errno, NULL, NULL, NULL); + return 0; } int32_t -stripe_stack_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *buf, - struct iatt *preoldparent, struct iatt *postoldparent, - struct iatt *prenewparent, struct iatt *postnewparent, - dict_t *xdata) +stripe_stack_rename_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *buf, + struct iatt *preoldparent, struct iatt *postoldparent, + struct iatt *prenewparent, struct iatt *postnewparent, + dict_t *xdata) { - int32_t callcnt = 0; - stripe_local_t *local = NULL; - call_frame_t *prev = NULL; + int32_t callcnt = 0; + stripe_local_t *local = NULL; + call_frame_t *prev = NULL; - if (!this || !frame || !frame->local || !cookie) { - gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref"); - goto out; - } + if (!this || !frame || !frame->local || !cookie) { + gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref"); + goto out; + } - prev = cookie; - local = frame->local; + prev = cookie; + local = frame->local; - LOCK (&frame->lock); - { - callcnt = --local->call_count; - - if (op_ret == -1) { - gf_log (this->name, GF_LOG_DEBUG, - "%s returned error %s", - prev->this->name, strerror (op_errno)); - local->op_errno = op_errno; - if ((op_errno != ENOENT) || - (prev->this == FIRST_CHILD (this))) - local->failed = 1; - } + LOCK(&frame->lock); + { + callcnt = --local->call_count; + + if (op_ret == -1) { + gf_log(this->name, GF_LOG_DEBUG, "%s returned error %s", + prev->this->name, strerror(op_errno)); + local->op_errno = op_errno; + if ((op_errno != ENOENT) || (prev->this == FIRST_CHILD(this))) + local->failed = 1; + } - if (op_ret == 0) { - local->op_ret = 0; + if (op_ret == 0) { + local->op_ret = 0; - local->stbuf.ia_blocks += buf->ia_blocks; - local->preparent.ia_blocks += preoldparent->ia_blocks; - local->postparent.ia_blocks += postoldparent->ia_blocks; - local->pre_buf.ia_blocks += prenewparent->ia_blocks; - local->post_buf.ia_blocks += postnewparent->ia_blocks; + local->stbuf.ia_blocks += buf->ia_blocks; + local->preparent.ia_blocks += preoldparent->ia_blocks; + local->postparent.ia_blocks += postoldparent->ia_blocks; + local->pre_buf.ia_blocks += prenewparent->ia_blocks; + local->post_buf.ia_blocks += postnewparent->ia_blocks; - correct_file_size(buf, local->fctx, prev); + correct_file_size(buf, local->fctx, prev); - if (local->stbuf.ia_size < buf->ia_size) - local->stbuf.ia_size = buf->ia_size; + if (local->stbuf.ia_size < buf->ia_size) + local->stbuf.ia_size = buf->ia_size; - if (local->preparent.ia_size < preoldparent->ia_size) - local->preparent.ia_size = preoldparent->ia_size; + if (local->preparent.ia_size < preoldparent->ia_size) + local->preparent.ia_size = preoldparent->ia_size; - if (local->postparent.ia_size < postoldparent->ia_size) - local->postparent.ia_size = postoldparent->ia_size; + if (local->postparent.ia_size < postoldparent->ia_size) + local->postparent.ia_size = postoldparent->ia_size; - if (local->pre_buf.ia_size < prenewparent->ia_size) - local->pre_buf.ia_size = prenewparent->ia_size; + if (local->pre_buf.ia_size < prenewparent->ia_size) + local->pre_buf.ia_size = prenewparent->ia_size; - if (local->post_buf.ia_size < postnewparent->ia_size) - local->post_buf.ia_size = postnewparent->ia_size; - } + if (local->post_buf.ia_size < postnewparent->ia_size) + local->post_buf.ia_size = postnewparent->ia_size; } - UNLOCK (&frame->lock); + } + UNLOCK(&frame->lock); - if (!callcnt) { - if (local->failed) - local->op_ret = -1; + if (!callcnt) { + if (local->failed) + local->op_ret = -1; - STRIPE_STACK_UNWIND (rename, frame, local->op_ret, local->op_errno, - &local->stbuf, &local->preparent, - &local->postparent, &local->pre_buf, - &local->post_buf, NULL); - } + STRIPE_STACK_UNWIND(rename, frame, local->op_ret, local->op_errno, + &local->stbuf, &local->preparent, + &local->postparent, &local->pre_buf, + &local->post_buf, NULL); + } out: - return 0; + return 0; } int32_t -stripe_first_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *buf, - struct iatt *preoldparent, struct iatt *postoldparent, - struct iatt *prenewparent, struct iatt *postnewparent, - dict_t *xdata) +stripe_first_rename_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *buf, + struct iatt *preoldparent, struct iatt *postoldparent, + struct iatt *prenewparent, struct iatt *postnewparent, + dict_t *xdata) { - stripe_local_t *local = NULL; - xlator_list_t *trav = NULL; + stripe_local_t *local = NULL; + xlator_list_t *trav = NULL; + + if (!this || !frame || !frame->local) { + gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref"); + op_errno = EINVAL; + goto unwind; + } + + if (op_ret == -1) { + goto unwind; + } + + local = frame->local; + trav = this->children; + + local->stbuf = *buf; + local->preparent = *preoldparent; + local->postparent = *postoldparent; + local->pre_buf = *prenewparent; + local->post_buf = *postnewparent; + + local->op_ret = 0; + local->call_count--; + + trav = trav->next; /* Skip first child */ + while (trav) { + STACK_WIND(frame, stripe_stack_rename_cbk, trav->xlator, + trav->xlator->fops->rename, &local->loc, &local->loc2, NULL); + trav = trav->next; + } + return 0; - if (!this || !frame || !frame->local) { - gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref"); - op_errno = EINVAL; - goto unwind; - } +unwind: + STRIPE_STACK_UNWIND(rename, frame, -1, op_errno, buf, preoldparent, + postoldparent, prenewparent, postnewparent, NULL); + return 0; +} - if (op_ret == -1) { - goto unwind; - } +int32_t +stripe_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, + dict_t *xdata) +{ + stripe_private_t *priv = NULL; + stripe_local_t *local = NULL; + xlator_list_t *trav = NULL; + stripe_fd_ctx_t *fctx = NULL; + int32_t op_errno = EINVAL; + + VALIDATE_OR_GOTO(frame, err); + VALIDATE_OR_GOTO(this, err); + VALIDATE_OR_GOTO(oldloc, err); + VALIDATE_OR_GOTO(oldloc->path, err); + VALIDATE_OR_GOTO(oldloc->inode, err); + VALIDATE_OR_GOTO(newloc, err); + + priv = this->private; + trav = this->children; + + /* If any one node is down, don't allow rename */ + if (priv->nodes_down) { + op_errno = ENOTCONN; + goto err; + } + + /* Initialization */ + local = mem_get0(this->local_pool); + if (!local) { + op_errno = ENOMEM; + goto err; + } + + frame->local = local; + + local->op_ret = -1; + loc_copy(&local->loc, oldloc); + loc_copy(&local->loc2, newloc); + + local->call_count = priv->child_count; + + if (IA_ISREG(oldloc->inode->ia_type)) { + inode_ctx_get(oldloc->inode, this, (uint64_t *)&fctx); + if (!fctx) + goto err; + local->fctx = fctx; + } - local = frame->local; - trav = this->children; + STACK_WIND(frame, stripe_first_rename_cbk, trav->xlator, + trav->xlator->fops->rename, oldloc, newloc, NULL); - local->stbuf = *buf; - local->preparent = *preoldparent; - local->postparent = *postoldparent; - local->pre_buf = *prenewparent; - local->post_buf = *postnewparent; + return 0; +err: + STRIPE_STACK_UNWIND(rename, frame, -1, op_errno, NULL, NULL, NULL, NULL, + NULL, NULL); + return 0; +} +int32_t +stripe_first_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + struct iatt *preparent, struct iatt *postparent, + dict_t *xdata) +{ + stripe_local_t *local = NULL; + call_frame_t *prev = NULL; - local->op_ret = 0; - local->call_count--; + if (!this || !frame || !frame->local || !cookie) { + gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref"); + goto out; + } - trav = trav->next; /* Skip first child */ - while (trav) { - STACK_WIND (frame, stripe_stack_rename_cbk, - trav->xlator, trav->xlator->fops->rename, - &local->loc, &local->loc2, NULL); - trav = trav->next; - } - return 0; + prev = cookie; + local = frame->local; -unwind: - STRIPE_STACK_UNWIND (rename, frame, -1, op_errno, buf, preoldparent, - postoldparent, prenewparent, postnewparent, NULL); - return 0; + if (op_ret == -1) { + gf_log(this->name, GF_LOG_DEBUG, "%s returned %s", prev->this->name, + strerror(op_errno)); + goto out; + } + local->op_ret = 0; + local->preparent = *preparent; + local->postparent = *postparent; + local->preparent_blocks += preparent->ia_blocks; + local->postparent_blocks += postparent->ia_blocks; + + STRIPE_STACK_UNWIND(unlink, frame, local->op_ret, local->op_errno, + &local->preparent, &local->postparent, xdata); + return 0; +out: + STRIPE_STACK_UNWIND(unlink, frame, -1, op_errno, NULL, NULL, NULL); + + return 0; } int32_t -stripe_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc, - loc_t *newloc, dict_t *xdata) +stripe_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) { - stripe_private_t *priv = NULL; - stripe_local_t *local = NULL; - xlator_list_t *trav = NULL; - stripe_fd_ctx_t *fctx = NULL; - int32_t op_errno = EINVAL; - - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (oldloc, err); - VALIDATE_OR_GOTO (oldloc->path, err); - VALIDATE_OR_GOTO (oldloc->inode, err); - VALIDATE_OR_GOTO (newloc, err); - - priv = this->private; - trav = this->children; + int32_t callcnt = 0; + stripe_local_t *local = NULL; + call_frame_t *prev = NULL; - /* If any one node is down, don't allow rename */ - if (priv->nodes_down) { - op_errno = ENOTCONN; - goto err; + if (!this || !frame || !frame->local || !cookie) { + gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref"); + goto out; + } + + prev = cookie; + local = frame->local; + + LOCK(&frame->lock); + { + callcnt = --local->call_count; + + if (op_ret == -1) { + gf_log(this->name, GF_LOG_DEBUG, "%s returned %s", prev->this->name, + strerror(op_errno)); + local->op_errno = op_errno; + if (op_errno != ENOENT) { + local->failed = 1; + local->op_ret = op_ret; + } } + } + UNLOCK(&frame->lock); - /* Initialization */ - local = mem_get0 (this->local_pool); - if (!local) { - op_errno = ENOMEM; - goto err; + if (callcnt == 1) { + if (local->failed) { + op_errno = local->op_errno; + goto out; } + STACK_WIND(frame, stripe_first_unlink_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->unlink, &local->loc, local->xflag, + local->xdata); + } + return 0; +out: + STRIPE_STACK_UNWIND(unlink, frame, -1, op_errno, NULL, NULL, NULL); - frame->local = local; + return 0; +} - local->op_ret = -1; - loc_copy (&local->loc, oldloc); - loc_copy (&local->loc2, newloc); +int32_t +stripe_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag, + dict_t *xdata) +{ + xlator_list_t *trav = NULL; + stripe_local_t *local = NULL; + stripe_private_t *priv = NULL; + int32_t op_errno = EINVAL; + + VALIDATE_OR_GOTO(frame, err); + VALIDATE_OR_GOTO(this, err); + VALIDATE_OR_GOTO(loc, err); + VALIDATE_OR_GOTO(loc->path, err); + VALIDATE_OR_GOTO(loc->inode, err); + + priv = this->private; + trav = this->children; + + if (priv->first_child_down) { + op_errno = ENOTCONN; + goto err; + } + + /* Don't unlink a file if a node is down */ + if (priv->nodes_down) { + op_errno = ENOTCONN; + goto err; + } + + /* Initialization */ + local = mem_get0(this->local_pool); + if (!local) { + op_errno = ENOMEM; + goto err; + } + local->op_ret = -1; + loc_copy(&local->loc, loc); + local->xflag = xflag; + + if (xdata) + local->xdata = dict_ref(xdata); + + frame->local = local; + local->call_count = priv->child_count; + trav = trav->next; /* Skip the first child */ + + while (trav) { + STACK_WIND(frame, stripe_unlink_cbk, trav->xlator, + trav->xlator->fops->unlink, loc, xflag, xdata); + trav = trav->next; + } - local->call_count = priv->child_count; + return 0; +err: + STRIPE_STACK_UNWIND(unlink, frame, -1, op_errno, NULL, NULL, NULL); + return 0; +} + +int32_t +stripe_first_rmdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) +{ + stripe_local_t *local = NULL; - if (IA_ISREG(oldloc->inode->ia_type)) { - inode_ctx_get(oldloc->inode, this, (uint64_t *) &fctx); - if (!fctx) - goto err; - local->fctx = fctx; - } + if (!this || !frame || !frame->local) { + gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref"); + op_errno = EINVAL; + goto err; + } - STACK_WIND (frame, stripe_first_rename_cbk, trav->xlator, - trav->xlator->fops->rename, oldloc, newloc, NULL); + if (op_ret == -1) { + goto err; + } - return 0; + local = frame->local; + local->op_ret = 0; + + local->call_count--; /* First child successful */ + + local->preparent = *preparent; + local->postparent = *postparent; + local->preparent_size = preparent->ia_size; + local->postparent_size = postparent->ia_size; + local->preparent_blocks += preparent->ia_blocks; + local->postparent_blocks += postparent->ia_blocks; + + STRIPE_STACK_UNWIND(rmdir, frame, local->op_ret, local->op_errno, + &local->preparent, &local->postparent, xdata); + return 0; err: - STRIPE_STACK_UNWIND (rename, frame, -1, op_errno, NULL, NULL, NULL, - NULL, NULL, NULL); - return 0; + STRIPE_STACK_UNWIND(rmdir, frame, op_ret, op_errno, NULL, NULL, NULL); + return 0; } + int32_t -stripe_first_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) +stripe_rmdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) { - stripe_local_t *local = NULL; - call_frame_t *prev = NULL; + int32_t callcnt = 0; + stripe_local_t *local = NULL; + call_frame_t *prev = NULL; - if (!this || !frame || !frame->local || !cookie) { - gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref"); - goto out; - } + if (!this || !frame || !frame->local || !cookie) { + gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref"); + goto out; + } - prev = cookie; - local = frame->local; + prev = cookie; + local = frame->local; + + LOCK(&frame->lock); + { + callcnt = --local->call_count; if (op_ret == -1) { - gf_log (this->name, GF_LOG_DEBUG, "%s returned %s", - prev->this->name, strerror (op_errno)); - goto out; + gf_log(this->name, GF_LOG_DEBUG, "%s returned %s", prev->this->name, + strerror(op_errno)); + if (op_errno != ENOENT) + local->failed = 1; } - local->op_ret = 0; - local->preparent = *preparent; - local->postparent = *postparent; - local->preparent_blocks += preparent->ia_blocks; - local->postparent_blocks += postparent->ia_blocks; - - STRIPE_STACK_UNWIND(unlink, frame, local->op_ret, local->op_errno, - &local->preparent, &local->postparent, xdata); - return 0; -out: - STRIPE_STACK_UNWIND (unlink, frame, -1, op_errno, NULL, NULL, NULL); + } + UNLOCK(&frame->lock); - return 0; + if (callcnt == 1) { + if (local->failed) + goto out; + STACK_WIND(frame, stripe_first_rmdir_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->rmdir, &local->loc, local->flags, + NULL); + } + return 0; +out: + STRIPE_STACK_UNWIND(rmdir, frame, -1, op_errno, NULL, NULL, NULL); + return 0; } +int32_t +stripe_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, + dict_t *xdata) +{ + xlator_list_t *trav = NULL; + stripe_local_t *local = NULL; + stripe_private_t *priv = NULL; + int32_t op_errno = EINVAL; + + VALIDATE_OR_GOTO(frame, err); + VALIDATE_OR_GOTO(this, err); + VALIDATE_OR_GOTO(loc, err); + VALIDATE_OR_GOTO(loc->path, err); + VALIDATE_OR_GOTO(loc->inode, err); + + priv = this->private; + trav = this->children; + + /* don't delete a directory if any of the subvolume is down */ + if (priv->nodes_down) { + op_errno = ENOTCONN; + goto err; + } + + /* Initialization */ + local = mem_get0(this->local_pool); + if (!local) { + op_errno = ENOMEM; + goto err; + } + local->op_ret = -1; + frame->local = local; + loc_copy(&local->loc, loc); + local->flags = flags; + local->call_count = priv->child_count; + trav = trav->next; /* skip the first child */ + + while (trav) { + STACK_WIND(frame, stripe_rmdir_cbk, trav->xlator, + trav->xlator->fops->rmdir, loc, flags, NULL); + trav = trav->next; + } - + return 0; +err: + STRIPE_STACK_UNWIND(rmdir, frame, -1, op_errno, NULL, NULL, NULL); + return 0; +} int32_t -stripe_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) +stripe_mknod_ifreg_fail_unlink_cbk(call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, + int32_t op_errno, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) { - int32_t callcnt = 0; - stripe_local_t *local = NULL; - call_frame_t *prev = NULL; + int32_t callcnt = 0; + stripe_local_t *local = NULL; - if (!this || !frame || !frame->local || !cookie) { - gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref"); - goto out; - } + if (!this || !frame || !frame->local) { + gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref"); + goto out; + } - prev = cookie; - local = frame->local; + local = frame->local; - LOCK (&frame->lock); - { - callcnt = --local->call_count; - - if (op_ret == -1) { - gf_log (this->name, GF_LOG_DEBUG, "%s returned %s", - prev->this->name, strerror (op_errno)); - local->op_errno = op_errno; - if (op_errno != ENOENT) { - local->failed = 1; - local->op_ret = op_ret; - } - } - } - UNLOCK (&frame->lock); + LOCK(&frame->lock); + { + callcnt = --local->call_count; + } + UNLOCK(&frame->lock); - if (callcnt == 1) { - if (local->failed) { - op_errno = local->op_errno; - goto out; - } - STACK_WIND(frame, stripe_first_unlink_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->unlink, &local->loc, - local->xflag, local->xdata); - } - return 0; + if (!callcnt) { + STRIPE_STACK_UNWIND(mknod, frame, local->op_ret, local->op_errno, + local->inode, &local->stbuf, &local->preparent, + &local->postparent, NULL); + } out: - STRIPE_STACK_UNWIND (unlink, frame, -1, op_errno, NULL, NULL, NULL); - - return 0; + return 0; } +/** + */ int32_t -stripe_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, - int xflag, dict_t *xdata) +stripe_mknod_ifreg_setxattr_cbk(call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, + int32_t op_errno, dict_t *xdata) { - xlator_list_t *trav = NULL; - stripe_local_t *local = NULL; - stripe_private_t *priv = NULL; - int32_t op_errno = EINVAL; - - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (loc, err); - VALIDATE_OR_GOTO (loc->path, err); - VALIDATE_OR_GOTO (loc->inode, err); - - priv = this->private; - trav = this->children; + int32_t callcnt = 0; + stripe_local_t *local = NULL; + stripe_private_t *priv = NULL; + xlator_list_t *trav = NULL; + call_frame_t *prev = NULL; + + if (!this || !frame || !frame->local || !cookie) { + gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref"); + goto out; + } - if (priv->first_child_down) { - op_errno = ENOTCONN; - goto err; - } + prev = cookie; + priv = this->private; + local = frame->local; - /* Don't unlink a file if a node is down */ - if (priv->nodes_down) { - op_errno = ENOTCONN; - goto err; - } + LOCK(&frame->lock); + { + callcnt = --local->call_count; - /* Initialization */ - local = mem_get0 (this->local_pool); - if (!local) { - op_errno = ENOMEM; - goto err; + if (op_ret == -1) { + gf_log(this->name, GF_LOG_DEBUG, "%s returned error %s", + prev->this->name, strerror(op_errno)); + local->op_ret = -1; + local->op_errno = op_errno; } - local->op_ret = -1; - loc_copy (&local->loc, loc); - local->xflag = xflag; - - if (xdata) - local->xdata = dict_ref (xdata); - - frame->local = local; - local->call_count = priv->child_count; - trav = trav->next; /* Skip the first child */ + } + UNLOCK(&frame->lock); - while (trav) { - STACK_WIND (frame, stripe_unlink_cbk, - trav->xlator, trav->xlator->fops->unlink, - loc, xflag, xdata); + if (!callcnt) { + if (local->op_ret == -1) { + local->call_count = priv->child_count; + while (trav) { + STACK_WIND(frame, stripe_mknod_ifreg_fail_unlink_cbk, + trav->xlator, trav->xlator->fops->unlink, + &local->loc, 0, NULL); trav = trav->next; + } + return 0; } - return 0; -err: - STRIPE_STACK_UNWIND (unlink, frame, -1, op_errno, NULL, NULL, NULL); - return 0; + STRIPE_STACK_UNWIND(mknod, frame, local->op_ret, local->op_errno, + local->inode, &local->stbuf, &local->preparent, + &local->postparent, NULL); + } +out: + return 0; } - int32_t -stripe_first_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno,struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) +stripe_mknod_ifreg_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) { - stripe_local_t *local = NULL; + int32_t callcnt = 0; + stripe_local_t *local = NULL; + stripe_private_t *priv = NULL; + call_frame_t *prev = NULL; + xlator_list_t *trav = NULL; + + if (!this || !frame || !frame->local || !cookie) { + gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref"); + goto out; + } - if (!this || !frame || !frame->local) { - gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref"); - op_errno = EINVAL; - goto err; - } + prev = cookie; + priv = this->private; + local = frame->local; + + LOCK(&frame->lock); + { + callcnt = --local->call_count; if (op_ret == -1) { - goto err; + gf_log(this->name, GF_LOG_DEBUG, "%s returned error %s", + prev->this->name, strerror(op_errno)); + if ((op_errno != ENOENT) || (prev->this == FIRST_CHILD(this))) + local->failed = 1; + local->op_errno = op_errno; } + if (op_ret >= 0) { + local->op_ret = op_ret; - local = frame->local; - local->op_ret = 0; + /* Can be used as a mechanism to understand if mknod + was successful in at least one place */ + if (gf_uuid_is_null(local->ia_gfid)) + gf_uuid_copy(local->ia_gfid, buf->ia_gfid); - local->call_count--; /* First child successful */ + if (stripe_ctx_handle(this, prev, local, xdata)) + gf_log(this->name, GF_LOG_ERROR, + "Error getting fctx info from dict"); - local->preparent = *preparent; - local->postparent = *postparent; - local->preparent_size = preparent->ia_size; - local->postparent_size = postparent->ia_size; - local->preparent_blocks += preparent->ia_blocks; - local->postparent_blocks += postparent->ia_blocks; + local->stbuf_blocks += buf->ia_blocks; + local->preparent_blocks += preparent->ia_blocks; + local->postparent_blocks += postparent->ia_blocks; - STRIPE_STACK_UNWIND (rmdir, frame, local->op_ret, local->op_errno, - &local->preparent, &local->postparent, xdata); - return 0; -err: - STRIPE_STACK_UNWIND (rmdir, frame, op_ret, op_errno, NULL, NULL, NULL); - return 0; + correct_file_size(buf, local->fctx, prev); + + if (local->stbuf_size < buf->ia_size) + local->stbuf_size = buf->ia_size; + if (local->preparent_size < preparent->ia_size) + local->preparent_size = preparent->ia_size; + if (local->postparent_size < postparent->ia_size) + local->postparent_size = postparent->ia_size; + } + } + UNLOCK(&frame->lock); + if (!callcnt) { + if (local->failed) + local->op_ret = -1; + + if ((local->op_ret == -1) && !gf_uuid_is_null(local->ia_gfid)) { + /* ia_gfid set means, at least on one node 'mknod' + is successful */ + local->call_count = priv->child_count; + trav = this->children; + while (trav) { + STACK_WIND(frame, stripe_mknod_ifreg_fail_unlink_cbk, + trav->xlator, trav->xlator->fops->unlink, + &local->loc, 0, NULL); + trav = trav->next; + } + return 0; + } + + if (local->op_ret != -1) { + local->preparent.ia_blocks = local->preparent_blocks; + local->preparent.ia_size = local->preparent_size; + local->postparent.ia_blocks = local->postparent_blocks; + local->postparent.ia_size = local->postparent_size; + local->stbuf.ia_size = local->stbuf_size; + local->stbuf.ia_blocks = local->stbuf_blocks; + inode_ctx_put(local->inode, this, (uint64_t)(long)local->fctx); + } + STRIPE_STACK_UNWIND(mknod, frame, local->op_ret, local->op_errno, + local->inode, &local->stbuf, &local->preparent, + &local->postparent, NULL); + } +out: + return 0; } int32_t -stripe_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) +stripe_mknod_first_ifreg_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) { - int32_t callcnt = 0; - stripe_local_t *local = NULL; - call_frame_t *prev = NULL; + stripe_local_t *local = NULL; + stripe_private_t *priv = NULL; + call_frame_t *prev = NULL; + xlator_list_t *trav = NULL; + int i = 1; + dict_t *dict = NULL; + int ret = 0; + int need_unref = 0; + + if (!this || !frame || !frame->local || !cookie) { + gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref"); + goto out; + } - if (!this || !frame || !frame->local || !cookie) { - gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref"); - goto out; - } + prev = cookie; + priv = this->private; + local = frame->local; + trav = this->children; - prev = cookie; - local = frame->local; + local->call_count--; - LOCK (&frame->lock); - { - callcnt = --local->call_count; + if (op_ret == -1) { + gf_log(this->name, GF_LOG_DEBUG, "%s returned error %s", + prev->this->name, strerror(op_errno)); + local->failed = 1; + local->op_errno = op_errno; + goto out; + } - if (op_ret == -1) { - gf_log (this->name, GF_LOG_DEBUG, "%s returned %s", - prev->this->name, strerror (op_errno)); - if (op_errno != ENOENT) - local->failed = 1; - } - } - UNLOCK (&frame->lock); - - if (callcnt == 1) { - if (local->failed) - goto out; - STACK_WIND (frame, stripe_first_rmdir_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->rmdir, &local->loc, - local->flags, NULL); + local->op_ret = op_ret; + + local->stbuf = *buf; + local->preparent = *preparent; + local->postparent = *postparent; + + if (gf_uuid_is_null(local->ia_gfid)) + gf_uuid_copy(local->ia_gfid, buf->ia_gfid); + local->preparent.ia_blocks = local->preparent_blocks; + local->preparent.ia_size = local->preparent_size; + local->postparent.ia_blocks = local->postparent_blocks; + local->postparent.ia_size = local->postparent_size; + local->stbuf.ia_size = local->stbuf_size; + local->stbuf.ia_blocks = local->stbuf_blocks; + + trav = trav->next; + while (trav) { + if (priv->xattr_supported) { + dict = dict_new(); + if (!dict) { + gf_log(this->name, GF_LOG_ERROR, "failed to allocate dict %s", + local->loc.path); + } + need_unref = 1; + + dict_copy(local->xattr, dict); + + ret = stripe_xattr_request_build(this, dict, local->stripe_size, + priv->child_count, i, + priv->coalesce); + if (ret) + gf_log(this->name, GF_LOG_ERROR, + "Failed to build xattr request"); + + } else { + dict = local->xattr; } - return 0; + + STACK_WIND(frame, stripe_mknod_ifreg_cbk, trav->xlator, + trav->xlator->fops->mknod, &local->loc, local->mode, + local->rdev, 0, dict); + trav = trav->next; + i++; + + if (dict && need_unref) + dict_unref(dict); + } + + return 0; + out: - STRIPE_STACK_UNWIND (rmdir, frame, -1, op_errno, NULL, NULL, NULL); - return 0; + + STRIPE_STACK_UNWIND(mknod, frame, op_ret, op_errno, NULL, NULL, NULL, NULL, + NULL); + return 0; } int32_t -stripe_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, dict_t *xdata) +stripe_single_mknod_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) { - xlator_list_t *trav = NULL; - stripe_local_t *local = NULL; - stripe_private_t *priv = NULL; - int32_t op_errno = EINVAL; - - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (loc, err); - VALIDATE_OR_GOTO (loc->path, err); - VALIDATE_OR_GOTO (loc->inode, err); - - priv = this->private; - trav = this->children; + STRIPE_STACK_UNWIND(mknod, frame, op_ret, op_errno, inode, buf, preparent, + postparent, xdata); + return 0; +} - /* don't delete a directory if any of the subvolume is down */ +int +stripe_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, + dev_t rdev, mode_t umask, dict_t *xdata) +{ + stripe_private_t *priv = NULL; + stripe_local_t *local = NULL; + int32_t op_errno = EINVAL; + int32_t i = 0; + dict_t *dict = NULL; + int ret = 0; + int need_unref = 0; + + VALIDATE_OR_GOTO(frame, err); + VALIDATE_OR_GOTO(this, err); + VALIDATE_OR_GOTO(loc, err); + VALIDATE_OR_GOTO(loc->path, err); + VALIDATE_OR_GOTO(loc->inode, err); + + priv = this->private; + + if (priv->first_child_down) { + op_errno = ENOTCONN; + goto err; + } + + if (S_ISREG(mode)) { + /* NOTE: on older kernels (older than 2.6.9), + creat() fops is sent as mknod() + open(). Hence handling + S_IFREG files is necessary */ if (priv->nodes_down) { - op_errno = ENOTCONN; - goto err; + gf_log(this->name, GF_LOG_WARNING, "Some node down, returning EIO"); + op_errno = EIO; + goto err; } /* Initialization */ - local = mem_get0 (this->local_pool); + local = mem_get0(this->local_pool); if (!local) { - op_errno = ENOMEM; - goto err; + op_errno = ENOMEM; + goto err; } local->op_ret = -1; + local->op_errno = ENOTCONN; + local->stripe_size = stripe_get_matching_bs(loc->path, priv); frame->local = local; - loc_copy (&local->loc, loc); - local->flags = flags; + local->inode = inode_ref(loc->inode); + loc_copy(&local->loc, loc); + local->xattr = dict_copy_with_ref(xdata, NULL); + local->mode = mode; + local->umask = umask; + local->rdev = rdev; + + /* Every time in stripe lookup, all child nodes should + be looked up */ local->call_count = priv->child_count; - trav = trav->next; /* skip the first child */ - while (trav) { - STACK_WIND (frame, stripe_rmdir_cbk, trav->xlator, - trav->xlator->fops->rmdir, loc, flags, NULL); - trav = trav->next; + if (priv->xattr_supported) { + dict = dict_new(); + if (!dict) { + gf_log(this->name, GF_LOG_ERROR, "failed to allocate dict %s", + loc->path); + } + need_unref = 1; + + dict_copy(xdata, dict); + + ret = stripe_xattr_request_build(this, dict, local->stripe_size, + priv->child_count, i, + priv->coalesce); + if (ret) + gf_log(this->name, GF_LOG_ERROR, + "failed to build xattr request"); + } else { + dict = xdata; } + STACK_WIND(frame, stripe_mknod_first_ifreg_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->mknod, loc, mode, rdev, umask, + dict); + + if (dict && need_unref) + dict_unref(dict); return 0; + } + + STACK_WIND(frame, stripe_single_mknod_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->mknod, loc, mode, rdev, umask, xdata); + + return 0; err: - STRIPE_STACK_UNWIND (rmdir, frame, -1, op_errno, NULL, NULL, NULL); - return 0; + STRIPE_STACK_UNWIND(mknod, frame, -1, op_errno, NULL, NULL, NULL, NULL, + NULL); + return 0; } - int32_t -stripe_mknod_ifreg_fail_unlink_cbk (call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, - int32_t op_errno, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) +stripe_mkdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) { - int32_t callcnt = 0; - stripe_local_t *local = NULL; + int32_t callcnt = 0; + stripe_local_t *local = NULL; + call_frame_t *prev = NULL; - if (!this || !frame || !frame->local) { - gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref"); - goto out; - } + if (!this || !frame || !frame->local || !cookie) { + gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref"); + goto out; + } - local = frame->local; + prev = cookie; + local = frame->local; - LOCK (&frame->lock); - { - callcnt = --local->call_count; - } - UNLOCK (&frame->lock); + LOCK(&frame->lock); + { + callcnt = --local->call_count; - if (!callcnt) { - STRIPE_STACK_UNWIND (mknod, frame, local->op_ret, local->op_errno, - local->inode, &local->stbuf, - &local->preparent, &local->postparent, NULL); + if (op_ret == -1) { + gf_log(this->name, GF_LOG_DEBUG, "%s returned error %s", + prev->this->name, strerror(op_errno)); + local->op_errno = op_errno; + if ((op_errno != ENOENT) || (prev->this == FIRST_CHILD(this))) + local->failed = 1; } + + if (op_ret >= 0) { + local->op_ret = 0; + + local->stbuf_blocks += buf->ia_blocks; + local->preparent_blocks += preparent->ia_blocks; + local->postparent_blocks += postparent->ia_blocks; + + if (local->stbuf_size < buf->ia_size) + local->stbuf_size = buf->ia_size; + if (local->preparent_size < preparent->ia_size) + local->preparent_size = preparent->ia_size; + if (local->postparent_size < postparent->ia_size) + local->postparent_size = postparent->ia_size; + } + } + UNLOCK(&frame->lock); + + if (!callcnt) { + if (local->failed != -1) { + local->preparent.ia_blocks = local->preparent_blocks; + local->preparent.ia_size = local->preparent_size; + local->postparent.ia_blocks = local->postparent_blocks; + local->postparent.ia_size = local->postparent_size; + local->stbuf.ia_size = local->stbuf_size; + local->stbuf.ia_blocks = local->stbuf_blocks; + } + STRIPE_STACK_UNWIND(mkdir, frame, local->op_ret, local->op_errno, + local->inode, &local->stbuf, &local->preparent, + &local->postparent, NULL); + } out: - return 0; + return 0; } - -/** - */ int32_t -stripe_mknod_ifreg_setxattr_cbk (call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, - int32_t op_errno, dict_t *xdata) +stripe_first_mkdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) { - int32_t callcnt = 0; - stripe_local_t *local = NULL; - stripe_private_t *priv = NULL; - xlator_list_t *trav = NULL; - call_frame_t *prev = NULL; - - if (!this || !frame || !frame->local || !cookie) { - gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref"); - goto out; - } + stripe_local_t *local = NULL; + call_frame_t *prev = NULL; + xlator_list_t *trav = NULL; - prev = cookie; - priv = this->private; - local = frame->local; + if (!this || !frame || !frame->local || !cookie) { + gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref"); + goto out; + } - LOCK (&frame->lock); - { - callcnt = --local->call_count; - - if (op_ret == -1) { - gf_log (this->name, GF_LOG_DEBUG, - "%s returned error %s", - prev->this->name, strerror (op_errno)); - local->op_ret = -1; - local->op_errno = op_errno; - } - } - UNLOCK (&frame->lock); - - if (!callcnt) { - if (local->op_ret == -1) { - local->call_count = priv->child_count; - while (trav) { - STACK_WIND (frame, - stripe_mknod_ifreg_fail_unlink_cbk, - trav->xlator, - trav->xlator->fops->unlink, - &local->loc, 0, NULL); - trav = trav->next; - } - return 0; - } + prev = cookie; + local = frame->local; + trav = this->children; - STRIPE_STACK_UNWIND (mknod, frame, local->op_ret, local->op_errno, - local->inode, &local->stbuf, - &local->preparent, &local->postparent, NULL); - } -out: - return 0; -} + local->call_count--; /* first child is successful */ + trav = trav->next; /* skip first child */ -int32_t -stripe_mknod_ifreg_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, inode_t *inode, - struct iatt *buf, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) -{ - int32_t callcnt = 0; - stripe_local_t *local = NULL; - stripe_private_t *priv = NULL; - call_frame_t *prev = NULL; - xlator_list_t *trav = NULL; - - if (!this || !frame || !frame->local || !cookie) { - gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref"); - goto out; - } + if (op_ret == -1) { + gf_log(this->name, GF_LOG_DEBUG, "%s returned error %s", + prev->this->name, strerror(op_errno)); + local->op_errno = op_errno; + goto out; + } - prev = cookie; - priv = this->private; - local = frame->local; + local->op_ret = 0; - LOCK (&frame->lock); - { - callcnt = --local->call_count; - - if (op_ret == -1) { - gf_log (this->name, GF_LOG_DEBUG, - "%s returned error %s", - prev->this->name, strerror (op_errno)); - if ((op_errno != ENOENT) || - (prev->this == FIRST_CHILD (this))) - local->failed = 1; - local->op_errno = op_errno; - } - if (op_ret >= 0) { - local->op_ret = op_ret; - - /* Can be used as a mechanism to understand if mknod - was successful in at least one place */ - if (gf_uuid_is_null (local->ia_gfid)) - gf_uuid_copy (local->ia_gfid, buf->ia_gfid); - - if (stripe_ctx_handle(this, prev, local, xdata)) - gf_log(this->name, GF_LOG_ERROR, - "Error getting fctx info from dict"); - - local->stbuf_blocks += buf->ia_blocks; - local->preparent_blocks += preparent->ia_blocks; - local->postparent_blocks += postparent->ia_blocks; - - correct_file_size(buf, local->fctx, prev); - - if (local->stbuf_size < buf->ia_size) - local->stbuf_size = buf->ia_size; - if (local->preparent_size < preparent->ia_size) - local->preparent_size = preparent->ia_size; - if (local->postparent_size < postparent->ia_size) - local->postparent_size = postparent->ia_size; - } - } - UNLOCK (&frame->lock); - - if (!callcnt) { - if (local->failed) - local->op_ret = -1; - - if ((local->op_ret == -1) && !gf_uuid_is_null (local->ia_gfid)) { - /* ia_gfid set means, at least on one node 'mknod' - is successful */ - local->call_count = priv->child_count; - trav = this->children; - while (trav) { - STACK_WIND (frame, - stripe_mknod_ifreg_fail_unlink_cbk, - trav->xlator, - trav->xlator->fops->unlink, - &local->loc, 0, NULL); - trav = trav->next; - } - return 0; - } + local->inode = inode_ref(inode); + local->stbuf = *buf; + local->postparent = *postparent; + local->preparent = *preparent; + local->stbuf_blocks += buf->ia_blocks; + local->preparent_blocks += preparent->ia_blocks; + local->postparent_blocks += postparent->ia_blocks; - if (local->op_ret != -1) { - local->preparent.ia_blocks = local->preparent_blocks; - local->preparent.ia_size = local->preparent_size; - local->postparent.ia_blocks = local->postparent_blocks; - local->postparent.ia_size = local->postparent_size; - local->stbuf.ia_size = local->stbuf_size; - local->stbuf.ia_blocks = local->stbuf_blocks; - inode_ctx_put (local->inode, this, - (uint64_t)(long) local->fctx); + local->stbuf_size = buf->ia_size; + local->preparent_size = preparent->ia_size; + local->postparent_size = postparent->ia_size; - } - STRIPE_STACK_UNWIND (mknod, frame, local->op_ret, local->op_errno, - local->inode, &local->stbuf, - &local->preparent, &local->postparent, NULL); - } + while (trav) { + STACK_WIND(frame, stripe_mkdir_cbk, trav->xlator, + trav->xlator->fops->mkdir, &local->loc, local->mode, + local->umask, local->xdata); + trav = trav->next; + } + return 0; out: - return 0; + STRIPE_STACK_UNWIND(mkdir, frame, -1, op_errno, NULL, NULL, NULL, NULL, + NULL); + + return 0; } +int +stripe_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, + mode_t umask, dict_t *xdata) +{ + stripe_private_t *priv = NULL; + stripe_local_t *local = NULL; + xlator_list_t *trav = NULL; + int32_t op_errno = 1; + + VALIDATE_OR_GOTO(frame, err); + VALIDATE_OR_GOTO(this, err); + VALIDATE_OR_GOTO(loc, err); + VALIDATE_OR_GOTO(loc->path, err); + VALIDATE_OR_GOTO(loc->inode, err); + + priv = this->private; + trav = this->children; + + if (priv->first_child_down) { + op_errno = ENOTCONN; + goto err; + } + + /* Initialization */ + local = mem_get0(this->local_pool); + if (!local) { + op_errno = ENOMEM; + goto err; + } + local->op_ret = -1; + local->call_count = priv->child_count; + if (xdata) + local->xdata = dict_ref(xdata); + local->mode = mode; + local->umask = umask; + loc_copy(&local->loc, loc); + frame->local = local; + + /* Every time in stripe lookup, all child nodes should be looked up */ + STACK_WIND(frame, stripe_first_mkdir_cbk, trav->xlator, + trav->xlator->fops->mkdir, loc, mode, umask, xdata); + + return 0; +err: + STRIPE_STACK_UNWIND(mkdir, frame, -1, op_errno, NULL, NULL, NULL, NULL, + NULL); + return 0; +} int32_t -stripe_mknod_first_ifreg_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, inode_t *inode, - struct iatt *buf, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) +stripe_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) { - stripe_local_t *local = NULL; - stripe_private_t *priv = NULL; - call_frame_t *prev = NULL; - xlator_list_t *trav = NULL; - int i = 1; - dict_t *dict = NULL; - int ret = 0; - int need_unref = 0; - - if (!this || !frame || !frame->local || !cookie) { - gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref"); - goto out; - } + int32_t callcnt = 0; + stripe_local_t *local = NULL; + call_frame_t *prev = NULL; + stripe_fd_ctx_t *fctx = NULL; - prev = cookie; - priv = this->private; - local = frame->local; - trav = this->children; + if (!this || !frame || !frame->local || !cookie) { + gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref"); + goto out; + } - local->call_count--; + prev = cookie; + local = frame->local; + + LOCK(&frame->lock); + { + callcnt = --local->call_count; if (op_ret == -1) { - gf_log (this->name, GF_LOG_DEBUG, "%s returned error %s", - prev->this->name, strerror (op_errno)); + gf_log(this->name, GF_LOG_DEBUG, "%s returned error %s", + prev->this->name, strerror(op_errno)); + local->op_errno = op_errno; + if ((op_errno != ENOENT) || (prev->this == FIRST_CHILD(this))) local->failed = 1; - local->op_errno = op_errno; - goto out; } - local->op_ret = op_ret; - - local->stbuf = *buf; - local->preparent = *preparent; - local->postparent = *postparent; + if (op_ret >= 0) { + local->op_ret = 0; - if (gf_uuid_is_null (local->ia_gfid)) - gf_uuid_copy (local->ia_gfid, buf->ia_gfid); - local->preparent.ia_blocks = local->preparent_blocks; - local->preparent.ia_size = local->preparent_size; - local->postparent.ia_blocks = local->postparent_blocks; - local->postparent.ia_size = local->postparent_size; - local->stbuf.ia_size = local->stbuf_size; - local->stbuf.ia_blocks = local->stbuf_blocks; + if (IA_ISREG(inode->ia_type)) { + inode_ctx_get(inode, this, (uint64_t *)&fctx); + if (!fctx) { + gf_log(this->name, GF_LOG_ERROR, + "failed to get stripe context"); + op_ret = -1; + op_errno = EINVAL; + } + } + + if (FIRST_CHILD(this) == prev->this) { + local->inode = inode_ref(inode); + local->stbuf = *buf; + local->postparent = *postparent; + local->preparent = *preparent; + } + local->stbuf_blocks += buf->ia_blocks; + local->preparent_blocks += preparent->ia_blocks; + local->postparent_blocks += postparent->ia_blocks; + + correct_file_size(buf, fctx, prev); + + if (local->stbuf_size < buf->ia_size) + local->stbuf_size = buf->ia_size; + if (local->preparent_size < preparent->ia_size) + local->preparent_size = preparent->ia_size; + if (local->postparent_size < postparent->ia_size) + local->postparent_size = postparent->ia_size; + } + } + UNLOCK(&frame->lock); + + if (!callcnt) { + if (local->failed) + local->op_ret = -1; + + if (local->op_ret != -1) { + local->preparent.ia_blocks = local->preparent_blocks; + local->preparent.ia_size = local->preparent_size; + local->postparent.ia_blocks = local->postparent_blocks; + local->postparent.ia_size = local->postparent_size; + local->stbuf.ia_size = local->stbuf_size; + local->stbuf.ia_blocks = local->stbuf_blocks; + } + STRIPE_STACK_UNWIND(link, frame, local->op_ret, local->op_errno, + local->inode, &local->stbuf, &local->preparent, + &local->postparent, NULL); + } +out: + return 0; +} +int32_t +stripe_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, + dict_t *xdata) +{ + xlator_list_t *trav = NULL; + stripe_local_t *local = NULL; + stripe_private_t *priv = NULL; + int32_t op_errno = 1; + + VALIDATE_OR_GOTO(frame, err); + VALIDATE_OR_GOTO(this, err); + VALIDATE_OR_GOTO(oldloc, err); + VALIDATE_OR_GOTO(oldloc->path, err); + VALIDATE_OR_GOTO(oldloc->inode, err); + + priv = this->private; + trav = this->children; + + /* If any one node is down, don't allow link operation */ + if (priv->nodes_down) { + op_errno = ENOTCONN; + goto err; + } + + /* Initialization */ + local = mem_get0(this->local_pool); + if (!local) { + op_errno = ENOMEM; + goto err; + } + local->op_ret = -1; + frame->local = local; + local->call_count = priv->child_count; + + /* Every time in stripe lookup, all child + nodes should be looked up */ + while (trav) { + STACK_WIND(frame, stripe_link_cbk, trav->xlator, + trav->xlator->fops->link, oldloc, newloc, NULL); trav = trav->next; - while (trav) { - if (priv->xattr_supported) { - dict = dict_new (); - if (!dict) { - gf_log (this->name, GF_LOG_ERROR, - "failed to allocate dict %s", local->loc.path); - } - need_unref = 1; - - dict_copy (local->xattr, dict); - - ret = stripe_xattr_request_build (this, dict, - local->stripe_size, - priv->child_count, i, - priv->coalesce); - if (ret) - gf_log (this->name, GF_LOG_ERROR, - "Failed to build xattr request"); - - } else { - dict = local->xattr; - } + } - STACK_WIND (frame, stripe_mknod_ifreg_cbk, - trav->xlator, trav->xlator->fops->mknod, - &local->loc, local->mode, local->rdev, 0, dict); - trav = trav->next; - i++; + return 0; +err: + STRIPE_STACK_UNWIND(link, frame, -1, op_errno, NULL, NULL, NULL, NULL, + NULL); + return 0; +} - if (dict && need_unref) - dict_unref (dict); - } +int32_t +stripe_create_fail_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + struct iatt *preparent, struct iatt *postparent, + dict_t *xdata) +{ + int32_t callcnt = 0; + stripe_local_t *local = NULL; - return 0; + if (!this || !frame || !frame->local) { + gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref"); + goto out; + } -out: + local = frame->local; - STRIPE_STACK_UNWIND (mknod, frame, op_ret, op_errno, NULL, NULL, NULL, NULL, NULL); - return 0; -} + LOCK(&frame->lock); + { + callcnt = --local->call_count; + } + UNLOCK(&frame->lock); + if (!callcnt) { + STRIPE_STACK_UNWIND(create, frame, local->op_ret, local->op_errno, + local->fd, local->inode, &local->stbuf, + &local->preparent, &local->postparent, NULL); + } +out: + return 0; +} int32_t -stripe_single_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, inode_t *inode, - struct iatt *buf, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) +stripe_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode, + struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) { - STRIPE_STACK_UNWIND (mknod, frame, op_ret, op_errno, inode, buf, - preparent, postparent, xdata); - return 0; -} + int32_t callcnt = 0; + stripe_local_t *local = NULL; + stripe_private_t *priv = NULL; + call_frame_t *prev = NULL; + xlator_list_t *trav = NULL; + + if (!this || !frame || !frame->local || !cookie) { + gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref"); + goto out; + } + prev = cookie; + priv = this->private; + local = frame->local; -int -stripe_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, - dev_t rdev, mode_t umask, dict_t *xdata) -{ - stripe_private_t *priv = NULL; - stripe_local_t *local = NULL; - int32_t op_errno = EINVAL; - int32_t i = 0; - dict_t *dict = NULL; - int ret = 0; - int need_unref = 0; - - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (loc, err); - VALIDATE_OR_GOTO (loc->path, err); - VALIDATE_OR_GOTO (loc->inode, err); - - priv = this->private; - - if (priv->first_child_down) { - op_errno = ENOTCONN; - goto err; + LOCK(&frame->lock); + { + callcnt = --local->call_count; + + if (op_ret == -1) { + gf_log(this->name, GF_LOG_DEBUG, "%s returned error %s", + prev->this->name, strerror(op_errno)); + local->failed = 1; + local->op_errno = op_errno; } - if (S_ISREG(mode)) { - /* NOTE: on older kernels (older than 2.6.9), - creat() fops is sent as mknod() + open(). Hence handling - S_IFREG files is necessary */ - if (priv->nodes_down) { - gf_log (this->name, GF_LOG_WARNING, - "Some node down, returning EIO"); - op_errno = EIO; - goto err; - } + if (op_ret >= 0) { + if (IA_ISREG(buf->ia_type)) { + if (stripe_ctx_handle(this, prev, local, xdata)) + gf_log(this->name, GF_LOG_ERROR, + "Error getting fctx info from " + "dict"); + } - /* Initialization */ - local = mem_get0 (this->local_pool); - if (!local) { - op_errno = ENOMEM; - goto err; - } - local->op_ret = -1; - local->op_errno = ENOTCONN; - local->stripe_size = stripe_get_matching_bs (loc->path, priv); - frame->local = local; - local->inode = inode_ref (loc->inode); - loc_copy (&local->loc, loc); - local->xattr = dict_copy_with_ref (xdata, NULL); - local->mode = mode; - local->umask = umask; - local->rdev = rdev; - - /* Every time in stripe lookup, all child nodes should - be looked up */ - local->call_count = priv->child_count; - - if (priv->xattr_supported) { - dict = dict_new (); - if (!dict) { - gf_log (this->name, GF_LOG_ERROR, - "failed to allocate dict %s", loc->path); - } - need_unref = 1; - - dict_copy (xdata, dict); - - ret = stripe_xattr_request_build (this, dict, - local->stripe_size, - priv->child_count, - i, priv->coalesce); - if (ret) - gf_log (this->name, GF_LOG_ERROR, - "failed to build xattr request"); - } else { - dict = xdata; - } + local->op_ret = op_ret; + + local->stbuf_blocks += buf->ia_blocks; + local->preparent_blocks += preparent->ia_blocks; + local->postparent_blocks += postparent->ia_blocks; - STACK_WIND (frame, stripe_mknod_first_ifreg_cbk, - FIRST_CHILD (this), FIRST_CHILD (this)->fops->mknod, - loc, mode, rdev, umask, dict); + correct_file_size(buf, local->fctx, prev); - if (dict && need_unref) - dict_unref (dict); - return 0; + if (local->stbuf_size < buf->ia_size) + local->stbuf_size = buf->ia_size; + if (local->preparent_size < preparent->ia_size) + local->preparent_size = preparent->ia_size; + if (local->postparent_size < postparent->ia_size) + local->postparent_size = postparent->ia_size; } + } + UNLOCK(&frame->lock); - STACK_WIND (frame, stripe_single_mknod_cbk, - FIRST_CHILD(this), FIRST_CHILD(this)->fops->mknod, - loc, mode, rdev, umask, xdata); + if (!callcnt) { + if (local->failed) + local->op_ret = -1; - return 0; -err: - STRIPE_STACK_UNWIND (mknod, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL); - return 0; -} - - -int32_t -stripe_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, inode_t *inode, - struct iatt *buf, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) -{ - int32_t callcnt = 0; - stripe_local_t *local = NULL; - call_frame_t *prev = NULL; + if (local->op_ret == -1) { + local->call_count = priv->child_count; + trav = this->children; + while (trav) { + STACK_WIND(frame, stripe_create_fail_unlink_cbk, trav->xlator, + trav->xlator->fops->unlink, &local->loc, 0, NULL); + trav = trav->next; + } - if (!this || !frame || !frame->local || !cookie) { - gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref"); - goto out; + return 0; } - prev = cookie; - local = frame->local; - - LOCK (&frame->lock); - { - callcnt = --local->call_count; - - if (op_ret == -1) { - gf_log (this->name, GF_LOG_DEBUG, - "%s returned error %s", - prev->this->name, strerror (op_errno)); - local->op_errno = op_errno; - if ((op_errno != ENOENT) || - (prev->this == FIRST_CHILD (this))) - local->failed = 1; - } + if (local->op_ret >= 0) { + local->preparent.ia_blocks = local->preparent_blocks; + local->preparent.ia_size = local->preparent_size; + local->postparent.ia_blocks = local->postparent_blocks; + local->postparent.ia_size = local->postparent_size; + local->stbuf.ia_size = local->stbuf_size; + local->stbuf.ia_blocks = local->stbuf_blocks; - if (op_ret >= 0) { - local->op_ret = 0; + stripe_copy_xl_array(local->fctx->xl_array, priv->xl_array, + local->fctx->stripe_count); + inode_ctx_put(local->inode, this, (uint64_t)local->fctx); + } - local->stbuf_blocks += buf->ia_blocks; - local->preparent_blocks += preparent->ia_blocks; - local->postparent_blocks += postparent->ia_blocks; + /* Create itself has failed.. so return + without setxattring */ + STRIPE_STACK_UNWIND(create, frame, local->op_ret, local->op_errno, + local->fd, local->inode, &local->stbuf, + &local->preparent, &local->postparent, NULL); + } - if (local->stbuf_size < buf->ia_size) - local->stbuf_size = buf->ia_size; - if (local->preparent_size < preparent->ia_size) - local->preparent_size = preparent->ia_size; - if (local->postparent_size < postparent->ia_size) - local->postparent_size = postparent->ia_size; - } - } - UNLOCK (&frame->lock); - - if (!callcnt) { - if (local->failed != -1) { - local->preparent.ia_blocks = local->preparent_blocks; - local->preparent.ia_size = local->preparent_size; - local->postparent.ia_blocks = local->postparent_blocks; - local->postparent.ia_size = local->postparent_size; - local->stbuf.ia_size = local->stbuf_size; - local->stbuf.ia_blocks = local->stbuf_blocks; - } - STRIPE_STACK_UNWIND (mkdir, frame, local->op_ret, - local->op_errno, local->inode, - &local->stbuf, &local->preparent, - &local->postparent, NULL); - } out: - return 0; + return 0; } - int32_t -stripe_first_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, inode_t *inode, - struct iatt *buf, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) +stripe_first_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, fd_t *fd, + inode_t *inode, struct iatt *buf, + struct iatt *preparent, struct iatt *postparent, + dict_t *xdata) { - stripe_local_t *local = NULL; - call_frame_t *prev = NULL; - xlator_list_t *trav = NULL; - - if (!this || !frame || !frame->local || !cookie) { - gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref"); - goto out; - } - - prev = cookie; - local = frame->local; - trav = this->children; - - local->call_count--; /* first child is successful */ - trav = trav->next; /* skip first child */ - - if (op_ret == -1) { - gf_log (this->name, GF_LOG_DEBUG, "%s returned error %s", - prev->this->name, strerror (op_errno)); - local->op_errno = op_errno; - goto out; - } - - local->op_ret = 0; - - local->inode = inode_ref (inode); - local->stbuf = *buf; - local->postparent = *postparent; - local->preparent = *preparent; - - local->stbuf_blocks += buf->ia_blocks; - local->preparent_blocks += preparent->ia_blocks; - local->postparent_blocks += postparent->ia_blocks; - + stripe_local_t *local = NULL; + stripe_private_t *priv = NULL; + call_frame_t *prev = NULL; + xlator_list_t *trav = NULL; + int i = 1; + dict_t *dict = NULL; + loc_t *loc = NULL; + int32_t need_unref = 0; + int32_t ret = -1; + + if (!this || !frame || !frame->local || !cookie) { + gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref"); + goto out; + } + + prev = cookie; + priv = this->private; + local = frame->local; + trav = this->children; + loc = &local->loc; + + --local->call_count; + + if (op_ret == -1) { + gf_log(this->name, GF_LOG_DEBUG, "%s returned error %s", + prev->this->name, strerror(op_errno)); + local->failed = 1; + local->op_errno = op_errno; + } + + local->op_ret = 0; + /* Get the mapping in inode private */ + /* Get the stat buf right */ + local->stbuf = *buf; + local->preparent = *preparent; + local->postparent = *postparent; + + local->stbuf_blocks += buf->ia_blocks; + local->preparent_blocks += preparent->ia_blocks; + local->postparent_blocks += postparent->ia_blocks; + + if (local->stbuf_size < buf->ia_size) local->stbuf_size = buf->ia_size; + if (local->preparent_size < preparent->ia_size) local->preparent_size = preparent->ia_size; + if (local->postparent_size < postparent->ia_size) local->postparent_size = postparent->ia_size; - while (trav) { - STACK_WIND (frame, stripe_mkdir_cbk, trav->xlator, - trav->xlator->fops->mkdir, &local->loc, local->mode, - local->umask, local->xdata); - trav = trav->next; - } - return 0; -out: - STRIPE_STACK_UNWIND (mkdir, frame, -1, op_errno, NULL, NULL, NULL, - NULL, NULL); - - return 0; - -} - - -int -stripe_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, - mode_t umask, dict_t *xdata) -{ - stripe_private_t *priv = NULL; - stripe_local_t *local = NULL; - xlator_list_t *trav = NULL; - int32_t op_errno = 1; - - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (loc, err); - VALIDATE_OR_GOTO (loc->path, err); - VALIDATE_OR_GOTO (loc->inode, err); - - priv = this->private; - trav = this->children; - - if (priv->first_child_down) { - op_errno = ENOTCONN; - goto err; - } - - /* Initialization */ - local = mem_get0 (this->local_pool); - if (!local) { - op_errno = ENOMEM; - goto err; - } - local->op_ret = -1; - local->call_count = priv->child_count; - if (xdata) - local->xdata = dict_ref (xdata); - local->mode = mode; - local->umask = umask; - loc_copy (&local->loc, loc); - frame->local = local; - - /* Every time in stripe lookup, all child nodes should be looked up */ - STACK_WIND (frame, stripe_first_mkdir_cbk, trav->xlator, - trav->xlator->fops->mkdir, loc, mode, umask, xdata); - - return 0; -err: - STRIPE_STACK_UNWIND (mkdir, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL); - return 0; -} - - -int32_t -stripe_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, inode_t *inode, - struct iatt *buf, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) -{ - int32_t callcnt = 0; - stripe_local_t *local = NULL; - call_frame_t *prev = NULL; - stripe_fd_ctx_t *fctx = NULL; - - if (!this || !frame || !frame->local || !cookie) { - gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref"); - goto out; - } - - prev = cookie; - local = frame->local; - - LOCK (&frame->lock); - { - callcnt = --local->call_count; - - if (op_ret == -1) { - gf_log (this->name, GF_LOG_DEBUG, - "%s returned error %s", - prev->this->name, strerror (op_errno)); - local->op_errno = op_errno; - if ((op_errno != ENOENT) || - (prev->this == FIRST_CHILD (this))) - local->failed = 1; - } - - if (op_ret >= 0) { - local->op_ret = 0; - - if (IA_ISREG(inode->ia_type)) { - inode_ctx_get(inode, this, (uint64_t *) &fctx); - if (!fctx) { - gf_log(this->name, GF_LOG_ERROR, - "failed to get stripe context"); - op_ret = -1; - op_errno = EINVAL; - } - } - - if (FIRST_CHILD(this) == prev->this) { - local->inode = inode_ref (inode); - local->stbuf = *buf; - local->postparent = *postparent; - local->preparent = *preparent; - } - local->stbuf_blocks += buf->ia_blocks; - local->preparent_blocks += preparent->ia_blocks; - local->postparent_blocks += postparent->ia_blocks; - - correct_file_size(buf, fctx, prev); - - if (local->stbuf_size < buf->ia_size) - local->stbuf_size = buf->ia_size; - if (local->preparent_size < preparent->ia_size) - local->preparent_size = preparent->ia_size; - if (local->postparent_size < postparent->ia_size) - local->postparent_size = postparent->ia_size; - } - } - UNLOCK (&frame->lock); - - if (!callcnt) { - if (local->failed) - local->op_ret = -1; - - if (local->op_ret != -1) { - local->preparent.ia_blocks = local->preparent_blocks; - local->preparent.ia_size = local->preparent_size; - local->postparent.ia_blocks = local->postparent_blocks; - local->postparent.ia_size = local->postparent_size; - local->stbuf.ia_size = local->stbuf_size; - local->stbuf.ia_blocks = local->stbuf_blocks; - } - STRIPE_STACK_UNWIND (link, frame, local->op_ret, - local->op_errno, local->inode, - &local->stbuf, &local->preparent, - &local->postparent, NULL); - } -out: - return 0; -} - -int32_t -stripe_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, dict_t *xdata) -{ - xlator_list_t *trav = NULL; - stripe_local_t *local = NULL; - stripe_private_t *priv = NULL; - int32_t op_errno = 1; - - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (oldloc, err); - VALIDATE_OR_GOTO (oldloc->path, err); - VALIDATE_OR_GOTO (oldloc->inode, err); - - priv = this->private; - trav = this->children; - - /* If any one node is down, don't allow link operation */ - if (priv->nodes_down) { - op_errno = ENOTCONN; - goto err; - } - - /* Initialization */ - local = mem_get0 (this->local_pool); - if (!local) { - op_errno = ENOMEM; - goto err; - } + if (local->failed) local->op_ret = -1; - frame->local = local; - local->call_count = priv->child_count; - /* Every time in stripe lookup, all child - nodes should be looked up */ - while (trav) { - STACK_WIND (frame, stripe_link_cbk, - trav->xlator, trav->xlator->fops->link, - oldloc, newloc, NULL); - trav = trav->next; - } - - return 0; -err: - STRIPE_STACK_UNWIND (link, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL); + if (local->op_ret == -1) { + local->call_count = 1; + STACK_WIND(frame, stripe_create_fail_unlink_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->unlink, &local->loc, 0, NULL); return 0; -} + } -int32_t -stripe_create_fail_unlink_cbk (call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, - int32_t op_errno, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) -{ - int32_t callcnt = 0; - stripe_local_t *local = NULL; - - if (!this || !frame || !frame->local) { - gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref"); - goto out; - } - - local = frame->local; - - LOCK (&frame->lock); - { - callcnt = --local->call_count; - } - UNLOCK (&frame->lock); - - if (!callcnt) { - STRIPE_STACK_UNWIND (create, frame, local->op_ret, local->op_errno, - local->fd, local->inode, &local->stbuf, - &local->preparent, &local->postparent, NULL); - } -out: - return 0; -} - - -int32_t -stripe_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, fd_t *fd, - inode_t *inode, struct iatt *buf, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) -{ - int32_t callcnt = 0; - stripe_local_t *local = NULL; - stripe_private_t *priv = NULL; - call_frame_t *prev = NULL; - xlator_list_t *trav = NULL; - - if (!this || !frame || !frame->local || !cookie) { - gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref"); - goto out; - } - - prev = cookie; - priv = this->private; - local = frame->local; - - LOCK (&frame->lock); - { - callcnt = --local->call_count; - - if (op_ret == -1) { - gf_log (this->name, GF_LOG_DEBUG, - "%s returned error %s", - prev->this->name, strerror (op_errno)); - local->failed = 1; - local->op_errno = op_errno; - } - - if (op_ret >= 0) { - if (IA_ISREG(buf->ia_type)) { - if (stripe_ctx_handle(this, prev, local, xdata)) - gf_log(this->name, GF_LOG_ERROR, - "Error getting fctx info from " - "dict"); - } - - local->op_ret = op_ret; - - local->stbuf_blocks += buf->ia_blocks; - local->preparent_blocks += preparent->ia_blocks; - local->postparent_blocks += postparent->ia_blocks; - - correct_file_size(buf, local->fctx, prev); - - if (local->stbuf_size < buf->ia_size) - local->stbuf_size = buf->ia_size; - if (local->preparent_size < preparent->ia_size) - local->preparent_size = preparent->ia_size; - if (local->postparent_size < postparent->ia_size) - local->postparent_size = postparent->ia_size; - } - } - UNLOCK (&frame->lock); - - if (!callcnt) { - if (local->failed) - local->op_ret = -1; - - if (local->op_ret == -1) { - local->call_count = priv->child_count; - trav = this->children; - while (trav) { - STACK_WIND (frame, - stripe_create_fail_unlink_cbk, - trav->xlator, - trav->xlator->fops->unlink, - &local->loc, 0, NULL); - trav = trav->next; - } - - return 0; - } - - if (local->op_ret >= 0) { - local->preparent.ia_blocks = local->preparent_blocks; - local->preparent.ia_size = local->preparent_size; - local->postparent.ia_blocks = local->postparent_blocks; - local->postparent.ia_size = local->postparent_size; - local->stbuf.ia_size = local->stbuf_size; - local->stbuf.ia_blocks = local->stbuf_blocks; - - stripe_copy_xl_array(local->fctx->xl_array, - priv->xl_array, - local->fctx->stripe_count); - inode_ctx_put(local->inode, this, - (uint64_t) local->fctx); - } - - /* Create itself has failed.. so return - without setxattring */ - STRIPE_STACK_UNWIND (create, frame, local->op_ret, - local->op_errno, local->fd, - local->inode, &local->stbuf, - &local->preparent, &local->postparent, NULL); - } - -out: - return 0; -} - - - -int32_t -stripe_first_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, fd_t *fd, - inode_t *inode, struct iatt *buf, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) -{ - stripe_local_t *local = NULL; - stripe_private_t *priv = NULL; - call_frame_t *prev = NULL; - xlator_list_t *trav = NULL; - int i = 1; - dict_t *dict = NULL; - loc_t *loc = NULL; - int32_t need_unref = 0; - int32_t ret = -1; - - if (!this || !frame || !frame->local || !cookie) { - gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref"); - goto out; - } - - prev = cookie; - priv = this->private; - local = frame->local; - trav = this->children; - loc = &local->loc; - - --local->call_count; - - if (op_ret == -1) { - gf_log (this->name, GF_LOG_DEBUG, "%s returned error %s", - prev->this->name, strerror (op_errno)); - local->failed = 1; - local->op_errno = op_errno; - } - - local->op_ret = 0; - /* Get the mapping in inode private */ - /* Get the stat buf right */ - local->stbuf = *buf; - local->preparent = *preparent; - local->postparent = *postparent; - - local->stbuf_blocks += buf->ia_blocks; - local->preparent_blocks += preparent->ia_blocks; - local->postparent_blocks += postparent->ia_blocks; - - if (local->stbuf_size < buf->ia_size) - local->stbuf_size = buf->ia_size; - if (local->preparent_size < preparent->ia_size) - local->preparent_size = preparent->ia_size; - if (local->postparent_size < postparent->ia_size) - local->postparent_size = postparent->ia_size; - - if (local->failed) - local->op_ret = -1; - - if (local->op_ret == -1) { - local->call_count = 1; - STACK_WIND (frame, stripe_create_fail_unlink_cbk, - FIRST_CHILD (this), FIRST_CHILD (this)->fops->unlink, - &local->loc, 0, NULL); - return 0; - } - - if (local->op_ret >= 0) { - local->preparent.ia_blocks = local->preparent_blocks; - local->preparent.ia_size = local->preparent_size; - local->postparent.ia_blocks = local->postparent_blocks; - local->postparent.ia_size = local->postparent_size; - local->stbuf.ia_size = local->stbuf_size; - local->stbuf.ia_blocks = local->stbuf_blocks; + if (local->op_ret >= 0) { + local->preparent.ia_blocks = local->preparent_blocks; + local->preparent.ia_size = local->preparent_size; + local->postparent.ia_blocks = local->postparent_blocks; + local->postparent.ia_size = local->postparent_size; + local->stbuf.ia_size = local->stbuf_size; + local->stbuf.ia_blocks = local->stbuf_blocks; + } + + /* Send a setxattr request to nodes where the + files are created */ + trav = trav->next; + while (trav) { + if (priv->xattr_supported) { + dict = dict_new(); + if (!dict) { + gf_log(this->name, GF_LOG_ERROR, "failed to allocate dict %s", + loc->path); + } + need_unref = 1; + + dict_copy(local->xattr, dict); + + ret = stripe_xattr_request_build(this, dict, local->stripe_size, + priv->child_count, i, + priv->coalesce); + if (ret) + gf_log(this->name, GF_LOG_ERROR, + "failed to build xattr request"); + } else { + dict = local->xattr; } - /* Send a setxattr request to nodes where the - files are created */ + STACK_WIND(frame, stripe_create_cbk, trav->xlator, + trav->xlator->fops->create, &local->loc, local->flags, + local->mode, local->umask, local->fd, dict); trav = trav->next; - while (trav) { - if (priv->xattr_supported) { - dict = dict_new (); - if (!dict) { - gf_log (this->name, GF_LOG_ERROR, - "failed to allocate dict %s", loc->path); - } - need_unref = 1; - - dict_copy (local->xattr, dict); - - ret = stripe_xattr_request_build (this, dict, - local->stripe_size, - priv->child_count, - i, priv->coalesce); - if (ret) - gf_log (this->name, GF_LOG_ERROR, - "failed to build xattr request"); - } else { - dict = local->xattr; - } - - STACK_WIND (frame, stripe_create_cbk, trav->xlator, - trav->xlator->fops->create, &local->loc, - local->flags, local->mode, local->umask, local->fd, - dict); - trav = trav->next; - if (need_unref && dict) - dict_unref (dict); - i++; - } + if (need_unref && dict) + dict_unref(dict); + i++; + } out: - return 0; + return 0; } - - /** * stripe_create - If a block-size is specified for the 'name', create the * file in all the child nodes. If not, create it in only first child. @@ -2324,2797 +2250,2709 @@ out: * @name- complete path of the file to be created. */ int32_t -stripe_create (call_frame_t *frame, xlator_t *this, loc_t *loc, - int32_t flags, mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata) +stripe_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata) { - stripe_private_t *priv = NULL; - stripe_local_t *local = NULL; - int32_t op_errno = EINVAL; - int ret = 0; - int need_unref = 0; - int i = 0; - dict_t *dict = NULL; - - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (loc, err); - VALIDATE_OR_GOTO (loc->path, err); - VALIDATE_OR_GOTO (loc->inode, err); - - priv = this->private; - - /* files created in O_APPEND mode does not allow lseek() on fd */ - flags &= ~O_APPEND; - - if (priv->first_child_down || priv->nodes_down) { - gf_log (this->name, GF_LOG_DEBUG, - "First node down, returning EIO"); - op_errno = EIO; - goto err; - } - - /* Initialization */ - local = mem_get0 (this->local_pool); - if (!local) { - op_errno = ENOMEM; - goto err; - } - local->op_ret = -1; - local->op_errno = ENOTCONN; - local->stripe_size = stripe_get_matching_bs (loc->path, priv); - frame->local = local; - local->inode = inode_ref (loc->inode); - loc_copy (&local->loc, loc); - local->fd = fd_ref (fd); - local->flags = flags; - local->mode = mode; - local->umask = umask; - if (xdata) - local->xattr = dict_ref (xdata); - - local->call_count = priv->child_count; - /* Send a setxattr request to nodes where the - files are created */ - - if (priv->xattr_supported) { - dict = dict_new (); - if (!dict) { - gf_log (this->name, GF_LOG_ERROR, - "failed to allocate dict %s", loc->path); - } - need_unref = 1; - - dict_copy (xdata, dict); - - ret = stripe_xattr_request_build (this, dict, - local->stripe_size, - priv->child_count, - i, priv->coalesce); - if (ret) - gf_log (this->name, GF_LOG_ERROR, - "failed to build xattr request"); - } else { - dict = xdata; - } - - - STACK_WIND (frame, stripe_first_create_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->create, loc, flags, mode, - umask, fd, dict); + stripe_private_t *priv = NULL; + stripe_local_t *local = NULL; + int32_t op_errno = EINVAL; + int ret = 0; + int need_unref = 0; + int i = 0; + dict_t *dict = NULL; + + VALIDATE_OR_GOTO(frame, err); + VALIDATE_OR_GOTO(this, err); + VALIDATE_OR_GOTO(loc, err); + VALIDATE_OR_GOTO(loc->path, err); + VALIDATE_OR_GOTO(loc->inode, err); + + priv = this->private; + + /* files created in O_APPEND mode does not allow lseek() on fd */ + flags &= ~O_APPEND; + + if (priv->first_child_down || priv->nodes_down) { + gf_log(this->name, GF_LOG_DEBUG, "First node down, returning EIO"); + op_errno = EIO; + goto err; + } + + /* Initialization */ + local = mem_get0(this->local_pool); + if (!local) { + op_errno = ENOMEM; + goto err; + } + local->op_ret = -1; + local->op_errno = ENOTCONN; + local->stripe_size = stripe_get_matching_bs(loc->path, priv); + frame->local = local; + local->inode = inode_ref(loc->inode); + loc_copy(&local->loc, loc); + local->fd = fd_ref(fd); + local->flags = flags; + local->mode = mode; + local->umask = umask; + if (xdata) + local->xattr = dict_ref(xdata); + + local->call_count = priv->child_count; + /* Send a setxattr request to nodes where the + files are created */ + + if (priv->xattr_supported) { + dict = dict_new(); + if (!dict) { + gf_log(this->name, GF_LOG_ERROR, "failed to allocate dict %s", + loc->path); + } + need_unref = 1; + + dict_copy(xdata, dict); + + ret = stripe_xattr_request_build(this, dict, local->stripe_size, + priv->child_count, i, priv->coalesce); + if (ret) + gf_log(this->name, GF_LOG_ERROR, "failed to build xattr request"); + } else { + dict = xdata; + } - if (need_unref && dict) - dict_unref (dict); + STACK_WIND(frame, stripe_first_create_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->create, loc, flags, mode, umask, fd, + dict); + if (need_unref && dict) + dict_unref(dict); - return 0; + return 0; err: - STRIPE_STACK_UNWIND (create, frame, -1, op_errno, NULL, NULL, NULL, - NULL, NULL, xdata); - return 0; + STRIPE_STACK_UNWIND(create, frame, -1, op_errno, NULL, NULL, NULL, NULL, + NULL, xdata); + return 0; } int32_t -stripe_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata) +stripe_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata) { - int32_t callcnt = 0; - stripe_local_t *local = NULL; - call_frame_t *prev = NULL; + int32_t callcnt = 0; + stripe_local_t *local = NULL; + call_frame_t *prev = NULL; - if (!this || !frame || !frame->local || !cookie) { - gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref"); - goto out; - } - - prev = cookie; - local = frame->local; - - LOCK (&frame->lock); - { - callcnt = --local->call_count; + if (!this || !frame || !frame->local || !cookie) { + gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref"); + goto out; + } - if (op_ret == -1) { + prev = cookie; + local = frame->local; - gf_log (this->name, GF_LOG_DEBUG, - "%s returned error %s", - prev->this->name, strerror (op_errno)); - if ((op_errno != ENOENT) || - (prev->this == FIRST_CHILD (this))) - local->failed = 1; - local->op_errno = op_errno; - } + LOCK(&frame->lock); + { + callcnt = --local->call_count; - if (op_ret >= 0) - local->op_ret = op_ret; + if (op_ret == -1) { + gf_log(this->name, GF_LOG_DEBUG, "%s returned error %s", + prev->this->name, strerror(op_errno)); + if ((op_errno != ENOENT) || (prev->this == FIRST_CHILD(this))) + local->failed = 1; + local->op_errno = op_errno; } - UNLOCK (&frame->lock); - if (!callcnt) { - if (local->failed) - local->op_ret = -1; + if (op_ret >= 0) + local->op_ret = op_ret; + } + UNLOCK(&frame->lock); - STRIPE_STACK_UNWIND (open, frame, local->op_ret, - local->op_errno, local->fd, xdata); - } + if (!callcnt) { + if (local->failed) + local->op_ret = -1; + + STRIPE_STACK_UNWIND(open, frame, local->op_ret, local->op_errno, + local->fd, xdata); + } out: - return 0; + return 0; } - /** * stripe_open - */ int32_t -stripe_open (call_frame_t *frame, xlator_t *this, loc_t *loc, - int32_t flags, fd_t *fd, dict_t *xdata) +stripe_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + fd_t *fd, dict_t *xdata) { - stripe_local_t *local = NULL; - stripe_private_t *priv = NULL; - xlator_list_t *trav = NULL; - int32_t op_errno = 1; - - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (loc, err); - VALIDATE_OR_GOTO (loc->path, err); - VALIDATE_OR_GOTO (loc->inode, err); - - priv = this->private; - trav = this->children; - - if (priv->first_child_down) { - op_errno = ENOTCONN; - goto err; - } - - /* Initialization */ - local = mem_get0 (this->local_pool); - if (!local) { - op_errno = ENOMEM; - goto err; - } - - /* files opened in O_APPEND mode does not allow lseek() on fd */ - flags &= ~O_APPEND; - - local->fd = fd_ref (fd); - frame->local = local; - loc_copy (&local->loc, loc); - - /* Striped files */ - local->flags = flags; - local->call_count = priv->child_count; - local->stripe_size = stripe_get_matching_bs (loc->path, priv); - - while (trav) { - STACK_WIND (frame, stripe_open_cbk, trav->xlator, - trav->xlator->fops->open, - &local->loc, local->flags, local->fd, - xdata); - trav = trav->next; - } - return 0; + stripe_local_t *local = NULL; + stripe_private_t *priv = NULL; + xlator_list_t *trav = NULL; + int32_t op_errno = 1; + + VALIDATE_OR_GOTO(frame, err); + VALIDATE_OR_GOTO(this, err); + VALIDATE_OR_GOTO(loc, err); + VALIDATE_OR_GOTO(loc->path, err); + VALIDATE_OR_GOTO(loc->inode, err); + + priv = this->private; + trav = this->children; + + if (priv->first_child_down) { + op_errno = ENOTCONN; + goto err; + } + + /* Initialization */ + local = mem_get0(this->local_pool); + if (!local) { + op_errno = ENOMEM; + goto err; + } + + /* files opened in O_APPEND mode does not allow lseek() on fd */ + flags &= ~O_APPEND; + + local->fd = fd_ref(fd); + frame->local = local; + loc_copy(&local->loc, loc); + + /* Striped files */ + local->flags = flags; + local->call_count = priv->child_count; + local->stripe_size = stripe_get_matching_bs(loc->path, priv); + + while (trav) { + STACK_WIND(frame, stripe_open_cbk, trav->xlator, + trav->xlator->fops->open, &local->loc, local->flags, + local->fd, xdata); + trav = trav->next; + } + return 0; err: - STRIPE_STACK_UNWIND (open, frame, -1, op_errno, NULL, NULL); - return 0; + STRIPE_STACK_UNWIND(open, frame, -1, op_errno, NULL, NULL); + return 0; } - int32_t -stripe_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata) +stripe_opendir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata) { - int32_t callcnt = 0; - stripe_local_t *local = NULL; - call_frame_t *prev = NULL; - - if (!this || !frame || !frame->local || !cookie) { - gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref"); - goto out; - } - - prev = cookie; - local = frame->local; - - LOCK (&frame->lock); - { - callcnt = --local->call_count; - - if (op_ret == -1) { - gf_log (this->name, GF_LOG_DEBUG, - "%s returned error %s", - prev->this->name, strerror (op_errno)); - local->op_ret = -1; - local->op_errno = op_errno; - } - - if (op_ret >= 0) - local->op_ret = op_ret; - } - UNLOCK (&frame->lock); - - if (!callcnt) { - STRIPE_STACK_UNWIND (opendir, frame, local->op_ret, - local->op_errno, local->fd, NULL); - } -out: - return 0; -} - - -int32_t -stripe_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd, dict_t *xdata) -{ - xlator_list_t *trav = NULL; - stripe_local_t *local = NULL; - stripe_private_t *priv = NULL; - int32_t op_errno = EINVAL; - - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (loc, err); - VALIDATE_OR_GOTO (loc->path, err); - VALIDATE_OR_GOTO (loc->inode, err); - - priv = this->private; - trav = this->children; - - if (priv->first_child_down) { - op_errno = ENOTCONN; - goto err; - } - - /* Initialization */ - local = mem_get0 (this->local_pool); - if (!local) { - op_errno = ENOMEM; - goto err; - } - frame->local = local; - local->call_count = priv->child_count; - local->fd = fd_ref (fd); + int32_t callcnt = 0; + stripe_local_t *local = NULL; + call_frame_t *prev = NULL; - while (trav) { - STACK_WIND (frame, stripe_opendir_cbk, trav->xlator, - trav->xlator->fops->opendir, loc, fd, NULL); - trav = trav->next; - } + if (!this || !frame || !frame->local || !cookie) { + gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref"); + goto out; + } - return 0; -err: - STRIPE_STACK_UNWIND (opendir, frame, -1, op_errno, NULL, NULL); - return 0; -} + prev = cookie; + local = frame->local; -int32_t -stripe_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct gf_flock *lock, dict_t *xdata) -{ - int32_t callcnt = 0; - stripe_local_t *local = NULL; - call_frame_t *prev = NULL; + LOCK(&frame->lock); + { + callcnt = --local->call_count; - if (!this || !frame || !frame->local || !cookie) { - gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref"); - goto out; + if (op_ret == -1) { + gf_log(this->name, GF_LOG_DEBUG, "%s returned error %s", + prev->this->name, strerror(op_errno)); + local->op_ret = -1; + local->op_errno = op_errno; } - prev = cookie; - local = frame->local; + if (op_ret >= 0) + local->op_ret = op_ret; + } + UNLOCK(&frame->lock); - LOCK (&frame->lock); - { - callcnt = --local->call_count; - if (op_ret == -1) { - gf_log (this->name, GF_LOG_DEBUG, - "%s returned error %s", - prev->this->name, strerror (op_errno)); - local->op_errno = op_errno; - if ((op_errno != ENOENT) || - (prev->this == FIRST_CHILD (this))) - local->failed = 1; - } - if (op_ret >= 0) { - if (FIRST_CHILD(this) == prev->this) { - /* First successful call, copy the *lock */ - local->op_ret = op_ret; - local->lock = *lock; - } - } - } - UNLOCK (&frame->lock); - - if (!callcnt) { - if (local->failed) - local->op_ret = -1; - STRIPE_STACK_UNWIND (lk, frame, local->op_ret, - local->op_errno, &local->lock, NULL); - } + if (!callcnt) { + STRIPE_STACK_UNWIND(opendir, frame, local->op_ret, local->op_errno, + local->fd, NULL); + } out: - return 0; + return 0; } int32_t -stripe_lk (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd, - struct gf_flock *lock, dict_t *xdata) +stripe_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd, + dict_t *xdata) { - stripe_local_t *local = NULL; - xlator_list_t *trav = NULL; - stripe_private_t *priv = NULL; - int32_t op_errno = EINVAL; - - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (fd, err); - VALIDATE_OR_GOTO (fd->inode, err); - - trav = this->children; - priv = this->private; - - /* Initialization */ - local = mem_get0 (this->local_pool); - if (!local) { - op_errno = ENOMEM; - goto err; - } - local->op_ret = -1; - frame->local = local; - local->call_count = priv->child_count; - - while (trav) { - STACK_WIND (frame, stripe_lk_cbk, trav->xlator, - trav->xlator->fops->lk, fd, cmd, lock, NULL); - trav = trav->next; - } + xlator_list_t *trav = NULL; + stripe_local_t *local = NULL; + stripe_private_t *priv = NULL; + int32_t op_errno = EINVAL; + + VALIDATE_OR_GOTO(frame, err); + VALIDATE_OR_GOTO(this, err); + VALIDATE_OR_GOTO(loc, err); + VALIDATE_OR_GOTO(loc->path, err); + VALIDATE_OR_GOTO(loc->inode, err); + + priv = this->private; + trav = this->children; + + if (priv->first_child_down) { + op_errno = ENOTCONN; + goto err; + } + + /* Initialization */ + local = mem_get0(this->local_pool); + if (!local) { + op_errno = ENOMEM; + goto err; + } + frame->local = local; + local->call_count = priv->child_count; + local->fd = fd_ref(fd); + + while (trav) { + STACK_WIND(frame, stripe_opendir_cbk, trav->xlator, + trav->xlator->fops->opendir, loc, fd, NULL); + trav = trav->next; + } - return 0; + return 0; err: - STRIPE_STACK_UNWIND (lk, frame, -1, op_errno, NULL, NULL); - return 0; + STRIPE_STACK_UNWIND(opendir, frame, -1, op_errno, NULL, NULL); + return 0; } - int32_t -stripe_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) +stripe_lk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + int32_t op_errno, struct gf_flock *lock, dict_t *xdata) { - int32_t callcnt = 0; - stripe_local_t *local = NULL; - call_frame_t *prev = NULL; - - if (!this || !frame || !frame->local || !cookie) { - gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref"); - goto out; - } - - prev = cookie; - local = frame->local; - - LOCK (&frame->lock); - { - callcnt = --local->call_count; - - if (op_ret == -1) { - gf_log (this->name, GF_LOG_DEBUG, - "%s returned %s", - prev->this->name, strerror (op_errno)); - local->op_errno = op_errno; - if ((op_errno != ENOENT) || - (prev->this == FIRST_CHILD (this))) - local->failed = 1; - } - if (op_ret >= 0) - local->op_ret = op_ret; - } - UNLOCK (&frame->lock); - - if (!callcnt) { - if (local->failed) - local->op_ret = -1; - - STRIPE_STACK_UNWIND (flush, frame, local->op_ret, - local->op_errno, NULL); - } -out: - return 0; -} - -int32_t -stripe_flush (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) -{ - stripe_local_t *local = NULL; - stripe_private_t *priv = NULL; - xlator_list_t *trav = NULL; - int32_t op_errno = 1; - - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (fd, err); - VALIDATE_OR_GOTO (fd->inode, err); - - priv = this->private; - trav = this->children; - - if (priv->first_child_down) { - op_errno = ENOTCONN; - goto err; - } - /* Initialization */ - local = mem_get0 (this->local_pool); - if (!local) { - op_errno = ENOMEM; - goto err; - } - local->op_ret = -1; - frame->local = local; - local->call_count = priv->child_count; - - while (trav) { - STACK_WIND (frame, stripe_flush_cbk, trav->xlator, - trav->xlator->fops->flush, fd, NULL); - trav = trav->next; - } - - return 0; -err: - STRIPE_STACK_UNWIND (flush, frame, -1, op_errno, NULL); - return 0; -} - + int32_t callcnt = 0; + stripe_local_t *local = NULL; + call_frame_t *prev = NULL; + if (!this || !frame || !frame->local || !cookie) { + gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref"); + goto out; + } -int32_t -stripe_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *prebuf, - struct iatt *postbuf, dict_t *xdata) -{ - int32_t callcnt = 0; - stripe_local_t *local = NULL; - call_frame_t *prev = NULL; + prev = cookie; + local = frame->local; - if (!this || !frame || !frame->local || !cookie) { - gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref"); - goto out; + LOCK(&frame->lock); + { + callcnt = --local->call_count; + if (op_ret == -1) { + gf_log(this->name, GF_LOG_DEBUG, "%s returned error %s", + prev->this->name, strerror(op_errno)); + local->op_errno = op_errno; + if ((op_errno != ENOENT) || (prev->this == FIRST_CHILD(this))) + local->failed = 1; } - - prev = cookie; - local = frame->local; - - LOCK (&frame->lock); - { - callcnt = --local->call_count; - - if (op_ret == -1) { - gf_log (this->name, GF_LOG_DEBUG, - "%s returned %s", - prev->this->name, strerror (op_errno)); - local->op_errno = op_errno; - if ((op_errno != ENOENT) || - (prev->this == FIRST_CHILD (this))) - local->failed = 1; - } - if (op_ret >= 0) { - local->op_ret = op_ret; - if (FIRST_CHILD(this) == prev->this) { - local->pre_buf = *prebuf; - local->post_buf = *postbuf; - } - local->prebuf_blocks += prebuf->ia_blocks; - local->postbuf_blocks += postbuf->ia_blocks; - - correct_file_size(prebuf, local->fctx, prev); - correct_file_size(postbuf, local->fctx, prev); - - if (local->prebuf_size < prebuf->ia_size) - local->prebuf_size = prebuf->ia_size; - - if (local->postbuf_size < postbuf->ia_size) - local->postbuf_size = postbuf->ia_size; - } + if (op_ret >= 0) { + if (FIRST_CHILD(this) == prev->this) { + /* First successful call, copy the *lock */ + local->op_ret = op_ret; + local->lock = *lock; + } } - UNLOCK (&frame->lock); - - if (!callcnt) { - if (local->failed) - local->op_ret = -1; - - if (local->op_ret != -1) { - local->pre_buf.ia_blocks = local->prebuf_blocks; - local->pre_buf.ia_size = local->prebuf_size; - local->post_buf.ia_blocks = local->postbuf_blocks; - local->post_buf.ia_size = local->postbuf_size; - } + } + UNLOCK(&frame->lock); - STRIPE_STACK_UNWIND (fsync, frame, local->op_ret, - local->op_errno, &local->pre_buf, - &local->post_buf, NULL); - } + if (!callcnt) { + if (local->failed) + local->op_ret = -1; + STRIPE_STACK_UNWIND(lk, frame, local->op_ret, local->op_errno, + &local->lock, NULL); + } out: - return 0; + return 0; } int32_t -stripe_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, dict_t *xdata) +stripe_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd, + struct gf_flock *lock, dict_t *xdata) { - stripe_local_t *local = NULL; - stripe_private_t *priv = NULL; - xlator_list_t *trav = NULL; - stripe_fd_ctx_t *fctx = NULL; - int32_t op_errno = 1; - - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (fd, err); - VALIDATE_OR_GOTO (fd->inode, err); - - priv = this->private; - trav = this->children; - - /* Initialization */ - local = mem_get0 (this->local_pool); - if (!local) { - op_errno = ENOMEM; - goto err; - } - - frame->local = local; - - inode_ctx_get(fd->inode, this, (uint64_t *) &fctx); - if (!fctx) { - op_errno = EINVAL; - goto err; - } - local->fctx = fctx; - local->op_ret = -1; - local->call_count = priv->child_count; - - while (trav) { - STACK_WIND (frame, stripe_fsync_cbk, trav->xlator, - trav->xlator->fops->fsync, fd, flags, NULL); - trav = trav->next; - } + stripe_local_t *local = NULL; + xlator_list_t *trav = NULL; + stripe_private_t *priv = NULL; + int32_t op_errno = EINVAL; + + VALIDATE_OR_GOTO(frame, err); + VALIDATE_OR_GOTO(this, err); + VALIDATE_OR_GOTO(fd, err); + VALIDATE_OR_GOTO(fd->inode, err); + + trav = this->children; + priv = this->private; + + /* Initialization */ + local = mem_get0(this->local_pool); + if (!local) { + op_errno = ENOMEM; + goto err; + } + local->op_ret = -1; + frame->local = local; + local->call_count = priv->child_count; + + while (trav) { + STACK_WIND(frame, stripe_lk_cbk, trav->xlator, trav->xlator->fops->lk, + fd, cmd, lock, NULL); + trav = trav->next; + } - return 0; + return 0; err: - STRIPE_STACK_UNWIND (fsync, frame, -1, op_errno, NULL, NULL, NULL); - return 0; + STRIPE_STACK_UNWIND(lk, frame, -1, op_errno, NULL, NULL); + return 0; } int32_t -stripe_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *buf, dict_t *xdata) +stripe_flush_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - int32_t callcnt = 0; - stripe_local_t *local = NULL; - call_frame_t *prev = NULL; - - if (!this || !frame || !frame->local || !cookie) { - gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref"); - goto out; - } + int32_t callcnt = 0; + stripe_local_t *local = NULL; + call_frame_t *prev = NULL; - prev = cookie; - local = frame->local; - - LOCK (&frame->lock); - { - callcnt = --local->call_count; - - if (op_ret == -1) { - gf_log (this->name, GF_LOG_DEBUG, - "%s returned error %s", - prev->this->name, strerror (op_errno)); - local->op_errno = op_errno; - if ((op_errno != ENOENT) || - (prev->this == FIRST_CHILD (this))) - local->failed = 1; - } - - if (op_ret == 0) { - local->op_ret = 0; - - if (FIRST_CHILD(this) == prev->this) - local->stbuf = *buf; + if (!this || !frame || !frame->local || !cookie) { + gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref"); + goto out; + } - local->stbuf_blocks += buf->ia_blocks; + prev = cookie; + local = frame->local; - correct_file_size(buf, local->fctx, prev); + LOCK(&frame->lock); + { + callcnt = --local->call_count; - if (local->stbuf_size < buf->ia_size) - local->stbuf_size = buf->ia_size; - } + if (op_ret == -1) { + gf_log(this->name, GF_LOG_DEBUG, "%s returned %s", prev->this->name, + strerror(op_errno)); + local->op_errno = op_errno; + if ((op_errno != ENOENT) || (prev->this == FIRST_CHILD(this))) + local->failed = 1; } - UNLOCK (&frame->lock); - - if (!callcnt) { - if (local->failed) - local->op_ret = -1; - - if (local->op_ret != -1) { - local->stbuf.ia_size = local->stbuf_size; - local->stbuf.ia_blocks = local->stbuf_blocks; - } + if (op_ret >= 0) + local->op_ret = op_ret; + } + UNLOCK(&frame->lock); - STRIPE_STACK_UNWIND (fstat, frame, local->op_ret, - local->op_errno, &local->stbuf, NULL); - } + if (!callcnt) { + if (local->failed) + local->op_ret = -1; + STRIPE_STACK_UNWIND(flush, frame, local->op_ret, local->op_errno, NULL); + } out: - return 0; + return 0; } int32_t -stripe_fstat (call_frame_t *frame, - xlator_t *this, - fd_t *fd, dict_t *xdata) +stripe_flush(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) { - stripe_local_t *local = NULL; - stripe_private_t *priv = NULL; - xlator_list_t *trav = NULL; - stripe_fd_ctx_t *fctx = NULL; - int32_t op_errno = 1; - - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (fd, err); - VALIDATE_OR_GOTO (fd->inode, err); - - priv = this->private; - trav = this->children; - - /* Initialization */ - local = mem_get0 (this->local_pool); - if (!local) { - op_errno = ENOMEM; - goto err; - } - local->op_ret = -1; - frame->local = local; - local->call_count = priv->child_count; - - if (IA_ISREG(fd->inode->ia_type)) { - inode_ctx_get(fd->inode, this, (uint64_t *) &fctx); - if (!fctx) - goto err; - local->fctx = fctx; - } - - while (trav) { - STACK_WIND (frame, stripe_fstat_cbk, trav->xlator, - trav->xlator->fops->fstat, fd, NULL); - trav = trav->next; - } + stripe_local_t *local = NULL; + stripe_private_t *priv = NULL; + xlator_list_t *trav = NULL; + int32_t op_errno = 1; + + VALIDATE_OR_GOTO(frame, err); + VALIDATE_OR_GOTO(this, err); + VALIDATE_OR_GOTO(fd, err); + VALIDATE_OR_GOTO(fd->inode, err); + + priv = this->private; + trav = this->children; + + if (priv->first_child_down) { + op_errno = ENOTCONN; + goto err; + } + /* Initialization */ + local = mem_get0(this->local_pool); + if (!local) { + op_errno = ENOMEM; + goto err; + } + local->op_ret = -1; + frame->local = local; + local->call_count = priv->child_count; + + while (trav) { + STACK_WIND(frame, stripe_flush_cbk, trav->xlator, + trav->xlator->fops->flush, fd, NULL); + trav = trav->next; + } - return 0; + return 0; err: - STRIPE_STACK_UNWIND (fstat, frame, -1, op_errno, NULL, NULL); - return 0; + STRIPE_STACK_UNWIND(flush, frame, -1, op_errno, NULL); + return 0; } - int32_t -stripe_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, dict_t *xdata) +stripe_fsync_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) { - stripe_local_t *local = NULL; - stripe_private_t *priv = NULL; - stripe_fd_ctx_t *fctx = NULL; - int i, eof_idx; - off_t dest_offset, tmp_offset; - int32_t op_errno = 1; + int32_t callcnt = 0; + stripe_local_t *local = NULL; + call_frame_t *prev = NULL; - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (fd, err); - VALIDATE_OR_GOTO (fd->inode, err); + if (!this || !frame || !frame->local || !cookie) { + gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref"); + goto out; + } - priv = this->private; + prev = cookie; + local = frame->local; - /* Initialization */ - local = mem_get0 (this->local_pool); - if (!local) { - op_errno = ENOMEM; - goto err; - } - local->op_ret = -1; - frame->local = local; - local->call_count = priv->child_count; - - inode_ctx_get(fd->inode, this, (uint64_t *) &fctx); - if (!fctx) { - gf_log(this->name, GF_LOG_ERROR, "no stripe context"); - op_errno = EINVAL; - goto err; - } - if (!fctx->stripe_count) { - gf_log(this->name, GF_LOG_ERROR, "no stripe count"); - op_errno = EINVAL; - goto err; - } - - local->fctx = fctx; - eof_idx = (offset / fctx->stripe_size) % fctx->stripe_count; - - for (i = 0; i < fctx->stripe_count; i++) { - if (!fctx->xl_array[i]) { - gf_log(this->name, GF_LOG_ERROR, "no xlator at index " - "%d", i); - op_errno = EINVAL; - goto err; - } - - if (fctx->stripe_coalesce) { - if (i < eof_idx) - tmp_offset = roof(offset, fctx->stripe_size * - fctx->stripe_count); - else if (i > eof_idx) - tmp_offset = floor(offset, fctx->stripe_size * - fctx->stripe_count); - else - tmp_offset = offset; - - dest_offset = coalesced_offset(tmp_offset, - fctx->stripe_size, fctx->stripe_count); - } else { - dest_offset = offset; - } - - STACK_WIND(frame, stripe_truncate_cbk, fctx->xl_array[i], - fctx->xl_array[i]->fops->ftruncate, fd, dest_offset, - NULL); - } + LOCK(&frame->lock); + { + callcnt = --local->call_count; - return 0; -err: - STRIPE_STACK_UNWIND (ftruncate, frame, -1, op_errno, NULL, NULL, NULL); - return 0; -} + if (op_ret == -1) { + gf_log(this->name, GF_LOG_DEBUG, "%s returned %s", prev->this->name, + strerror(op_errno)); + local->op_errno = op_errno; + if ((op_errno != ENOENT) || (prev->this == FIRST_CHILD(this))) + local->failed = 1; + } + if (op_ret >= 0) { + local->op_ret = op_ret; + if (FIRST_CHILD(this) == prev->this) { + local->pre_buf = *prebuf; + local->post_buf = *postbuf; + } + local->prebuf_blocks += prebuf->ia_blocks; + local->postbuf_blocks += postbuf->ia_blocks; + correct_file_size(prebuf, local->fctx, prev); + correct_file_size(postbuf, local->fctx, prev); -int32_t -stripe_fsyncdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) -{ - int32_t callcnt = 0; - stripe_local_t *local = NULL; - call_frame_t *prev = NULL; + if (local->prebuf_size < prebuf->ia_size) + local->prebuf_size = prebuf->ia_size; - if (!this || !frame || !frame->local || !cookie) { - gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref"); - goto out; + if (local->postbuf_size < postbuf->ia_size) + local->postbuf_size = postbuf->ia_size; } + } + UNLOCK(&frame->lock); - prev = cookie; - local = frame->local; + if (!callcnt) { + if (local->failed) + local->op_ret = -1; - LOCK (&frame->lock); - { - callcnt = --local->call_count; - - if (op_ret == -1) { - gf_log (this->name, GF_LOG_DEBUG, - "%s returned %s", - prev->this->name, strerror (op_errno)); - local->op_errno = op_errno; - if ((op_errno != ENOENT) || - (prev->this == FIRST_CHILD (this))) - local->failed = 1; - } - if (op_ret >= 0) - local->op_ret = op_ret; + if (local->op_ret != -1) { + local->pre_buf.ia_blocks = local->prebuf_blocks; + local->pre_buf.ia_size = local->prebuf_size; + local->post_buf.ia_blocks = local->postbuf_blocks; + local->post_buf.ia_size = local->postbuf_size; } - UNLOCK (&frame->lock); - - if (!callcnt) { - if (local->failed) - local->op_ret = -1; - STRIPE_STACK_UNWIND (fsyncdir, frame, local->op_ret, - local->op_errno, NULL); - } + STRIPE_STACK_UNWIND(fsync, frame, local->op_ret, local->op_errno, + &local->pre_buf, &local->post_buf, NULL); + } out: - return 0; + return 0; } int32_t -stripe_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, dict_t *xdata) +stripe_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, + dict_t *xdata) { - stripe_local_t *local = NULL; - stripe_private_t *priv = NULL; - xlator_list_t *trav = NULL; - int32_t op_errno = 1; - - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (fd, err); - VALIDATE_OR_GOTO (fd->inode, err); - - priv = this->private; - trav = this->children; - - /* Initialization */ - local = mem_get0 (this->local_pool); - if (!local) { - op_errno = ENOMEM; - goto err; - } - local->op_ret = -1; - frame->local = local; - local->call_count = priv->child_count; - - while (trav) { - STACK_WIND (frame, stripe_fsyncdir_cbk, trav->xlator, - trav->xlator->fops->fsyncdir, fd, flags, NULL); - trav = trav->next; - } + stripe_local_t *local = NULL; + stripe_private_t *priv = NULL; + xlator_list_t *trav = NULL; + stripe_fd_ctx_t *fctx = NULL; + int32_t op_errno = 1; + + VALIDATE_OR_GOTO(frame, err); + VALIDATE_OR_GOTO(this, err); + VALIDATE_OR_GOTO(fd, err); + VALIDATE_OR_GOTO(fd->inode, err); + + priv = this->private; + trav = this->children; + + /* Initialization */ + local = mem_get0(this->local_pool); + if (!local) { + op_errno = ENOMEM; + goto err; + } + + frame->local = local; + + inode_ctx_get(fd->inode, this, (uint64_t *)&fctx); + if (!fctx) { + op_errno = EINVAL; + goto err; + } + local->fctx = fctx; + local->op_ret = -1; + local->call_count = priv->child_count; + + while (trav) { + STACK_WIND(frame, stripe_fsync_cbk, trav->xlator, + trav->xlator->fops->fsync, fd, flags, NULL); + trav = trav->next; + } - return 0; + return 0; err: - STRIPE_STACK_UNWIND (fsyncdir, frame, -1, op_errno, NULL); - return 0; -} - - -int32_t -stripe_readv_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *buf, dict_t *xdata) -{ - int32_t i = 0; - int32_t callcnt = 0; - int32_t count = 0; - stripe_local_t *local = NULL; - struct iovec *vec = NULL; - struct iatt tmp_stbuf = {0,}; - struct iobref *tmp_iobref = NULL; - struct iobuf *iobuf = NULL; - call_frame_t *prev = NULL; - - if (!this || !frame || !frame->local) { - gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref"); - goto out; - } - - local = frame->local; - prev = cookie; - - LOCK (&frame->lock); - { - callcnt = --local->call_count; - if (op_ret != -1) { - correct_file_size(buf, local->fctx, prev); - if (local->stbuf_size < buf->ia_size) - local->stbuf_size = buf->ia_size; - } - } - UNLOCK (&frame->lock); - - if (!callcnt) { - op_ret = 0; - - /* Keep extra space for filling in '\0's */ - vec = GF_CALLOC ((local->count * 2), sizeof (struct iovec), - gf_stripe_mt_iovec); - if (!vec) { - op_ret = -1; - goto done; - } - - for (i = 0; i < local->wind_count; i++) { - if (local->replies[i].op_ret) { - memcpy ((vec + count), local->replies[i].vector, - (local->replies[i].count * sizeof (struct iovec))); - count += local->replies[i].count; - op_ret += local->replies[i].op_ret; - } - if ((local->replies[i].op_ret < - local->replies[i].requested_size) && - (local->stbuf_size > (local->offset + op_ret))) { - /* Fill in 0s here */ - vec[count].iov_len = - (local->replies[i].requested_size - - local->replies[i].op_ret); - iobuf = iobuf_get2 (this->ctx->iobuf_pool, - vec[count].iov_len); - if (!iobuf) { - gf_log (this->name, GF_LOG_ERROR, - "Out of memory."); - op_ret = -1; - op_errno = ENOMEM; - goto done; - } - memset (iobuf->ptr, 0, vec[count].iov_len); - vec[count].iov_base = iobuf->ptr; - - iobref_add (local->iobref, iobuf); - iobuf_unref(iobuf); - - op_ret += vec[count].iov_len; - count++; - } - GF_FREE (local->replies[i].vector); - } - - /* ENOENT signals EOF to the NFS-server */ - if (op_ret != -1 && op_ret < local->readv_size && - (local->offset + op_ret == buf->ia_size)) - op_errno = ENOENT; - - /* FIXME: notice that st_ino, and st_dev (gen) will be - * different than what inode will have. Make sure this doesn't - * cause any bugs at higher levels */ - memcpy (&tmp_stbuf, &local->replies[0].stbuf, - sizeof (struct iatt)); - tmp_stbuf.ia_size = local->stbuf_size; - - done: - GF_FREE (local->replies); - tmp_iobref = local->iobref; - STRIPE_STACK_UNWIND (readv, frame, op_ret, op_errno, vec, - count, &tmp_stbuf, tmp_iobref, NULL); - - iobref_unref (tmp_iobref); - GF_FREE (vec); - } -out: - return 0; + STRIPE_STACK_UNWIND(fsync, frame, -1, op_errno, NULL, NULL, NULL); + return 0; } -/** - * stripe_readv_cbk - get all the striped reads, and order it properly, send it - * to above layer after putting it in a single vector. - */ int32_t -stripe_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iovec *vector, - int32_t count, struct iatt *stbuf, struct iobref *iobref, dict_t *xdata) +stripe_fstat_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *buf, + dict_t *xdata) { - int32_t index = 0; - int32_t callcnt = 0; - int32_t final_count = 0; - int32_t need_to_check_proper_size = 0; - call_frame_t *mframe = NULL; - stripe_local_t *mlocal = NULL; - stripe_local_t *local = NULL; - struct iovec *final_vec = NULL; - struct iatt tmp_stbuf = {0,}; - struct iatt *tmp_stbuf_p = NULL; //need it for a warning - struct iobref *tmp_iobref = NULL; - stripe_fd_ctx_t *fctx = NULL; - call_frame_t *prev = NULL; - - if (!this || !frame || !frame->local || !cookie) { - gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref"); - goto end; - } + int32_t callcnt = 0; + stripe_local_t *local = NULL; + call_frame_t *prev = NULL; - local = frame->local; - index = local->node_index; - prev = cookie; - mframe = local->orig_frame; - if (!mframe) - goto out; + if (!this || !frame || !frame->local || !cookie) { + gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref"); + goto out; + } - mlocal = mframe->local; - if (!mlocal) - goto out; + prev = cookie; + local = frame->local; - fctx = mlocal->fctx; + LOCK(&frame->lock); + { + callcnt = --local->call_count; - LOCK (&mframe->lock); - { - mlocal->replies[index].op_ret = op_ret; - mlocal->replies[index].op_errno = op_errno; - mlocal->replies[index].requested_size = local->readv_size; - if (op_ret >= 0) { - mlocal->replies[index].stbuf = *stbuf; - mlocal->replies[index].count = count; - mlocal->replies[index].vector = iov_dup (vector, count); - - correct_file_size(stbuf, fctx, prev); - - if (local->stbuf_size < stbuf->ia_size) - local->stbuf_size = stbuf->ia_size; - local->stbuf_blocks += stbuf->ia_blocks; - - if (!mlocal->iobref) - mlocal->iobref = iobref_new (); - iobref_merge (mlocal->iobref, iobref); - } - callcnt = ++mlocal->call_count; + if (op_ret == -1) { + gf_log(this->name, GF_LOG_DEBUG, "%s returned error %s", + prev->this->name, strerror(op_errno)); + local->op_errno = op_errno; + if ((op_errno != ENOENT) || (prev->this == FIRST_CHILD(this))) + local->failed = 1; } - UNLOCK(&mframe->lock); - - if (callcnt == mlocal->wind_count) { - op_ret = 0; - - for (index=0; index < mlocal->wind_count; index++) { - /* check whether each stripe returned - * 'expected' number of bytes */ - if (mlocal->replies[index].op_ret == -1) { - op_ret = -1; - op_errno = mlocal->replies[index].op_errno; - break; - } - /* TODO: handle the 'holes' within the read range - properly */ - if (mlocal->replies[index].op_ret < - mlocal->replies[index].requested_size) { - need_to_check_proper_size = 1; - } - - op_ret += mlocal->replies[index].op_ret; - mlocal->count += mlocal->replies[index].count; - } - if (op_ret == -1) - goto done; - if (need_to_check_proper_size) - goto check_size; - final_vec = GF_CALLOC (mlocal->count, sizeof (struct iovec), - gf_stripe_mt_iovec); + if (op_ret == 0) { + local->op_ret = 0; - if (!final_vec) { - op_ret = -1; - goto done; - } + if (FIRST_CHILD(this) == prev->this) + local->stbuf = *buf; - for (index = 0; index < mlocal->wind_count; index++) { - memcpy ((final_vec + final_count), - mlocal->replies[index].vector, - (mlocal->replies[index].count * - sizeof (struct iovec))); - final_count += mlocal->replies[index].count; - GF_FREE (mlocal->replies[index].vector); - } + local->stbuf_blocks += buf->ia_blocks; - /* FIXME: notice that st_ino, and st_dev (gen) will be - * different than what inode will have. Make sure this doesn't - * cause any bugs at higher levels */ - memcpy (&tmp_stbuf, &mlocal->replies[0].stbuf, - sizeof (struct iatt)); - tmp_stbuf.ia_size = local->stbuf_size; - tmp_stbuf.ia_blocks = local->stbuf_blocks; - - done: - /* */ - GF_FREE (mlocal->replies); - tmp_iobref = mlocal->iobref; - /* work around for nfs truncated read. Bug 3774 */ - tmp_stbuf_p = &tmp_stbuf; - WIPE (tmp_stbuf_p); - STRIPE_STACK_UNWIND (readv, mframe, op_ret, op_errno, final_vec, - final_count, &tmp_stbuf, tmp_iobref, NULL); - - iobref_unref (tmp_iobref); - GF_FREE (final_vec); - } + correct_file_size(buf, local->fctx, prev); - goto out; + if (local->stbuf_size < buf->ia_size) + local->stbuf_size = buf->ia_size; + } + } + UNLOCK(&frame->lock); -check_size: - mlocal->call_count = fctx->stripe_count; + if (!callcnt) { + if (local->failed) + local->op_ret = -1; - for (index = 0; index < fctx->stripe_count; index++) { - STACK_WIND (mframe, stripe_readv_fstat_cbk, - (fctx->xl_array[index]), - (fctx->xl_array[index])->fops->fstat, - mlocal->fd, NULL); + if (local->op_ret != -1) { + local->stbuf.ia_size = local->stbuf_size; + local->stbuf.ia_blocks = local->stbuf_blocks; } + STRIPE_STACK_UNWIND(fstat, frame, local->op_ret, local->op_errno, + &local->stbuf, NULL); + } + out: - STRIPE_STACK_DESTROY (frame); -end: - return 0; + return 0; } - int32_t -stripe_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, - size_t size, off_t offset, uint32_t flags, dict_t *xdata) +stripe_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) { - int32_t op_errno = EINVAL; - int32_t idx = 0; - int32_t index = 0; - int32_t num_stripe = 0; - int32_t off_index = 0; - size_t frame_size = 0; - off_t rounded_end = 0; - uint64_t tmp_fctx = 0; - uint64_t stripe_size = 0; - off_t rounded_start = 0; - off_t frame_offset = offset; - off_t dest_offset = 0; - stripe_local_t *local = NULL; - call_frame_t *rframe = NULL; - stripe_local_t *rlocal = NULL; - stripe_fd_ctx_t *fctx = NULL; - - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (fd, err); - VALIDATE_OR_GOTO (fd->inode, err); - - inode_ctx_get (fd->inode, this, &tmp_fctx); - if (!tmp_fctx) { - op_errno = EBADFD; - goto err; - } - fctx = (stripe_fd_ctx_t *)(long)tmp_fctx; - stripe_size = fctx->stripe_size; + stripe_local_t *local = NULL; + stripe_private_t *priv = NULL; + xlator_list_t *trav = NULL; + stripe_fd_ctx_t *fctx = NULL; + int32_t op_errno = 1; + + VALIDATE_OR_GOTO(frame, err); + VALIDATE_OR_GOTO(this, err); + VALIDATE_OR_GOTO(fd, err); + VALIDATE_OR_GOTO(fd->inode, err); + + priv = this->private; + trav = this->children; + + /* Initialization */ + local = mem_get0(this->local_pool); + if (!local) { + op_errno = ENOMEM; + goto err; + } + local->op_ret = -1; + frame->local = local; + local->call_count = priv->child_count; + + if (IA_ISREG(fd->inode->ia_type)) { + inode_ctx_get(fd->inode, this, (uint64_t *)&fctx); + if (!fctx) + goto err; + local->fctx = fctx; + } - STRIPE_VALIDATE_FCTX (fctx, err); + while (trav) { + STACK_WIND(frame, stripe_fstat_cbk, trav->xlator, + trav->xlator->fops->fstat, fd, NULL); + trav = trav->next; + } - if (!stripe_size) { - gf_log (this->name, GF_LOG_DEBUG, - "Wrong stripe size for the file"); - goto err; - } - /* The file is stripe across the child nodes. Send the read request - * to the child nodes appropriately after checking which region of - * the file is in which child node. Always '0-' part of - * the file resides in the first child. - */ - rounded_start = floor (offset, stripe_size); - rounded_end = roof (offset+size, stripe_size); - num_stripe = (rounded_end- rounded_start)/stripe_size; + return 0; +err: + STRIPE_STACK_UNWIND(fstat, frame, -1, op_errno, NULL, NULL); + return 0; +} - local = mem_get0 (this->local_pool); - if (!local) { - op_errno = ENOMEM; - goto err; +int32_t +stripe_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + dict_t *xdata) +{ + stripe_local_t *local = NULL; + stripe_private_t *priv = NULL; + stripe_fd_ctx_t *fctx = NULL; + int i, eof_idx; + off_t dest_offset, tmp_offset; + int32_t op_errno = 1; + + VALIDATE_OR_GOTO(frame, err); + VALIDATE_OR_GOTO(this, err); + VALIDATE_OR_GOTO(fd, err); + VALIDATE_OR_GOTO(fd->inode, err); + + priv = this->private; + + /* Initialization */ + local = mem_get0(this->local_pool); + if (!local) { + op_errno = ENOMEM; + goto err; + } + local->op_ret = -1; + frame->local = local; + local->call_count = priv->child_count; + + inode_ctx_get(fd->inode, this, (uint64_t *)&fctx); + if (!fctx) { + gf_log(this->name, GF_LOG_ERROR, "no stripe context"); + op_errno = EINVAL; + goto err; + } + if (!fctx->stripe_count) { + gf_log(this->name, GF_LOG_ERROR, "no stripe count"); + op_errno = EINVAL; + goto err; + } + + local->fctx = fctx; + eof_idx = (offset / fctx->stripe_size) % fctx->stripe_count; + + for (i = 0; i < fctx->stripe_count; i++) { + if (!fctx->xl_array[i]) { + gf_log(this->name, GF_LOG_ERROR, + "no xlator at index " + "%d", + i); + op_errno = EINVAL; + goto err; + } + + if (fctx->stripe_coalesce) { + if (i < eof_idx) + tmp_offset = roof(offset, + fctx->stripe_size * fctx->stripe_count); + else if (i > eof_idx) + tmp_offset = floor(offset, + fctx->stripe_size * fctx->stripe_count); + else + tmp_offset = offset; + + dest_offset = coalesced_offset(tmp_offset, fctx->stripe_size, + fctx->stripe_count); + } else { + dest_offset = offset; } - frame->local = local; - /* This is where all the vectors should be copied. */ - local->replies = GF_CALLOC (num_stripe, sizeof (struct stripe_replies), - gf_stripe_mt_stripe_replies); - if (!local->replies) { - op_errno = ENOMEM; - goto err; - } + STACK_WIND(frame, stripe_truncate_cbk, fctx->xl_array[i], + fctx->xl_array[i]->fops->ftruncate, fd, dest_offset, NULL); + } - off_index = (offset / stripe_size) % fctx->stripe_count; - local->wind_count = num_stripe; - local->readv_size = size; - local->offset = offset; - local->fd = fd_ref (fd); - local->fctx = fctx; - - for (index = off_index; index < (num_stripe + off_index); index++) { - rframe = copy_frame (frame); - rlocal = mem_get0 (this->local_pool); - if (!rlocal) { - op_errno = ENOMEM; - goto err; - } + return 0; +err: + STRIPE_STACK_UNWIND(ftruncate, frame, -1, op_errno, NULL, NULL, NULL); + return 0; +} - frame_size = min (roof (frame_offset+1, stripe_size), - (offset + size)) - frame_offset; +int32_t +stripe_fsyncdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + int32_t callcnt = 0; + stripe_local_t *local = NULL; + call_frame_t *prev = NULL; - rlocal->node_index = index - off_index; - rlocal->orig_frame = frame; - rlocal->readv_size = frame_size; - rframe->local = rlocal; - idx = (index % fctx->stripe_count); + if (!this || !frame || !frame->local || !cookie) { + gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref"); + goto out; + } - if (fctx->stripe_coalesce) - dest_offset = coalesced_offset(frame_offset, - stripe_size, fctx->stripe_count); - else - dest_offset = frame_offset; + prev = cookie; + local = frame->local; - STACK_WIND (rframe, stripe_readv_cbk, fctx->xl_array[idx], - fctx->xl_array[idx]->fops->readv, - fd, frame_size, dest_offset, flags, xdata); + LOCK(&frame->lock); + { + callcnt = --local->call_count; - frame_offset += frame_size; + if (op_ret == -1) { + gf_log(this->name, GF_LOG_DEBUG, "%s returned %s", prev->this->name, + strerror(op_errno)); + local->op_errno = op_errno; + if ((op_errno != ENOENT) || (prev->this == FIRST_CHILD(this))) + local->failed = 1; } + if (op_ret >= 0) + local->op_ret = op_ret; + } + UNLOCK(&frame->lock); - return 0; -err: - if (rframe) - STRIPE_STACK_DESTROY (rframe); + if (!callcnt) { + if (local->failed) + local->op_ret = -1; - STRIPE_STACK_UNWIND (readv, frame, -1, op_errno, NULL, 0, NULL, NULL, NULL); - return 0; + STRIPE_STACK_UNWIND(fsyncdir, frame, local->op_ret, local->op_errno, + NULL); + } +out: + return 0; } +int32_t +stripe_fsyncdir(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, + dict_t *xdata) +{ + stripe_local_t *local = NULL; + stripe_private_t *priv = NULL; + xlator_list_t *trav = NULL; + int32_t op_errno = 1; + + VALIDATE_OR_GOTO(frame, err); + VALIDATE_OR_GOTO(this, err); + VALIDATE_OR_GOTO(fd, err); + VALIDATE_OR_GOTO(fd->inode, err); + + priv = this->private; + trav = this->children; + + /* Initialization */ + local = mem_get0(this->local_pool); + if (!local) { + op_errno = ENOMEM; + goto err; + } + local->op_ret = -1; + frame->local = local; + local->call_count = priv->child_count; + + while (trav) { + STACK_WIND(frame, stripe_fsyncdir_cbk, trav->xlator, + trav->xlator->fops->fsyncdir, fd, flags, NULL); + trav = trav->next; + } + + return 0; +err: + STRIPE_STACK_UNWIND(fsyncdir, frame, -1, op_errno, NULL); + return 0; +} int32_t -stripe_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *prebuf, - struct iatt *postbuf, dict_t *xdata) +stripe_readv_fstat_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *buf, + dict_t *xdata) { - int32_t callcnt = 0; - stripe_local_t *local = NULL; - stripe_local_t *mlocal = NULL; - call_frame_t *prev = NULL; - call_frame_t *mframe = NULL; - struct stripe_replies *reply = NULL; - int32_t i = 0; - - if (!this || !frame || !frame->local || !cookie) { - gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref"); - goto out; - } + int32_t i = 0; + int32_t callcnt = 0; + int32_t count = 0; + stripe_local_t *local = NULL; + struct iovec *vec = NULL; + struct iatt tmp_stbuf = { + 0, + }; + struct iobref *tmp_iobref = NULL; + struct iobuf *iobuf = NULL; + call_frame_t *prev = NULL; + + if (!this || !frame || !frame->local) { + gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref"); + goto out; + } + + local = frame->local; + prev = cookie; + + LOCK(&frame->lock); + { + callcnt = --local->call_count; + if (op_ret != -1) { + correct_file_size(buf, local->fctx, prev); + if (local->stbuf_size < buf->ia_size) + local->stbuf_size = buf->ia_size; + } + } + UNLOCK(&frame->lock); + + if (!callcnt) { + op_ret = 0; + + /* Keep extra space for filling in '\0's */ + vec = GF_CALLOC((local->count * 2), sizeof(struct iovec), + gf_stripe_mt_iovec); + if (!vec) { + op_ret = -1; + goto done; + } + + for (i = 0; i < local->wind_count; i++) { + if (local->replies[i].op_ret) { + memcpy((vec + count), local->replies[i].vector, + (local->replies[i].count * sizeof(struct iovec))); + count += local->replies[i].count; + op_ret += local->replies[i].op_ret; + } + if ((local->replies[i].op_ret < local->replies[i].requested_size) && + (local->stbuf_size > (local->offset + op_ret))) { + /* Fill in 0s here */ + vec[count].iov_len = (local->replies[i].requested_size - + local->replies[i].op_ret); + iobuf = iobuf_get2(this->ctx->iobuf_pool, vec[count].iov_len); + if (!iobuf) { + gf_log(this->name, GF_LOG_ERROR, "Out of memory."); + op_ret = -1; + op_errno = ENOMEM; + goto done; + } + memset(iobuf->ptr, 0, vec[count].iov_len); + vec[count].iov_base = iobuf->ptr; - prev = cookie; - local = frame->local; - mframe = local->orig_frame; - mlocal = mframe->local; + iobref_add(local->iobref, iobuf); + iobuf_unref(iobuf); - LOCK(&frame->lock); - { - callcnt = ++mlocal->call_count; + op_ret += vec[count].iov_len; + count++; + } + GF_FREE(local->replies[i].vector); + } + + /* ENOENT signals EOF to the NFS-server */ + if (op_ret != -1 && op_ret < local->readv_size && + (local->offset + op_ret == buf->ia_size)) + op_errno = ENOENT; + + /* FIXME: notice that st_ino, and st_dev (gen) will be + * different than what inode will have. Make sure this doesn't + * cause any bugs at higher levels */ + memcpy(&tmp_stbuf, &local->replies[0].stbuf, sizeof(struct iatt)); + tmp_stbuf.ia_size = local->stbuf_size; + + done: + GF_FREE(local->replies); + tmp_iobref = local->iobref; + STRIPE_STACK_UNWIND(readv, frame, op_ret, op_errno, vec, count, + &tmp_stbuf, tmp_iobref, NULL); + + iobref_unref(tmp_iobref); + GF_FREE(vec); + } +out: + return 0; +} + +/** + * stripe_readv_cbk - get all the striped reads, and order it properly, send it + * to above layer after putting it in a single vector. + */ +int32_t +stripe_readv_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iovec *vector, + int32_t count, struct iatt *stbuf, struct iobref *iobref, + dict_t *xdata) +{ + int32_t index = 0; + int32_t callcnt = 0; + int32_t final_count = 0; + int32_t need_to_check_proper_size = 0; + call_frame_t *mframe = NULL; + stripe_local_t *mlocal = NULL; + stripe_local_t *local = NULL; + struct iovec *final_vec = NULL; + struct iatt tmp_stbuf = { + 0, + }; + struct iatt *tmp_stbuf_p = NULL; // need it for a warning + struct iobref *tmp_iobref = NULL; + stripe_fd_ctx_t *fctx = NULL; + call_frame_t *prev = NULL; + + if (!this || !frame || !frame->local || !cookie) { + gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref"); + goto end; + } + + local = frame->local; + index = local->node_index; + prev = cookie; + mframe = local->orig_frame; + if (!mframe) + goto out; - mlocal->replies[local->node_index].op_ret = op_ret; - mlocal->replies[local->node_index].op_errno = op_errno; + mlocal = mframe->local; + if (!mlocal) + goto out; - if (op_ret >= 0) { - mlocal->post_buf = *postbuf; - mlocal->pre_buf = *prebuf; + fctx = mlocal->fctx; + + LOCK(&mframe->lock); + { + mlocal->replies[index].op_ret = op_ret; + mlocal->replies[index].op_errno = op_errno; + mlocal->replies[index].requested_size = local->readv_size; + if (op_ret >= 0) { + mlocal->replies[index].stbuf = *stbuf; + mlocal->replies[index].count = count; + mlocal->replies[index].vector = iov_dup(vector, count); + + correct_file_size(stbuf, fctx, prev); + + if (local->stbuf_size < stbuf->ia_size) + local->stbuf_size = stbuf->ia_size; + local->stbuf_blocks += stbuf->ia_blocks; + + if (!mlocal->iobref) + mlocal->iobref = iobref_new(); + iobref_merge(mlocal->iobref, iobref); + } + callcnt = ++mlocal->call_count; + } + UNLOCK(&mframe->lock); + + if (callcnt == mlocal->wind_count) { + op_ret = 0; + + for (index = 0; index < mlocal->wind_count; index++) { + /* check whether each stripe returned + * 'expected' number of bytes */ + if (mlocal->replies[index].op_ret == -1) { + op_ret = -1; + op_errno = mlocal->replies[index].op_errno; + break; + } + /* TODO: handle the 'holes' within the read range + properly */ + if (mlocal->replies[index].op_ret < + mlocal->replies[index].requested_size) { + need_to_check_proper_size = 1; + } + + op_ret += mlocal->replies[index].op_ret; + mlocal->count += mlocal->replies[index].count; + } + if (op_ret == -1) + goto done; + if (need_to_check_proper_size) + goto check_size; + + final_vec = GF_CALLOC(mlocal->count, sizeof(struct iovec), + gf_stripe_mt_iovec); + + if (!final_vec) { + op_ret = -1; + goto done; + } + + for (index = 0; index < mlocal->wind_count; index++) { + memcpy((final_vec + final_count), mlocal->replies[index].vector, + (mlocal->replies[index].count * sizeof(struct iovec))); + final_count += mlocal->replies[index].count; + GF_FREE(mlocal->replies[index].vector); + } + + /* FIXME: notice that st_ino, and st_dev (gen) will be + * different than what inode will have. Make sure this doesn't + * cause any bugs at higher levels */ + memcpy(&tmp_stbuf, &mlocal->replies[0].stbuf, sizeof(struct iatt)); + tmp_stbuf.ia_size = local->stbuf_size; + tmp_stbuf.ia_blocks = local->stbuf_blocks; + + done: + /* */ + GF_FREE(mlocal->replies); + tmp_iobref = mlocal->iobref; + /* work around for nfs truncated read. Bug 3774 */ + tmp_stbuf_p = &tmp_stbuf; + WIPE(tmp_stbuf_p); + STRIPE_STACK_UNWIND(readv, mframe, op_ret, op_errno, final_vec, + final_count, &tmp_stbuf, tmp_iobref, NULL); + + iobref_unref(tmp_iobref); + GF_FREE(final_vec); + } + + goto out; - mlocal->prebuf_blocks += prebuf->ia_blocks; - mlocal->postbuf_blocks += postbuf->ia_blocks; +check_size: + mlocal->call_count = fctx->stripe_count; - correct_file_size(prebuf, mlocal->fctx, prev); - correct_file_size(postbuf, mlocal->fctx, prev); + for (index = 0; index < fctx->stripe_count; index++) { + STACK_WIND(mframe, stripe_readv_fstat_cbk, (fctx->xl_array[index]), + (fctx->xl_array[index])->fops->fstat, mlocal->fd, NULL); + } - if (mlocal->prebuf_size < prebuf->ia_size) - mlocal->prebuf_size = prebuf->ia_size; - if (mlocal->postbuf_size < postbuf->ia_size) - mlocal->postbuf_size = postbuf->ia_size; - } - } - UNLOCK (&frame->lock); - - if ((callcnt == mlocal->wind_count) && mlocal->unwind) { - mlocal->pre_buf.ia_size = mlocal->prebuf_size; - mlocal->pre_buf.ia_blocks = mlocal->prebuf_blocks; - mlocal->post_buf.ia_size = mlocal->postbuf_size; - mlocal->post_buf.ia_blocks = mlocal->postbuf_blocks; - - /* - * Only return the number of consecutively written bytes up until - * the first error. Only return an error if it occurs first. - * - * When a short write occurs, the application should retry at the - * appropriate offset, at which point we'll potentially pass back - * the error. - */ - for (i = 0, reply = mlocal->replies; i < mlocal->wind_count; - i++, reply++) { - if (reply->op_ret == -1) { - gf_log(this->name, GF_LOG_DEBUG, "reply %d " - "returned error %s", i, - strerror(reply->op_errno)); - if (!mlocal->op_ret) { - mlocal->op_ret = -1; - mlocal->op_errno = reply->op_errno; - } - break; - } - - mlocal->op_ret += reply->op_ret; - - if (reply->op_ret < reply->requested_size) - break; - } - - GF_FREE(mlocal->replies); - - STRIPE_STACK_UNWIND (writev, mframe, mlocal->op_ret, - mlocal->op_errno, &mlocal->pre_buf, - &mlocal->post_buf, NULL); - } out: - if (frame) - STRIPE_STACK_DESTROY(frame); - return 0; + STRIPE_STACK_DESTROY(frame); +end: + return 0; } int32_t -stripe_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, - struct iovec *vector, int32_t count, off_t offset, - uint32_t flags, struct iobref *iobref, dict_t *xdata) +stripe_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t offset, uint32_t flags, dict_t *xdata) { - struct iovec *tmp_vec = NULL; - stripe_local_t *local = NULL; - stripe_fd_ctx_t *fctx = NULL; - int32_t op_errno = 1; - int32_t idx = 0; - int32_t total_size = 0; - int32_t offset_offset = 0; - int32_t remaining_size = 0; - int32_t tmp_count = count; - off_t fill_size = 0; - uint64_t stripe_size = 0; - uint64_t tmp_fctx = 0; - off_t dest_offset = 0; - off_t rounded_start = 0; - off_t rounded_end = 0; - int32_t total_chunks = 0; - call_frame_t *wframe = NULL; - stripe_local_t *wlocal = NULL; - - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (fd, err); - VALIDATE_OR_GOTO (fd->inode, err); - - inode_ctx_get (fd->inode, this, &tmp_fctx); - if (!tmp_fctx) { - op_errno = EINVAL; - goto err; + int32_t op_errno = EINVAL; + int32_t idx = 0; + int32_t index = 0; + int32_t num_stripe = 0; + int32_t off_index = 0; + size_t frame_size = 0; + off_t rounded_end = 0; + uint64_t tmp_fctx = 0; + uint64_t stripe_size = 0; + off_t rounded_start = 0; + off_t frame_offset = offset; + off_t dest_offset = 0; + stripe_local_t *local = NULL; + call_frame_t *rframe = NULL; + stripe_local_t *rlocal = NULL; + stripe_fd_ctx_t *fctx = NULL; + + VALIDATE_OR_GOTO(frame, err); + VALIDATE_OR_GOTO(this, err); + VALIDATE_OR_GOTO(fd, err); + VALIDATE_OR_GOTO(fd->inode, err); + + inode_ctx_get(fd->inode, this, &tmp_fctx); + if (!tmp_fctx) { + op_errno = EBADFD; + goto err; + } + fctx = (stripe_fd_ctx_t *)(long)tmp_fctx; + stripe_size = fctx->stripe_size; + + STRIPE_VALIDATE_FCTX(fctx, err); + + if (!stripe_size) { + gf_log(this->name, GF_LOG_DEBUG, "Wrong stripe size for the file"); + goto err; + } + /* The file is stripe across the child nodes. Send the read request + * to the child nodes appropriately after checking which region of + * the file is in which child node. Always '0-' part of + * the file resides in the first child. + */ + rounded_start = floor(offset, stripe_size); + rounded_end = roof(offset + size, stripe_size); + num_stripe = (rounded_end - rounded_start) / stripe_size; + + local = mem_get0(this->local_pool); + if (!local) { + op_errno = ENOMEM; + goto err; + } + frame->local = local; + + /* This is where all the vectors should be copied. */ + local->replies = GF_CALLOC(num_stripe, sizeof(struct stripe_replies), + gf_stripe_mt_stripe_replies); + if (!local->replies) { + op_errno = ENOMEM; + goto err; + } + + off_index = (offset / stripe_size) % fctx->stripe_count; + local->wind_count = num_stripe; + local->readv_size = size; + local->offset = offset; + local->fd = fd_ref(fd); + local->fctx = fctx; + + for (index = off_index; index < (num_stripe + off_index); index++) { + rframe = copy_frame(frame); + rlocal = mem_get0(this->local_pool); + if (!rlocal) { + op_errno = ENOMEM; + goto err; } - fctx = (stripe_fd_ctx_t *)(long)tmp_fctx; - stripe_size = fctx->stripe_size; - STRIPE_VALIDATE_FCTX (fctx, err); + frame_size = min(roof(frame_offset + 1, stripe_size), (offset + size)) - + frame_offset; - /* File has to be stripped across the child nodes */ - total_size = iov_length (vector, count); - remaining_size = total_size; + rlocal->node_index = index - off_index; + rlocal->orig_frame = frame; + rlocal->readv_size = frame_size; + rframe->local = rlocal; + idx = (index % fctx->stripe_count); - local = mem_get0 (this->local_pool); - if (!local) { - op_errno = ENOMEM; - goto err; - } - frame->local = local; - local->stripe_size = stripe_size; - local->fctx = fctx; - - if (!stripe_size) { - gf_log (this->name, GF_LOG_DEBUG, - "Wrong stripe size for the file"); - op_errno = EINVAL; - goto err; - } + if (fctx->stripe_coalesce) + dest_offset = coalesced_offset(frame_offset, stripe_size, + fctx->stripe_count); + else + dest_offset = frame_offset; + + STACK_WIND(rframe, stripe_readv_cbk, fctx->xl_array[idx], + fctx->xl_array[idx]->fops->readv, fd, frame_size, + dest_offset, flags, xdata); + + frame_offset += frame_size; + } + + return 0; +err: + if (rframe) + STRIPE_STACK_DESTROY(rframe); + + STRIPE_STACK_UNWIND(readv, frame, -1, op_errno, NULL, 0, NULL, NULL, NULL); + return 0; +} - rounded_start = floor(offset, stripe_size); - rounded_end = roof(offset + total_size, stripe_size); - total_chunks = (rounded_end - rounded_start) / stripe_size; - local->replies = GF_CALLOC(total_chunks, sizeof(struct stripe_replies), - gf_stripe_mt_stripe_replies); - if (!local->replies) { - op_errno = ENOMEM; - goto err; - } - - total_chunks = 0; - while (1) { - wframe = copy_frame(frame); - wlocal = mem_get0(this->local_pool); - if (!wlocal) { - op_errno = ENOMEM; - goto err; - } - wlocal->orig_frame = frame; - wframe->local = wlocal; - - /* Send striped chunk of the vector to child - nodes appropriately. */ - idx = (((offset + offset_offset) / - local->stripe_size) % fctx->stripe_count); - - fill_size = (local->stripe_size - - ((offset + offset_offset) % local->stripe_size)); - if (fill_size > remaining_size) - fill_size = remaining_size; - - remaining_size -= fill_size; - - tmp_count = iov_subset (vector, count, offset_offset, - offset_offset + fill_size, NULL); - tmp_vec = GF_CALLOC (tmp_count, sizeof (struct iovec), - gf_stripe_mt_iovec); - if (!tmp_vec) { - op_errno = ENOMEM; - goto err; +int32_t +stripe_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) +{ + int32_t callcnt = 0; + stripe_local_t *local = NULL; + stripe_local_t *mlocal = NULL; + call_frame_t *prev = NULL; + call_frame_t *mframe = NULL; + struct stripe_replies *reply = NULL; + int32_t i = 0; + + if (!this || !frame || !frame->local || !cookie) { + gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref"); + goto out; + } + + prev = cookie; + local = frame->local; + mframe = local->orig_frame; + mlocal = mframe->local; + + LOCK(&frame->lock); + { + callcnt = ++mlocal->call_count; + + mlocal->replies[local->node_index].op_ret = op_ret; + mlocal->replies[local->node_index].op_errno = op_errno; + + if (op_ret >= 0) { + mlocal->post_buf = *postbuf; + mlocal->pre_buf = *prebuf; + + mlocal->prebuf_blocks += prebuf->ia_blocks; + mlocal->postbuf_blocks += postbuf->ia_blocks; + + correct_file_size(prebuf, mlocal->fctx, prev); + correct_file_size(postbuf, mlocal->fctx, prev); + + if (mlocal->prebuf_size < prebuf->ia_size) + mlocal->prebuf_size = prebuf->ia_size; + if (mlocal->postbuf_size < postbuf->ia_size) + mlocal->postbuf_size = postbuf->ia_size; + } + } + UNLOCK(&frame->lock); + + if ((callcnt == mlocal->wind_count) && mlocal->unwind) { + mlocal->pre_buf.ia_size = mlocal->prebuf_size; + mlocal->pre_buf.ia_blocks = mlocal->prebuf_blocks; + mlocal->post_buf.ia_size = mlocal->postbuf_size; + mlocal->post_buf.ia_blocks = mlocal->postbuf_blocks; + + /* + * Only return the number of consecutively written bytes up until + * the first error. Only return an error if it occurs first. + * + * When a short write occurs, the application should retry at the + * appropriate offset, at which point we'll potentially pass back + * the error. + */ + for (i = 0, reply = mlocal->replies; i < mlocal->wind_count; + i++, reply++) { + if (reply->op_ret == -1) { + gf_log(this->name, GF_LOG_DEBUG, + "reply %d " + "returned error %s", + i, strerror(reply->op_errno)); + if (!mlocal->op_ret) { + mlocal->op_ret = -1; + mlocal->op_errno = reply->op_errno; } - tmp_count = iov_subset (vector, count, offset_offset, - offset_offset + fill_size, tmp_vec); - - local->wind_count++; - if (remaining_size == 0) - local->unwind = 1; - - /* - * Store off the request index (with respect to the chunk of the - * initial offset) and the size of the request. This is required - * in the callback to calculate an appropriate return value in - * the event of a write failure in one or more requests. - */ - wlocal->node_index = total_chunks; - local->replies[total_chunks].requested_size = fill_size; - - dest_offset = offset + offset_offset; - if (fctx->stripe_coalesce) - dest_offset = coalesced_offset(dest_offset, - local->stripe_size, fctx->stripe_count); - - STACK_WIND (wframe, stripe_writev_cbk, fctx->xl_array[idx], - fctx->xl_array[idx]->fops->writev, fd, tmp_vec, - tmp_count, dest_offset, flags, iobref, - xdata); + break; + } - GF_FREE (tmp_vec); - offset_offset += fill_size; - total_chunks++; - if (remaining_size == 0) - break; + mlocal->op_ret += reply->op_ret; + + if (reply->op_ret < reply->requested_size) + break; } - return 0; -err: - if (wframe) - STRIPE_STACK_DESTROY(wframe); + GF_FREE(mlocal->replies); - STRIPE_STACK_UNWIND (writev, frame, -1, op_errno, NULL, NULL, NULL); - return 0; + STRIPE_STACK_UNWIND(writev, mframe, mlocal->op_ret, mlocal->op_errno, + &mlocal->pre_buf, &mlocal->post_buf, NULL); + } +out: + if (frame) + STRIPE_STACK_DESTROY(frame); + return 0; } +int32_t +stripe_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, + struct iovec *vector, int32_t count, off_t offset, uint32_t flags, + struct iobref *iobref, dict_t *xdata) +{ + struct iovec *tmp_vec = NULL; + stripe_local_t *local = NULL; + stripe_fd_ctx_t *fctx = NULL; + int32_t op_errno = 1; + int32_t idx = 0; + int32_t total_size = 0; + int32_t offset_offset = 0; + int32_t remaining_size = 0; + int32_t tmp_count = count; + off_t fill_size = 0; + uint64_t stripe_size = 0; + uint64_t tmp_fctx = 0; + off_t dest_offset = 0; + off_t rounded_start = 0; + off_t rounded_end = 0; + int32_t total_chunks = 0; + call_frame_t *wframe = NULL; + stripe_local_t *wlocal = NULL; + + VALIDATE_OR_GOTO(frame, err); + VALIDATE_OR_GOTO(this, err); + VALIDATE_OR_GOTO(fd, err); + VALIDATE_OR_GOTO(fd->inode, err); + + inode_ctx_get(fd->inode, this, &tmp_fctx); + if (!tmp_fctx) { + op_errno = EINVAL; + goto err; + } + fctx = (stripe_fd_ctx_t *)(long)tmp_fctx; + stripe_size = fctx->stripe_size; + + STRIPE_VALIDATE_FCTX(fctx, err); + + /* File has to be stripped across the child nodes */ + total_size = iov_length(vector, count); + remaining_size = total_size; + + local = mem_get0(this->local_pool); + if (!local) { + op_errno = ENOMEM; + goto err; + } + frame->local = local; + local->stripe_size = stripe_size; + local->fctx = fctx; + + if (!stripe_size) { + gf_log(this->name, GF_LOG_DEBUG, "Wrong stripe size for the file"); + op_errno = EINVAL; + goto err; + } + + rounded_start = floor(offset, stripe_size); + rounded_end = roof(offset + total_size, stripe_size); + total_chunks = (rounded_end - rounded_start) / stripe_size; + local->replies = GF_CALLOC(total_chunks, sizeof(struct stripe_replies), + gf_stripe_mt_stripe_replies); + if (!local->replies) { + op_errno = ENOMEM; + goto err; + } + + total_chunks = 0; + while (1) { + wframe = copy_frame(frame); + wlocal = mem_get0(this->local_pool); + if (!wlocal) { + op_errno = ENOMEM; + goto err; + } + wlocal->orig_frame = frame; + wframe->local = wlocal; + + /* Send striped chunk of the vector to child + nodes appropriately. */ + idx = (((offset + offset_offset) / local->stripe_size) % + fctx->stripe_count); + + fill_size = (local->stripe_size - + ((offset + offset_offset) % local->stripe_size)); + if (fill_size > remaining_size) + fill_size = remaining_size; + + remaining_size -= fill_size; + + tmp_count = iov_subset(vector, count, offset_offset, + offset_offset + fill_size, NULL); + tmp_vec = GF_CALLOC(tmp_count, sizeof(struct iovec), + gf_stripe_mt_iovec); + if (!tmp_vec) { + op_errno = ENOMEM; + goto err; + } + tmp_count = iov_subset(vector, count, offset_offset, + offset_offset + fill_size, tmp_vec); + + local->wind_count++; + if (remaining_size == 0) + local->unwind = 1; + + /* + * Store off the request index (with respect to the chunk of the + * initial offset) and the size of the request. This is required + * in the callback to calculate an appropriate return value in + * the event of a write failure in one or more requests. + */ + wlocal->node_index = total_chunks; + local->replies[total_chunks].requested_size = fill_size; + + dest_offset = offset + offset_offset; + if (fctx->stripe_coalesce) + dest_offset = coalesced_offset(dest_offset, local->stripe_size, + fctx->stripe_count); + + STACK_WIND(wframe, stripe_writev_cbk, fctx->xl_array[idx], + fctx->xl_array[idx]->fops->writev, fd, tmp_vec, tmp_count, + dest_offset, flags, iobref, xdata); + + GF_FREE(tmp_vec); + offset_offset += fill_size; + total_chunks++; + if (remaining_size == 0) + break; + } + + return 0; +err: + if (wframe) + STRIPE_STACK_DESTROY(wframe); + + STRIPE_STACK_UNWIND(writev, frame, -1, op_errno, NULL, NULL, NULL); + return 0; +} int32_t stripe_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata) { - int32_t callcnt = 0; - stripe_local_t *local = NULL; - stripe_local_t *mlocal = NULL; - call_frame_t *prev = NULL; - call_frame_t *mframe = NULL; - - if (!this || !frame || !frame->local || !cookie) { - gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref"); - goto out; - } + int32_t callcnt = 0; + stripe_local_t *local = NULL; + stripe_local_t *mlocal = NULL; + call_frame_t *prev = NULL; + call_frame_t *mframe = NULL; + + if (!this || !frame || !frame->local || !cookie) { + gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref"); + goto out; + } - prev = cookie; - local = frame->local; - mframe = local->orig_frame; - mlocal = mframe->local; + prev = cookie; + local = frame->local; + mframe = local->orig_frame; + mlocal = mframe->local; - LOCK(&frame->lock); - { - callcnt = ++mlocal->call_count; + LOCK(&frame->lock); + { + callcnt = ++mlocal->call_count; - if (op_ret == 0) { - mlocal->post_buf = *postbuf; - mlocal->pre_buf = *prebuf; + if (op_ret == 0) { + mlocal->post_buf = *postbuf; + mlocal->pre_buf = *prebuf; - mlocal->prebuf_blocks += prebuf->ia_blocks; - mlocal->postbuf_blocks += postbuf->ia_blocks; + mlocal->prebuf_blocks += prebuf->ia_blocks; + mlocal->postbuf_blocks += postbuf->ia_blocks; - correct_file_size(prebuf, mlocal->fctx, prev); - correct_file_size(postbuf, mlocal->fctx, prev); + correct_file_size(prebuf, mlocal->fctx, prev); + correct_file_size(postbuf, mlocal->fctx, prev); - if (mlocal->prebuf_size < prebuf->ia_size) - mlocal->prebuf_size = prebuf->ia_size; - if (mlocal->postbuf_size < postbuf->ia_size) - mlocal->postbuf_size = postbuf->ia_size; - } + if (mlocal->prebuf_size < prebuf->ia_size) + mlocal->prebuf_size = prebuf->ia_size; + if (mlocal->postbuf_size < postbuf->ia_size) + mlocal->postbuf_size = postbuf->ia_size; + } - /* return the first failure */ - if (mlocal->op_ret == 0) { - mlocal->op_ret = op_ret; - mlocal->op_errno = op_errno; - } + /* return the first failure */ + if (mlocal->op_ret == 0) { + mlocal->op_ret = op_ret; + mlocal->op_errno = op_errno; } - UNLOCK (&frame->lock); + } + UNLOCK(&frame->lock); - if ((callcnt == mlocal->wind_count) && mlocal->unwind) { - mlocal->pre_buf.ia_size = mlocal->prebuf_size; - mlocal->pre_buf.ia_blocks = mlocal->prebuf_blocks; - mlocal->post_buf.ia_size = mlocal->postbuf_size; - mlocal->post_buf.ia_blocks = mlocal->postbuf_blocks; + if ((callcnt == mlocal->wind_count) && mlocal->unwind) { + mlocal->pre_buf.ia_size = mlocal->prebuf_size; + mlocal->pre_buf.ia_blocks = mlocal->prebuf_blocks; + mlocal->post_buf.ia_size = mlocal->postbuf_size; + mlocal->post_buf.ia_blocks = mlocal->postbuf_blocks; - STRIPE_STACK_UNWIND (fallocate, mframe, mlocal->op_ret, - mlocal->op_errno, &mlocal->pre_buf, - &mlocal->post_buf, NULL); - } + STRIPE_STACK_UNWIND(fallocate, mframe, mlocal->op_ret, mlocal->op_errno, + &mlocal->pre_buf, &mlocal->post_buf, NULL); + } out: - if (frame) - STRIPE_STACK_DESTROY(frame); - return 0; + if (frame) + STRIPE_STACK_DESTROY(frame); + return 0; } int32_t stripe_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode, - off_t offset, size_t len, dict_t *xdata) + off_t offset, size_t len, dict_t *xdata) { - stripe_local_t *local = NULL; - stripe_fd_ctx_t *fctx = NULL; - int32_t op_errno = 1; - int32_t idx = 0; - int32_t offset_offset = 0; - int32_t remaining_size = 0; - off_t fill_size = 0; - uint64_t stripe_size = 0; - uint64_t tmp_fctx = 0; - off_t dest_offset = 0; - call_frame_t *fframe = NULL; - stripe_local_t *flocal = NULL; - - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (fd, err); - VALIDATE_OR_GOTO (fd->inode, err); - - inode_ctx_get (fd->inode, this, &tmp_fctx); - if (!tmp_fctx) { - op_errno = EINVAL; - goto err; - } - fctx = (stripe_fd_ctx_t *)(long)tmp_fctx; - stripe_size = fctx->stripe_size; - - STRIPE_VALIDATE_FCTX (fctx, err); - - remaining_size = len; - - local = mem_get0 (this->local_pool); - if (!local) { - op_errno = ENOMEM; - goto err; - } - frame->local = local; - local->stripe_size = stripe_size; - local->fctx = fctx; - - if (!stripe_size) { - gf_log (this->name, GF_LOG_DEBUG, - "Wrong stripe size for the file"); - op_errno = EINVAL; - goto err; - } + stripe_local_t *local = NULL; + stripe_fd_ctx_t *fctx = NULL; + int32_t op_errno = 1; + int32_t idx = 0; + int32_t offset_offset = 0; + int32_t remaining_size = 0; + off_t fill_size = 0; + uint64_t stripe_size = 0; + uint64_t tmp_fctx = 0; + off_t dest_offset = 0; + call_frame_t *fframe = NULL; + stripe_local_t *flocal = NULL; + + VALIDATE_OR_GOTO(frame, err); + VALIDATE_OR_GOTO(this, err); + VALIDATE_OR_GOTO(fd, err); + VALIDATE_OR_GOTO(fd->inode, err); + + inode_ctx_get(fd->inode, this, &tmp_fctx); + if (!tmp_fctx) { + op_errno = EINVAL; + goto err; + } + fctx = (stripe_fd_ctx_t *)(long)tmp_fctx; + stripe_size = fctx->stripe_size; + + STRIPE_VALIDATE_FCTX(fctx, err); + + remaining_size = len; + + local = mem_get0(this->local_pool); + if (!local) { + op_errno = ENOMEM; + goto err; + } + frame->local = local; + local->stripe_size = stripe_size; + local->fctx = fctx; + + if (!stripe_size) { + gf_log(this->name, GF_LOG_DEBUG, "Wrong stripe size for the file"); + op_errno = EINVAL; + goto err; + } + + while (1) { + fframe = copy_frame(frame); + flocal = mem_get0(this->local_pool); + if (!flocal) { + op_errno = ENOMEM; + goto err; + } + flocal->orig_frame = frame; + fframe->local = flocal; + + /* send fallocate request to the associated child node */ + idx = (((offset + offset_offset) / local->stripe_size) % + fctx->stripe_count); + + fill_size = (local->stripe_size - + ((offset + offset_offset) % local->stripe_size)); + if (fill_size > remaining_size) + fill_size = remaining_size; + + remaining_size -= fill_size; + + local->wind_count++; + if (remaining_size == 0) + local->unwind = 1; + + dest_offset = offset + offset_offset; + if (fctx->stripe_coalesce) + dest_offset = coalesced_offset(dest_offset, local->stripe_size, + fctx->stripe_count); + + /* + * TODO: Create a separate handler for coalesce mode that sends a + * single fallocate per-child (since the ranges are linear). + */ + STACK_WIND(fframe, stripe_fallocate_cbk, fctx->xl_array[idx], + fctx->xl_array[idx]->fops->fallocate, fd, mode, dest_offset, + fill_size, xdata); - while (1) { - fframe = copy_frame(frame); - flocal = mem_get0(this->local_pool); - if (!flocal) { - op_errno = ENOMEM; - goto err; - } - flocal->orig_frame = frame; - fframe->local = flocal; - - /* send fallocate request to the associated child node */ - idx = (((offset + offset_offset) / - local->stripe_size) % fctx->stripe_count); - - fill_size = (local->stripe_size - - ((offset + offset_offset) % local->stripe_size)); - if (fill_size > remaining_size) - fill_size = remaining_size; - - remaining_size -= fill_size; - - local->wind_count++; - if (remaining_size == 0) - local->unwind = 1; - - dest_offset = offset + offset_offset; - if (fctx->stripe_coalesce) - dest_offset = coalesced_offset(dest_offset, - local->stripe_size, fctx->stripe_count); - - /* - * TODO: Create a separate handler for coalesce mode that sends a - * single fallocate per-child (since the ranges are linear). - */ - STACK_WIND(fframe, stripe_fallocate_cbk, fctx->xl_array[idx], - fctx->xl_array[idx]->fops->fallocate, fd, mode, - dest_offset, fill_size, xdata); - - offset_offset += fill_size; - if (remaining_size == 0) - break; - } + offset_offset += fill_size; + if (remaining_size == 0) + break; + } - return 0; + return 0; err: - if (fframe) - STRIPE_STACK_DESTROY(fframe); + if (fframe) + STRIPE_STACK_DESTROY(fframe); - STRIPE_STACK_UNWIND (fallocate, frame, -1, op_errno, NULL, NULL, NULL); - return 0; + STRIPE_STACK_UNWIND(fallocate, frame, -1, op_errno, NULL, NULL, NULL); + return 0; } - int32_t stripe_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata) { - int32_t callcnt = 0; - stripe_local_t *local = NULL; - stripe_local_t *mlocal = NULL; - call_frame_t *prev = NULL; - call_frame_t *mframe = NULL; - - if (!this || !frame || !frame->local || !cookie) { - gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref"); - goto out; - } + int32_t callcnt = 0; + stripe_local_t *local = NULL; + stripe_local_t *mlocal = NULL; + call_frame_t *prev = NULL; + call_frame_t *mframe = NULL; + + if (!this || !frame || !frame->local || !cookie) { + gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref"); + goto out; + } - prev = cookie; - local = frame->local; - mframe = local->orig_frame; - mlocal = mframe->local; + prev = cookie; + local = frame->local; + mframe = local->orig_frame; + mlocal = mframe->local; - LOCK(&frame->lock); - { - callcnt = ++mlocal->call_count; + LOCK(&frame->lock); + { + callcnt = ++mlocal->call_count; - if (op_ret == 0) { - mlocal->post_buf = *postbuf; - mlocal->pre_buf = *prebuf; + if (op_ret == 0) { + mlocal->post_buf = *postbuf; + mlocal->pre_buf = *prebuf; - mlocal->prebuf_blocks += prebuf->ia_blocks; - mlocal->postbuf_blocks += postbuf->ia_blocks; + mlocal->prebuf_blocks += prebuf->ia_blocks; + mlocal->postbuf_blocks += postbuf->ia_blocks; - correct_file_size(prebuf, mlocal->fctx, prev); - correct_file_size(postbuf, mlocal->fctx, prev); + correct_file_size(prebuf, mlocal->fctx, prev); + correct_file_size(postbuf, mlocal->fctx, prev); - if (mlocal->prebuf_size < prebuf->ia_size) - mlocal->prebuf_size = prebuf->ia_size; - if (mlocal->postbuf_size < postbuf->ia_size) - mlocal->postbuf_size = postbuf->ia_size; - } + if (mlocal->prebuf_size < prebuf->ia_size) + mlocal->prebuf_size = prebuf->ia_size; + if (mlocal->postbuf_size < postbuf->ia_size) + mlocal->postbuf_size = postbuf->ia_size; + } - /* return the first failure */ - if (mlocal->op_ret == 0) { - mlocal->op_ret = op_ret; - mlocal->op_errno = op_errno; - } + /* return the first failure */ + if (mlocal->op_ret == 0) { + mlocal->op_ret = op_ret; + mlocal->op_errno = op_errno; } - UNLOCK (&frame->lock); + } + UNLOCK(&frame->lock); - if ((callcnt == mlocal->wind_count) && mlocal->unwind) { - mlocal->pre_buf.ia_size = mlocal->prebuf_size; - mlocal->pre_buf.ia_blocks = mlocal->prebuf_blocks; - mlocal->post_buf.ia_size = mlocal->postbuf_size; - mlocal->post_buf.ia_blocks = mlocal->postbuf_blocks; + if ((callcnt == mlocal->wind_count) && mlocal->unwind) { + mlocal->pre_buf.ia_size = mlocal->prebuf_size; + mlocal->pre_buf.ia_blocks = mlocal->prebuf_blocks; + mlocal->post_buf.ia_size = mlocal->postbuf_size; + mlocal->post_buf.ia_blocks = mlocal->postbuf_blocks; - STRIPE_STACK_UNWIND (discard, mframe, mlocal->op_ret, - mlocal->op_errno, &mlocal->pre_buf, - &mlocal->post_buf, NULL); - } + STRIPE_STACK_UNWIND(discard, mframe, mlocal->op_ret, mlocal->op_errno, + &mlocal->pre_buf, &mlocal->post_buf, NULL); + } out: - if (frame) - STRIPE_STACK_DESTROY(frame); + if (frame) + STRIPE_STACK_DESTROY(frame); - return 0; + return 0; } int32_t stripe_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, - size_t len, dict_t *xdata) + size_t len, dict_t *xdata) { - stripe_local_t *local = NULL; - stripe_fd_ctx_t *fctx = NULL; - int32_t op_errno = 1; - int32_t idx = 0; - int32_t offset_offset = 0; - int32_t remaining_size = 0; - off_t fill_size = 0; - uint64_t stripe_size = 0; - uint64_t tmp_fctx = 0; - off_t dest_offset = 0; - call_frame_t *fframe = NULL; - stripe_local_t *flocal = NULL; - - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (fd, err); - VALIDATE_OR_GOTO (fd->inode, err); - - inode_ctx_get (fd->inode, this, &tmp_fctx); - if (!tmp_fctx) { - op_errno = EINVAL; - goto err; - } - fctx = (stripe_fd_ctx_t *)(long)tmp_fctx; - stripe_size = fctx->stripe_size; - - STRIPE_VALIDATE_FCTX (fctx, err); - - remaining_size = len; - - local = mem_get0 (this->local_pool); - if (!local) { - op_errno = ENOMEM; - goto err; - } - frame->local = local; - local->stripe_size = stripe_size; - local->fctx = fctx; - - if (!stripe_size) { - gf_log (this->name, GF_LOG_DEBUG, - "Wrong stripe size for the file"); - op_errno = EINVAL; - goto err; - } + stripe_local_t *local = NULL; + stripe_fd_ctx_t *fctx = NULL; + int32_t op_errno = 1; + int32_t idx = 0; + int32_t offset_offset = 0; + int32_t remaining_size = 0; + off_t fill_size = 0; + uint64_t stripe_size = 0; + uint64_t tmp_fctx = 0; + off_t dest_offset = 0; + call_frame_t *fframe = NULL; + stripe_local_t *flocal = NULL; + + VALIDATE_OR_GOTO(frame, err); + VALIDATE_OR_GOTO(this, err); + VALIDATE_OR_GOTO(fd, err); + VALIDATE_OR_GOTO(fd->inode, err); + + inode_ctx_get(fd->inode, this, &tmp_fctx); + if (!tmp_fctx) { + op_errno = EINVAL; + goto err; + } + fctx = (stripe_fd_ctx_t *)(long)tmp_fctx; + stripe_size = fctx->stripe_size; + + STRIPE_VALIDATE_FCTX(fctx, err); + + remaining_size = len; + + local = mem_get0(this->local_pool); + if (!local) { + op_errno = ENOMEM; + goto err; + } + frame->local = local; + local->stripe_size = stripe_size; + local->fctx = fctx; + + if (!stripe_size) { + gf_log(this->name, GF_LOG_DEBUG, "Wrong stripe size for the file"); + op_errno = EINVAL; + goto err; + } + + while (1) { + fframe = copy_frame(frame); + flocal = mem_get0(this->local_pool); + if (!flocal) { + op_errno = ENOMEM; + goto err; + } + flocal->orig_frame = frame; + fframe->local = flocal; + + /* send discard request to the associated child node */ + idx = (((offset + offset_offset) / local->stripe_size) % + fctx->stripe_count); + + fill_size = (local->stripe_size - + ((offset + offset_offset) % local->stripe_size)); + if (fill_size > remaining_size) + fill_size = remaining_size; + + remaining_size -= fill_size; + + local->wind_count++; + if (remaining_size == 0) + local->unwind = 1; + + dest_offset = offset + offset_offset; + if (fctx->stripe_coalesce) + dest_offset = coalesced_offset(dest_offset, local->stripe_size, + fctx->stripe_count); + + /* + * TODO: Create a separate handler for coalesce mode that sends a + * single discard per-child (since the ranges are linear). + */ + STACK_WIND(fframe, stripe_discard_cbk, fctx->xl_array[idx], + fctx->xl_array[idx]->fops->discard, fd, dest_offset, + fill_size, xdata); - while (1) { - fframe = copy_frame(frame); - flocal = mem_get0(this->local_pool); - if (!flocal) { - op_errno = ENOMEM; - goto err; - } - flocal->orig_frame = frame; - fframe->local = flocal; - - /* send discard request to the associated child node */ - idx = (((offset + offset_offset) / - local->stripe_size) % fctx->stripe_count); - - fill_size = (local->stripe_size - - ((offset + offset_offset) % local->stripe_size)); - if (fill_size > remaining_size) - fill_size = remaining_size; - - remaining_size -= fill_size; - - local->wind_count++; - if (remaining_size == 0) - local->unwind = 1; - - dest_offset = offset + offset_offset; - if (fctx->stripe_coalesce) - dest_offset = coalesced_offset(dest_offset, - local->stripe_size, fctx->stripe_count); - - /* - * TODO: Create a separate handler for coalesce mode that sends a - * single discard per-child (since the ranges are linear). - */ - STACK_WIND(fframe, stripe_discard_cbk, fctx->xl_array[idx], - fctx->xl_array[idx]->fops->discard, fd, dest_offset, - fill_size, xdata); - - offset_offset += fill_size; - if (remaining_size == 0) - break; - } + offset_offset += fill_size; + if (remaining_size == 0) + break; + } - return 0; + return 0; err: - if (fframe) - STRIPE_STACK_DESTROY(fframe); + if (fframe) + STRIPE_STACK_DESTROY(fframe); - STRIPE_STACK_UNWIND (discard, frame, -1, op_errno, NULL, NULL, NULL); - return 0; + STRIPE_STACK_UNWIND(discard, frame, -1, op_errno, NULL, NULL, NULL); + return 0; } int32_t stripe_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *prebuf, - struct iatt *postbuf, dict_t *xdata) + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) { - int32_t callcnt = 0; - stripe_local_t *local = NULL; - stripe_local_t *mlocal = NULL; - call_frame_t *prev = NULL; - call_frame_t *mframe = NULL; + int32_t callcnt = 0; + stripe_local_t *local = NULL; + stripe_local_t *mlocal = NULL; + call_frame_t *prev = NULL; + call_frame_t *mframe = NULL; - GF_ASSERT (frame); + GF_ASSERT(frame); - if (!this || !frame->local || !cookie) { - gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref"); - goto out; - } + if (!this || !frame->local || !cookie) { + gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref"); + goto out; + } - prev = cookie; - local = frame->local; - mframe = local->orig_frame; - mlocal = mframe->local; + prev = cookie; + local = frame->local; + mframe = local->orig_frame; + mlocal = mframe->local; - LOCK(&frame->lock); - { - callcnt = ++mlocal->call_count; + LOCK(&frame->lock); + { + callcnt = ++mlocal->call_count; - if (op_ret == 0) { - mlocal->post_buf = *postbuf; - mlocal->pre_buf = *prebuf; + if (op_ret == 0) { + mlocal->post_buf = *postbuf; + mlocal->pre_buf = *prebuf; - mlocal->prebuf_blocks += prebuf->ia_blocks; - mlocal->postbuf_blocks += postbuf->ia_blocks; + mlocal->prebuf_blocks += prebuf->ia_blocks; + mlocal->postbuf_blocks += postbuf->ia_blocks; - correct_file_size(prebuf, mlocal->fctx, prev); - correct_file_size(postbuf, mlocal->fctx, prev); + correct_file_size(prebuf, mlocal->fctx, prev); + correct_file_size(postbuf, mlocal->fctx, prev); - if (mlocal->prebuf_size < prebuf->ia_size) - mlocal->prebuf_size = prebuf->ia_size; - if (mlocal->postbuf_size < postbuf->ia_size) - mlocal->postbuf_size = postbuf->ia_size; - } + if (mlocal->prebuf_size < prebuf->ia_size) + mlocal->prebuf_size = prebuf->ia_size; + if (mlocal->postbuf_size < postbuf->ia_size) + mlocal->postbuf_size = postbuf->ia_size; + } - /* return the first failure */ - if (mlocal->op_ret == 0) { - mlocal->op_ret = op_ret; - mlocal->op_errno = op_errno; - } + /* return the first failure */ + if (mlocal->op_ret == 0) { + mlocal->op_ret = op_ret; + mlocal->op_errno = op_errno; } - UNLOCK (&frame->lock); + } + UNLOCK(&frame->lock); - if ((callcnt == mlocal->wind_count) && mlocal->unwind) { - mlocal->pre_buf.ia_size = mlocal->prebuf_size; - mlocal->pre_buf.ia_blocks = mlocal->prebuf_blocks; - mlocal->post_buf.ia_size = mlocal->postbuf_size; - mlocal->post_buf.ia_blocks = mlocal->postbuf_blocks; + if ((callcnt == mlocal->wind_count) && mlocal->unwind) { + mlocal->pre_buf.ia_size = mlocal->prebuf_size; + mlocal->pre_buf.ia_blocks = mlocal->prebuf_blocks; + mlocal->post_buf.ia_size = mlocal->postbuf_size; + mlocal->post_buf.ia_blocks = mlocal->postbuf_blocks; - STRIPE_STACK_UNWIND (zerofill, mframe, mlocal->op_ret, - mlocal->op_errno, &mlocal->pre_buf, - &mlocal->post_buf, NULL); - } + STRIPE_STACK_UNWIND(zerofill, mframe, mlocal->op_ret, mlocal->op_errno, + &mlocal->pre_buf, &mlocal->post_buf, NULL); + } out: - STRIPE_STACK_DESTROY(frame); - return 0; + STRIPE_STACK_DESTROY(frame); + return 0; } int32_t stripe_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, - off_t len, dict_t *xdata) + off_t len, dict_t *xdata) { - stripe_local_t *local = NULL; - stripe_fd_ctx_t *fctx = NULL; - int32_t op_errno = 1; - int32_t idx = 0; - int32_t offset_offset = 0; - int32_t remaining_size = 0; - off_t fill_size = 0; - uint64_t stripe_size = 0; - uint64_t tmp_fctx = 0; - off_t dest_offset = 0; - call_frame_t *fframe = NULL; - stripe_local_t *flocal = NULL; - - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (fd, err); - VALIDATE_OR_GOTO (fd->inode, err); - - inode_ctx_get (fd->inode, this, &tmp_fctx); - if (!tmp_fctx) { - op_errno = EINVAL; - goto err; - } - fctx = (stripe_fd_ctx_t *)(long)tmp_fctx; - stripe_size = fctx->stripe_size; - - STRIPE_VALIDATE_FCTX (fctx, err); - - remaining_size = len; - - local = mem_get0 (this->local_pool); - if (!local) { - op_errno = ENOMEM; - goto err; - } - frame->local = local; - local->stripe_size = stripe_size; - local->fctx = fctx; - - if (!stripe_size) { - gf_log (this->name, GF_LOG_DEBUG, - "Wrong stripe size for the file"); - op_errno = EINVAL; - goto err; - } - - while (1) { - fframe = copy_frame(frame); - flocal = mem_get0(this->local_pool); - if (!flocal) { - op_errno = ENOMEM; - goto err; - } - flocal->orig_frame = frame; - fframe->local = flocal; - - idx = (((offset + offset_offset) / - local->stripe_size) % fctx->stripe_count); - - fill_size = (local->stripe_size - - ((offset + offset_offset) % local->stripe_size)); - if (fill_size > remaining_size) - fill_size = remaining_size; - - remaining_size -= fill_size; - - local->wind_count++; - if (remaining_size == 0) - local->unwind = 1; - - dest_offset = offset + offset_offset; - if (fctx->stripe_coalesce) - dest_offset = coalesced_offset(dest_offset, - local->stripe_size, - fctx->stripe_count); - - STACK_WIND(fframe, stripe_zerofill_cbk, fctx->xl_array[idx], - fctx->xl_array[idx]->fops->zerofill, fd, - dest_offset, fill_size, xdata); - offset_offset += fill_size; - if (remaining_size == 0) - break; - } - - return 0; + stripe_local_t *local = NULL; + stripe_fd_ctx_t *fctx = NULL; + int32_t op_errno = 1; + int32_t idx = 0; + int32_t offset_offset = 0; + int32_t remaining_size = 0; + off_t fill_size = 0; + uint64_t stripe_size = 0; + uint64_t tmp_fctx = 0; + off_t dest_offset = 0; + call_frame_t *fframe = NULL; + stripe_local_t *flocal = NULL; + + VALIDATE_OR_GOTO(frame, err); + VALIDATE_OR_GOTO(this, err); + VALIDATE_OR_GOTO(fd, err); + VALIDATE_OR_GOTO(fd->inode, err); + + inode_ctx_get(fd->inode, this, &tmp_fctx); + if (!tmp_fctx) { + op_errno = EINVAL; + goto err; + } + fctx = (stripe_fd_ctx_t *)(long)tmp_fctx; + stripe_size = fctx->stripe_size; + + STRIPE_VALIDATE_FCTX(fctx, err); + + remaining_size = len; + + local = mem_get0(this->local_pool); + if (!local) { + op_errno = ENOMEM; + goto err; + } + frame->local = local; + local->stripe_size = stripe_size; + local->fctx = fctx; + + if (!stripe_size) { + gf_log(this->name, GF_LOG_DEBUG, "Wrong stripe size for the file"); + op_errno = EINVAL; + goto err; + } + + while (1) { + fframe = copy_frame(frame); + flocal = mem_get0(this->local_pool); + if (!flocal) { + op_errno = ENOMEM; + goto err; + } + flocal->orig_frame = frame; + fframe->local = flocal; + + idx = (((offset + offset_offset) / local->stripe_size) % + fctx->stripe_count); + + fill_size = (local->stripe_size - + ((offset + offset_offset) % local->stripe_size)); + if (fill_size > remaining_size) + fill_size = remaining_size; + + remaining_size -= fill_size; + + local->wind_count++; + if (remaining_size == 0) + local->unwind = 1; + + dest_offset = offset + offset_offset; + if (fctx->stripe_coalesce) + dest_offset = coalesced_offset(dest_offset, local->stripe_size, + fctx->stripe_count); + + STACK_WIND(fframe, stripe_zerofill_cbk, fctx->xl_array[idx], + fctx->xl_array[idx]->fops->zerofill, fd, dest_offset, + fill_size, xdata); + offset_offset += fill_size; + if (remaining_size == 0) + break; + } + + return 0; err: - if (fframe) - STRIPE_STACK_DESTROY(fframe); + if (fframe) + STRIPE_STACK_DESTROY(fframe); - STRIPE_STACK_UNWIND (zerofill, frame, -1, op_errno, NULL, NULL, NULL); - return 0; + STRIPE_STACK_UNWIND(zerofill, frame, -1, op_errno, NULL, NULL, NULL); + return 0; } int32_t -stripe_seek (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, - gf_seek_what_t what, dict_t *xdata) +stripe_seek(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + gf_seek_what_t what, dict_t *xdata) { - /* TBD */ - gf_log (this->name, GF_LOG_INFO, "seek called on %s.", - uuid_utoa (fd->inode->gfid)); - STRIPE_STACK_UNWIND (seek, frame, -1, ENOTSUP, 0, NULL); - return 0; + /* TBD */ + gf_log(this->name, GF_LOG_INFO, "seek called on %s.", + uuid_utoa(fd->inode->gfid)); + STRIPE_STACK_UNWIND(seek, frame, -1, ENOTSUP, 0, NULL); + return 0; } int32_t -stripe_release (xlator_t *this, fd_t *fd) +stripe_release(xlator_t *this, fd_t *fd) { - return 0; + return 0; } int -stripe_forget (xlator_t *this, inode_t *inode) +stripe_forget(xlator_t *this, inode_t *inode) { - uint64_t tmp_fctx = 0; - stripe_fd_ctx_t *fctx = NULL; + uint64_t tmp_fctx = 0; + stripe_fd_ctx_t *fctx = NULL; - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (inode, err); + VALIDATE_OR_GOTO(this, err); + VALIDATE_OR_GOTO(inode, err); - (void) inode_ctx_del (inode, this, &tmp_fctx); - if (!tmp_fctx) { - goto err; - } + (void)inode_ctx_del(inode, this, &tmp_fctx); + if (!tmp_fctx) { + goto err; + } - fctx = (stripe_fd_ctx_t *)(long)tmp_fctx; + fctx = (stripe_fd_ctx_t *)(long)tmp_fctx; - if (!fctx->static_array) - GF_FREE (fctx->xl_array); + if (!fctx->static_array) + GF_FREE(fctx->xl_array); - GF_FREE (fctx); + GF_FREE(fctx); err: - return 0; + return 0; } int32_t -notify (xlator_t *this, int32_t event, void *data, ...) +notify(xlator_t *this, int32_t event, void *data, ...) { - stripe_private_t *priv = NULL; - int down_client = 0; - int i = 0; - gf_boolean_t heard_from_all_children = _gf_false; - - if (!this) - return 0; - - priv = this->private; - if (!priv) - return 0; - - switch (event) - { - case GF_EVENT_CHILD_UP: - { - /* get an index number to set */ - for (i = 0; i < priv->child_count; i++) { - if (data == priv->xl_array[i]) - break; - } - - if (priv->child_count == i) { - gf_log (this->name, GF_LOG_ERROR, - "got GF_EVENT_CHILD_UP bad subvolume %s", - data? ((xlator_t *)data)->name: NULL); - break; - } - - LOCK (&priv->lock); - { - if (data == FIRST_CHILD (this)) - priv->first_child_down = 0; - priv->last_event[i] = event; - } - UNLOCK (&priv->lock); - } - break; - case GF_EVENT_CHILD_CONNECTING: - { - // 'CONNECTING' doesn't ensure its CHILD_UP, so do nothing - goto out; - } - case GF_EVENT_CHILD_DOWN: - { - /* get an index number to set */ - for (i = 0; i < priv->child_count; i++) { - if (data == priv->xl_array[i]) - break; - } - - if (priv->child_count == i) { - gf_log (this->name, GF_LOG_ERROR, - "got GF_EVENT_CHILD_DOWN bad subvolume %s", - data? ((xlator_t *)data)->name: NULL); - break; - } - - LOCK (&priv->lock); - { - if (data == FIRST_CHILD (this)) - priv->first_child_down = 1; - priv->last_event[i] = event; - } - UNLOCK (&priv->lock); - } - break; + stripe_private_t *priv = NULL; + int down_client = 0; + int i = 0; + gf_boolean_t heard_from_all_children = _gf_false; - default: - { - /* */ - default_notify (this, event, data); - goto out; - } - break; - } + if (!this) + return 0; - // Consider child as down if it's last_event is not CHILD_UP - for (i = 0, down_client = 0; i < priv->child_count; i++) - if (priv->last_event[i] != GF_EVENT_CHILD_UP) - down_client++; + priv = this->private; + if (!priv) + return 0; - LOCK (&priv->lock); - { - priv->nodes_down = down_client; + switch (event) { + case GF_EVENT_CHILD_UP: { + /* get an index number to set */ + for (i = 0; i < priv->child_count; i++) { + if (data == priv->xl_array[i]) + break; + } + + if (priv->child_count == i) { + gf_log(this->name, GF_LOG_ERROR, + "got GF_EVENT_CHILD_UP bad subvolume %s", + data ? ((xlator_t *)data)->name : NULL); + break; + } + + LOCK(&priv->lock); + { + if (data == FIRST_CHILD(this)) + priv->first_child_down = 0; + priv->last_event[i] = event; + } + UNLOCK(&priv->lock); + } break; + case GF_EVENT_CHILD_CONNECTING: { + // 'CONNECTING' doesn't ensure its CHILD_UP, so do nothing + goto out; } - UNLOCK (&priv->lock); - - heard_from_all_children = _gf_true; - for (i = 0; i < priv->child_count; i++) - if (!priv->last_event[i]) - heard_from_all_children = _gf_false; - - if (heard_from_all_children) - default_notify (this, event, data); + case GF_EVENT_CHILD_DOWN: { + /* get an index number to set */ + for (i = 0; i < priv->child_count; i++) { + if (data == priv->xl_array[i]) + break; + } + + if (priv->child_count == i) { + gf_log(this->name, GF_LOG_ERROR, + "got GF_EVENT_CHILD_DOWN bad subvolume %s", + data ? ((xlator_t *)data)->name : NULL); + break; + } + + LOCK(&priv->lock); + { + if (data == FIRST_CHILD(this)) + priv->first_child_down = 1; + priv->last_event[i] = event; + } + UNLOCK(&priv->lock); + } break; + + default: { + /* */ + default_notify(this, event, data); + goto out; + } break; + } + + // Consider child as down if it's last_event is not CHILD_UP + for (i = 0, down_client = 0; i < priv->child_count; i++) + if (priv->last_event[i] != GF_EVENT_CHILD_UP) + down_client++; + + LOCK(&priv->lock); + { + priv->nodes_down = down_client; + } + UNLOCK(&priv->lock); + + heard_from_all_children = _gf_true; + for (i = 0; i < priv->child_count; i++) + if (!priv->last_event[i]) + heard_from_all_children = _gf_false; + + if (heard_from_all_children) + default_notify(this, event, data); out: - return 0; + return 0; } int -stripe_setxattr_cbk (call_frame_t *frame, void *cookie, - xlator_t *this, int op_ret, int op_errno, dict_t *xdata) +stripe_setxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, dict_t *xdata) { - int ret = -1; - int call_cnt = 0; - stripe_local_t *local = NULL; + int ret = -1; + int call_cnt = 0; + stripe_local_t *local = NULL; - if (!frame || !frame->local || !this) { - gf_log ("", GF_LOG_ERROR, "Possible NULL deref"); - return ret; - } + if (!frame || !frame->local || !this) { + gf_log("", GF_LOG_ERROR, "Possible NULL deref"); + return ret; + } - local = frame->local; + local = frame->local; - LOCK (&frame->lock); - { - call_cnt = --local->wind_count; - - /** - * We overwrite ->op_* values here for subsequent failure - * conditions, hence we propagate the last errno down the - * stack. - */ - if (op_ret < 0) { - local->op_ret = op_ret; - local->op_errno = op_errno; - goto unlock; - } + LOCK(&frame->lock); + { + call_cnt = --local->wind_count; + + /** + * We overwrite ->op_* values here for subsequent failure + * conditions, hence we propagate the last errno down the + * stack. + */ + if (op_ret < 0) { + local->op_ret = op_ret; + local->op_errno = op_errno; + goto unlock; } + } - unlock: - UNLOCK (&frame->lock); +unlock: + UNLOCK(&frame->lock); - if (!call_cnt) { - STRIPE_STACK_UNWIND (setxattr, frame, local->op_ret, - local->op_errno, xdata); - } + if (!call_cnt) { + STRIPE_STACK_UNWIND(setxattr, frame, local->op_ret, local->op_errno, + xdata); + } - return 0; + return 0; } #ifdef HAVE_BD_XLATOR int -stripe_is_bd (dict_t *this, char *key, data_t *value, void *data) +stripe_is_bd(dict_t *this, char *key, data_t *value, void *data) { - gf_boolean_t *is_bd = data; + gf_boolean_t *is_bd = data; - if (data == NULL) - return 0; + if (data == NULL) + return 0; - if (XATTR_IS_BD (key)) - *is_bd = _gf_true; + if (XATTR_IS_BD(key)) + *is_bd = _gf_true; - return 0; + return 0; } static gf_boolean_t -stripe_setxattr_is_bd (dict_t *dict) +stripe_setxattr_is_bd(dict_t *dict) { - gf_boolean_t is_bd = _gf_false; + gf_boolean_t is_bd = _gf_false; - if (dict == NULL) - goto out; + if (dict == NULL) + goto out; - dict_foreach (dict, stripe_is_bd, &is_bd); + dict_foreach(dict, stripe_is_bd, &is_bd); out: - return is_bd; + return is_bd; } #else #define stripe_setxattr_is_bd(dict) _gf_false #endif int -stripe_setxattr (call_frame_t *frame, xlator_t *this, - loc_t *loc, dict_t *dict, int flags, dict_t *xdata) +stripe_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, + int flags, dict_t *xdata) { - int32_t op_errno = EINVAL; - xlator_list_t *trav = NULL; - stripe_private_t *priv = NULL; - stripe_local_t *local = NULL; - int i = 0; - gf_boolean_t is_bd = _gf_false; - - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (loc, err); - VALIDATE_OR_GOTO (loc->inode, err); - - GF_IF_INTERNAL_XATTR_GOTO ("trusted.*stripe*", dict, - op_errno, err); - - priv = this->private; - trav = this->children; - - local = mem_get0 (this->local_pool); - if (!local) { - op_errno = ENOMEM; - goto err; - } - - frame->local = local; - local->wind_count = priv->child_count; - local->op_ret = local->op_errno = 0; - - is_bd = stripe_setxattr_is_bd (dict); - - /** - * Set xattrs for directories on all subvolumes. Additionally - * this power is only given to a special client. Bd xlator - * also needs xattrs for regular files (ie LVs) - */ - if (((frame->root->pid == GF_CLIENT_PID_GSYNCD) && - IA_ISDIR (loc->inode->ia_type)) || is_bd) { - for (i = 0; i < priv->child_count; i++, trav = trav->next) { - STACK_WIND (frame, stripe_setxattr_cbk, - trav->xlator, trav->xlator->fops->setxattr, - loc, dict, flags, xdata); - } - } else { - local->wind_count = 1; - STACK_WIND (frame, stripe_setxattr_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->setxattr, - loc, dict, flags, xdata); - } - - return 0; + int32_t op_errno = EINVAL; + xlator_list_t *trav = NULL; + stripe_private_t *priv = NULL; + stripe_local_t *local = NULL; + int i = 0; + gf_boolean_t is_bd = _gf_false; + + VALIDATE_OR_GOTO(frame, err); + VALIDATE_OR_GOTO(this, err); + VALIDATE_OR_GOTO(loc, err); + VALIDATE_OR_GOTO(loc->inode, err); + + GF_IF_INTERNAL_XATTR_GOTO("trusted.*stripe*", dict, op_errno, err); + + priv = this->private; + trav = this->children; + + local = mem_get0(this->local_pool); + if (!local) { + op_errno = ENOMEM; + goto err; + } + + frame->local = local; + local->wind_count = priv->child_count; + local->op_ret = local->op_errno = 0; + + is_bd = stripe_setxattr_is_bd(dict); + + /** + * Set xattrs for directories on all subvolumes. Additionally + * this power is only given to a special client. Bd xlator + * also needs xattrs for regular files (ie LVs) + */ + if (((frame->root->pid == GF_CLIENT_PID_GSYNCD) && + IA_ISDIR(loc->inode->ia_type)) || + is_bd) { + for (i = 0; i < priv->child_count; i++, trav = trav->next) { + STACK_WIND(frame, stripe_setxattr_cbk, trav->xlator, + trav->xlator->fops->setxattr, loc, dict, flags, xdata); + } + } else { + local->wind_count = 1; + STACK_WIND(frame, stripe_setxattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->setxattr, loc, dict, flags, xdata); + } + + return 0; err: - STRIPE_STACK_UNWIND (setxattr, frame, -1, op_errno, NULL); - return 0; + STRIPE_STACK_UNWIND(setxattr, frame, -1, op_errno, NULL); + return 0; } - int -stripe_fsetxattr_cbk (call_frame_t *frame, void *cookie, - xlator_t *this, int op_ret, int op_errno, dict_t *xdata) +stripe_fsetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, dict_t *xdata) { - STRIPE_STACK_UNWIND (fsetxattr, frame, op_ret, op_errno, xdata); - return 0; + STRIPE_STACK_UNWIND(fsetxattr, frame, op_ret, op_errno, xdata); + return 0; } - int -stripe_is_special_key (dict_t *this, - char *key, - data_t *value, - void *data) +stripe_is_special_key(dict_t *this, char *key, data_t *value, void *data) { - gf_boolean_t *is_special = NULL; + gf_boolean_t *is_special = NULL; - if (data == NULL) { - goto out; - } + if (data == NULL) { + goto out; + } - is_special = data; + is_special = data; - if (XATTR_IS_LOCKINFO (key) || XATTR_IS_BD (key)) - *is_special = _gf_true; + if (XATTR_IS_LOCKINFO(key) || XATTR_IS_BD(key)) + *is_special = _gf_true; out: - return 0; + return 0; } int32_t -stripe_fsetxattr_everyone_cbk (call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, int32_t op_errno, - dict_t *xdata) +stripe_fsetxattr_everyone_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - int call_count = 0; - stripe_local_t *local = NULL; + int call_count = 0; + stripe_local_t *local = NULL; - local = frame->local; + local = frame->local; - LOCK (&frame->lock); - { - call_count = --local->wind_count; - - if (op_ret < 0) { - local->op_ret = op_ret; - local->op_errno = op_errno; - } - } - UNLOCK (&frame->lock); + LOCK(&frame->lock); + { + call_count = --local->wind_count; - if (call_count == 0) { - STRIPE_STACK_UNWIND (fsetxattr, frame, local->op_ret, - local->op_errno, NULL); + if (op_ret < 0) { + local->op_ret = op_ret; + local->op_errno = op_errno; } - return 0; + } + UNLOCK(&frame->lock); + + if (call_count == 0) { + STRIPE_STACK_UNWIND(fsetxattr, frame, local->op_ret, local->op_errno, + NULL); + } + return 0; } int -stripe_fsetxattr_to_everyone (call_frame_t *frame, xlator_t *this, fd_t *fd, - dict_t *dict, int flags, dict_t *xdata) +stripe_fsetxattr_to_everyone(call_frame_t *frame, xlator_t *this, fd_t *fd, + dict_t *dict, int flags, dict_t *xdata) { - xlator_list_t *trav = NULL; - stripe_private_t *priv = NULL; - int ret = -1; - stripe_local_t *local = NULL; + xlator_list_t *trav = NULL; + stripe_private_t *priv = NULL; + int ret = -1; + stripe_local_t *local = NULL; - priv = this->private; + priv = this->private; - local = mem_get0 (this->local_pool); - if (local == NULL) { - goto out; - } + local = mem_get0(this->local_pool); + if (local == NULL) { + goto out; + } - frame->local = local; + frame->local = local; - local->wind_count = priv->child_count; + local->wind_count = priv->child_count; - trav = this->children; + trav = this->children; - while (trav) { - STACK_WIND (frame, stripe_fsetxattr_everyone_cbk, - trav->xlator, trav->xlator->fops->fsetxattr, - fd, dict, flags, xdata); - trav = trav->next; - } + while (trav) { + STACK_WIND(frame, stripe_fsetxattr_everyone_cbk, trav->xlator, + trav->xlator->fops->fsetxattr, fd, dict, flags, xdata); + trav = trav->next; + } - ret = 0; + ret = 0; out: - return ret; + return ret; } static gf_boolean_t -stripe_fsetxattr_is_special (dict_t *dict) +stripe_fsetxattr_is_special(dict_t *dict) { - gf_boolean_t is_spl = _gf_false; + gf_boolean_t is_spl = _gf_false; - if (dict == NULL) { - goto out; - } + if (dict == NULL) { + goto out; + } - dict_foreach (dict, stripe_is_special_key, &is_spl); + dict_foreach(dict, stripe_is_special_key, &is_spl); out: - return is_spl; + return is_spl; } int -stripe_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, - dict_t *dict, int flags, dict_t *xdata) +stripe_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict, + int flags, dict_t *xdata) { - int32_t op_ret = -1, ret = -1, op_errno = EINVAL; - gf_boolean_t is_spl = _gf_false; - - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (fd, err); - - GF_IF_INTERNAL_XATTR_GOTO ("trusted.*stripe*", dict, - op_errno, err); - - is_spl = stripe_fsetxattr_is_special (dict); - if (is_spl) { - ret = stripe_fsetxattr_to_everyone (frame, this, fd, dict, - flags, xdata); - if (ret < 0) { - op_errno = ENOMEM; - goto err; - } + int32_t op_ret = -1, ret = -1, op_errno = EINVAL; + gf_boolean_t is_spl = _gf_false; - goto out; + VALIDATE_OR_GOTO(frame, err); + VALIDATE_OR_GOTO(this, err); + VALIDATE_OR_GOTO(fd, err); + + GF_IF_INTERNAL_XATTR_GOTO("trusted.*stripe*", dict, op_errno, err); + + is_spl = stripe_fsetxattr_is_special(dict); + if (is_spl) { + ret = stripe_fsetxattr_to_everyone(frame, this, fd, dict, flags, xdata); + if (ret < 0) { + op_errno = ENOMEM; + goto err; } - STACK_WIND (frame, stripe_fsetxattr_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fsetxattr, - fd, dict, flags, xdata); + goto out; + } + + STACK_WIND(frame, stripe_fsetxattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags, xdata); out: - return 0; + return 0; err: - STRIPE_STACK_UNWIND (fsetxattr, frame, op_ret, op_errno, NULL); - return 0; + STRIPE_STACK_UNWIND(fsetxattr, frame, op_ret, op_errno, NULL); + return 0; } int -stripe_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) +stripe_removexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - STRIPE_STACK_UNWIND (removexattr, frame, op_ret, op_errno, xdata); - return 0; + STRIPE_STACK_UNWIND(removexattr, frame, op_ret, op_errno, xdata); + return 0; } int -stripe_removexattr (call_frame_t *frame, xlator_t *this, - loc_t *loc, const char *name, dict_t *xdata) +stripe_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc, + const char *name, dict_t *xdata) { - int32_t op_errno = EINVAL; + int32_t op_errno = EINVAL; - VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO(this, err); - GF_IF_NATIVE_XATTR_GOTO ("trusted.*stripe*", - name, op_errno, err); + GF_IF_NATIVE_XATTR_GOTO("trusted.*stripe*", name, op_errno, err); - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (loc, err); + VALIDATE_OR_GOTO(frame, err); + VALIDATE_OR_GOTO(loc, err); - STACK_WIND (frame, stripe_removexattr_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->removexattr, - loc, name, xdata); - return 0; + STACK_WIND(frame, stripe_removexattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->removexattr, loc, name, xdata); + return 0; err: - STRIPE_STACK_UNWIND (removexattr, frame, -1, op_errno, NULL); - return 0; + STRIPE_STACK_UNWIND(removexattr, frame, -1, op_errno, NULL); + return 0; } - int -stripe_fremovexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) +stripe_fremovexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - STRIPE_STACK_UNWIND (fremovexattr, frame, op_ret, op_errno, xdata); - return 0; + STRIPE_STACK_UNWIND(fremovexattr, frame, op_ret, op_errno, xdata); + return 0; } int -stripe_fremovexattr (call_frame_t *frame, xlator_t *this, - fd_t *fd, const char *name, dict_t *xdata) +stripe_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd, + const char *name, dict_t *xdata) { - int32_t op_ret = -1; - int32_t op_errno = EINVAL; + int32_t op_ret = -1; + int32_t op_errno = EINVAL; - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (fd, err); + VALIDATE_OR_GOTO(frame, err); + VALIDATE_OR_GOTO(this, err); + VALIDATE_OR_GOTO(fd, err); - GF_IF_NATIVE_XATTR_GOTO ("trusted.*stripe*", - name, op_errno, err); + GF_IF_NATIVE_XATTR_GOTO("trusted.*stripe*", name, op_errno, err); - STACK_WIND (frame, stripe_fremovexattr_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fremovexattr, - fd, name, xdata); - return 0; - err: - STRIPE_STACK_UNWIND (fremovexattr, frame, op_ret, op_errno, xdata); - return 0; + STACK_WIND(frame, stripe_fremovexattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fremovexattr, fd, name, xdata); + return 0; +err: + STRIPE_STACK_UNWIND(fremovexattr, frame, op_ret, op_errno, xdata); + return 0; } int32_t -stripe_readdirp_lookup_cbk (call_frame_t *frame, void *cookie, - xlator_t *this, int op_ret, int op_errno, - inode_t *inode, struct iatt *stbuf, - dict_t *xattr, struct iatt *parent) +stripe_readdirp_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, inode_t *inode, + struct iatt *stbuf, dict_t *xattr, + struct iatt *parent) { - stripe_local_t *local = NULL; - call_frame_t *main_frame = NULL; - stripe_local_t *main_local = NULL; - gf_dirent_t *entry = NULL; - call_frame_t *prev = NULL; - int done = 0; - - local = frame->local; - prev = cookie; - - entry = local->dirent; - - main_frame = local->orig_frame; - main_local = main_frame->local; - LOCK (&frame->lock); - { - - local->call_count--; - if (!local->call_count) - done = 1; - if (op_ret == -1) { - local->op_errno = op_errno; - local->op_ret = op_ret; - goto unlock; - } + stripe_local_t *local = NULL; + call_frame_t *main_frame = NULL; + stripe_local_t *main_local = NULL; + gf_dirent_t *entry = NULL; + call_frame_t *prev = NULL; + int done = 0; + + local = frame->local; + prev = cookie; + + entry = local->dirent; + + main_frame = local->orig_frame; + main_local = main_frame->local; + LOCK(&frame->lock); + { + local->call_count--; + if (!local->call_count) + done = 1; + if (op_ret == -1) { + local->op_errno = op_errno; + local->op_ret = op_ret; + goto unlock; + } - if (stripe_ctx_handle(this, prev, local, xattr)) - gf_log(this->name, GF_LOG_ERROR, - "Error getting fctx info from dict."); + if (stripe_ctx_handle(this, prev, local, xattr)) + gf_log(this->name, GF_LOG_ERROR, + "Error getting fctx info from dict."); - correct_file_size(stbuf, local->fctx, prev); + correct_file_size(stbuf, local->fctx, prev); - stripe_iatt_merge (stbuf, &entry->d_stat); - local->stbuf_blocks += stbuf->ia_blocks; - } + stripe_iatt_merge(stbuf, &entry->d_stat); + local->stbuf_blocks += stbuf->ia_blocks; + } unlock: - UNLOCK(&frame->lock); + UNLOCK(&frame->lock); + if (done) { + inode_ctx_put(entry->inode, this, (uint64_t)(long)local->fctx); + + done = 0; + LOCK(&main_frame->lock); + { + main_local->wind_count--; + if (!main_local->wind_count) + done = 1; + if (local->op_ret == -1) { + main_local->op_errno = local->op_errno; + main_local->op_ret = local->op_ret; + } + entry->d_stat.ia_blocks = local->stbuf_blocks; + } + UNLOCK(&main_frame->lock); if (done) { - inode_ctx_put (entry->inode, this, - (uint64_t) (long)local->fctx); - - done = 0; - LOCK (&main_frame->lock); - { - main_local->wind_count--; - if (!main_local->wind_count) - done = 1; - if (local->op_ret == -1) { - main_local->op_errno = local->op_errno; - main_local->op_ret = local->op_ret; - } - entry->d_stat.ia_blocks = local->stbuf_blocks; - } - UNLOCK (&main_frame->lock); - if (done) { - main_frame->local = NULL; - STRIPE_STACK_UNWIND (readdir, main_frame, - main_local->op_ret, - main_local->op_errno, - &main_local->entries, NULL); - gf_dirent_free (&main_local->entries); - stripe_local_wipe (main_local); - mem_put (main_local); - } - frame->local = NULL; - stripe_local_wipe (local); - mem_put (local); - STRIPE_STACK_DESTROY (frame); - } + main_frame->local = NULL; + STRIPE_STACK_UNWIND(readdir, main_frame, main_local->op_ret, + main_local->op_errno, &main_local->entries, + NULL); + gf_dirent_free(&main_local->entries); + stripe_local_wipe(main_local); + mem_put(main_local); + } + frame->local = NULL; + stripe_local_wipe(local); + mem_put(local); + STRIPE_STACK_DESTROY(frame); + } - return 0; + return 0; } int32_t -stripe_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, - gf_dirent_t *orig_entries, dict_t *xdata) +stripe_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, gf_dirent_t *orig_entries, + dict_t *xdata) { - stripe_local_t *local = NULL; - call_frame_t *prev = NULL; - gf_dirent_t *local_entry = NULL; - gf_dirent_t *tmp_entry = NULL; - xlator_list_t *trav = NULL; - loc_t loc = {0, }; - int32_t count = 0; - stripe_private_t *priv = NULL; - int32_t subvols = 0; - dict_t *xattrs = NULL; - call_frame_t *local_frame = NULL; - stripe_local_t *local_ent = NULL; - - if (!this || !frame->local || !cookie) { - gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref"); - goto out; - } - prev = cookie; - local = frame->local; - trav = this->children; - priv = this->private; - - subvols = priv->child_count; + stripe_local_t *local = NULL; + call_frame_t *prev = NULL; + gf_dirent_t *local_entry = NULL; + gf_dirent_t *tmp_entry = NULL; + xlator_list_t *trav = NULL; + loc_t loc = { + 0, + }; + int32_t count = 0; + stripe_private_t *priv = NULL; + int32_t subvols = 0; + dict_t *xattrs = NULL; + call_frame_t *local_frame = NULL; + stripe_local_t *local_ent = NULL; + + if (!this || !frame->local || !cookie) { + gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref"); + goto out; + } + prev = cookie; + local = frame->local; + trav = this->children; + priv = this->private; - LOCK (&frame->lock); - { - local->op_errno = op_errno; - local->op_ret = op_ret; + subvols = priv->child_count; - if (op_ret != -1) { - list_splice_init (&orig_entries->list, - &local->entries.list); - local->wind_count = op_ret; - } + LOCK(&frame->lock); + { + local->op_errno = op_errno; + local->op_ret = op_ret; + if (op_ret != -1) { + list_splice_init(&orig_entries->list, &local->entries.list); + local->wind_count = op_ret; } - UNLOCK (&frame->lock); + } + UNLOCK(&frame->lock); - if (op_ret == -1) { - gf_log (this->name, GF_LOG_WARNING, "%s returned error %s", - prev->this->name, strerror (op_errno)); - goto out; + if (op_ret == -1) { + gf_log(this->name, GF_LOG_WARNING, "%s returned error %s", + prev->this->name, strerror(op_errno)); + goto out; + } + + xattrs = dict_new(); + if (xattrs) + (void)stripe_xattr_request_build(this, xattrs, 0, 0, 0, 0); + count = op_ret; + list_for_each_entry_safe(local_entry, tmp_entry, (&local->entries.list), + list) + { + if (!local_entry) + break; + if (!IA_ISREG(local_entry->d_stat.ia_type) || !local_entry->inode) { + LOCK(&frame->lock); + { + local->wind_count--; + count = local->wind_count; + } + UNLOCK(&frame->lock); + continue; + } + + local_frame = copy_frame(frame); + + if (!local_frame) { + op_errno = ENOMEM; + op_ret = -1; + goto out; } - xattrs = dict_new (); - if (xattrs) - (void) stripe_xattr_request_build (this, xattrs, 0, 0, 0, 0); - count = op_ret; - list_for_each_entry_safe (local_entry, tmp_entry, - (&local->entries.list), list) { - - if (!local_entry) - break; - if (!IA_ISREG (local_entry->d_stat.ia_type) || !local_entry->inode) { - LOCK (&frame->lock); - { - local->wind_count--; - count = local->wind_count; - } - UNLOCK (&frame->lock); - continue; - } - - local_frame = copy_frame (frame); - - if (!local_frame) { - op_errno = ENOMEM; - op_ret = -1; - goto out; - } - - local_ent = mem_get0 (this->local_pool); - if (!local_ent) { - op_errno = ENOMEM; - op_ret = -1; - goto out; - } + local_ent = mem_get0(this->local_pool); + if (!local_ent) { + op_errno = ENOMEM; + op_ret = -1; + goto out; + } - loc.inode = inode_ref (local_entry->inode); + loc.inode = inode_ref(local_entry->inode); - gf_uuid_copy (loc.gfid, local_entry->d_stat.ia_gfid); + gf_uuid_copy(loc.gfid, local_entry->d_stat.ia_gfid); - local_ent->orig_frame = frame; + local_ent->orig_frame = frame; - local_ent->call_count = subvols; + local_ent->call_count = subvols; - local_ent->dirent = local_entry; + local_ent->dirent = local_entry; - local_frame->local = local_ent; + local_frame->local = local_ent; - trav = this->children; - while (trav) { - STACK_WIND (local_frame, stripe_readdirp_lookup_cbk, - trav->xlator, trav->xlator->fops->lookup, - &loc, xattrs); - trav = trav->next; - } - loc_wipe (&loc); + trav = this->children; + while (trav) { + STACK_WIND(local_frame, stripe_readdirp_lookup_cbk, trav->xlator, + trav->xlator->fops->lookup, &loc, xattrs); + trav = trav->next; } + loc_wipe(&loc); + } out: - if (!count) { - /* all entries are directories */ - frame->local = NULL; - STRIPE_STACK_UNWIND (readdir, frame, - (local ? local->op_ret : -1), - (local ? local->op_errno : EINVAL), - (local ? &local->entries : NULL), - NULL); - gf_dirent_free (&local->entries); - stripe_local_wipe (local); - mem_put (local); - } - if (xattrs) - dict_unref (xattrs); - return 0; - + if (!count) { + /* all entries are directories */ + frame->local = NULL; + STRIPE_STACK_UNWIND(readdir, frame, (local ? local->op_ret : -1), + (local ? local->op_errno : EINVAL), + (local ? &local->entries : NULL), NULL); + gf_dirent_free(&local->entries); + stripe_local_wipe(local); + mem_put(local); + } + if (xattrs) + dict_unref(xattrs); + return 0; } int32_t -stripe_readdirp (call_frame_t *frame, xlator_t *this, - fd_t *fd, size_t size, off_t off, dict_t *xdata) +stripe_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t off, dict_t *xdata) { - stripe_local_t *local = NULL; - stripe_private_t *priv = NULL; - xlator_list_t *trav = NULL; - int op_errno = -1; + stripe_local_t *local = NULL; + stripe_private_t *priv = NULL; + xlator_list_t *trav = NULL; + int op_errno = -1; - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (fd, err); + VALIDATE_OR_GOTO(frame, err); + VALIDATE_OR_GOTO(this, err); + VALIDATE_OR_GOTO(fd, err); - priv = this->private; - trav = this->children; + priv = this->private; + trav = this->children; - if (priv->first_child_down) { - op_errno = ENOTCONN; - goto err; - } + if (priv->first_child_down) { + op_errno = ENOTCONN; + goto err; + } - /* Initialization */ - local = mem_get0 (this->local_pool); - if (!local) { - op_errno = ENOMEM; - goto err; - } + /* Initialization */ + local = mem_get0(this->local_pool); + if (!local) { + op_errno = ENOMEM; + goto err; + } - frame->local = local; + frame->local = local; - local->fd = fd_ref (fd); + local->fd = fd_ref(fd); - local->wind_count = 0; + local->wind_count = 0; - local->count = 0; - local->op_ret = -1; - INIT_LIST_HEAD(&local->entries); + local->count = 0; + local->op_ret = -1; + INIT_LIST_HEAD(&local->entries); - if (!trav) - goto err; + if (!trav) + goto err; - STACK_WIND (frame, stripe_readdirp_cbk, trav->xlator, - trav->xlator->fops->readdirp, fd, size, off, xdata); - return 0; + STACK_WIND(frame, stripe_readdirp_cbk, trav->xlator, + trav->xlator->fops->readdirp, fd, size, off, xdata); + return 0; err: - op_errno = (op_errno == -1) ? errno : op_errno; - STRIPE_STACK_UNWIND (readdir, frame, -1, op_errno, NULL, NULL); - - return 0; + op_errno = (op_errno == -1) ? errno : op_errno; + STRIPE_STACK_UNWIND(readdir, frame, -1, op_errno, NULL, NULL); + return 0; } int32_t -mem_acct_init (xlator_t *this) +mem_acct_init(xlator_t *this) { - int ret = -1; + int ret = -1; - if (!this) - goto out; + if (!this) + goto out; - ret = xlator_mem_acct_init (this, gf_stripe_mt_end + 1); + ret = xlator_mem_acct_init(this, gf_stripe_mt_end + 1); - if (ret != 0) { - gf_log (this->name, GF_LOG_ERROR, "Memory accounting init" - "failed"); - goto out; - } + if (ret != 0) { + gf_log(this->name, GF_LOG_ERROR, + "Memory accounting init" + "failed"); + goto out; + } out: - return ret; + return ret; } static int -clear_pattern_list (stripe_private_t *priv) +clear_pattern_list(stripe_private_t *priv) { - struct stripe_options *prev = NULL; - struct stripe_options *trav = NULL; - int ret = -1; - - GF_VALIDATE_OR_GOTO ("stripe", priv, out); + struct stripe_options *prev = NULL; + struct stripe_options *trav = NULL; + int ret = -1; - trav = priv->pattern; - priv->pattern = NULL; - while (trav) { - prev = trav; - trav = trav->next; - GF_FREE (prev); - } - - ret = 0; - out: - return ret; + GF_VALIDATE_OR_GOTO("stripe", priv, out); + trav = priv->pattern; + priv->pattern = NULL; + while (trav) { + prev = trav; + trav = trav->next; + GF_FREE(prev); + } + ret = 0; +out: + return ret; } - int -reconfigure (xlator_t *this, dict_t *options) +reconfigure(xlator_t *this, dict_t *options) { + stripe_private_t *priv = NULL; + data_t *data = NULL; + int ret = -1; + volume_option_t *opt = NULL; - stripe_private_t *priv = NULL; - data_t *data = NULL; - int ret = -1; - volume_option_t *opt = NULL; - - GF_ASSERT (this); - GF_ASSERT (this->private); + GF_ASSERT(this); + GF_ASSERT(this->private); - priv = this->private; + priv = this->private; + ret = 0; + LOCK(&priv->lock); + { + ret = clear_pattern_list(priv); + if (ret) + goto unlock; - ret = 0; - LOCK (&priv->lock); - { - ret = clear_pattern_list (priv); - if (ret) - goto unlock; - - data = dict_get (options, "block-size"); - if (data) { - ret = set_stripe_block_size (this, priv, data->data); - if (ret) - goto unlock; - } else { - opt = xlator_volume_option_get (this, "block-size"); - if (!opt) { - gf_log (this->name, GF_LOG_WARNING, - "option 'block-size' not found"); - ret = -1; - goto unlock; - } - - if (gf_string2bytesize_uint64 (opt->default_value, &priv->block_size)){ - gf_log (this->name, GF_LOG_ERROR, - "Unable to set default block-size "); - ret = -1; - goto unlock; - } - } + data = dict_get(options, "block-size"); + if (data) { + ret = set_stripe_block_size(this, priv, data->data); + if (ret) + goto unlock; + } else { + opt = xlator_volume_option_get(this, "block-size"); + if (!opt) { + gf_log(this->name, GF_LOG_WARNING, + "option 'block-size' not found"); + ret = -1; + goto unlock; + } - GF_OPTION_RECONF("coalesce", priv->coalesce, options, bool, - unlock); + if (gf_string2bytesize_uint64(opt->default_value, + &priv->block_size)) { + gf_log(this->name, GF_LOG_ERROR, + "Unable to set default block-size "); + ret = -1; + goto unlock; + } } - unlock: - UNLOCK (&priv->lock); - if (ret) - goto out; - ret = 0; - out: - return ret; + GF_OPTION_RECONF("coalesce", priv->coalesce, options, bool, unlock); + } +unlock: + UNLOCK(&priv->lock); + if (ret) + goto out; + ret = 0; +out: + return ret; } /** @@ -5123,129 +4961,126 @@ reconfigure (xlator_t *this, dict_t *options) * @this - */ int32_t -init (xlator_t *this) +init(xlator_t *this) { - stripe_private_t *priv = NULL; - volume_option_t *opt = NULL; - xlator_list_t *trav = NULL; - data_t *data = NULL; - int32_t count = 0; - int ret = -1; - - if (!this) - goto out; - - trav = this->children; - while (trav) { - count++; - trav = trav->next; - } + stripe_private_t *priv = NULL; + volume_option_t *opt = NULL; + xlator_list_t *trav = NULL; + data_t *data = NULL; + int32_t count = 0; + int ret = -1; + + if (!this) + goto out; - if (!count) { - gf_log (this->name, GF_LOG_ERROR, - "stripe configured without \"subvolumes\" option. " - "exiting"); - goto out; - } + trav = this->children; + while (trav) { + count++; + trav = trav->next; + } - if (!this->parents) { - gf_log (this->name, GF_LOG_WARNING, - "dangling volume. check volfile "); - } + if (!count) { + gf_log(this->name, GF_LOG_ERROR, + "stripe configured without \"subvolumes\" option. " + "exiting"); + goto out; + } - if (count == 1) { - gf_log (this->name, GF_LOG_ERROR, - "stripe configured with only one \"subvolumes\" option." - " please check the volume. exiting"); - goto out; - } + if (!this->parents) { + gf_log(this->name, GF_LOG_WARNING, "dangling volume. check volfile "); + } - priv = GF_CALLOC (1, sizeof (stripe_private_t), - gf_stripe_mt_stripe_private_t); + if (count == 1) { + gf_log(this->name, GF_LOG_ERROR, + "stripe configured with only one \"subvolumes\" option." + " please check the volume. exiting"); + goto out; + } - if (!priv) - goto out; - priv->xl_array = GF_CALLOC (count, sizeof (xlator_t *), - gf_stripe_mt_xlator_t); - if (!priv->xl_array) - goto out; + priv = GF_CALLOC(1, sizeof(stripe_private_t), + gf_stripe_mt_stripe_private_t); - priv->last_event = GF_CALLOC (count, sizeof (int), - gf_stripe_mt_int32_t); - if (!priv->last_event) - goto out; + if (!priv) + goto out; + priv->xl_array = GF_CALLOC(count, sizeof(xlator_t *), + gf_stripe_mt_xlator_t); + if (!priv->xl_array) + goto out; - priv->child_count = count; - LOCK_INIT (&priv->lock); + priv->last_event = GF_CALLOC(count, sizeof(int), gf_stripe_mt_int32_t); + if (!priv->last_event) + goto out; - trav = this->children; - count = 0; - while (trav) { - priv->xl_array[count++] = trav->xlator; - trav = trav->next; - } + priv->child_count = count; + LOCK_INIT(&priv->lock); - if (count > 256) { - gf_log (this->name, GF_LOG_ERROR, - "maximum number of stripe subvolumes supported " - "is 256"); - goto out; - } + trav = this->children; + count = 0; + while (trav) { + priv->xl_array[count++] = trav->xlator; + trav = trav->next; + } - ret = 0; - LOCK (&priv->lock); - { - opt = xlator_volume_option_get (this, "block-size"); - if (!opt) { - gf_log (this->name, GF_LOG_WARNING, - "option 'block-size' not found"); - ret = -1; - goto unlock; - } - if (gf_string2bytesize_uint64 (opt->default_value, &priv->block_size)){ - gf_log (this->name, GF_LOG_ERROR, - "Unable to set default block-size "); - ret = -1; - goto unlock; - } - /* option stripe-pattern *avi:1GB,*pdf:16K */ - data = dict_get (this->options, "block-size"); - if (data) { - ret = set_stripe_block_size (this, priv, data->data); - if (ret) - goto unlock; - } - } - unlock: - UNLOCK (&priv->lock); - if (ret) - goto out; + if (count > 256) { + gf_log(this->name, GF_LOG_ERROR, + "maximum number of stripe subvolumes supported " + "is 256"); + goto out; + } + + ret = 0; + LOCK(&priv->lock); + { + opt = xlator_volume_option_get(this, "block-size"); + if (!opt) { + gf_log(this->name, GF_LOG_WARNING, "option 'block-size' not found"); + ret = -1; + goto unlock; + } + if (gf_string2bytesize_uint64(opt->default_value, &priv->block_size)) { + gf_log(this->name, GF_LOG_ERROR, + "Unable to set default block-size "); + ret = -1; + goto unlock; + } + /* option stripe-pattern *avi:1GB,*pdf:16K */ + data = dict_get(this->options, "block-size"); + if (data) { + ret = set_stripe_block_size(this, priv, data->data); + if (ret) + goto unlock; + } + } +unlock: + UNLOCK(&priv->lock); + if (ret) + goto out; - GF_OPTION_INIT ("use-xattr", priv->xattr_supported, bool, out); - /* notify related */ - priv->nodes_down = priv->child_count; + GF_OPTION_INIT("use-xattr", priv->xattr_supported, bool, out); + /* notify related */ + priv->nodes_down = priv->child_count; - GF_OPTION_INIT("coalesce", priv->coalesce, bool, out); + GF_OPTION_INIT("coalesce", priv->coalesce, bool, out); - this->local_pool = mem_pool_new (stripe_local_t, 128); - if (!this->local_pool) { - ret = -1; - gf_log (this->name, GF_LOG_ERROR, - "failed to create local_t's memory pool"); - goto out; - } + this->local_pool = mem_pool_new(stripe_local_t, 128); + if (!this->local_pool) { + ret = -1; + gf_log(this->name, GF_LOG_ERROR, + "failed to create local_t's memory pool"); + goto out; + } - this->private = priv; + this->private = priv; - ret = 0; + ret = 0; out: - if (ret) { - if (priv) { - GF_FREE (priv->xl_array); - GF_FREE (priv); - } + if (ret) { + if (priv) { + GF_FREE(priv->xl_array); + GF_FREE(priv); } - return ret; + } + return ret; } /** @@ -5253,533 +5088,524 @@ out: * @this - */ void -fini (xlator_t *this) +fini(xlator_t *this) { - stripe_private_t *priv = NULL; - struct stripe_options *prev = NULL; - struct stripe_options *trav = NULL; + stripe_private_t *priv = NULL; + struct stripe_options *prev = NULL; + struct stripe_options *trav = NULL; - if (!this) - goto out; + if (!this) + goto out; - priv = this->private; - if (priv) { - this->private = NULL; - GF_FREE (priv->xl_array); - - trav = priv->pattern; - while (trav) { - prev = trav; - trav = trav->next; - GF_FREE (prev); - } - GF_FREE (priv->last_event); - LOCK_DESTROY (&priv->lock); - GF_FREE (priv); + priv = this->private; + if (priv) { + this->private = NULL; + GF_FREE(priv->xl_array); + + trav = priv->pattern; + while (trav) { + prev = trav; + trav = trav->next; + GF_FREE(prev); } + GF_FREE(priv->last_event); + LOCK_DESTROY(&priv->lock); + GF_FREE(priv); + } out: - return; + return; } int32_t -stripe_getxattr_unwind (call_frame_t *frame, - int op_ret, int op_errno, dict_t *dict, dict_t *xdata) +stripe_getxattr_unwind(call_frame_t *frame, int op_ret, int op_errno, + dict_t *dict, dict_t *xdata) { - STRIPE_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict, xdata); - return 0; + STRIPE_STACK_UNWIND(getxattr, frame, op_ret, op_errno, dict, xdata); + return 0; } int -stripe_internal_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, dict_t *xattr, - dict_t *xdata) +stripe_internal_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, dict_t *xattr, + dict_t *xdata) { + char size_key[256] = { + 0, + }; + char index_key[256] = { + 0, + }; + char count_key[256] = { + 0, + }; + char coalesce_key[256] = { + 0, + }; + + VALIDATE_OR_GOTO(frame, out); + VALIDATE_OR_GOTO(frame->local, out); + + if (!xattr || (op_ret == -1)) + goto out; - char size_key[256] = {0,}; - char index_key[256] = {0,}; - char count_key[256] = {0,}; - char coalesce_key[256] = {0,}; - - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (frame->local, out); - - if (!xattr || (op_ret == -1)) - goto out; - - sprintf (size_key, "trusted.%s.stripe-size", this->name); - sprintf (count_key, "trusted.%s.stripe-count", this->name); - sprintf (index_key, "trusted.%s.stripe-index", this->name); - sprintf (coalesce_key, "trusted.%s.stripe-coalesce", this->name); + sprintf(size_key, "trusted.%s.stripe-size", this->name); + sprintf(count_key, "trusted.%s.stripe-count", this->name); + sprintf(index_key, "trusted.%s.stripe-index", this->name); + sprintf(coalesce_key, "trusted.%s.stripe-coalesce", this->name); - dict_del (xattr, size_key); - dict_del (xattr, count_key); - dict_del (xattr, index_key); - dict_del (xattr, coalesce_key); + dict_del(xattr, size_key); + dict_del(xattr, count_key); + dict_del(xattr, index_key); + dict_del(xattr, coalesce_key); out: - STRIPE_STACK_UNWIND (getxattr, frame, op_ret, op_errno, xattr, xdata); - - return 0; + STRIPE_STACK_UNWIND(getxattr, frame, op_ret, op_errno, xattr, xdata); + return 0; } int -stripe_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, dict_t *xattr, dict_t *xdata) +stripe_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, dict_t *xattr, dict_t *xdata) { - int call_cnt = 0; - stripe_local_t *local = NULL; + int call_cnt = 0; + stripe_local_t *local = NULL; - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (frame->local, out); + VALIDATE_OR_GOTO(frame, out); + VALIDATE_OR_GOTO(frame->local, out); - local = frame->local; + local = frame->local; - LOCK (&frame->lock); - { - call_cnt = --local->wind_count; - } - UNLOCK (&frame->lock); + LOCK(&frame->lock); + { + call_cnt = --local->wind_count; + } + UNLOCK(&frame->lock); - if (!xattr || (op_ret < 0)) - goto out; + if (!xattr || (op_ret < 0)) + goto out; - local->op_ret = 0; + local->op_ret = 0; - if (!local->xattr) { - local->xattr = dict_ref (xattr); - } else { - stripe_aggregate_xattr (local->xattr, xattr); - } + if (!local->xattr) { + local->xattr = dict_ref(xattr); + } else { + stripe_aggregate_xattr(local->xattr, xattr); + } out: - if (!call_cnt) { - STRIPE_STACK_UNWIND (getxattr, frame, - (local ? local->op_ret : -1), - op_errno, - (local ? local->xattr : NULL), xdata); - } + if (!call_cnt) { + STRIPE_STACK_UNWIND(getxattr, frame, (local ? local->op_ret : -1), + op_errno, (local ? local->xattr : NULL), xdata); + } - return 0; + return 0; } int32_t -stripe_vgetxattr_cbk (call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, int32_t op_errno, - dict_t *dict, dict_t *xdata) +stripe_vgetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *dict, + dict_t *xdata) { - stripe_local_t *local = NULL; - int32_t callcnt = 0; - int32_t ret = -1; - long cky = 0; - void *xattr_val = NULL; - void *xattr_serz = NULL; - stripe_xattr_sort_t *xattr = NULL; - dict_t *stripe_xattr = NULL; - - if (!frame || !frame->local || !this) { - gf_log ("", GF_LOG_ERROR, "Possible NULL deref"); - return ret; - } + stripe_local_t *local = NULL; + int32_t callcnt = 0; + int32_t ret = -1; + long cky = 0; + void *xattr_val = NULL; + void *xattr_serz = NULL; + stripe_xattr_sort_t *xattr = NULL; + dict_t *stripe_xattr = NULL; + + if (!frame || !frame->local || !this) { + gf_log("", GF_LOG_ERROR, "Possible NULL deref"); + return ret; + } - local = frame->local; - cky = (long) cookie; + local = frame->local; + cky = (long)cookie; - if (local->xsel[0] == '\0') { - gf_log (this->name, GF_LOG_ERROR, "Empty xattr in cbk"); - return ret; - } + if (local->xsel[0] == '\0') { + gf_log(this->name, GF_LOG_ERROR, "Empty xattr in cbk"); + return ret; + } - LOCK (&frame->lock); - { - callcnt = --local->wind_count; + LOCK(&frame->lock); + { + callcnt = --local->wind_count; - if (!dict || (op_ret < 0)) - goto out; + if (!dict || (op_ret < 0)) + goto out; - if (!local->xattr_list) - local->xattr_list = (stripe_xattr_sort_t *) - GF_CALLOC (local->nallocs, - sizeof (stripe_xattr_sort_t), - gf_stripe_mt_xattr_sort_t); + if (!local->xattr_list) + local->xattr_list = (stripe_xattr_sort_t *)GF_CALLOC( + local->nallocs, sizeof(stripe_xattr_sort_t), + gf_stripe_mt_xattr_sort_t); - if (local->xattr_list) { - xattr = local->xattr_list + (int32_t) cky; + if (local->xattr_list) { + xattr = local->xattr_list + (int32_t)cky; - ret = dict_get_ptr_and_len (dict, local->xsel, - &xattr_val, - &xattr->xattr_len); - if (xattr->xattr_len == 0) - goto out; + ret = dict_get_ptr_and_len(dict, local->xsel, &xattr_val, + &xattr->xattr_len); + if (xattr->xattr_len == 0) + goto out; - xattr->pos = cky; - xattr->xattr_value = gf_memdup (xattr_val, - xattr->xattr_len); + xattr->pos = cky; + xattr->xattr_value = gf_memdup(xattr_val, xattr->xattr_len); - if (xattr->xattr_value != NULL) - local->xattr_total_len += xattr->xattr_len + 1; - } + if (xattr->xattr_value != NULL) + local->xattr_total_len += xattr->xattr_len + 1; } - out: - UNLOCK (&frame->lock); - - if (!callcnt) { - if (!local->xattr_total_len) - goto unwind; - - stripe_xattr = dict_new (); - if (!stripe_xattr) - goto unwind; - - /* select filler based on ->xsel */ - if (XATTR_IS_PATHINFO (local->xsel)) - ret = stripe_fill_pathinfo_xattr (this, local, - (char **)&xattr_serz); - else if (XATTR_IS_LOCKINFO (local->xsel)) { - ret = stripe_fill_lockinfo_xattr (this, local, - &xattr_serz); - } else { - gf_log (this->name, GF_LOG_WARNING, - "Unknown xattr in xattr request"); - goto unwind; - } + } +out: + UNLOCK(&frame->lock); - if (!ret) { - ret = dict_set_dynptr (stripe_xattr, local->xsel, - xattr_serz, - local->xattr_total_len); - if (ret) - gf_log (this->name, GF_LOG_ERROR, - "Can't set %s key in dict", - local->xsel); - } + if (!callcnt) { + if (!local->xattr_total_len) + goto unwind; - unwind: - /* - * Among other things, STRIPE_STACK_UNWIND will free "local" - * for us. That means we can't dereference it afterward. - * Fortunately, the actual result is in stripe_xattr now, so we - * can simply clean up before unwinding. - */ - ret = stripe_free_xattr_str (local); - GF_FREE (local->xattr_list); - local->xattr_list = NULL; - - STRIPE_STACK_UNWIND (getxattr, frame, op_ret, op_errno, - stripe_xattr, NULL); - - if (stripe_xattr) - dict_unref (stripe_xattr); + stripe_xattr = dict_new(); + if (!stripe_xattr) + goto unwind; + + /* select filler based on ->xsel */ + if (XATTR_IS_PATHINFO(local->xsel)) + ret = stripe_fill_pathinfo_xattr(this, local, (char **)&xattr_serz); + else if (XATTR_IS_LOCKINFO(local->xsel)) { + ret = stripe_fill_lockinfo_xattr(this, local, &xattr_serz); + } else { + gf_log(this->name, GF_LOG_WARNING, + "Unknown xattr in xattr request"); + goto unwind; } - return ret; + if (!ret) { + ret = dict_set_dynptr(stripe_xattr, local->xsel, xattr_serz, + local->xattr_total_len); + if (ret) + gf_log(this->name, GF_LOG_ERROR, "Can't set %s key in dict", + local->xsel); + } + + unwind: + /* + * Among other things, STRIPE_STACK_UNWIND will free "local" + * for us. That means we can't dereference it afterward. + * Fortunately, the actual result is in stripe_xattr now, so we + * can simply clean up before unwinding. + */ + ret = stripe_free_xattr_str(local); + GF_FREE(local->xattr_list); + local->xattr_list = NULL; + + STRIPE_STACK_UNWIND(getxattr, frame, op_ret, op_errno, stripe_xattr, + NULL); + + if (stripe_xattr) + dict_unref(stripe_xattr); + } + + return ret; } int -stripe_marker_populate_args (call_frame_t *frame, int type, int *gauge, +stripe_marker_populate_args(call_frame_t *frame, int type, int *gauge, xlator_t **subvols) { - xlator_t *this = frame->this; - stripe_private_t *priv = this->private; - stripe_local_t *local = frame->local; - int count = 0; - - count = priv->child_count; - if (MARKER_XTIME_TYPE == type) { - if (!IA_FILE_OR_DIR (local->loc.inode->ia_type)) - count = 1; - } - memcpy (subvols, priv->xl_array, sizeof (*subvols) * count); - - return count; + xlator_t *this = frame->this; + stripe_private_t *priv = this->private; + stripe_local_t *local = frame->local; + int count = 0; + + count = priv->child_count; + if (MARKER_XTIME_TYPE == type) { + if (!IA_FILE_OR_DIR(local->loc.inode->ia_type)) + count = 1; + } + memcpy(subvols, priv->xl_array, sizeof(*subvols) * count); + + return count; } int32_t -stripe_getxattr (call_frame_t *frame, xlator_t *this, - loc_t *loc, const char *name, dict_t *xdata) +stripe_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, + const char *name, dict_t *xdata) { - stripe_local_t *local = NULL; - xlator_list_t *trav = NULL; - stripe_private_t *priv = NULL; - int32_t op_errno = EINVAL; - int i = 0; - int ret = 0; - - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (loc, err); - VALIDATE_OR_GOTO (loc->path, err); - VALIDATE_OR_GOTO (loc->inode, err); - - priv = this->private; - trav = this->children; + stripe_local_t *local = NULL; + xlator_list_t *trav = NULL; + stripe_private_t *priv = NULL; + int32_t op_errno = EINVAL; + int i = 0; + int ret = 0; + + VALIDATE_OR_GOTO(frame, err); + VALIDATE_OR_GOTO(this, err); + VALIDATE_OR_GOTO(loc, err); + VALIDATE_OR_GOTO(loc->path, err); + VALIDATE_OR_GOTO(loc->inode, err); + + priv = this->private; + trav = this->children; + + /* Initialization */ + local = mem_get0(this->local_pool); + if (!local) { + op_errno = ENOMEM; + goto err; + } + local->op_ret = -1; + frame->local = local; + loc_copy(&local->loc, loc); + + if (name && strncmp(name, QUOTA_SIZE_KEY, SLEN(QUOTA_SIZE_KEY)) == 0) { + local->wind_count = priv->child_count; - /* Initialization */ - local = mem_get0 (this->local_pool); - if (!local) { - op_errno = ENOMEM; - goto err; + for (i = 0, trav = this->children; i < priv->child_count; + i++, trav = trav->next) { + STACK_WIND(frame, stripe_getxattr_cbk, trav->xlator, + trav->xlator->fops->getxattr, loc, name, xdata); } - local->op_ret = -1; - frame->local = local; - loc_copy (&local->loc, loc); - - if (name && strncmp (name, QUOTA_SIZE_KEY, - SLEN (QUOTA_SIZE_KEY)) == 0) { - local->wind_count = priv->child_count; - - for (i = 0, trav=this->children; i < priv->child_count; i++, - trav = trav->next) { - STACK_WIND (frame, stripe_getxattr_cbk, - trav->xlator, trav->xlator->fops->getxattr, - loc, name, xdata); - } + return 0; + } - return 0; + if (name && (XATTR_IS_PATHINFO(name))) { + if (IA_ISREG(loc->inode->ia_type)) { + ret = inode_ctx_get(loc->inode, this, (uint64_t *)&local->fctx); + if (ret) + gf_log(this->name, GF_LOG_ERROR, + "stripe size unavailable from fctx" + " relying on pathinfo could lead to" + " wrong results"); } - if (name && (XATTR_IS_PATHINFO (name))) { - if (IA_ISREG (loc->inode->ia_type)) { - ret = inode_ctx_get (loc->inode, this, - (uint64_t *) &local->fctx); - if (ret) - gf_log (this->name, GF_LOG_ERROR, - "stripe size unavailable from fctx" - " relying on pathinfo could lead to" - " wrong results"); - } - - local->nallocs = local->wind_count = priv->child_count; - (void) strncpy (local->xsel, name, strlen (name)); - - /** - * for xattrs that need info from all children, fill ->xsel - * as above and call the filler function in cbk based on - * it - */ - for (i = 0, trav = this->children; i < priv->child_count; i++, - trav = trav->next) { - STACK_WIND_COOKIE (frame, stripe_vgetxattr_cbk, - (void *) (long) i, trav->xlator, - trav->xlator->fops->getxattr, - loc, name, xdata); - } + local->nallocs = local->wind_count = priv->child_count; + (void)strncpy(local->xsel, name, strlen(name)); - return 0; + /** + * for xattrs that need info from all children, fill ->xsel + * as above and call the filler function in cbk based on + * it + */ + for (i = 0, trav = this->children; i < priv->child_count; + i++, trav = trav->next) { + STACK_WIND_COOKIE(frame, stripe_vgetxattr_cbk, (void *)(long)i, + trav->xlator, trav->xlator->fops->getxattr, loc, + name, xdata); } - if (cluster_handle_marker_getxattr (frame, loc, name, priv->vol_uuid, - stripe_getxattr_unwind, - stripe_marker_populate_args) == 0) - return 0; - - STACK_WIND (frame, stripe_internal_getxattr_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->getxattr, loc, name, xdata); + return 0; + } + if (cluster_handle_marker_getxattr(frame, loc, name, priv->vol_uuid, + stripe_getxattr_unwind, + stripe_marker_populate_args) == 0) return 0; + STACK_WIND(frame, stripe_internal_getxattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->getxattr, loc, name, xdata); + + return 0; + err: - STRIPE_STACK_UNWIND (getxattr, frame, -1, op_errno, NULL, NULL); - return 0; + STRIPE_STACK_UNWIND(getxattr, frame, -1, op_errno, NULL, NULL); + return 0; } static gf_boolean_t -stripe_is_special_xattr (const char *name) +stripe_is_special_xattr(const char *name) { - gf_boolean_t is_spl = _gf_false; + gf_boolean_t is_spl = _gf_false; - if (!name) { - goto out; - } + if (!name) { + goto out; + } - if (!strncmp (name, GF_XATTR_LOCKINFO_KEY, - SLEN (GF_XATTR_LOCKINFO_KEY)) - || XATTR_IS_PATHINFO (name)) - is_spl = _gf_true; + if (!strncmp(name, GF_XATTR_LOCKINFO_KEY, SLEN(GF_XATTR_LOCKINFO_KEY)) || + XATTR_IS_PATHINFO(name)) + is_spl = _gf_true; out: - return is_spl; + return is_spl; } int32_t -stripe_fgetxattr_from_everyone (call_frame_t *frame, xlator_t *this, fd_t *fd, - const char *name, dict_t *xdata) +stripe_fgetxattr_from_everyone(call_frame_t *frame, xlator_t *this, fd_t *fd, + const char *name, dict_t *xdata) { - stripe_local_t *local = NULL; - stripe_private_t *priv = NULL; - int32_t ret = -1, op_errno = 0; - int i = 0; - xlator_list_t *trav = NULL; + stripe_local_t *local = NULL; + stripe_private_t *priv = NULL; + int32_t ret = -1, op_errno = 0; + int i = 0; + xlator_list_t *trav = NULL; - priv = this->private; + priv = this->private; - local = mem_get0 (this->local_pool); - if (!local) { - op_errno = ENOMEM; - goto err; - } + local = mem_get0(this->local_pool); + if (!local) { + op_errno = ENOMEM; + goto err; + } - local->op_ret = -1; - frame->local = local; + local->op_ret = -1; + frame->local = local; - strncpy (local->xsel, name, strlen (name)); - local->nallocs = local->wind_count = priv->child_count; + strncpy(local->xsel, name, strlen(name)); + local->nallocs = local->wind_count = priv->child_count; - for (i = 0, trav = this->children; i < priv->child_count; i++, - trav = trav->next) { - STACK_WIND_COOKIE (frame, stripe_vgetxattr_cbk, - (void *) (long) i, trav->xlator, - trav->xlator->fops->fgetxattr, - fd, name, xdata); - } + for (i = 0, trav = this->children; i < priv->child_count; + i++, trav = trav->next) { + STACK_WIND_COOKIE(frame, stripe_vgetxattr_cbk, (void *)(long)i, + trav->xlator, trav->xlator->fops->fgetxattr, fd, name, + xdata); + } - return 0; + return 0; err: - STACK_UNWIND_STRICT (fgetxattr, frame, -1, op_errno, NULL, NULL); - return ret; + STACK_UNWIND_STRICT(fgetxattr, frame, -1, op_errno, NULL, NULL); + return ret; } int32_t -stripe_fgetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, - const char *name, dict_t *xdata) +stripe_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, + const char *name, dict_t *xdata) { - if (stripe_is_special_xattr (name)) { - stripe_fgetxattr_from_everyone (frame, this, fd, name, xdata); - goto out; - } + if (stripe_is_special_xattr(name)) { + stripe_fgetxattr_from_everyone(frame, this, fd, name, xdata); + goto out; + } - STACK_WIND (frame, stripe_internal_getxattr_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fgetxattr, fd, name, xdata); + STACK_WIND(frame, stripe_internal_getxattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fgetxattr, fd, name, xdata); out: - return 0; + return 0; } - - int32_t -stripe_priv_dump (xlator_t *this) +stripe_priv_dump(xlator_t *this) { - char key[GF_DUMP_MAX_BUF_LEN]; - int i = 0; - stripe_private_t *priv = NULL; - int ret = -1; - struct stripe_options *options = NULL; + char key[GF_DUMP_MAX_BUF_LEN]; + int i = 0; + stripe_private_t *priv = NULL; + int ret = -1; + struct stripe_options *options = NULL; - GF_VALIDATE_OR_GOTO ("stripe", this, out); + GF_VALIDATE_OR_GOTO("stripe", this, out); - priv = this->private; - if (!priv) - goto out; + priv = this->private; + if (!priv) + goto out; - ret = TRY_LOCK (&priv->lock); - if (ret != 0) - goto out; + ret = TRY_LOCK(&priv->lock); + if (ret != 0) + goto out; - gf_proc_dump_add_section("xlator.cluster.stripe.%s.priv", this->name); - gf_proc_dump_write("child_count","%d", priv->child_count); + gf_proc_dump_add_section("xlator.cluster.stripe.%s.priv", this->name); + gf_proc_dump_write("child_count", "%d", priv->child_count); - for (i = 0; i < priv->child_count; i++) { - sprintf (key, "subvolumes[%d]", i); - gf_proc_dump_write (key, "%s.%s", priv->xl_array[i]->type, - priv->xl_array[i]->name); - } + for (i = 0; i < priv->child_count; i++) { + sprintf(key, "subvolumes[%d]", i); + gf_proc_dump_write(key, "%s.%s", priv->xl_array[i]->type, + priv->xl_array[i]->name); + } - options = priv->pattern; - while (options != NULL) { - gf_proc_dump_write ("path_pattern", "%s", priv->pattern->path_pattern); - gf_proc_dump_write ("options_block_size", "%ul", options->block_size); + options = priv->pattern; + while (options != NULL) { + gf_proc_dump_write("path_pattern", "%s", priv->pattern->path_pattern); + gf_proc_dump_write("options_block_size", "%ul", options->block_size); - options = options->next; - } + options = options->next; + } - gf_proc_dump_write ("block_size", "%ul", priv->block_size); - gf_proc_dump_write ("nodes-down", "%d", priv->nodes_down); - gf_proc_dump_write ("first-child_down", "%d", priv->first_child_down); - gf_proc_dump_write ("xattr_supported", "%d", priv->xattr_supported); + gf_proc_dump_write("block_size", "%ul", priv->block_size); + gf_proc_dump_write("nodes-down", "%d", priv->nodes_down); + gf_proc_dump_write("first-child_down", "%d", priv->first_child_down); + gf_proc_dump_write("xattr_supported", "%d", priv->xattr_supported); - UNLOCK (&priv->lock); + UNLOCK(&priv->lock); out: - return ret; + return ret; } struct xlator_fops fops = { - .stat = stripe_stat, - .unlink = stripe_unlink, - .rename = stripe_rename, - .link = stripe_link, - .truncate = stripe_truncate, - .create = stripe_create, - .open = stripe_open, - .readv = stripe_readv, - .writev = stripe_writev, - .statfs = stripe_statfs, - .flush = stripe_flush, - .fsync = stripe_fsync, - .ftruncate = stripe_ftruncate, - .fstat = stripe_fstat, - .mkdir = stripe_mkdir, - .rmdir = stripe_rmdir, - .lk = stripe_lk, - .opendir = stripe_opendir, - .fsyncdir = stripe_fsyncdir, - .setattr = stripe_setattr, - .fsetattr = stripe_fsetattr, - .lookup = stripe_lookup, - .mknod = stripe_mknod, - .setxattr = stripe_setxattr, - .fsetxattr = stripe_fsetxattr, - .getxattr = stripe_getxattr, - .fgetxattr = stripe_fgetxattr, - .removexattr = stripe_removexattr, - .fremovexattr = stripe_fremovexattr, - .readdirp = stripe_readdirp, - .fallocate = stripe_fallocate, - .discard = stripe_discard, - .zerofill = stripe_zerofill, - .seek = stripe_seek, + .stat = stripe_stat, + .unlink = stripe_unlink, + .rename = stripe_rename, + .link = stripe_link, + .truncate = stripe_truncate, + .create = stripe_create, + .open = stripe_open, + .readv = stripe_readv, + .writev = stripe_writev, + .statfs = stripe_statfs, + .flush = stripe_flush, + .fsync = stripe_fsync, + .ftruncate = stripe_ftruncate, + .fstat = stripe_fstat, + .mkdir = stripe_mkdir, + .rmdir = stripe_rmdir, + .lk = stripe_lk, + .opendir = stripe_opendir, + .fsyncdir = stripe_fsyncdir, + .setattr = stripe_setattr, + .fsetattr = stripe_fsetattr, + .lookup = stripe_lookup, + .mknod = stripe_mknod, + .setxattr = stripe_setxattr, + .fsetxattr = stripe_fsetxattr, + .getxattr = stripe_getxattr, + .fgetxattr = stripe_fgetxattr, + .removexattr = stripe_removexattr, + .fremovexattr = stripe_fremovexattr, + .readdirp = stripe_readdirp, + .fallocate = stripe_fallocate, + .discard = stripe_discard, + .zerofill = stripe_zerofill, + .seek = stripe_seek, }; struct xlator_cbks cbks = { - .release = stripe_release, - .forget = stripe_forget, + .release = stripe_release, + .forget = stripe_forget, }; struct xlator_dumpops dumpops = { - .priv = stripe_priv_dump, + .priv = stripe_priv_dump, }; struct volume_options options[] = { - { .key = {"block-size"}, - .type = GF_OPTION_TYPE_SIZE_LIST, - .default_value = "128KB", - .min = STRIPE_MIN_BLOCK_SIZE, - .description = "Size of the stripe unit that would be read " - "from or written to the striped servers.", - .op_version = { 1 }, - .tags = { "stripe" }, - .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE, - }, - { .key = {"use-xattr"}, - .type = GF_OPTION_TYPE_BOOL, - .default_value = "true", - .description = "handle the stripe without the xattr", - .tags = { "stripe", "dev-only" }, - .flags = OPT_FLAG_CLIENT_OPT, - }, - { .key = {"coalesce"}, - .type = GF_OPTION_TYPE_BOOL, - .default_value = "true", - .description = "Enable/Disable coalesce mode to flatten striped " - "files as stored on the server (i.e., eliminate holes " - "caused by the traditional format).", - .op_version = { 1 }, - .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE, - .tags = { "stripe" }, - }, - { .key = {NULL} }, + { + .key = {"block-size"}, + .type = GF_OPTION_TYPE_SIZE_LIST, + .default_value = "128KB", + .min = STRIPE_MIN_BLOCK_SIZE, + .description = "Size of the stripe unit that would be read " + "from or written to the striped servers.", + .op_version = {1}, + .tags = {"stripe"}, + .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE, + }, + { + .key = {"use-xattr"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "true", + .description = "handle the stripe without the xattr", + .tags = {"stripe", "dev-only"}, + .flags = OPT_FLAG_CLIENT_OPT, + }, + { + .key = {"coalesce"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "true", + .description = "Enable/Disable coalesce mode to flatten striped " + "files as stored on the server (i.e., eliminate holes " + "caused by the traditional format).", + .op_version = {1}, + .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE, + .tags = {"stripe"}, + }, + {.key = {NULL}}, }; -- cgit