summaryrefslogtreecommitdiffstats
path: root/xlators/cluster/ec/src
diff options
context:
space:
mode:
Diffstat (limited to 'xlators/cluster/ec/src')
-rw-r--r--xlators/cluster/ec/src/ec-combine.c40
-rw-r--r--xlators/cluster/ec/src/ec-common.c267
-rw-r--r--xlators/cluster/ec/src/ec-common.h53
-rw-r--r--xlators/cluster/ec/src/ec-data.c12
-rw-r--r--xlators/cluster/ec/src/ec-data.h2
-rw-r--r--xlators/cluster/ec/src/ec-dir-read.c39
-rw-r--r--xlators/cluster/ec/src/ec-dir-write.c112
-rw-r--r--xlators/cluster/ec/src/ec-fops.h144
-rw-r--r--xlators/cluster/ec/src/ec-galois.c3
-rw-r--r--xlators/cluster/ec/src/ec-generic.c108
-rw-r--r--xlators/cluster/ec/src/ec-heal.c363
-rw-r--r--xlators/cluster/ec/src/ec-heald.c157
-rw-r--r--xlators/cluster/ec/src/ec-heald.h9
-rw-r--r--xlators/cluster/ec/src/ec-helpers.c10
-rw-r--r--xlators/cluster/ec/src/ec-inode-read.c107
-rw-r--r--xlators/cluster/ec/src/ec-inode-write.c199
-rw-r--r--xlators/cluster/ec/src/ec-locks.c109
-rw-r--r--xlators/cluster/ec/src/ec-messages.h3
-rw-r--r--xlators/cluster/ec/src/ec-method.h2
-rw-r--r--xlators/cluster/ec/src/ec-types.h34
-rw-r--r--xlators/cluster/ec/src/ec.c258
-rw-r--r--xlators/cluster/ec/src/ec.h1
22 files changed, 1433 insertions, 599 deletions
diff --git a/xlators/cluster/ec/src/ec-combine.c b/xlators/cluster/ec/src/ec-combine.c
index c5af2ab5e39..703a30e2485 100644
--- a/xlators/cluster/ec/src/ec-combine.c
+++ b/xlators/cluster/ec/src/ec-combine.c
@@ -179,13 +179,14 @@ ec_iatt_combine(ec_fop_data_t *fop, struct iatt *dst, struct iatt *src,
"links: %u-%u, uid: %u-%u, gid: %u-%u, "
"rdev: %" PRIu64 "-%" PRIu64 ", size: %" PRIu64 "-%" PRIu64
", "
- "mode: %o-%o)",
+ "mode: %o-%o), %s",
dst[i].ia_ino, src[i].ia_ino, dst[i].ia_nlink,
src[i].ia_nlink, dst[i].ia_uid, src[i].ia_uid, dst[i].ia_gid,
src[i].ia_gid, dst[i].ia_rdev, src[i].ia_rdev,
dst[i].ia_size, src[i].ia_size,
st_mode_from_ia(dst[i].ia_prot, dst[i].ia_type),
- st_mode_from_ia(src[i].ia_prot, dst[i].ia_type));
+ st_mode_from_ia(src[i].ia_prot, dst[i].ia_type),
+ ec_msg_str(fop));
return 0;
}
@@ -342,9 +343,8 @@ out:
}
static int32_t
-ec_dict_data_concat(const char *fmt, ec_cbk_data_t *cbk, int32_t which,
- char *key, char *new_key, const char *def,
- gf_boolean_t global, ...)
+ec_dict_data_concat(ec_cbk_data_t *cbk, int32_t which, char *key, char *new_key,
+ const char *def, gf_boolean_t global, const char *fmt, ...)
{
ec_t *ec = cbk->fop->xl->private;
data_t *data[ec->nodes];
@@ -356,7 +356,7 @@ ec_dict_data_concat(const char *fmt, ec_cbk_data_t *cbk, int32_t which,
ec_dict_list(data, cbk, which, key, global);
- va_start(args, global);
+ va_start(args, fmt);
err = ec_concat_prepare(cbk->fop->xl, &pre, &sep, &post, fmt, args);
va_end(args);
@@ -485,22 +485,12 @@ ec_dict_data_merge(ec_cbk_data_t *cbk, int32_t which, char *key)
tmp = NULL;
- len = dict_serialized_length(lockinfo);
- if (len < 0) {
- err = len;
-
- goto out;
- }
- ptr = GF_MALLOC(len, gf_common_mt_char);
- if (ptr == NULL) {
- err = -ENOMEM;
-
- goto out;
- }
- err = dict_serialize(lockinfo, ptr);
+ err = dict_allocate_and_serialize(lockinfo, (char **)&ptr,
+ (unsigned int *)&len);
if (err != 0) {
goto out;
}
+
dict = (which == EC_COMBINE_XDATA) ? cbk->xdata : cbk->dict;
err = dict_set_dynptr(dict, key, ptr, len);
if (err != 0) {
@@ -739,14 +729,14 @@ ec_dict_data_combine(dict_t *dict, char *key, data_t *value, void *arg)
if ((strcmp(key, GF_XATTR_PATHINFO_KEY) == 0) ||
(strcmp(key, GF_XATTR_USER_PATHINFO_KEY) == 0)) {
- return ec_dict_data_concat("(<EC:%s> { })", data->cbk, data->which, key,
- NULL, NULL, _gf_false,
+ return ec_dict_data_concat(data->cbk, data->which, key, NULL, NULL,
+ _gf_false, _gf_false, "(<EC:%s> { })",
data->cbk->fop->xl->name);
}
if (strncmp(key, GF_XATTR_CLRLK_CMD, SLEN(GF_XATTR_CLRLK_CMD)) == 0) {
- return ec_dict_data_concat("{\n}", data->cbk, data->which, key, NULL,
- NULL, _gf_false);
+ return ec_dict_data_concat(data->cbk, data->which, key, NULL, NULL,
+ _gf_false, "{\n}");
}
if (strncmp(key, GF_XATTR_LOCKINFO_KEY, SLEN(GF_XATTR_LOCKINFO_KEY)) == 0) {
@@ -776,9 +766,9 @@ ec_dict_data_combine(dict_t *dict, char *key, data_t *value, void *arg)
if (XATTR_IS_NODE_UUID(key)) {
if (data->cbk->fop->int32) {
/* List of node uuid is requested */
- return ec_dict_data_concat("{ }", data->cbk, data->which, key,
+ return ec_dict_data_concat(data->cbk, data->which, key,
GF_XATTR_LIST_NODE_UUIDS_KEY, UUID0_STR,
- _gf_true);
+ _gf_true, "{ }");
} else {
return ec_dict_data_uuid(data->cbk, data->which, key);
}
diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c
index 8d656702e12..b955efd8c2d 100644
--- a/xlators/cluster/ec/src/ec-common.c
+++ b/xlators/cluster/ec/src/ec-common.c
@@ -44,16 +44,16 @@ ec_update_fd_status(fd_t *fd, xlator_t *xl, int idx, int32_t ret_status)
UNLOCK(&fd->lock);
}
-static int
-ec_fd_ctx_need_open(fd_t *fd, xlator_t *this, uintptr_t *need_open)
+static uintptr_t
+ec_fd_ctx_need_open(fd_t *fd, xlator_t *this, uintptr_t mask)
{
int i = 0;
int count = 0;
ec_t *ec = NULL;
ec_fd_t *fd_ctx = NULL;
+ uintptr_t need_open = 0;
ec = this->private;
- *need_open = 0;
fd_ctx = ec_fd_get(fd, this);
if (!fd_ctx)
@@ -63,9 +63,9 @@ ec_fd_ctx_need_open(fd_t *fd, xlator_t *this, uintptr_t *need_open)
{
for (i = 0; i < ec->nodes; i++) {
if ((fd_ctx->fd_status[i] == EC_FD_NOT_OPENED) &&
- (ec->xl_up & (1 << i))) {
+ ((ec->xl_up & (1 << i)) != 0) && ((mask & (1 << i)) != 0)) {
fd_ctx->fd_status[i] = EC_FD_OPENING;
- *need_open |= (1 << i);
+ need_open |= (1 << i);
count++;
}
}
@@ -76,10 +76,11 @@ ec_fd_ctx_need_open(fd_t *fd, xlator_t *this, uintptr_t *need_open)
* then ignore fixing the fd as it has been
* requested from heal operation.
*/
- if (count >= ec->fragments)
- count = 0;
+ if (count >= ec->fragments) {
+ need_open = 0;
+ }
- return count;
+ return need_open;
}
static gf_boolean_t
@@ -96,11 +97,11 @@ ec_is_fd_fixable(fd_t *fd)
}
static void
-ec_fix_open(ec_fop_data_t *fop)
+ec_fix_open(ec_fop_data_t *fop, uintptr_t mask)
{
- int call_count = 0;
uintptr_t need_open = 0;
int ret = 0;
+ int32_t flags = 0;
loc_t loc = {
0,
};
@@ -109,9 +110,10 @@ ec_fix_open(ec_fop_data_t *fop)
goto out;
/* Evaluate how many remote fd's to be opened */
- call_count = ec_fd_ctx_need_open(fop->fd, fop->xl, &need_open);
- if (!call_count)
+ need_open = ec_fd_ctx_need_open(fop->fd, fop->xl, mask);
+ if (need_open == 0) {
goto out;
+ }
loc.inode = inode_ref(fop->fd->inode);
gf_uuid_copy(loc.gfid, fop->fd->inode->gfid);
@@ -120,12 +122,15 @@ ec_fix_open(ec_fop_data_t *fop)
goto out;
}
+ flags = fop->fd->flags & (~(O_TRUNC | O_APPEND | O_CREAT | O_EXCL));
if (IA_IFDIR == fop->fd->inode->ia_type) {
- ec_opendir(fop->frame, fop->xl, need_open, EC_MINIMUM_ONE, NULL, NULL,
+ ec_opendir(fop->frame, fop->xl, need_open,
+ EC_MINIMUM_ONE | EC_FOP_NO_PROPAGATE_ERROR, NULL, NULL,
&fop->loc[0], fop->fd, NULL);
} else {
- ec_open(fop->frame, fop->xl, need_open, EC_MINIMUM_ONE, NULL, NULL,
- &loc, fop->fd->flags, fop->fd, NULL);
+ ec_open(fop->frame, fop->xl, need_open,
+ EC_MINIMUM_ONE | EC_FOP_NO_PROPAGATE_ERROR, NULL, NULL, &loc,
+ flags, fop->fd, NULL);
}
out:
@@ -225,7 +230,7 @@ ec_child_next(ec_t *ec, ec_fop_data_t *fop, uint32_t idx)
int32_t
ec_heal_report(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, uintptr_t mask, uintptr_t good,
- uintptr_t bad, dict_t *xdata)
+ uintptr_t bad, uint32_t pending, dict_t *xdata)
{
if (op_ret < 0) {
gf_msg(this->name, GF_LOG_DEBUG, op_errno, EC_MSG_HEAL_FAIL,
@@ -311,16 +316,19 @@ ec_check_status(ec_fop_data_t *fop)
}
}
- gf_msg(fop->xl->name, GF_LOG_WARNING, 0, EC_MSG_OP_FAIL_ON_SUBVOLS,
- "Operation failed on %d of %d subvolumes.(up=%s, mask=%s, "
- "remaining=%s, good=%s, bad=%s)",
- gf_bits_count(ec->xl_up & ~(fop->remaining | fop->good)), ec->nodes,
- ec_bin(str1, sizeof(str1), ec->xl_up, ec->nodes),
- ec_bin(str2, sizeof(str2), fop->mask, ec->nodes),
- ec_bin(str3, sizeof(str3), fop->remaining, ec->nodes),
- ec_bin(str4, sizeof(str4), fop->good, ec->nodes),
- ec_bin(str5, sizeof(str5), ec->xl_up & ~(fop->remaining | fop->good),
- ec->nodes));
+ gf_msg(
+ fop->xl->name, GF_LOG_WARNING, 0, EC_MSG_OP_FAIL_ON_SUBVOLS,
+ "Operation failed on %d of %d subvolumes.(up=%s, mask=%s, "
+ "remaining=%s, good=%s, bad=%s,"
+ "(Least significant bit represents first client/brick of subvol), %s)",
+ gf_bits_count(ec->xl_up & ~(fop->remaining | fop->good)), ec->nodes,
+ ec_bin(str1, sizeof(str1), ec->xl_up, ec->nodes),
+ ec_bin(str2, sizeof(str2), fop->mask, ec->nodes),
+ ec_bin(str3, sizeof(str3), fop->remaining, ec->nodes),
+ ec_bin(str4, sizeof(str4), fop->good, ec->nodes),
+ ec_bin(str5, sizeof(str5), ec->xl_up & ~(fop->remaining | fop->good),
+ ec->nodes),
+ ec_msg_str(fop));
if (fop->use_fd) {
if (fop->fd != NULL) {
ec_fheal(NULL, fop->xl, -1, EC_MINIMUM_ONE, ec_heal_report, NULL,
@@ -494,12 +502,16 @@ ec_resume(ec_fop_data_t *fop, int32_t error)
}
void
-ec_resume_parent(ec_fop_data_t *fop, int32_t error)
+ec_resume_parent(ec_fop_data_t *fop)
{
ec_fop_data_t *parent;
+ int32_t error = 0;
parent = fop->parent;
if (parent != NULL) {
+ if ((fop->fop_flags & EC_FOP_NO_PROPAGATE_ERROR) == 0) {
+ error = fop->error;
+ }
ec_trace("RESUME_PARENT", fop, "error=%u", error);
fop->parent = NULL;
ec_resume(parent, error);
@@ -592,6 +604,8 @@ ec_internal_op(ec_fop_data_t *fop)
return _gf_true;
if (fop->id == GF_FOP_FXATTROP)
return _gf_true;
+ if (fop->id == GF_FOP_OPEN)
+ return _gf_true;
return _gf_false;
}
@@ -602,10 +616,10 @@ ec_msg_str(ec_fop_data_t *fop)
loc_t *loc2 = NULL;
char gfid1[64] = {0};
char gfid2[64] = {0};
+ ec_fop_data_t *parent = fop->parent;
if (fop->errstr)
return fop->errstr;
-
if (!fop->use_fd) {
loc1 = &fop->loc[0];
loc2 = &fop->loc[1];
@@ -613,24 +627,46 @@ ec_msg_str(ec_fop_data_t *fop)
if (fop->id == GF_FOP_RENAME) {
gf_asprintf(&fop->errstr,
"FOP : '%s' failed on '%s' and '%s' with gfids "
- "%s and %s respectively",
+ "%s and %s respectively. Parent FOP: %s",
ec_fop_name(fop->id), loc1->path, loc2->path,
uuid_utoa_r(loc1->gfid, gfid1),
- uuid_utoa_r(loc2->gfid, gfid2));
+ uuid_utoa_r(loc2->gfid, gfid2),
+ parent ? ec_fop_name(parent->id) : "No Parent");
} else {
- gf_asprintf(&fop->errstr, "FOP : '%s' failed on '%s' with gfid %s",
- ec_fop_name(fop->id), loc1->path,
- uuid_utoa_r(loc1->gfid, gfid1));
+ gf_asprintf(
+ &fop->errstr,
+ "FOP : '%s' failed on '%s' with gfid %s. Parent FOP: %s",
+ ec_fop_name(fop->id), loc1->path,
+ uuid_utoa_r(loc1->gfid, gfid1),
+ parent ? ec_fop_name(parent->id) : "No Parent");
}
} else {
- gf_asprintf(&fop->errstr, "FOP : '%s' failed on gfid %s",
- ec_fop_name(fop->id),
- uuid_utoa_r(fop->fd->inode->gfid, gfid1));
+ gf_asprintf(
+ &fop->errstr, "FOP : '%s' failed on gfid %s. Parent FOP: %s",
+ ec_fop_name(fop->id), uuid_utoa_r(fop->fd->inode->gfid, gfid1),
+ parent ? ec_fop_name(parent->id) : "No Parent");
}
return fop->errstr;
}
-int32_t
+static void
+ec_log_insufficient_vol(ec_fop_data_t *fop, int32_t have, uint32_t need,
+ int32_t loglevel)
+{
+ ec_t *ec = fop->xl->private;
+ char str1[32], str2[32], str3[32];
+
+ gf_msg(ec->xl->name, loglevel, 0, EC_MSG_CHILDS_INSUFFICIENT,
+ "Insufficient available children for this request: "
+ "Have : %d, Need : %u : Child UP : %s "
+ "Mask: %s, Healing : %s : %s ",
+ have, need, ec_bin(str1, sizeof(str1), ec->xl_up, ec->nodes),
+ ec_bin(str2, sizeof(str2), fop->mask, ec->nodes),
+ ec_bin(str3, sizeof(str3), fop->healing, ec->nodes),
+ ec_msg_str(fop));
+}
+
+static int32_t
ec_child_select(ec_fop_data_t *fop)
{
ec_t *ec = fop->xl->private;
@@ -644,6 +680,9 @@ ec_child_select(ec_fop_data_t *fop)
* unlock should go on all subvols where lock is performed*/
if (fop->parent && !ec_internal_op(fop)) {
fop->mask &= (fop->parent->mask & ~fop->parent->healing);
+ if (ec_is_data_fop(fop->id)) {
+ fop->healing |= fop->parent->healing;
+ }
}
if ((fop->mask & ~ec->xl_up) != 0) {
@@ -684,15 +723,18 @@ ec_child_select(ec_fop_data_t *fop)
ec_trace("SELECT", fop, "");
if ((num < fop->minimum) && (num < ec->fragments)) {
- gf_msg(ec->xl->name, GF_LOG_ERROR, 0, EC_MSG_CHILDS_INSUFFICIENT,
- "Insufficient available children "
- "for this request (have %d, need "
- "%d). %s",
- num, fop->minimum, ec_msg_str(fop));
+ ec_log_insufficient_vol(fop, num, fop->minimum, GF_LOG_ERROR);
return 0;
}
- ec_sleep(fop);
+ if (!fop->parent && fop->lock_count &&
+ (fop->locks[0].update[EC_DATA_TXN] ||
+ fop->locks[0].update[EC_METADATA_TXN])) {
+ if (ec->quorum_count && (num < ec->quorum_count)) {
+ ec_log_insufficient_vol(fop, num, ec->quorum_count, GF_LOG_ERROR);
+ return 0;
+ }
+ }
return 1;
}
@@ -772,6 +814,8 @@ ec_dispatch_one(ec_fop_data_t *fop)
ec_dispatch_start(fop);
if (ec_child_select(fop)) {
+ ec_sleep(fop);
+
fop->expected = 1;
fop->first = ec_select_first_by_read_policy(fop->xl->private, fop);
@@ -806,6 +850,8 @@ ec_dispatch_inc(ec_fop_data_t *fop)
ec_dispatch_start(fop);
if (ec_child_select(fop)) {
+ ec_sleep(fop);
+
fop->expected = gf_bits_count(fop->remaining);
fop->first = 0;
@@ -819,6 +865,8 @@ ec_dispatch_all(ec_fop_data_t *fop)
ec_dispatch_start(fop);
if (ec_child_select(fop)) {
+ ec_sleep(fop);
+
fop->expected = gf_bits_count(fop->remaining);
fop->first = 0;
@@ -837,6 +885,8 @@ ec_dispatch_min(ec_fop_data_t *fop)
ec_dispatch_start(fop);
if (ec_child_select(fop)) {
+ ec_sleep(fop);
+
fop->expected = count = ec->fragments;
fop->first = ec_select_first_by_read_policy(fop->xl->private, fop);
idx = fop->first - 1;
@@ -851,6 +901,23 @@ ec_dispatch_min(ec_fop_data_t *fop)
}
}
+void
+ec_succeed_all(ec_fop_data_t *fop)
+{
+ ec_dispatch_start(fop);
+
+ if (ec_child_select(fop)) {
+ fop->expected = gf_bits_count(fop->remaining);
+ fop->first = 0;
+
+ /* Simulate a successful execution on all bricks */
+ ec_trace("SUCCEED", fop, "");
+
+ fop->good = fop->remaining;
+ fop->remaining = 0;
+ }
+}
+
ec_lock_t *
ec_lock_allocate(ec_fop_data_t *fop, loc_t *loc)
{
@@ -1372,27 +1439,28 @@ ec_get_size_version(ec_lock_link_t *link)
!ec_is_data_fop(fop->id))
link->optimistic_changelog = _gf_true;
+ memset(&loc, 0, sizeof(loc));
+
+ LOCK(&lock->loc.inode->lock);
+
set_dirty = ec_set_dirty_flag(link, ctx, dirty);
/* If ec metadata has already been retrieved, do not try again. */
- if (ctx->have_info && (!set_dirty)) {
+ if (ctx->have_info) {
if (ec_is_data_fop(fop->id)) {
fop->healing |= lock->healing;
}
- return;
+ if (!set_dirty)
+ goto unlock;
}
/* Determine if there's something we need to retrieve for the current
* operation. */
if (!set_dirty && !lock->query && (lock->loc.inode->ia_type != IA_IFREG) &&
(lock->loc.inode->ia_type != IA_INVAL)) {
- return;
+ goto unlock;
}
- memset(&loc, 0, sizeof(loc));
-
- LOCK(&lock->loc.inode->lock);
-
changed_flags = ec_set_xattrop_flags_and_params(lock, link, dirty);
if (link->waiting_flags) {
/* This fop needs to wait until all its flags are cleared which
@@ -1403,6 +1471,7 @@ ec_get_size_version(ec_lock_link_t *link)
GF_ASSERT(!changed_flags);
}
+unlock:
UNLOCK(&lock->loc.inode->lock);
if (!changed_flags)
@@ -1814,6 +1883,10 @@ ec_lock_acquired(ec_lock_link_t *link)
LOCK(&lock->loc.inode->lock);
lock->acquired = _gf_true;
+ if (lock->contention) {
+ lock->release = _gf_true;
+ lock->contention = _gf_false;
+ }
ec_lock_update_fd(lock, fop);
ec_lock_wake_shared(lock, &list);
@@ -1824,7 +1897,8 @@ ec_lock_acquired(ec_lock_link_t *link)
if (fop->use_fd &&
(link->update[EC_DATA_TXN] || link->update[EC_METADATA_TXN])) {
- ec_fix_open(fop);
+ /* Try to reopen closed fd's only if lock has succeeded. */
+ ec_fix_open(fop, lock->mask);
}
ec_lock_resume_shared(&list);
@@ -1838,15 +1912,20 @@ ec_locked(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
ec_lock_link_t *link = NULL;
ec_lock_t *lock = NULL;
+ link = fop->data;
+ lock = link->lock;
if (op_ret >= 0) {
- link = fop->data;
- lock = link->lock;
lock->mask = lock->good_mask = fop->good;
lock->healing = 0;
ec_lock_acquired(link);
ec_lock(fop->parent);
} else {
+ LOCK(&lock->loc.inode->lock);
+ {
+ lock->contention = _gf_false;
+ }
+ UNLOCK(&lock->loc.inode->lock);
gf_msg(this->name, GF_LOG_WARNING, op_errno, EC_MSG_PREOP_LOCK_FAILED,
"Failed to complete preop lock");
}
@@ -2177,7 +2256,7 @@ ec_unlocked(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
if (op_ret < 0) {
gf_msg(this->name, GF_LOG_WARNING, op_errno, EC_MSG_UNLOCK_FAILED,
- "entry/inode unlocking failed (%s)", ec_fop_name(link->fop->id));
+ "entry/inode unlocking failed :(%s)", ec_msg_str(link->fop));
} else {
ec_trace("UNLOCKED", link->fop, "lock=%p", link->lock);
}
@@ -2214,6 +2293,23 @@ ec_unlock_lock(ec_lock_link_t *link)
}
}
+void
+ec_inode_bad_inc(inode_t *inode, xlator_t *xl)
+{
+ ec_inode_t *ctx = NULL;
+
+ LOCK(&inode->lock);
+ {
+ ctx = __ec_inode_get(inode, xl);
+ if (ctx == NULL) {
+ goto unlock;
+ }
+ ctx->bad_version++;
+ }
+unlock:
+ UNLOCK(&inode->lock);
+}
+
int32_t
ec_update_size_version_done(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, dict_t *xattr,
@@ -2229,6 +2325,12 @@ ec_update_size_version_done(call_frame_t *frame, void *cookie, xlator_t *this,
ctx = lock->ctx;
if (op_ret < 0) {
+ if (link->lock->fd == NULL) {
+ ec_inode_bad_inc(link->lock->loc.inode, this);
+ } else {
+ ec_inode_bad_inc(link->lock->fd->inode, this);
+ }
+
gf_msg(fop->xl->name, fop_log_level(fop->id, op_errno), op_errno,
EC_MSG_SIZE_VERS_UPDATE_FAIL,
"Failed to update version and size. %s", ec_msg_str(fop));
@@ -2371,37 +2473,47 @@ ec_update_info(ec_lock_link_t *link)
uint64_t dirty[2] = {0, 0};
uint64_t size;
ec_t *ec = NULL;
+ uintptr_t mask;
lock = link->lock;
ctx = lock->ctx;
ec = link->fop->xl->private;
/* pre_version[*] will be 0 if have_version is false */
- version[0] = ctx->post_version[0] - ctx->pre_version[0];
- version[1] = ctx->post_version[1] - ctx->pre_version[1];
+ version[EC_DATA_TXN] = ctx->post_version[EC_DATA_TXN] -
+ ctx->pre_version[EC_DATA_TXN];
+ version[EC_METADATA_TXN] = ctx->post_version[EC_METADATA_TXN] -
+ ctx->pre_version[EC_METADATA_TXN];
size = ctx->post_size - ctx->pre_size;
/* If we set the dirty flag for update fop, we have to unset it.
* If fop has failed on some bricks, leave the dirty as marked. */
+
if (lock->unlock_now) {
+ if (version[EC_DATA_TXN]) {
+ /*A data fop will have difference in post and pre version
+ *and for data fop we send writes on healing bricks also */
+ mask = lock->good_mask | lock->healing;
+ } else {
+ mask = lock->good_mask;
+ }
/* Ensure that nodes are up while doing final
* metadata update.*/
- if (!(ec->node_mask & ~lock->good_mask) &&
- !(ec->node_mask & ~ec->xl_up)) {
- if (ctx->dirty[0] != 0) {
- dirty[0] = -1;
+ if (!(ec->node_mask & ~(mask)) && !(ec->node_mask & ~ec->xl_up)) {
+ if (ctx->dirty[EC_DATA_TXN] != 0) {
+ dirty[EC_DATA_TXN] = -1;
}
- if (ctx->dirty[1] != 0) {
- dirty[1] = -1;
+ if (ctx->dirty[EC_METADATA_TXN] != 0) {
+ dirty[EC_METADATA_TXN] = -1;
}
/*If everything is fine and we already
*have version xattr set on entry, there
*is no need to update version again*/
- if (ctx->pre_version[0]) {
- version[0] = 0;
+ if (ctx->pre_version[EC_DATA_TXN]) {
+ version[EC_DATA_TXN] = 0;
}
- if (ctx->pre_version[1]) {
- version[1] = 0;
+ if (ctx->pre_version[EC_METADATA_TXN]) {
+ version[EC_METADATA_TXN] = 0;
}
} else {
link->optimistic_changelog = _gf_false;
@@ -2410,8 +2522,8 @@ ec_update_info(ec_lock_link_t *link)
memset(ctx->dirty, 0, sizeof(ctx->dirty));
}
- if ((version[0] != 0) || (version[1] != 0) || (dirty[0] != 0) ||
- (dirty[1] != 0)) {
+ if ((version[EC_DATA_TXN] != 0) || (version[EC_METADATA_TXN] != 0) ||
+ (dirty[EC_DATA_TXN] != 0) || (dirty[EC_METADATA_TXN] != 0)) {
ec_update_size_version(link, version, size, dirty);
return _gf_true;
}
@@ -2453,13 +2565,20 @@ ec_lock_release(ec_t *ec, inode_t *inode)
goto done;
}
lock = ctx->inode_lock;
- if ((lock == NULL) || !lock->acquired || lock->release) {
+ if ((lock == NULL) || lock->release) {
goto done;
}
gf_msg_debug(ec->xl->name, 0, "Releasing inode %p due to lock contention",
inode);
+ if (!lock->acquired) {
+ /* This happens if some bricks already got the lock while inodelk is in
+ * progress. Set release to true after lock is acquired*/
+ lock->contention = _gf_true;
+ goto done;
+ }
+
/* The lock is not marked to be released, so the frozen list should be
* empty. */
GF_ASSERT(list_empty(&lock->frozen));
@@ -2911,3 +3030,13 @@ ec_manager(ec_fop_data_t *fop, int32_t error)
__ec_manager(fop, error);
}
+
+gf_boolean_t
+__ec_is_last_fop(ec_t *ec)
+{
+ if ((list_empty(&ec->pending_fops)) &&
+ (GF_ATOMIC_GET(ec->async_fop_count) == 0)) {
+ return _gf_true;
+ }
+ return _gf_false;
+}
diff --git a/xlators/cluster/ec/src/ec-common.h b/xlators/cluster/ec/src/ec-common.h
index 115e1475b50..51493612ac6 100644
--- a/xlators/cluster/ec/src/ec-common.h
+++ b/xlators/cluster/ec/src/ec-common.h
@@ -11,8 +11,7 @@
#ifndef __EC_COMMON_H__
#define __EC_COMMON_H__
-#include <glusterfs/xlator.h>
-
+#include "glusterfs/compat-errno.h" // for ENODATA on BSD
#include "ec-data.h"
typedef enum { EC_DATA_TXN, EC_METADATA_TXN } ec_txn_t;
@@ -26,6 +25,30 @@ typedef enum { EC_DATA_TXN, EC_METADATA_TXN } ec_txn_t;
#define EC_FLAG_LOCK_SHARED 0x0001
+#define QUORUM_CBK(fn, fop, frame, cookie, this, op_ret, op_errno, params...) \
+ do { \
+ ec_t *__ec = fop->xl->private; \
+ int32_t __op_ret = 0; \
+ int32_t __op_errno = 0; \
+ int32_t __success_count = gf_bits_count(fop->good); \
+ \
+ __op_ret = op_ret; \
+ __op_errno = op_errno; \
+ if (!fop->parent && frame && \
+ (GF_CLIENT_PID_SELF_HEALD != frame->root->pid) && \
+ __ec->quorum_count && (__success_count < __ec->quorum_count) && \
+ op_ret >= 0) { \
+ __op_ret = -1; \
+ __op_errno = EIO; \
+ gf_msg(__ec->xl->name, GF_LOG_ERROR, 0, \
+ EC_MSG_CHILDS_INSUFFICIENT, \
+ "Insufficient available children for this request " \
+ "(have %d, need %d). %s", \
+ __success_count, __ec->quorum_count, ec_msg_str(fop)); \
+ } \
+ fn(frame, cookie, this, __op_ret, __op_errno, params); \
+ } while (0)
+
enum _ec_xattrop_flags {
EC_FLAG_XATTROP,
EC_FLAG_DATA_DIRTY,
@@ -54,9 +77,12 @@ enum _ec_xattrop_flags {
#define EC_SELFHEAL_BIT 62
-#define EC_MINIMUM_ONE -1
-#define EC_MINIMUM_MIN -2
-#define EC_MINIMUM_ALL -3
+#define EC_MINIMUM_ONE (1 << 6)
+#define EC_MINIMUM_MIN (2 << 6)
+#define EC_MINIMUM_ALL (3 << 6)
+#define EC_FOP_NO_PROPAGATE_ERROR (1 << 8)
+#define EC_FOP_MINIMUM(_flags) ((_flags)&255)
+#define EC_FOP_FLAGS(_flags) ((_flags) & ~255)
#define EC_UPDATE_DATA 1
#define EC_UPDATE_META 2
@@ -163,11 +189,14 @@ void
ec_dispatch_one(ec_fop_data_t *fop);
void
+ec_succeed_all(ec_fop_data_t *fop);
+
+void
ec_sleep(ec_fop_data_t *fop);
void
ec_resume(ec_fop_data_t *fop, int32_t error);
void
-ec_resume_parent(ec_fop_data_t *fop, int32_t error);
+ec_resume_parent(ec_fop_data_t *fop);
void
ec_manager(ec_fop_data_t *fop, int32_t error);
@@ -190,4 +219,16 @@ ec_lock_unlocked(call_frame_t *frame, void *cookie, xlator_t *this,
void
ec_update_fd_status(fd_t *fd, xlator_t *xl, int child_index,
int32_t ret_status);
+gf_boolean_t
+ec_is_entry_healing(ec_fop_data_t *fop);
+void
+ec_set_entry_healing(ec_fop_data_t *fop);
+void
+ec_reset_entry_healing(ec_fop_data_t *fop);
+char *
+ec_msg_str(ec_fop_data_t *fop);
+gf_boolean_t
+__ec_is_last_fop(ec_t *ec);
+void
+ec_lock_update_good(ec_lock_t *lock, ec_fop_data_t *fop);
#endif /* __EC_COMMON_H__ */
diff --git a/xlators/cluster/ec/src/ec-data.c b/xlators/cluster/ec/src/ec-data.c
index fae8843a679..06388833546 100644
--- a/xlators/cluster/ec/src/ec-data.c
+++ b/xlators/cluster/ec/src/ec-data.c
@@ -8,7 +8,6 @@
cases as published by the Free Software Foundation.
*/
-#include "ec-mem-types.h"
#include "ec-helpers.h"
#include "ec-common.h"
#include "ec-data.h"
@@ -98,7 +97,7 @@ ec_cbk_data_destroy(ec_cbk_data_t *cbk)
ec_fop_data_t *
ec_fop_data_allocate(call_frame_t *frame, xlator_t *this, int32_t id,
- uint32_t flags, uintptr_t target, int32_t minimum,
+ uint32_t flags, uintptr_t target, uint32_t fop_flags,
ec_wind_f wind, ec_handler_f handler, ec_cbk_t cbks,
void *data)
{
@@ -151,7 +150,8 @@ ec_fop_data_allocate(call_frame_t *frame, xlator_t *this, int32_t id,
fop->refs = 1;
fop->flags = flags;
- fop->minimum = minimum;
+ fop->minimum = EC_FOP_MINIMUM(fop_flags);
+ fop->fop_flags = EC_FOP_FLAGS(fop_flags);
fop->mask = target;
fop->wind = wind;
@@ -201,11 +201,13 @@ ec_handle_last_pending_fop_completion(ec_fop_data_t *fop, gf_boolean_t *notify)
{
ec_t *ec = fop->xl->private;
+ *notify = _gf_false;
+
if (!list_empty(&fop->pending_list)) {
LOCK(&ec->lock);
{
list_del_init(&fop->pending_list);
- *notify = list_empty(&ec->pending_fops);
+ *notify = __ec_is_last_fop(ec);
}
UNLOCK(&ec->lock);
}
@@ -271,7 +273,7 @@ ec_fop_data_release(ec_fop_data_t *fop)
loc_wipe(&fop->loc[1]);
GF_FREE(fop->errstr);
- ec_resume_parent(fop, fop->error);
+ ec_resume_parent(fop);
ec_fop_cleanup(fop);
diff --git a/xlators/cluster/ec/src/ec-data.h b/xlators/cluster/ec/src/ec-data.h
index 112536d554c..c8a74ffe1ed 100644
--- a/xlators/cluster/ec/src/ec-data.h
+++ b/xlators/cluster/ec/src/ec-data.h
@@ -18,7 +18,7 @@ ec_cbk_data_allocate(call_frame_t *frame, xlator_t *this, ec_fop_data_t *fop,
int32_t id, int32_t idx, int32_t op_ret, int32_t op_errno);
ec_fop_data_t *
ec_fop_data_allocate(call_frame_t *frame, xlator_t *this, int32_t id,
- uint32_t flags, uintptr_t target, int32_t minimum,
+ uint32_t flags, uintptr_t target, uint32_t fop_flags,
ec_wind_f wind, ec_handler_f handler, ec_cbk_t cbks,
void *data);
void
diff --git a/xlators/cluster/ec/src/ec-dir-read.c b/xlators/cluster/ec/src/ec-dir-read.c
index c9db7010a0f..f71dcfac293 100644
--- a/xlators/cluster/ec/src/ec-dir-read.c
+++ b/xlators/cluster/ec/src/ec-dir-read.c
@@ -8,15 +8,11 @@
cases as published by the Free Software Foundation.
*/
-#include <glusterfs/xlator.h>
-#include <glusterfs/defaults.h>
-
#include "ec.h"
#include "ec-messages.h"
#include "ec-helpers.h"
#include "ec-common.h"
#include "ec-combine.h"
-#include "ec-method.h"
#include "ec-fops.h"
/****************************************************************
@@ -127,13 +123,15 @@ ec_manager_opendir(ec_fop_data_t *fop, int32_t state)
return EC_STATE_REPORT;
}
- err = ec_loc_from_loc(fop->xl, &ctx->loc, &fop->loc[0]);
- if (err != 0) {
- UNLOCK(&fop->fd->lock);
+ if (!ctx->loc.inode) {
+ err = ec_loc_from_loc(fop->xl, &ctx->loc, &fop->loc[0]);
+ if (err != 0) {
+ UNLOCK(&fop->fd->lock);
- fop->error = -err;
+ fop->error = -err;
- return EC_STATE_REPORT;
+ return EC_STATE_REPORT;
+ }
}
UNLOCK(&fop->fd->lock);
@@ -219,7 +217,7 @@ ec_manager_opendir(ec_fop_data_t *fop, int32_t state)
void
ec_opendir(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_opendir_cbk_t func, void *data, loc_t *loc,
+ uint32_t fop_flags, fop_opendir_cbk_t func, void *data, loc_t *loc,
fd_t *fd, dict_t *xdata)
{
ec_cbk_t callback = {.opendir = func};
@@ -233,7 +231,7 @@ ec_opendir(call_frame_t *frame, xlator_t *this, uintptr_t target,
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
fop = ec_fop_data_allocate(frame, this, GF_FOP_OPENDIR, EC_FLAG_LOCK_SHARED,
- target, minimum, ec_wind_opendir,
+ target, fop_flags, ec_wind_opendir,
ec_manager_opendir, callback, data);
if (fop == NULL) {
goto out;
@@ -388,9 +386,16 @@ ec_manager_readdir(ec_fop_data_t *fop, int32_t state)
/* Return error if opendir has not been successfully called on
* any subvolume. */
ctx = ec_fd_get(fop->fd, fop->xl);
- if ((ctx == NULL) || (ctx->open == 0)) {
- fop->error = EINVAL;
+ if (ctx == NULL) {
+ fop->error = ENOMEM;
+ } else if (ctx->open == 0) {
+ fop->error = EBADFD;
+ }
+ if (fop->error) {
+ gf_msg(fop->xl->name, GF_LOG_ERROR, fop->error,
+ EC_MSG_INVALID_REQUEST, "EC is not winding readdir: %s",
+ ec_msg_str(fop));
return EC_STATE_REPORT;
}
@@ -515,7 +520,7 @@ ec_manager_readdir(ec_fop_data_t *fop, int32_t state)
void
ec_readdir(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_readdir_cbk_t func, void *data, fd_t *fd,
+ uint32_t fop_flags, fop_readdir_cbk_t func, void *data, fd_t *fd,
size_t size, off_t offset, dict_t *xdata)
{
ec_cbk_t callback = {.readdir = func};
@@ -529,7 +534,7 @@ ec_readdir(call_frame_t *frame, xlator_t *this, uintptr_t target,
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
fop = ec_fop_data_allocate(frame, this, GF_FOP_READDIR, EC_FLAG_LOCK_SHARED,
- target, minimum, ec_wind_readdir,
+ target, fop_flags, ec_wind_readdir,
ec_manager_readdir, callback, data);
if (fop == NULL) {
goto out;
@@ -585,7 +590,7 @@ ec_wind_readdirp(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
void
ec_readdirp(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_readdirp_cbk_t func, void *data, fd_t *fd,
+ uint32_t fop_flags, fop_readdirp_cbk_t func, void *data, fd_t *fd,
size_t size, off_t offset, dict_t *xdata)
{
ec_cbk_t callback = {.readdirp = func};
@@ -599,7 +604,7 @@ ec_readdirp(call_frame_t *frame, xlator_t *this, uintptr_t target,
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
fop = ec_fop_data_allocate(
- frame, this, GF_FOP_READDIRP, EC_FLAG_LOCK_SHARED, target, minimum,
+ frame, this, GF_FOP_READDIRP, EC_FLAG_LOCK_SHARED, target, fop_flags,
ec_wind_readdirp, ec_manager_readdir, callback, data);
if (fop == NULL) {
goto out;
diff --git a/xlators/cluster/ec/src/ec-dir-write.c b/xlators/cluster/ec/src/ec-dir-write.c
index e24667feedc..53d27d895c3 100644
--- a/xlators/cluster/ec/src/ec-dir-write.c
+++ b/xlators/cluster/ec/src/ec-dir-write.c
@@ -8,9 +8,6 @@
cases as published by the Free Software Foundation.
*/
-#include <glusterfs/xlator.h>
-#include <glusterfs/defaults.h>
-
#include "ec.h"
#include "ec-messages.h"
#include "ec-helpers.h"
@@ -218,10 +215,10 @@ ec_manager_create(ec_fop_data_t *fop, int32_t state)
GF_ASSERT(cbk != NULL);
if (fop->cbks.create != NULL) {
- fop->cbks.create(fop->req_frame, fop, fop->xl, cbk->op_ret,
- cbk->op_errno, fop->fd, fop->loc[0].inode,
- &cbk->iatt[0], &cbk->iatt[1], &cbk->iatt[2],
- cbk->xdata);
+ QUORUM_CBK(fop->cbks.create, fop, fop->req_frame, fop, fop->xl,
+ cbk->op_ret, cbk->op_errno, fop->fd,
+ fop->loc[0].inode, &cbk->iatt[0], &cbk->iatt[1],
+ &cbk->iatt[2], cbk->xdata);
}
return EC_STATE_LOCK_REUSE;
@@ -262,7 +259,7 @@ ec_manager_create(ec_fop_data_t *fop, int32_t state)
void
ec_create(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_create_cbk_t func, void *data, loc_t *loc,
+ uint32_t fop_flags, fop_create_cbk_t func, void *data, loc_t *loc,
int32_t flags, mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
{
ec_cbk_t callback = {.create = func};
@@ -275,7 +272,7 @@ ec_create(call_frame_t *frame, xlator_t *this, uintptr_t target,
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = ec_fop_data_allocate(frame, this, GF_FOP_CREATE, 0, target, minimum,
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_CREATE, 0, target, fop_flags,
ec_wind_create, ec_manager_create, callback,
data);
if (fop == NULL) {
@@ -390,9 +387,10 @@ ec_manager_link(ec_fop_data_t *fop, int32_t state)
GF_ASSERT(cbk != NULL);
if (fop->cbks.link != NULL) {
- fop->cbks.link(fop->req_frame, fop, fop->xl, cbk->op_ret,
- cbk->op_errno, fop->loc[0].inode, &cbk->iatt[0],
- &cbk->iatt[1], &cbk->iatt[2], cbk->xdata);
+ QUORUM_CBK(fop->cbks.link, fop, fop->req_frame, fop, fop->xl,
+ cbk->op_ret, cbk->op_errno, fop->loc[0].inode,
+ &cbk->iatt[0], &cbk->iatt[1], &cbk->iatt[2],
+ cbk->xdata);
}
return EC_STATE_LOCK_REUSE;
@@ -432,9 +430,9 @@ ec_manager_link(ec_fop_data_t *fop, int32_t state)
}
void
-ec_link(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
- fop_link_cbk_t func, void *data, loc_t *oldloc, loc_t *newloc,
- dict_t *xdata)
+ec_link(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_link_cbk_t func, void *data, loc_t *oldloc,
+ loc_t *newloc, dict_t *xdata)
{
ec_cbk_t callback = {.link = func};
ec_fop_data_t *fop = NULL;
@@ -446,7 +444,7 @@ ec_link(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = ec_fop_data_allocate(frame, this, GF_FOP_LINK, 0, target, minimum,
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_LINK, 0, target, fop_flags,
ec_wind_link, ec_manager_link, callback, data);
if (fop == NULL) {
goto out;
@@ -569,9 +567,10 @@ ec_manager_mkdir(ec_fop_data_t *fop, int32_t state)
GF_ASSERT(cbk != NULL);
if (fop->cbks.mkdir != NULL) {
- fop->cbks.mkdir(fop->req_frame, fop, fop->xl, cbk->op_ret,
- cbk->op_errno, fop->loc[0].inode, &cbk->iatt[0],
- &cbk->iatt[1], &cbk->iatt[2], cbk->xdata);
+ QUORUM_CBK(fop->cbks.mkdir, fop, fop->req_frame, fop, fop->xl,
+ cbk->op_ret, cbk->op_errno, fop->loc[0].inode,
+ &cbk->iatt[0], &cbk->iatt[1], &cbk->iatt[2],
+ cbk->xdata);
}
return EC_STATE_LOCK_REUSE;
@@ -613,9 +612,9 @@ ec_manager_mkdir(ec_fop_data_t *fop, int32_t state)
}
void
-ec_mkdir(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
- fop_mkdir_cbk_t func, void *data, loc_t *loc, mode_t mode,
- mode_t umask, dict_t *xdata)
+ec_mkdir(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_mkdir_cbk_t func, void *data, loc_t *loc,
+ mode_t mode, mode_t umask, dict_t *xdata)
{
ec_cbk_t callback = {.mkdir = func};
ec_fop_data_t *fop = NULL;
@@ -627,7 +626,7 @@ ec_mkdir(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = ec_fop_data_allocate(frame, this, GF_FOP_MKDIR, 0, target, minimum,
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_MKDIR, 0, target, fop_flags,
ec_wind_mkdir, ec_manager_mkdir, callback, data);
if (fop == NULL) {
goto out;
@@ -773,9 +772,10 @@ ec_manager_mknod(ec_fop_data_t *fop, int32_t state)
GF_ASSERT(cbk != NULL);
if (fop->cbks.mknod != NULL) {
- fop->cbks.mknod(fop->req_frame, fop, fop->xl, cbk->op_ret,
- cbk->op_errno, fop->loc[0].inode, &cbk->iatt[0],
- &cbk->iatt[1], &cbk->iatt[2], cbk->xdata);
+ QUORUM_CBK(fop->cbks.mknod, fop, fop->req_frame, fop, fop->xl,
+ cbk->op_ret, cbk->op_errno, fop->loc[0].inode,
+ &cbk->iatt[0], &cbk->iatt[1], &cbk->iatt[2],
+ cbk->xdata);
}
return EC_STATE_LOCK_REUSE;
@@ -815,9 +815,9 @@ ec_manager_mknod(ec_fop_data_t *fop, int32_t state)
}
void
-ec_mknod(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
- fop_mknod_cbk_t func, void *data, loc_t *loc, mode_t mode, dev_t rdev,
- mode_t umask, dict_t *xdata)
+ec_mknod(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_mknod_cbk_t func, void *data, loc_t *loc,
+ mode_t mode, dev_t rdev, mode_t umask, dict_t *xdata)
{
ec_cbk_t callback = {.mknod = func};
ec_fop_data_t *fop = NULL;
@@ -829,7 +829,7 @@ ec_mknod(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = ec_fop_data_allocate(frame, this, GF_FOP_MKNOD, 0, target, minimum,
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_MKNOD, 0, target, fop_flags,
ec_wind_mknod, ec_manager_mknod, callback, data);
if (fop == NULL) {
goto out;
@@ -931,10 +931,10 @@ ec_manager_rename(ec_fop_data_t *fop, int32_t state)
GF_ASSERT(cbk != NULL);
if (fop->cbks.rename != NULL) {
- fop->cbks.rename(fop->req_frame, fop, fop->xl, cbk->op_ret,
- cbk->op_errno, &cbk->iatt[0], &cbk->iatt[1],
- &cbk->iatt[2], &cbk->iatt[3], &cbk->iatt[4],
- cbk->xdata);
+ QUORUM_CBK(fop->cbks.rename, fop, fop->req_frame, fop, fop->xl,
+ cbk->op_ret, cbk->op_errno, &cbk->iatt[0],
+ &cbk->iatt[1], &cbk->iatt[2], &cbk->iatt[3],
+ &cbk->iatt[4], cbk->xdata);
}
return EC_STATE_LOCK_REUSE;
@@ -975,7 +975,7 @@ ec_manager_rename(ec_fop_data_t *fop, int32_t state)
void
ec_rename(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_rename_cbk_t func, void *data, loc_t *oldloc,
+ uint32_t fop_flags, fop_rename_cbk_t func, void *data, loc_t *oldloc,
loc_t *newloc, dict_t *xdata)
{
ec_cbk_t callback = {.rename = func};
@@ -988,7 +988,7 @@ ec_rename(call_frame_t *frame, xlator_t *this, uintptr_t target,
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = ec_fop_data_allocate(frame, this, GF_FOP_RENAME, 0, target, minimum,
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_RENAME, 0, target, fop_flags,
ec_wind_rename, ec_manager_rename, callback,
data);
if (fop == NULL) {
@@ -1083,9 +1083,9 @@ ec_manager_rmdir(ec_fop_data_t *fop, int32_t state)
GF_ASSERT(cbk != NULL);
if (fop->cbks.rmdir != NULL) {
- fop->cbks.rmdir(fop->req_frame, fop, fop->xl, cbk->op_ret,
- cbk->op_errno, &cbk->iatt[0], &cbk->iatt[1],
- cbk->xdata);
+ QUORUM_CBK(fop->cbks.rmdir, fop, fop->req_frame, fop, fop->xl,
+ cbk->op_ret, cbk->op_errno, &cbk->iatt[0],
+ &cbk->iatt[1], cbk->xdata);
}
return EC_STATE_LOCK_REUSE;
@@ -1125,9 +1125,9 @@ ec_manager_rmdir(ec_fop_data_t *fop, int32_t state)
}
void
-ec_rmdir(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
- fop_rmdir_cbk_t func, void *data, loc_t *loc, int xflags,
- dict_t *xdata)
+ec_rmdir(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_rmdir_cbk_t func, void *data, loc_t *loc,
+ int xflags, dict_t *xdata)
{
ec_cbk_t callback = {.rmdir = func};
ec_fop_data_t *fop = NULL;
@@ -1139,7 +1139,7 @@ ec_rmdir(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = ec_fop_data_allocate(frame, this, GF_FOP_RMDIR, 0, target, minimum,
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_RMDIR, 0, target, fop_flags,
ec_wind_rmdir, ec_manager_rmdir, callback, data);
if (fop == NULL) {
goto out;
@@ -1237,10 +1237,10 @@ ec_manager_symlink(ec_fop_data_t *fop, int32_t state)
GF_ASSERT(cbk != NULL);
if (fop->cbks.symlink != NULL) {
- fop->cbks.symlink(fop->req_frame, fop, fop->xl, cbk->op_ret,
- cbk->op_errno, fop->loc[0].inode,
- &cbk->iatt[0], &cbk->iatt[1], &cbk->iatt[2],
- cbk->xdata);
+ QUORUM_CBK(fop->cbks.symlink, fop, fop->req_frame, fop, fop->xl,
+ cbk->op_ret, cbk->op_errno, fop->loc[0].inode,
+ &cbk->iatt[0], &cbk->iatt[1], &cbk->iatt[2],
+ cbk->xdata);
}
return EC_STATE_LOCK_REUSE;
@@ -1281,7 +1281,7 @@ ec_manager_symlink(ec_fop_data_t *fop, int32_t state)
void
ec_symlink(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_symlink_cbk_t func, void *data,
+ uint32_t fop_flags, fop_symlink_cbk_t func, void *data,
const char *linkname, loc_t *loc, mode_t umask, dict_t *xdata)
{
ec_cbk_t callback = {.symlink = func};
@@ -1294,9 +1294,9 @@ ec_symlink(call_frame_t *frame, xlator_t *this, uintptr_t target,
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = ec_fop_data_allocate(frame, this, GF_FOP_SYMLINK, 0, target, minimum,
- ec_wind_symlink, ec_manager_symlink, callback,
- data);
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_SYMLINK, 0, target,
+ fop_flags, ec_wind_symlink, ec_manager_symlink,
+ callback, data);
if (fop == NULL) {
goto out;
}
@@ -1392,9 +1392,9 @@ ec_manager_unlink(ec_fop_data_t *fop, int32_t state)
GF_ASSERT(cbk != NULL);
if (fop->cbks.unlink != NULL) {
- fop->cbks.unlink(fop->req_frame, fop, fop->xl, cbk->op_ret,
- cbk->op_errno, &cbk->iatt[0], &cbk->iatt[1],
- cbk->xdata);
+ QUORUM_CBK(fop->cbks.unlink, fop, fop->req_frame, fop, fop->xl,
+ cbk->op_ret, cbk->op_errno, &cbk->iatt[0],
+ &cbk->iatt[1], cbk->xdata);
}
return EC_STATE_LOCK_REUSE;
@@ -1435,7 +1435,7 @@ ec_manager_unlink(ec_fop_data_t *fop, int32_t state)
void
ec_unlink(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_unlink_cbk_t func, void *data, loc_t *loc,
+ uint32_t fop_flags, fop_unlink_cbk_t func, void *data, loc_t *loc,
int xflags, dict_t *xdata)
{
ec_cbk_t callback = {.unlink = func};
@@ -1448,7 +1448,7 @@ ec_unlink(call_frame_t *frame, xlator_t *this, uintptr_t target,
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = ec_fop_data_allocate(frame, this, GF_FOP_UNLINK, 0, target, minimum,
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_UNLINK, 0, target, fop_flags,
ec_wind_unlink, ec_manager_unlink, callback,
data);
if (fop == NULL) {
diff --git a/xlators/cluster/ec/src/ec-fops.h b/xlators/cluster/ec/src/ec-fops.h
index 2abef0d17b3..07edf8a7fec 100644
--- a/xlators/cluster/ec/src/ec-fops.h
+++ b/xlators/cluster/ec/src/ec-fops.h
@@ -18,233 +18,237 @@
void
ec_access(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_access_cbk_t func, void *data, loc_t *loc,
+ uint32_t fop_flags, fop_access_cbk_t func, void *data, loc_t *loc,
int32_t mask, dict_t *xdata);
void
ec_create(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_create_cbk_t func, void *data, loc_t *loc,
+ uint32_t fop_flags, fop_create_cbk_t func, void *data, loc_t *loc,
int32_t flags, mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata);
void
ec_entrylk(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_entrylk_cbk_t func, void *data,
+ uint32_t fop_flags, fop_entrylk_cbk_t func, void *data,
const char *volume, loc_t *loc, const char *basename,
entrylk_cmd cmd, entrylk_type type, dict_t *xdata);
void
ec_fentrylk(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_fentrylk_cbk_t func, void *data,
+ uint32_t fop_flags, fop_fentrylk_cbk_t func, void *data,
const char *volume, fd_t *fd, const char *basename, entrylk_cmd cmd,
entrylk_type type, dict_t *xdata);
void
-ec_flush(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
- fop_flush_cbk_t func, void *data, fd_t *fd, dict_t *xdata);
+ec_flush(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_flush_cbk_t func, void *data, fd_t *fd,
+ dict_t *xdata);
void
-ec_fsync(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
- fop_fsync_cbk_t func, void *data, fd_t *fd, int32_t datasync,
- dict_t *xdata);
+ec_fsync(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_fsync_cbk_t func, void *data, fd_t *fd,
+ int32_t datasync, dict_t *xdata);
void
ec_fsyncdir(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_fsyncdir_cbk_t func, void *data, fd_t *fd,
+ uint32_t fop_flags, fop_fsyncdir_cbk_t func, void *data, fd_t *fd,
int32_t datasync, dict_t *xdata);
void
ec_getxattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_getxattr_cbk_t func, void *data, loc_t *loc,
+ uint32_t fop_flags, fop_getxattr_cbk_t func, void *data, loc_t *loc,
const char *name, dict_t *xdata);
void
ec_fgetxattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_fgetxattr_cbk_t func, void *data, fd_t *fd,
+ uint32_t fop_flags, fop_fgetxattr_cbk_t func, void *data, fd_t *fd,
const char *name, dict_t *xdata);
void
-ec_heal(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
- fop_heal_cbk_t func, void *data, loc_t *loc, int32_t partial,
- dict_t *xdata);
+ec_heal(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_heal_cbk_t func, void *data, loc_t *loc,
+ int32_t partial, dict_t *xdata);
void
-ec_fheal(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
- fop_fheal_cbk_t func, void *data, fd_t *fd, int32_t partial,
- dict_t *xdata);
+ec_fheal(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_fheal_cbk_t func, void *data, fd_t *fd,
+ int32_t partial, dict_t *xdata);
void
ec_inodelk(call_frame_t *frame, xlator_t *this, gf_lkowner_t *owner,
- uintptr_t target, int32_t minimum, fop_inodelk_cbk_t func,
+ uintptr_t target, uint32_t fop_flags, fop_inodelk_cbk_t func,
void *data, const char *volume, loc_t *loc, int32_t cmd,
struct gf_flock *flock, dict_t *xdata);
void
ec_finodelk(call_frame_t *frame, xlator_t *this, gf_lkowner_t *owner,
- uintptr_t target, int32_t minimum, fop_finodelk_cbk_t func,
+ uintptr_t target, uint32_t fop_flags, fop_finodelk_cbk_t func,
void *data, const char *volume, fd_t *fd, int32_t cmd,
struct gf_flock *flock, dict_t *xdata);
void
-ec_link(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
- fop_link_cbk_t func, void *data, loc_t *oldloc, loc_t *newloc,
- dict_t *xdata);
+ec_link(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_link_cbk_t func, void *data, loc_t *oldloc,
+ loc_t *newloc, dict_t *xdata);
void
-ec_lk(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
+ec_lk(call_frame_t *frame, xlator_t *this, uintptr_t target, uint32_t fop_flags,
fop_lk_cbk_t func, void *data, fd_t *fd, int32_t cmd,
struct gf_flock *flock, dict_t *xdata);
void
ec_lookup(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_lookup_cbk_t func, void *data, loc_t *loc,
+ uint32_t fop_flags, fop_lookup_cbk_t func, void *data, loc_t *loc,
dict_t *xdata);
void
-ec_mkdir(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
- fop_mkdir_cbk_t func, void *data, loc_t *loc, mode_t mode,
- mode_t umask, dict_t *xdata);
+ec_mkdir(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_mkdir_cbk_t func, void *data, loc_t *loc,
+ mode_t mode, mode_t umask, dict_t *xdata);
void
-ec_mknod(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
- fop_mknod_cbk_t func, void *data, loc_t *loc, mode_t mode, dev_t rdev,
- mode_t umask, dict_t *xdata);
+ec_mknod(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_mknod_cbk_t func, void *data, loc_t *loc,
+ mode_t mode, dev_t rdev, mode_t umask, dict_t *xdata);
void
-ec_open(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
- fop_open_cbk_t func, void *data, loc_t *loc, int32_t flags, fd_t *fd,
- dict_t *xdata);
+ec_open(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_open_cbk_t func, void *data, loc_t *loc,
+ int32_t flags, fd_t *fd, dict_t *xdata);
void
ec_opendir(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_opendir_cbk_t func, void *data, loc_t *loc,
+ uint32_t fop_flags, fop_opendir_cbk_t func, void *data, loc_t *loc,
fd_t *fd, dict_t *xdata);
void
ec_readdir(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_readdir_cbk_t func, void *data, fd_t *fd,
+ uint32_t fop_flags, fop_readdir_cbk_t func, void *data, fd_t *fd,
size_t size, off_t offset, dict_t *xdata);
void
ec_readdirp(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_readdirp_cbk_t func, void *data, fd_t *fd,
+ uint32_t fop_flags, fop_readdirp_cbk_t func, void *data, fd_t *fd,
size_t size, off_t offset, dict_t *xdata);
void
ec_readlink(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_readlink_cbk_t func, void *data, loc_t *loc,
+ uint32_t fop_flags, fop_readlink_cbk_t func, void *data, loc_t *loc,
size_t size, dict_t *xdata);
void
-ec_readv(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
- fop_readv_cbk_t func, void *data, fd_t *fd, size_t size, off_t offset,
- uint32_t flags, dict_t *xdata);
+ec_readv(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_readv_cbk_t func, void *data, fd_t *fd,
+ size_t size, off_t offset, uint32_t flags, dict_t *xdata);
void
ec_removexattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_removexattr_cbk_t func, void *data,
+ uint32_t fop_flags, fop_removexattr_cbk_t func, void *data,
loc_t *loc, const char *name, dict_t *xdata);
void
ec_fremovexattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_fremovexattr_cbk_t func, void *data,
+ uint32_t fop_flags, fop_fremovexattr_cbk_t func, void *data,
fd_t *fd, const char *name, dict_t *xdata);
void
ec_rename(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_rename_cbk_t func, void *data, loc_t *oldloc,
+ uint32_t fop_flags, fop_rename_cbk_t func, void *data, loc_t *oldloc,
loc_t *newloc, dict_t *xdata);
void
-ec_rmdir(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
- fop_rmdir_cbk_t func, void *data, loc_t *loc, int xflags,
- dict_t *xdata);
+ec_rmdir(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_rmdir_cbk_t func, void *data, loc_t *loc,
+ int xflags, dict_t *xdata);
void
ec_setattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_setattr_cbk_t func, void *data, loc_t *loc,
+ uint32_t fop_flags, fop_setattr_cbk_t func, void *data, loc_t *loc,
struct iatt *stbuf, int32_t valid, dict_t *xdata);
void
ec_fsetattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_fsetattr_cbk_t func, void *data, fd_t *fd,
+ uint32_t fop_flags, fop_fsetattr_cbk_t func, void *data, fd_t *fd,
struct iatt *stbuf, int32_t valid, dict_t *xdata);
void
ec_setxattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_setxattr_cbk_t func, void *data, loc_t *loc,
+ uint32_t fop_flags, fop_setxattr_cbk_t func, void *data, loc_t *loc,
dict_t *dict, int32_t flags, dict_t *xdata);
void
ec_fsetxattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_fsetxattr_cbk_t func, void *data, fd_t *fd,
+ uint32_t fop_flags, fop_fsetxattr_cbk_t func, void *data, fd_t *fd,
dict_t *dict, int32_t flags, dict_t *xdata);
void
-ec_stat(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
- fop_stat_cbk_t func, void *data, loc_t *loc, dict_t *xdata);
+ec_stat(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_stat_cbk_t func, void *data, loc_t *loc,
+ dict_t *xdata);
void
-ec_fstat(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
- fop_fstat_cbk_t func, void *data, fd_t *fd, dict_t *xdata);
+ec_fstat(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_fstat_cbk_t func, void *data, fd_t *fd,
+ dict_t *xdata);
void
ec_statfs(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_statfs_cbk_t func, void *data, loc_t *loc,
+ uint32_t fop_flags, fop_statfs_cbk_t func, void *data, loc_t *loc,
dict_t *xdata);
void
ec_symlink(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_symlink_cbk_t func, void *data,
+ uint32_t fop_flags, fop_symlink_cbk_t func, void *data,
const char *linkname, loc_t *loc, mode_t umask, dict_t *xdata);
void
ec_fallocate(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_fallocate_cbk_t func, void *data, fd_t *fd,
+ uint32_t fop_flags, fop_fallocate_cbk_t func, void *data, fd_t *fd,
int32_t mode, off_t offset, size_t len, dict_t *xdata);
void
ec_discard(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_discard_cbk_t func, void *data, fd_t *fd,
+ uint32_t fop_flags, fop_discard_cbk_t func, void *data, fd_t *fd,
off_t offset, size_t len, dict_t *xdata);
void
ec_truncate(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_truncate_cbk_t func, void *data, loc_t *loc,
+ uint32_t fop_flags, fop_truncate_cbk_t func, void *data, loc_t *loc,
off_t offset, dict_t *xdata);
void
ec_ftruncate(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_ftruncate_cbk_t func, void *data, fd_t *fd,
+ uint32_t fop_flags, fop_ftruncate_cbk_t func, void *data, fd_t *fd,
off_t offset, dict_t *xdata);
void
ec_unlink(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_unlink_cbk_t func, void *data, loc_t *loc,
+ uint32_t fop_flags, fop_unlink_cbk_t func, void *data, loc_t *loc,
int xflags, dict_t *xdata);
void
ec_writev(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_writev_cbk_t func, void *data, fd_t *fd,
+ uint32_t fop_flags, fop_writev_cbk_t func, void *data, fd_t *fd,
struct iovec *vector, int32_t count, off_t offset, uint32_t flags,
struct iobref *iobref, dict_t *xdata);
void
ec_xattrop(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_xattrop_cbk_t func, void *data, loc_t *loc,
+ uint32_t fop_flags, fop_xattrop_cbk_t func, void *data, loc_t *loc,
gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata);
void
ec_fxattrop(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_fxattrop_cbk_t func, void *data, fd_t *fd,
+ uint32_t fop_flags, fop_fxattrop_cbk_t func, void *data, fd_t *fd,
gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata);
void
-ec_seek(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
- fop_seek_cbk_t func, void *data, fd_t *fd, off_t offset,
- gf_seek_what_t what, dict_t *xdata);
+ec_seek(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_seek_cbk_t func, void *data, fd_t *fd,
+ off_t offset, gf_seek_what_t what, dict_t *xdata);
void
-ec_ipc(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
- fop_ipc_cbk_t func, void *data, int32_t op, dict_t *xdata);
+ec_ipc(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_ipc_cbk_t func, void *data, int32_t op,
+ dict_t *xdata);
#endif /* __EC_FOPS_H__ */
diff --git a/xlators/cluster/ec/src/ec-galois.c b/xlators/cluster/ec/src/ec-galois.c
index ee7662f52ce..6e4990c71f5 100644
--- a/xlators/cluster/ec/src/ec-galois.c
+++ b/xlators/cluster/ec/src/ec-galois.c
@@ -10,9 +10,6 @@
#include <string.h>
-#include <glusterfs/mem-pool.h>
-#include <glusterfs/list.h>
-
#include "ec-mem-types.h"
#include "ec-gf8.h"
#include "ec-helpers.h"
diff --git a/xlators/cluster/ec/src/ec-generic.c b/xlators/cluster/ec/src/ec-generic.c
index 175e88ac94b..884deb93669 100644
--- a/xlators/cluster/ec/src/ec-generic.c
+++ b/xlators/cluster/ec/src/ec-generic.c
@@ -8,8 +8,6 @@
cases as published by the Free Software Foundation.
*/
-#include <glusterfs/xlator.h>
-#include <glusterfs/defaults.h>
#include <glusterfs/byte-order.h>
#include "ec.h"
@@ -17,7 +15,6 @@
#include "ec-helpers.h"
#include "ec-common.h"
#include "ec-combine.h"
-#include "ec-method.h"
#include "ec-fops.h"
/* FOP: flush */
@@ -150,9 +147,41 @@ ec_manager_flush(ec_fop_data_t *fop, int32_t state)
}
}
+static int32_t
+ec_validate_fd(fd_t *fd, xlator_t *xl)
+{
+ uint64_t iversion = 0;
+ uint64_t fversion = 0;
+ ec_inode_t *inode_ctx = NULL;
+ ec_fd_t *fd_ctx = NULL;
+
+ LOCK(&fd->lock);
+ {
+ fd_ctx = __ec_fd_get(fd, xl);
+ if (fd_ctx) {
+ fversion = fd_ctx->bad_version;
+ }
+ }
+ UNLOCK(&fd->lock);
+
+ LOCK(&fd->inode->lock);
+ {
+ inode_ctx = __ec_inode_get(fd->inode, xl);
+ if (inode_ctx) {
+ iversion = inode_ctx->bad_version;
+ }
+ }
+ UNLOCK(&fd->inode->lock);
+ if (fversion < iversion) {
+ return EBADF;
+ }
+ return 0;
+}
+
void
-ec_flush(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
- fop_flush_cbk_t func, void *data, fd_t *fd, dict_t *xdata)
+ec_flush(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_flush_cbk_t func, void *data, fd_t *fd,
+ dict_t *xdata)
{
ec_cbk_t callback = {.flush = func};
ec_fop_data_t *fop = NULL;
@@ -164,7 +193,17 @@ ec_flush(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = ec_fop_data_allocate(frame, this, GF_FOP_FLUSH, 0, target, minimum,
+ if (fd) {
+ error = ec_validate_fd(fd, this);
+ if (error) {
+ gf_msg(this->name, GF_LOG_ERROR, EBADF, EC_MSG_FD_BAD,
+ "Failing %s on %s", gf_fop_list[GF_FOP_FLUSH],
+ fd->inode ? uuid_utoa(fd->inode->gfid) : "");
+ goto out;
+ }
+ }
+
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_FLUSH, 0, target, fop_flags,
ec_wind_flush, ec_manager_flush, callback, data);
if (fop == NULL) {
goto out;
@@ -366,9 +405,9 @@ ec_manager_fsync(ec_fop_data_t *fop, int32_t state)
}
void
-ec_fsync(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
- fop_fsync_cbk_t func, void *data, fd_t *fd, int32_t datasync,
- dict_t *xdata)
+ec_fsync(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_fsync_cbk_t func, void *data, fd_t *fd,
+ int32_t datasync, dict_t *xdata)
{
ec_cbk_t callback = {.fsync = func};
ec_fop_data_t *fop = NULL;
@@ -380,7 +419,17 @@ ec_fsync(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = ec_fop_data_allocate(frame, this, GF_FOP_FSYNC, 0, target, minimum,
+ if (fd) {
+ error = ec_validate_fd(fd, this);
+ if (error) {
+ gf_msg(this->name, GF_LOG_ERROR, EBADF, EC_MSG_FD_BAD,
+ "Failing %s on %s", gf_fop_list[GF_FOP_FSYNC],
+ fd->inode ? uuid_utoa(fd->inode->gfid) : "");
+ goto out;
+ }
+ }
+
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_FSYNC, 0, target, fop_flags,
ec_wind_fsync, ec_manager_fsync, callback, data);
if (fop == NULL) {
goto out;
@@ -553,7 +602,7 @@ ec_manager_fsyncdir(ec_fop_data_t *fop, int32_t state)
void
ec_fsyncdir(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_fsyncdir_cbk_t func, void *data, fd_t *fd,
+ uint32_t fop_flags, fop_fsyncdir_cbk_t func, void *data, fd_t *fd,
int32_t datasync, dict_t *xdata)
{
ec_cbk_t callback = {.fsyncdir = func};
@@ -566,9 +615,9 @@ ec_fsyncdir(call_frame_t *frame, xlator_t *this, uintptr_t target,
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = ec_fop_data_allocate(frame, this, GF_FOP_FSYNCDIR, 0, target, minimum,
- ec_wind_fsyncdir, ec_manager_fsyncdir, callback,
- data);
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_FSYNCDIR, 0, target,
+ fop_flags, ec_wind_fsyncdir, ec_manager_fsyncdir,
+ callback, data);
if (fop == NULL) {
goto out;
}
@@ -848,7 +897,7 @@ ec_manager_lookup(ec_fop_data_t *fop, int32_t state)
void
ec_lookup(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_lookup_cbk_t func, void *data, loc_t *loc,
+ uint32_t fop_flags, fop_lookup_cbk_t func, void *data, loc_t *loc,
dict_t *xdata)
{
ec_cbk_t callback = {.lookup = func};
@@ -862,7 +911,7 @@ ec_lookup(call_frame_t *frame, xlator_t *this, uintptr_t target,
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
fop = ec_fop_data_allocate(frame, this, GF_FOP_LOOKUP, EC_FLAG_LOCK_SHARED,
- target, minimum, ec_wind_lookup,
+ target, fop_flags, ec_wind_lookup,
ec_manager_lookup, callback, data);
if (fop == NULL) {
goto out;
@@ -1033,7 +1082,7 @@ ec_manager_statfs(ec_fop_data_t *fop, int32_t state)
void
ec_statfs(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_statfs_cbk_t func, void *data, loc_t *loc,
+ uint32_t fop_flags, fop_statfs_cbk_t func, void *data, loc_t *loc,
dict_t *xdata)
{
ec_cbk_t callback = {.statfs = func};
@@ -1047,7 +1096,7 @@ ec_statfs(call_frame_t *frame, xlator_t *this, uintptr_t target,
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
fop = ec_fop_data_allocate(frame, this, GF_FOP_STATFS, EC_FLAG_LOCK_SHARED,
- target, minimum, ec_wind_statfs,
+ target, fop_flags, ec_wind_statfs,
ec_manager_statfs, callback, data);
if (fop == NULL) {
goto out;
@@ -1270,7 +1319,7 @@ ec_manager_xattrop(ec_fop_data_t *fop, int32_t state)
void
ec_xattrop(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_xattrop_cbk_t func, void *data, loc_t *loc,
+ uint32_t fop_flags, fop_xattrop_cbk_t func, void *data, loc_t *loc,
gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata)
{
ec_cbk_t callback = {.xattrop = func};
@@ -1283,9 +1332,9 @@ ec_xattrop(call_frame_t *frame, xlator_t *this, uintptr_t target,
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = ec_fop_data_allocate(frame, this, GF_FOP_XATTROP, 0, target, minimum,
- ec_wind_xattrop, ec_manager_xattrop, callback,
- data);
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_XATTROP, 0, target,
+ fop_flags, ec_wind_xattrop, ec_manager_xattrop,
+ callback, data);
if (fop == NULL) {
goto out;
}
@@ -1343,7 +1392,7 @@ ec_wind_fxattrop(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
void
ec_fxattrop(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_fxattrop_cbk_t func, void *data, fd_t *fd,
+ uint32_t fop_flags, fop_fxattrop_cbk_t func, void *data, fd_t *fd,
gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata)
{
ec_cbk_t callback = {.fxattrop = func};
@@ -1356,9 +1405,9 @@ ec_fxattrop(call_frame_t *frame, xlator_t *this, uintptr_t target,
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = ec_fop_data_allocate(frame, this, GF_FOP_FXATTROP, 0, target, minimum,
- ec_wind_fxattrop, ec_manager_xattrop, callback,
- data);
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_FXATTROP, 0, target,
+ fop_flags, ec_wind_fxattrop, ec_manager_xattrop,
+ callback, data);
if (fop == NULL) {
goto out;
}
@@ -1507,8 +1556,9 @@ ec_manager_ipc(ec_fop_data_t *fop, int32_t state)
}
void
-ec_ipc(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
- fop_ipc_cbk_t func, void *data, int32_t op, dict_t *xdata)
+ec_ipc(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_ipc_cbk_t func, void *data, int32_t op,
+ dict_t *xdata)
{
ec_cbk_t callback = {.ipc = func};
ec_fop_data_t *fop = NULL;
@@ -1520,7 +1570,7 @@ ec_ipc(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = ec_fop_data_allocate(frame, this, GF_FOP_IPC, 0, target, minimum,
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_IPC, 0, target, fop_flags,
ec_wind_ipc, ec_manager_ipc, callback, data);
if (fop == NULL) {
goto out;
diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c
index eaf80e023e3..7d991f04aac 100644
--- a/xlators/cluster/ec/src/ec-heal.c
+++ b/xlators/cluster/ec/src/ec-heal.c
@@ -8,7 +8,6 @@
cases as published by the Free Software Foundation.
*/
-#include <glusterfs/xlator.h>
#include <glusterfs/defaults.h>
#include <glusterfs/compat-errno.h>
#include <glusterfs/byte-order.h>
@@ -17,7 +16,6 @@
#include <glusterfs/cluster-syncop.h>
#include "ec.h"
-#include "ec-mem-types.h"
#include "ec-types.h"
#include "ec-messages.h"
#include "ec-helpers.h"
@@ -72,6 +70,7 @@ struct ec_name_data {
char *name;
inode_t *parent;
default_args_cbk_t *replies;
+ uint32_t heal_pending;
};
static char *ec_ignore_xattrs[] = {GF_SELINUX_XATTR_KEY, QUOTA_SIZE_KEY, NULL};
@@ -103,6 +102,48 @@ ec_sh_key_match(dict_t *dict, char *key, data_t *val, void *mdata)
}
/* FOP: heal */
+void
+ec_set_entry_healing(ec_fop_data_t *fop)
+{
+ ec_inode_t *ctx = NULL;
+ loc_t *loc = NULL;
+
+ if (!fop)
+ return;
+
+ loc = &fop->loc[0];
+ LOCK(&loc->inode->lock);
+ {
+ ctx = __ec_inode_get(loc->inode, fop->xl);
+ if (ctx) {
+ ctx->heal_count += 1;
+ }
+ }
+ UNLOCK(&loc->inode->lock);
+}
+
+void
+ec_reset_entry_healing(ec_fop_data_t *fop)
+{
+ ec_inode_t *ctx = NULL;
+ loc_t *loc = NULL;
+ int32_t heal_count = 0;
+ if (!fop)
+ return;
+
+ loc = &fop->loc[0];
+ LOCK(&loc->inode->lock);
+ {
+ ctx = __ec_inode_get(loc->inode, fop->xl);
+ if (ctx) {
+ ctx->heal_count += -1;
+ heal_count = ctx->heal_count;
+ }
+ }
+ UNLOCK(&loc->inode->lock);
+ GF_ASSERT(heal_count >= 0);
+}
+
uintptr_t
ec_heal_check(ec_fop_data_t *fop, uintptr_t *pgood)
{
@@ -325,16 +366,16 @@ ec_heal_data_block(ec_heal_t *heal)
/* FOP: fheal */
void
-ec_fheal(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
- fop_fheal_cbk_t func, void *data, fd_t *fd, int32_t partial,
- dict_t *xdata)
+ec_fheal(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_fheal_cbk_t func, void *data, fd_t *fd,
+ int32_t partial, dict_t *xdata)
{
ec_fd_t *ctx = ec_fd_get(fd, this);
if (ctx != NULL) {
gf_msg_trace("ec", 0, "FHEAL ctx: flags=%X, open=%" PRIXPTR, ctx->flags,
ctx->open);
- ec_heal(frame, this, target, minimum, func, data, &ctx->loc, partial,
+ ec_heal(frame, this, target, fop_flags, func, data, &ctx->loc, partial,
xdata);
}
}
@@ -954,6 +995,7 @@ ec_set_new_entry_dirty(ec_t *ec, loc_t *loc, struct iatt *ia,
ret = -ENOTCONN;
goto out;
}
+
out:
if (xattr)
dict_unref(xattr);
@@ -977,6 +1019,7 @@ ec_delete_stale_name(dict_t *gfid_db, char *key, data_t *d, void *data)
int estale_count = 0;
int i = 0;
call_frame_t *frame = name_data->frame;
+ uuid_t gfid;
ec = name_data->frame->this->private;
EC_REPLIES_ALLOC(replies, ec->nodes);
@@ -985,12 +1028,16 @@ ec_delete_stale_name(dict_t *gfid_db, char *key, data_t *d, void *data)
goto out;
}
+ loc.parent = inode_ref(name_data->parent);
loc.inode = inode_new(name_data->parent->table);
if (!loc.inode) {
ret = -ENOMEM;
goto out;
}
- gf_uuid_parse(key, loc.gfid);
+
+ gf_uuid_parse(key, gfid);
+ gf_uuid_copy(loc.pargfid, name_data->parent->gfid);
+ loc.name = name_data->name;
output = alloca0(ec->nodes);
ret = cluster_lookup(ec->xl_list, name_data->participants, ec->nodes,
replies, output, name_data->frame, ec->xl, &loc, NULL);
@@ -1003,6 +1050,11 @@ ec_delete_stale_name(dict_t *gfid_db, char *key, data_t *d, void *data)
estale_count++;
else
name_data->participants[i] = 0;
+ } else if (gf_uuid_compare(gfid, replies[i].stat.ia_gfid)) {
+ estale_count++;
+ gf_msg_debug(ec->xl->name, 0, "%s/%s: different gfid as %s",
+ uuid_utoa(name_data->parent->gfid), name_data->name,
+ key);
}
}
@@ -1122,6 +1174,7 @@ ec_create_name(call_frame_t *frame, ec_t *ec, inode_t *parent, char *name,
dict_t *xdata = NULL;
char *linkname = NULL;
ec_config_t config;
+
/* There should be just one gfid key */
EC_REPLIES_ALLOC(replies, ec->nodes);
if (gfid_db->count != 1) {
@@ -1366,6 +1419,11 @@ __ec_heal_name(call_frame_t *frame, ec_t *ec, inode_t *parent, char *name,
ret = ec_create_name(frame, ec, parent, name, replies, gfid_db, enoent,
participants);
+ if (ret >= 0) {
+ /* If ec_create_name() succeeded we return 1 to indicate that a new
+ * file has been created and it will need to be healed. */
+ ret = 1;
+ }
out:
cluster_replies_wipe(replies, ec->nodes);
loc_wipe(&loc);
@@ -1443,18 +1501,22 @@ ec_name_heal_handler(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
ret = ec_heal_name(name_data->frame, ec, parent->inode, entry->d_name,
name_on);
- if (ret < 0)
+ if (ret < 0) {
memset(name_on, 0, ec->nodes);
+ } else {
+ name_data->heal_pending += ret;
+ }
for (i = 0; i < ec->nodes; i++)
if (name_data->participants[i] && !name_on[i])
name_data->failed_on[i] = 1;
+
return 0;
}
int
ec_heal_names(call_frame_t *frame, ec_t *ec, inode_t *inode,
- unsigned char *participants)
+ unsigned char *participants, uint32_t *pending)
{
int i = 0;
int j = 0;
@@ -1467,7 +1529,7 @@ ec_heal_names(call_frame_t *frame, ec_t *ec, inode_t *inode,
name_data.frame = frame;
name_data.participants = participants;
name_data.failed_on = alloca0(ec->nodes);
- ;
+ name_data.heal_pending = 0;
for (i = 0; i < ec->nodes; i++) {
if (!participants[i])
@@ -1486,6 +1548,8 @@ ec_heal_names(call_frame_t *frame, ec_t *ec, inode_t *inode,
break;
}
}
+ *pending += name_data.heal_pending;
+
loc_wipe(&loc);
return ret;
}
@@ -1493,7 +1557,7 @@ ec_heal_names(call_frame_t *frame, ec_t *ec, inode_t *inode,
int
__ec_heal_entry(call_frame_t *frame, ec_t *ec, inode_t *inode,
unsigned char *heal_on, unsigned char *sources,
- unsigned char *healed_sinks)
+ unsigned char *healed_sinks, uint32_t *pending)
{
unsigned char *locked_on = NULL;
unsigned char *output = NULL;
@@ -1538,7 +1602,7 @@ unlock:
if (sources[i] || healed_sinks[i])
participants[i] = 1;
}
- ret = ec_heal_names(frame, ec, inode, participants);
+ ret = ec_heal_names(frame, ec, inode, participants, pending);
if (EC_COUNT(participants, ec->nodes) <= ec->fragments)
goto out;
@@ -1559,7 +1623,8 @@ out:
int
ec_heal_entry(call_frame_t *frame, ec_t *ec, inode_t *inode,
- unsigned char *sources, unsigned char *healed_sinks)
+ unsigned char *sources, unsigned char *healed_sinks,
+ uint32_t *pending)
{
unsigned char *locked_on = NULL;
unsigned char *up_subvols = NULL;
@@ -1590,7 +1655,7 @@ ec_heal_entry(call_frame_t *frame, ec_t *ec, inode_t *inode,
goto unlock;
}
ret = __ec_heal_entry(frame, ec, inode, locked_on, sources,
- healed_sinks);
+ healed_sinks, pending);
}
unlock:
cluster_uninodelk(ec->xl_list, locked_on, ec->nodes, replies, output, frame,
@@ -1909,16 +1974,16 @@ ec_manager_heal_block(ec_fop_data_t *fop, int32_t state)
case EC_STATE_REPORT:
if (fop->cbks.heal) {
- fop->cbks.heal(fop->req_frame, fop, fop->xl, 0, 0,
+ fop->cbks.heal(fop->req_frame, fop->data, fop->xl, 0, 0,
(heal->good | heal->bad), heal->good, heal->bad,
- NULL);
+ 0, NULL);
}
return EC_STATE_END;
case -EC_STATE_REPORT:
if (fop->cbks.heal) {
- fop->cbks.heal(fop->req_frame, fop, fop->xl, -1, fop->error, 0,
- 0, 0, NULL);
+ fop->cbks.heal(fop->req_frame, fop->data, fop->xl, -1,
+ fop->error, 0, 0, 0, 0, NULL);
}
return EC_STATE_END;
@@ -1933,7 +1998,7 @@ ec_manager_heal_block(ec_fop_data_t *fop, int32_t state)
/*Takes lock */
void
ec_heal_block(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_heal_cbk_t func, ec_heal_t *heal)
+ uint32_t fop_flags, fop_heal_cbk_t func, ec_heal_t *heal)
{
ec_cbk_t callback = {.heal = func};
ec_fop_data_t *fop = NULL;
@@ -1944,7 +2009,7 @@ ec_heal_block(call_frame_t *frame, xlator_t *this, uintptr_t target,
VALIDATE_OR_GOTO(this, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = ec_fop_data_allocate(frame, this, EC_FOP_HEAL, 0, target, minimum,
+ fop = ec_fop_data_allocate(frame, this, EC_FOP_HEAL, 0, target, fop_flags,
NULL, ec_manager_heal_block, callback, heal);
if (fop == NULL)
goto out;
@@ -1955,19 +2020,21 @@ out:
if (fop != NULL) {
ec_manager(fop, error);
} else {
- func(frame, NULL, this, -1, error, 0, 0, 0, NULL);
+ func(frame, heal, this, -1, error, 0, 0, 0, 0, NULL);
}
}
int32_t
ec_heal_block_done(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, uintptr_t mask,
- uintptr_t good, uintptr_t bad, dict_t *xdata)
+ uintptr_t good, uintptr_t bad, uint32_t pending,
+ dict_t *xdata)
{
- ec_fop_data_t *fop = cookie;
- ec_heal_t *heal = fop->data;
+ ec_heal_t *heal = cookie;
- fop->heal = NULL;
+ if (heal->fop) {
+ heal->fop->heal = NULL;
+ }
heal->fop = NULL;
heal->error = op_ret < 0 ? op_errno : 0;
syncbarrier_wake(heal->data);
@@ -2259,9 +2326,10 @@ ec_restore_time_and_adjust_versions(call_frame_t *frame, ec_t *ec, fd_t *fd,
loc.inode = inode_ref(fd->inode);
gf_uuid_copy(loc.gfid, fd->inode->gfid);
- ret = cluster_setattr(ec->xl_list, healed_sinks, ec->nodes, replies,
- output, frame, ec->xl, &loc, &source_buf,
- GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME, NULL);
+ ret = cluster_setattr(
+ ec->xl_list, healed_sinks, ec->nodes, replies, output, frame,
+ ec->xl, &loc, &source_buf,
+ GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME | GF_SET_ATTR_CTIME, NULL);
EC_INTERSECT(healed_sinks, healed_sinks, output, ec->nodes);
if (EC_COUNT(healed_sinks, ec->nodes) == 0) {
ret = -ENOTCONN;
@@ -2429,6 +2497,58 @@ out:
return ret;
}
+int
+ec_heal_purge_stale_index(call_frame_t *frame, ec_t *ec, inode_t *inode)
+{
+ int i = 0;
+ int ret = 0;
+ dict_t **xattr = NULL;
+ loc_t loc = {0};
+ uint64_t dirty_xattr[EC_VERSION_SIZE] = {0};
+ unsigned char *on = NULL;
+ default_args_cbk_t *replies = NULL;
+ dict_t *dict = NULL;
+
+ /* Allocate the required memory */
+ loc.inode = inode_ref(inode);
+ gf_uuid_copy(loc.gfid, inode->gfid);
+ on = alloca0(ec->nodes);
+ EC_REPLIES_ALLOC(replies, ec->nodes);
+ xattr = GF_CALLOC(ec->nodes, sizeof(*xattr), gf_common_mt_pointer);
+ if (!xattr) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ dict = dict_new();
+ if (!dict) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ for (i = 0; i < ec->nodes; i++) {
+ xattr[i] = dict;
+ on[i] = 1;
+ }
+ ret = dict_set_static_bin(dict, EC_XATTR_DIRTY, dirty_xattr,
+ (sizeof(*dirty_xattr) * EC_VERSION_SIZE));
+ if (ret < 0) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ PARALLEL_FOP_ONLIST(ec->xl_list, on, ec->nodes, replies, frame,
+ ec_wind_xattrop_parallel, &loc, GF_XATTROP_ADD_ARRAY64,
+ xattr, NULL);
+out:
+ if (dict) {
+ dict_unref(dict);
+ }
+ if (xattr) {
+ GF_FREE(xattr);
+ }
+ cluster_replies_wipe(replies, ec->nodes);
+ loc_wipe(&loc);
+ return ret;
+}
+
void
ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial)
{
@@ -2446,6 +2566,7 @@ ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial)
intptr_t mbad = 0;
intptr_t good = 0;
intptr_t bad = 0;
+ uint32_t pending = 0;
ec_fop_data_t *fop = data;
gf_boolean_t blocking = _gf_false;
ec_heal_need_t need_heal = EC_HEAL_NONEED;
@@ -2481,7 +2602,7 @@ ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial)
if (loc->name && strlen(loc->name)) {
ret = ec_heal_name(frame, ec, loc->parent, (char *)loc->name,
participants);
- if (ret == 0) {
+ if (ret >= 0) {
gf_msg_debug(this->name, 0,
"%s: name heal "
"successful on %" PRIXPTR,
@@ -2499,32 +2620,34 @@ ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial)
/* Mount triggers heal only when it detects that it must need heal, shd
* triggers heals periodically which need not be thorough*/
- ec_heal_inspect(frame, ec, loc->inode, up_subvols, _gf_false,
- !ec->shd.iamshd, &need_heal);
-
- if (need_heal == EC_HEAL_NONEED) {
- gf_msg(ec->xl->name, GF_LOG_DEBUG, 0, EC_MSG_HEAL_FAIL,
- "Heal is not required for : %s ", uuid_utoa(loc->gfid));
- goto out;
+ if (ec->shd.iamshd && (ret <= 0)) {
+ ec_heal_inspect(frame, ec, loc->inode, up_subvols, _gf_false, _gf_false,
+ &need_heal);
+
+ if (need_heal == EC_HEAL_PURGE_INDEX) {
+ gf_msg(ec->xl->name, GF_LOG_INFO, 0, EC_MSG_HEAL_FAIL,
+ "Index entry needs to be purged for: %s ",
+ uuid_utoa(loc->gfid));
+ /* We need to send zero-xattrop so that stale index entry could be
+ * removed. We need not take lock on this entry to do so as
+ * xattrop on a brick is atomic. */
+ ec_heal_purge_stale_index(frame, ec, loc->inode);
+ goto out;
+ } else if (need_heal == EC_HEAL_NONEED) {
+ gf_msg(ec->xl->name, GF_LOG_DEBUG, 0, EC_MSG_HEAL_FAIL,
+ "Heal is not required for : %s ", uuid_utoa(loc->gfid));
+ goto out;
+ }
}
- msources = alloca0(ec->nodes);
- mhealed_sinks = alloca0(ec->nodes);
- ret = ec_heal_metadata(frame, ec, loc->inode, msources, mhealed_sinks);
- if (ret == 0) {
- mgood = ec_char_array_to_mask(msources, ec->nodes);
- mbad = ec_char_array_to_mask(mhealed_sinks, ec->nodes);
- } else {
- op_ret = -1;
- op_errno = -ret;
- }
sources = alloca0(ec->nodes);
healed_sinks = alloca0(ec->nodes);
if (IA_ISREG(loc->inode->ia_type)) {
ret = ec_heal_data(frame, ec, blocking, loc->inode, sources,
healed_sinks);
} else if (IA_ISDIR(loc->inode->ia_type) && !partial) {
- ret = ec_heal_entry(frame, ec, loc->inode, sources, healed_sinks);
+ ret = ec_heal_entry(frame, ec, loc->inode, sources, healed_sinks,
+ &pending);
} else {
ret = 0;
memcpy(sources, participants, ec->nodes);
@@ -2538,15 +2661,27 @@ ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial)
op_ret = -1;
op_errno = -ret;
}
+ msources = alloca0(ec->nodes);
+ mhealed_sinks = alloca0(ec->nodes);
+ ret = ec_heal_metadata(frame, ec, loc->inode, msources, mhealed_sinks);
+ if (ret == 0) {
+ mgood = ec_char_array_to_mask(msources, ec->nodes);
+ mbad = ec_char_array_to_mask(mhealed_sinks, ec->nodes);
+ } else {
+ op_ret = -1;
+ op_errno = -ret;
+ }
out:
+ ec_reset_entry_healing(fop);
if (fop->cbks.heal) {
- fop->cbks.heal(fop->req_frame, fop, fop->xl, op_ret, op_errno,
+ fop->cbks.heal(fop->req_frame, fop->data, fop->xl, op_ret, op_errno,
ec_char_array_to_mask(participants, ec->nodes),
- mgood & good, mbad & bad, NULL);
+ mgood & good, mbad & bad, pending, NULL);
}
if (frame)
STACK_DESTROY(frame->root);
+
return;
}
@@ -2593,8 +2728,8 @@ void
ec_heal_fail(ec_t *ec, ec_fop_data_t *fop)
{
if (fop->cbks.heal) {
- fop->cbks.heal(fop->req_frame, NULL, ec->xl, -1, fop->error, 0, 0, 0,
- NULL);
+ fop->cbks.heal(fop->req_frame, fop->data, ec->xl, -1, fop->error, 0, 0,
+ 0, 0, NULL);
}
ec_fop_data_release(fop);
}
@@ -2603,13 +2738,31 @@ void
ec_launch_heal(ec_t *ec, ec_fop_data_t *fop)
{
int ret = 0;
+ call_frame_t *frame = NULL;
+
+ frame = create_frame(ec->xl, ec->xl->ctx->pool);
+ if (!frame) {
+ ret = -1;
+ goto out;
+ }
+
+ ec_owner_set(frame, frame->root);
+ /*Do heal as root*/
+ frame->root->uid = 0;
+ frame->root->gid = 0;
+ /*Mark the fops as internal*/
+ frame->root->pid = GF_CLIENT_PID_SELF_HEALD;
ret = synctask_new(ec->xl->ctx->env, ec_synctask_heal_wrap, ec_heal_done,
- NULL, fop);
+ frame, fop);
+out:
if (ret < 0) {
ec_fop_set_error(fop, ENOMEM);
ec_heal_fail(ec, fop);
}
+
+ if (frame)
+ STACK_DESTROY(frame->root);
}
void
@@ -2650,11 +2803,33 @@ ec_handle_healers_done(ec_fop_data_t *fop)
ec_launch_heal(ec, heal_fop);
}
+gf_boolean_t
+ec_is_entry_healing(ec_fop_data_t *fop)
+{
+ ec_inode_t *ctx = NULL;
+ int32_t heal_count = 0;
+ loc_t *loc = NULL;
+
+ loc = &fop->loc[0];
+
+ LOCK(&loc->inode->lock);
+ {
+ ctx = __ec_inode_get(loc->inode, fop->xl);
+ if (ctx) {
+ heal_count = ctx->heal_count;
+ }
+ }
+ UNLOCK(&loc->inode->lock);
+ GF_ASSERT(heal_count >= 0);
+ return heal_count;
+}
+
void
ec_heal_throttle(xlator_t *this, ec_fop_data_t *fop)
{
gf_boolean_t can_heal = _gf_true;
ec_t *ec = this->private;
+ ec_fop_data_t *fop_rel = NULL;
if (fop->req_frame == NULL) {
LOCK(&ec->lock);
@@ -2662,8 +2837,13 @@ ec_heal_throttle(xlator_t *this, ec_fop_data_t *fop)
if ((ec->background_heals > 0) &&
(ec->heal_wait_qlen + ec->background_heals) >
(ec->heal_waiters + ec->healers)) {
- list_add_tail(&fop->healer, &ec->heal_waiting);
- ec->heal_waiters++;
+ if (!ec_is_entry_healing(fop)) {
+ list_add_tail(&fop->healer, &ec->heal_waiting);
+ ec->heal_waiters++;
+ ec_set_entry_healing(fop);
+ } else {
+ fop_rel = fop;
+ }
fop = __ec_dequeue_heals(ec);
} else {
can_heal = _gf_false;
@@ -2673,8 +2853,12 @@ ec_heal_throttle(xlator_t *this, ec_fop_data_t *fop)
}
if (can_heal) {
- if (fop)
+ if (fop) {
+ if (fop->req_frame != NULL) {
+ ec_set_entry_healing(fop);
+ }
ec_launch_heal(ec, fop);
+ }
} else {
gf_msg_debug(this->name, 0,
"Max number of heals are "
@@ -2682,12 +2866,15 @@ ec_heal_throttle(xlator_t *this, ec_fop_data_t *fop)
ec_fop_set_error(fop, EBUSY);
ec_heal_fail(ec, fop);
}
+ if (fop_rel) {
+ ec_heal_done(0, NULL, fop_rel);
+ }
}
void
-ec_heal(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
- fop_heal_cbk_t func, void *data, loc_t *loc, int32_t partial,
- dict_t *xdata)
+ec_heal(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_heal_cbk_t func, void *data, loc_t *loc,
+ int32_t partial, dict_t *xdata)
{
ec_cbk_t callback = {.heal = func};
ec_fop_data_t *fop = NULL;
@@ -2703,7 +2890,7 @@ ec_heal(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
if (frame && frame->local)
goto fail;
- fop = ec_fop_data_allocate(frame, this, EC_FOP_HEAL, 0, target, minimum,
+ fop = ec_fop_data_allocate(frame, this, EC_FOP_HEAL, 0, target, fop_flags,
NULL, NULL, callback, data);
err = ENOMEM;
@@ -2729,15 +2916,27 @@ fail:
if (fop)
ec_fop_data_release(fop);
if (func)
- func(frame, NULL, this, -1, err, 0, 0, 0, NULL);
+ func(frame, data, this, -1, err, 0, 0, 0, 0, NULL);
}
int
ec_replace_heal_done(int ret, call_frame_t *heal, void *opaque)
{
ec_t *ec = opaque;
+ gf_boolean_t last_fop = _gf_false;
+ if (GF_ATOMIC_DEC(ec->async_fop_count) == 0) {
+ LOCK(&ec->lock);
+ {
+ last_fop = __ec_is_last_fop(ec);
+ }
+ UNLOCK(&ec->lock);
+ }
gf_msg_debug(ec->xl->name, 0, "getxattr on bricks is done ret %d", ret);
+
+ if (last_fop)
+ ec_pending_fops_completed(ec);
+
return 0;
}
@@ -2777,6 +2976,10 @@ ec_replace_brick_heal_wrap(void *opaque)
itable = ec->xl->itable;
else
goto out;
+
+ if (xlator_is_cleanup_starting(ec->xl))
+ goto out;
+
ret = ec_replace_heal(ec, itable->root);
out:
return ret;
@@ -2787,14 +2990,15 @@ ec_launch_replace_heal(ec_t *ec)
{
int ret = -1;
- if (!ec)
- return ret;
ret = synctask_new(ec->xl->ctx->env, ec_replace_brick_heal_wrap,
ec_replace_heal_done, NULL, ec);
+
if (ret < 0) {
gf_msg_debug(ec->xl->name, 0, "Heal failed for replace brick ret = %d",
ret);
+ ec_replace_heal_done(-1, NULL, ec);
}
+
return ret;
}
@@ -2826,7 +3030,7 @@ out:
static int32_t
_need_heal_calculate(ec_t *ec, uint64_t *dirty, unsigned char *sources,
gf_boolean_t self_locked, int32_t lock_count,
- ec_heal_need_t *need_heal)
+ ec_heal_need_t *need_heal, uint64_t *versions)
{
int i = 0;
int source_count = 0;
@@ -2836,11 +3040,18 @@ _need_heal_calculate(ec_t *ec, uint64_t *dirty, unsigned char *sources,
*need_heal = EC_HEAL_NONEED;
if (self_locked || lock_count == 0) {
for (i = 0; i < ec->nodes; i++) {
- if (dirty[i]) {
+ if (dirty[i] || (versions[i] != versions[0])) {
*need_heal = EC_HEAL_MUST;
goto out;
}
}
+ /* If lock count is 0, all dirty flags are 0 and all the
+ * versions are macthing then why are we here. It looks
+ * like something went wrong while removing the index entries
+ * after completing a successful heal or fop. In this case
+ * we need to remove this index entry to avoid triggering heal
+ * in a loop and causing lookups again and again*/
+ *need_heal = EC_HEAL_PURGE_INDEX;
} else {
for (i = 0; i < ec->nodes; i++) {
/* Since each lock can only increment the dirty
@@ -2852,6 +3063,9 @@ _need_heal_calculate(ec_t *ec, uint64_t *dirty, unsigned char *sources,
*need_heal = EC_HEAL_MUST;
goto out;
}
+ if (dirty[i] != dirty[0] || (versions[i] != versions[0])) {
+ *need_heal = EC_HEAL_MAYBE;
+ }
}
}
} else {
@@ -2872,7 +3086,6 @@ ec_need_metadata_heal(ec_t *ec, inode_t *inode, default_args_cbk_t *replies,
unsigned char *healed_sinks = NULL;
uint64_t *meta_versions = NULL;
int ret = 0;
- int i = 0;
sources = alloca0(ec->nodes);
healed_sinks = alloca0(ec->nodes);
@@ -2885,15 +3098,7 @@ ec_need_metadata_heal(ec_t *ec, inode_t *inode, default_args_cbk_t *replies,
}
ret = _need_heal_calculate(ec, dirty, sources, self_locked, lock_count,
- need_heal);
- if (ret == ec->nodes && *need_heal == EC_HEAL_NONEED) {
- for (i = 1; i < ec->nodes; i++) {
- if (meta_versions[i] != meta_versions[0]) {
- *need_heal = EC_HEAL_MUST;
- goto out;
- }
- }
- }
+ need_heal, meta_versions);
out:
return ret;
}
@@ -2929,7 +3134,7 @@ ec_need_data_heal(ec_t *ec, inode_t *inode, default_args_cbk_t *replies,
}
ret = _need_heal_calculate(ec, dirty, sources, self_locked, lock_count,
- need_heal);
+ need_heal, data_versions);
out:
return ret;
}
@@ -2957,7 +3162,7 @@ ec_need_entry_heal(ec_t *ec, inode_t *inode, default_args_cbk_t *replies,
}
ret = _need_heal_calculate(ec, dirty, sources, self_locked, lock_count,
- need_heal);
+ need_heal, data_versions);
out:
return ret;
}
@@ -3055,10 +3260,6 @@ ec_heal_inspect(call_frame_t *frame, ec_t *ec, inode_t *inode,
need_heal:
ret = ec_need_heal(ec, inode, replies, lock_count, self_locked, thorough,
need_heal);
-
- if (!self_locked && *need_heal == EC_HEAL_MUST) {
- *need_heal = EC_HEAL_MAYBE;
- }
out:
cluster_replies_wipe(replies, ec->nodes);
loc_wipe(&loc);
@@ -3144,7 +3345,7 @@ ec_get_heal_info(xlator_t *this, loc_t *entry_loc, dict_t **dict_rsp)
ret = ec_heal_inspect(frame, ec, loc.inode, up_subvols, _gf_false,
_gf_false, &need_heal);
- if (ret == ec->nodes && need_heal == EC_HEAL_NONEED) {
+ if (ret == ec->nodes && need_heal != EC_HEAL_MAYBE) {
goto set_heal;
}
need_heal = EC_HEAL_NONEED;
diff --git a/xlators/cluster/ec/src/ec-heald.c b/xlators/cluster/ec/src/ec-heald.c
index cba111a3e8f..5c1586bc9c5 100644
--- a/xlators/cluster/ec/src/ec-heald.c
+++ b/xlators/cluster/ec/src/ec-heald.c
@@ -8,7 +8,6 @@
cases as published by the Free Software Foundation.
*/
-#include <glusterfs/xlator.h>
#include <glusterfs/defaults.h>
#include <glusterfs/compat-errno.h>
#include "ec.h"
@@ -63,7 +62,7 @@ __ec_shd_healer_wait(struct subvol_healer *healer)
ec = healer->this->private;
disabled_loop:
- wait_till.tv_sec = time(NULL) + 60;
+ wait_till.tv_sec = gf_time() + ec->shd.timeout;
while (!healer->rerun) {
ret = pthread_cond_timedwait(&healer->cond, &healer->mutex, &wait_till);
@@ -71,6 +70,11 @@ disabled_loop:
break;
}
+ if (ec->shutdown) {
+ healer->running = _gf_false;
+ return -1;
+ }
+
ret = healer->rerun;
healer->rerun = 0;
@@ -152,19 +156,78 @@ ec_shd_index_purge(xlator_t *subvol, inode_t *inode, char *name)
return ret;
}
+static gf_boolean_t
+ec_is_heal_completed(char *status)
+{
+ char *bad_pos = NULL;
+ char *zero_pos = NULL;
+
+ if (!status) {
+ return _gf_false;
+ }
+
+ /*Logic:
+ * Status will be of the form Good: <binary>, Bad: <binary>
+ * If heal completes, if we do strchr for '0' it should be present after
+ * 'Bad:' i.e. strRchr for ':'
+ * */
+
+ zero_pos = strchr(status, '0');
+ bad_pos = strrchr(status, ':');
+ if (!zero_pos || !bad_pos) {
+ /*malformed status*/
+ return _gf_false;
+ }
+
+ if (zero_pos > bad_pos) {
+ return _gf_true;
+ }
+
+ return _gf_false;
+}
+
int
ec_shd_selfheal(struct subvol_healer *healer, int child, loc_t *loc,
gf_boolean_t full)
{
+ dict_t *xdata = NULL;
+ dict_t *dict = NULL;
+ uint32_t count;
int32_t ret;
+ char *heal_status = NULL;
+ ec_t *ec = healer->this->private;
+
+ GF_ATOMIC_INC(ec->stats.shd.attempted);
+ ret = syncop_getxattr(healer->this, loc, &dict, EC_XATTR_HEAL, NULL,
+ &xdata);
+ if (ret == 0) {
+ if (dict && (dict_get_str(dict, EC_XATTR_HEAL, &heal_status) == 0)) {
+ if (ec_is_heal_completed(heal_status)) {
+ GF_ATOMIC_INC(ec->stats.shd.completed);
+ }
+ }
+ }
- ret = syncop_getxattr(healer->this, loc, NULL, EC_XATTR_HEAL, NULL, NULL);
- if (!full && (ret >= 0) && (loc->inode->ia_type == IA_IFDIR)) {
+ if (!full && (loc->inode->ia_type == IA_IFDIR)) {
/* If we have just healed a directory, it's possible that
- * other index entries have appeared to be healed. We put a
- * mark so that we can check it later and restart a scan
- * without delay. */
- healer->rerun = _gf_true;
+ * other index entries have appeared to be healed. */
+ if ((xdata != NULL) &&
+ (dict_get_uint32(xdata, EC_XATTR_HEAL_NEW, &count) == 0) &&
+ (count > 0)) {
+ /* Force a rerun of the index healer. */
+ gf_msg_debug(healer->this->name, 0, "%d more entries to heal",
+ count);
+
+ healer->rerun = _gf_true;
+ }
+ }
+
+ if (xdata != NULL) {
+ dict_unref(xdata);
+ }
+
+ if (dict) {
+ dict_unref(dict);
}
return ret;
@@ -241,9 +304,11 @@ ec_shd_index_sweep(struct subvol_healer *healer)
goto out;
}
+ _mask_cancellation();
ret = syncop_mt_dir_scan(NULL, subvol, &loc, GF_CLIENT_PID_SELF_HEALD,
healer, ec_shd_index_heal, xdata,
ec->shd.max_threads, ec->shd.wait_qlength);
+ _unmask_cancellation();
out:
if (xdata)
dict_unref(xdata);
@@ -263,6 +328,11 @@ ec_shd_full_heal(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
int ret = 0;
ec = this->private;
+
+ if (this->cleanup_starting) {
+ return -ENOTCONN;
+ }
+
if (ec->xl_up_count <= ec->fragments) {
return -ENOTCONN;
}
@@ -305,11 +375,15 @@ ec_shd_full_sweep(struct subvol_healer *healer, inode_t *inode)
{
ec_t *ec = NULL;
loc_t loc = {0};
+ int ret = -1;
ec = healer->this->private;
loc.inode = inode;
- return syncop_ftw(ec->xl_list[healer->subvol], &loc,
- GF_CLIENT_PID_SELF_HEALD, healer, ec_shd_full_heal);
+ _mask_cancellation();
+ ret = syncop_ftw(ec->xl_list[healer->subvol], &loc,
+ GF_CLIENT_PID_SELF_HEALD, healer, ec_shd_full_heal);
+ _unmask_cancellation();
+ return ret;
}
void *
@@ -317,13 +391,16 @@ ec_shd_index_healer(void *data)
{
struct subvol_healer *healer = NULL;
xlator_t *this = NULL;
+ int run = 0;
healer = data;
THIS = this = healer->this;
ec_t *ec = this->private;
for (;;) {
- ec_shd_healer_wait(healer);
+ run = ec_shd_healer_wait(healer);
+ if (run == -1)
+ break;
if (ec->xl_up_count > ec->fragments) {
gf_msg_debug(this->name, 0, "starting index sweep on subvol %s",
@@ -352,16 +429,12 @@ ec_shd_full_healer(void *data)
rootloc.inode = this->itable->root;
for (;;) {
- pthread_mutex_lock(&healer->mutex);
- {
- run = __ec_shd_healer_wait(healer);
- if (!run)
- healer->running = _gf_false;
- }
- pthread_mutex_unlock(&healer->mutex);
-
- if (!run)
+ run = ec_shd_healer_wait(healer);
+ if (run < 0) {
break;
+ } else if (run == 0) {
+ continue;
+ }
if (ec->xl_up_count > ec->fragments) {
gf_msg(this->name, GF_LOG_INFO, 0, EC_MSG_FULL_SWEEP_START,
@@ -429,6 +502,9 @@ unlock:
int
ec_shd_full_healer_spawn(xlator_t *this, int subvol)
{
+ if (xlator_is_cleanup_starting(this))
+ return -1;
+
return ec_shd_healer_spawn(this, NTH_FULL_HEALER(this, subvol),
ec_shd_full_healer);
}
@@ -436,6 +512,9 @@ ec_shd_full_healer_spawn(xlator_t *this, int subvol)
int
ec_shd_index_healer_spawn(xlator_t *this, int subvol)
{
+ if (xlator_is_cleanup_starting(this))
+ return -1;
+
return ec_shd_healer_spawn(this, NTH_INDEX_HEALER(this, subvol),
ec_shd_index_healer);
}
@@ -562,3 +641,41 @@ out:
dict_del(output, this->name);
return ret;
}
+
+void
+ec_destroy_healer_object(xlator_t *this, struct subvol_healer *healer)
+{
+ if (!healer)
+ return;
+
+ pthread_cond_destroy(&healer->cond);
+ pthread_mutex_destroy(&healer->mutex);
+}
+
+void
+ec_selfheal_daemon_fini(xlator_t *this)
+{
+ struct subvol_healer *healer = NULL;
+ ec_self_heald_t *shd = NULL;
+ ec_t *priv = NULL;
+ int i = 0;
+
+ priv = this->private;
+ if (!priv)
+ return;
+
+ shd = &priv->shd;
+ if (!shd->iamshd)
+ return;
+
+ for (i = 0; i < priv->nodes; i++) {
+ healer = &shd->index_healers[i];
+ ec_destroy_healer_object(this, healer);
+
+ healer = &shd->full_healers[i];
+ ec_destroy_healer_object(this, healer);
+ }
+
+ GF_FREE(shd->index_healers);
+ GF_FREE(shd->full_healers);
+}
diff --git a/xlators/cluster/ec/src/ec-heald.h b/xlators/cluster/ec/src/ec-heald.h
index 2eda2a74f54..6c7da4edc10 100644
--- a/xlators/cluster/ec/src/ec-heald.h
+++ b/xlators/cluster/ec/src/ec-heald.h
@@ -11,9 +11,9 @@
#ifndef __EC_HEALD_H__
#define __EC_HEALD_H__
-#include <glusterfs/xlator.h>
-
-#include "ec-types.h"
+#include "ec-types.h" // for ec_t
+#include "glusterfs/dict.h" // for dict_t
+#include "glusterfs/globals.h" // for xlator_t
int
ec_xl_op(xlator_t *this, dict_t *input, dict_t *output);
@@ -24,4 +24,7 @@ ec_selfheal_daemon_init(xlator_t *this);
void
ec_shd_index_healer_wake(ec_t *ec);
+void
+ec_selfheal_daemon_fini(xlator_t *this);
+
#endif /* __EC_HEALD_H__ */
diff --git a/xlators/cluster/ec/src/ec-helpers.c b/xlators/cluster/ec/src/ec-helpers.c
index e6b0359bd6f..48f54475e01 100644
--- a/xlators/cluster/ec/src/ec-helpers.c
+++ b/xlators/cluster/ec/src/ec-helpers.c
@@ -476,7 +476,7 @@ out:
int32_t
ec_loc_setup_path(xlator_t *xl, loc_t *loc)
{
- uuid_t root = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
+ static uuid_t root = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
char *name;
int32_t ret = -EINVAL;
@@ -717,6 +717,7 @@ __ec_inode_get(inode_t *inode, xlator_t *xl)
memset(ctx, 0, sizeof(*ctx));
INIT_LIST_HEAD(&ctx->heal);
INIT_LIST_HEAD(&ctx->stripe_cache.lru);
+ ctx->heal_count = 0;
value = (uint64_t)(uintptr_t)ctx;
if (__inode_ctx_set(inode, xl, &value) != 0) {
GF_FREE(ctx);
@@ -752,6 +753,7 @@ __ec_fd_get(fd_t *fd, xlator_t *xl)
{
int i = 0;
ec_fd_t *ctx = NULL;
+ ec_inode_t *ictx = NULL;
uint64_t value = 0;
ec_t *ec = xl->private;
@@ -774,6 +776,12 @@ __ec_fd_get(fd_t *fd, xlator_t *xl)
GF_FREE(ctx);
return NULL;
}
+ /* Only refering bad-version so no need for lock
+ * */
+ ictx = __ec_inode_get(fd->inode, xl);
+ if (ictx) {
+ ctx->bad_version = ictx->bad_version;
+ }
}
} else {
ctx = (ec_fd_t *)(uintptr_t)value;
diff --git a/xlators/cluster/ec/src/ec-inode-read.c b/xlators/cluster/ec/src/ec-inode-read.c
index 55e59345ab0..dad5f4d7018 100644
--- a/xlators/cluster/ec/src/ec-inode-read.c
+++ b/xlators/cluster/ec/src/ec-inode-read.c
@@ -8,9 +8,6 @@
cases as published by the Free Software Foundation.
*/
-#include <glusterfs/xlator.h>
-#include <glusterfs/defaults.h>
-
#include "ec.h"
#include "ec-messages.h"
#include "ec-helpers.h"
@@ -135,7 +132,7 @@ ec_manager_access(ec_fop_data_t *fop, int32_t state)
void
ec_access(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_access_cbk_t func, void *data, loc_t *loc,
+ uint32_t fop_flags, fop_access_cbk_t func, void *data, loc_t *loc,
int32_t mask, dict_t *xdata)
{
ec_cbk_t callback = {.access = func};
@@ -149,7 +146,7 @@ ec_access(call_frame_t *frame, xlator_t *this, uintptr_t target,
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
fop = ec_fop_data_allocate(frame, this, GF_FOP_ACCESS, EC_FLAG_LOCK_SHARED,
- target, minimum, ec_wind_access,
+ target, fop_flags, ec_wind_access,
ec_manager_access, callback, data);
if (fop == NULL) {
goto out;
@@ -393,15 +390,34 @@ ec_manager_getxattr(ec_fop_data_t *fop, int32_t state)
int32_t
ec_getxattr_heal_cbk(call_frame_t *frame, void *cookie, xlator_t *xl,
int32_t op_ret, int32_t op_errno, uintptr_t mask,
- uintptr_t good, uintptr_t bad, dict_t *xdata)
+ uintptr_t good, uintptr_t bad, uint32_t pending,
+ dict_t *xdata)
{
- ec_fop_data_t *fop = cookie;
- fop_getxattr_cbk_t func = fop->data;
+ fop_getxattr_cbk_t func = cookie;
ec_t *ec = xl->private;
dict_t *dict = NULL;
char *str;
char bin1[65], bin2[65];
+ /* We try to return the 'pending' information in xdata, but if this cannot
+ * be set, we will ignore it silently. We prefer to report the success or
+ * failure of the heal itself. */
+ if (xdata == NULL) {
+ xdata = dict_new();
+ } else {
+ dict_ref(xdata);
+ }
+ if (xdata != NULL) {
+ if (dict_set_uint32(xdata, EC_XATTR_HEAL_NEW, pending) != 0) {
+ /* dict_set_uint32() is marked as 'warn_unused_result' and gcc
+ * enforces to check the result in this case. However we don't
+ * really care if it succeeded or not. We'll just do the same.
+ *
+ * This empty 'if' avoids the warning, and it will be removed by
+ * the optimizer. */
+ }
+ }
+
if (op_ret >= 0) {
dict = dict_new();
if (dict == NULL) {
@@ -435,18 +451,21 @@ ec_getxattr_heal_cbk(call_frame_t *frame, void *cookie, xlator_t *xl,
}
out:
- func(frame, NULL, xl, op_ret, op_errno, dict, NULL);
+ func(frame, NULL, xl, op_ret, op_errno, dict, xdata);
if (dict != NULL) {
dict_unref(dict);
}
+ if (xdata != NULL) {
+ dict_unref(xdata);
+ }
return 0;
}
void
ec_getxattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_getxattr_cbk_t func, void *data, loc_t *loc,
+ uint32_t fop_flags, fop_getxattr_cbk_t func, void *data, loc_t *loc,
const char *name, dict_t *xdata)
{
ec_cbk_t callback = {.getxattr = func};
@@ -468,7 +487,7 @@ ec_getxattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
}
fop = ec_fop_data_allocate(
- frame, this, GF_FOP_GETXATTR, EC_FLAG_LOCK_SHARED, target, minimum,
+ frame, this, GF_FOP_GETXATTR, EC_FLAG_LOCK_SHARED, target, fop_flags,
ec_wind_getxattr, ec_manager_getxattr, callback, data);
if (fop == NULL) {
goto out;
@@ -588,7 +607,7 @@ ec_wind_fgetxattr(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
void
ec_fgetxattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_fgetxattr_cbk_t func, void *data, fd_t *fd,
+ uint32_t fop_flags, fop_fgetxattr_cbk_t func, void *data, fd_t *fd,
const char *name, dict_t *xdata)
{
ec_cbk_t callback = {.fgetxattr = func};
@@ -602,7 +621,7 @@ ec_fgetxattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
fop = ec_fop_data_allocate(
- frame, this, GF_FOP_FGETXATTR, EC_FLAG_LOCK_SHARED, target, minimum,
+ frame, this, GF_FOP_FGETXATTR, EC_FLAG_LOCK_SHARED, target, fop_flags,
ec_wind_fgetxattr, ec_manager_getxattr, callback, data);
if (fop == NULL) {
goto out;
@@ -774,13 +793,15 @@ ec_manager_open(ec_fop_data_t *fop, int32_t state)
return EC_STATE_REPORT;
}
- err = ec_loc_from_loc(fop->xl, &ctx->loc, &fop->loc[0]);
- if (err != 0) {
- UNLOCK(&fop->fd->lock);
+ if (!ctx->loc.inode) {
+ err = ec_loc_from_loc(fop->xl, &ctx->loc, &fop->loc[0]);
+ if (err != 0) {
+ UNLOCK(&fop->fd->lock);
- fop->error = -err;
+ fop->error = -err;
- return EC_STATE_REPORT;
+ return EC_STATE_REPORT;
+ }
}
ctx->flags = fop->int32;
@@ -869,9 +890,9 @@ ec_manager_open(ec_fop_data_t *fop, int32_t state)
}
void
-ec_open(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
- fop_open_cbk_t func, void *data, loc_t *loc, int32_t flags, fd_t *fd,
- dict_t *xdata)
+ec_open(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_open_cbk_t func, void *data, loc_t *loc,
+ int32_t flags, fd_t *fd, dict_t *xdata)
{
ec_cbk_t callback = {.open = func};
ec_fop_data_t *fop = NULL;
@@ -884,7 +905,7 @@ ec_open(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
fop = ec_fop_data_allocate(frame, this, GF_FOP_OPEN, EC_FLAG_LOCK_SHARED,
- target, minimum, ec_wind_open, ec_manager_open,
+ target, fop_flags, ec_wind_open, ec_manager_open,
callback, data);
if (fop == NULL) {
goto out;
@@ -1071,7 +1092,7 @@ ec_manager_readlink(ec_fop_data_t *fop, int32_t state)
void
ec_readlink(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_readlink_cbk_t func, void *data, loc_t *loc,
+ uint32_t fop_flags, fop_readlink_cbk_t func, void *data, loc_t *loc,
size_t size, dict_t *xdata)
{
ec_cbk_t callback = {.readlink = func};
@@ -1085,7 +1106,7 @@ ec_readlink(call_frame_t *frame, xlator_t *this, uintptr_t target,
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
fop = ec_fop_data_allocate(
- frame, this, GF_FOP_READLINK, EC_FLAG_LOCK_SHARED, target, minimum,
+ frame, this, GF_FOP_READLINK, EC_FLAG_LOCK_SHARED, target, fop_flags,
ec_wind_readlink, ec_manager_readlink, callback, data);
if (fop == NULL) {
goto out;
@@ -1331,6 +1352,7 @@ int32_t
ec_manager_readv(ec_fop_data_t *fop, int32_t state)
{
ec_cbk_data_t *cbk;
+ ec_t *ec = fop->xl->private;
switch (state) {
case EC_STATE_INIT:
@@ -1350,6 +1372,9 @@ ec_manager_readv(ec_fop_data_t *fop, int32_t state)
return EC_STATE_DISPATCH;
case EC_STATE_DISPATCH:
+ if (ec->read_mask) {
+ fop->mask &= ec->read_mask;
+ }
ec_dispatch_min(fop);
return EC_STATE_PREPARE_ANSWER;
@@ -1417,9 +1442,9 @@ ec_manager_readv(ec_fop_data_t *fop, int32_t state)
}
void
-ec_readv(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
- fop_readv_cbk_t func, void *data, fd_t *fd, size_t size, off_t offset,
- uint32_t flags, dict_t *xdata)
+ec_readv(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_readv_cbk_t func, void *data, fd_t *fd,
+ size_t size, off_t offset, uint32_t flags, dict_t *xdata)
{
ec_cbk_t callback = {.readv = func};
ec_fop_data_t *fop = NULL;
@@ -1432,8 +1457,8 @@ ec_readv(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
fop = ec_fop_data_allocate(frame, this, GF_FOP_READ, EC_FLAG_LOCK_SHARED,
- target, minimum, ec_wind_readv, ec_manager_readv,
- callback, data);
+ target, fop_flags, ec_wind_readv,
+ ec_manager_readv, callback, data);
if (fop == NULL) {
goto out;
}
@@ -1637,9 +1662,9 @@ ec_manager_seek(ec_fop_data_t *fop, int32_t state)
}
void
-ec_seek(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
- fop_seek_cbk_t func, void *data, fd_t *fd, off_t offset,
- gf_seek_what_t what, dict_t *xdata)
+ec_seek(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_seek_cbk_t func, void *data, fd_t *fd,
+ off_t offset, gf_seek_what_t what, dict_t *xdata)
{
ec_cbk_t callback = {.seek = func};
ec_fop_data_t *fop = NULL;
@@ -1652,7 +1677,7 @@ ec_seek(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
fop = ec_fop_data_allocate(frame, this, GF_FOP_SEEK, EC_FLAG_LOCK_SHARED,
- target, minimum, ec_wind_seek, ec_manager_seek,
+ target, fop_flags, ec_wind_seek, ec_manager_seek,
callback, data);
if (fop == NULL) {
goto out;
@@ -1855,8 +1880,9 @@ ec_manager_stat(ec_fop_data_t *fop, int32_t state)
}
void
-ec_stat(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
- fop_stat_cbk_t func, void *data, loc_t *loc, dict_t *xdata)
+ec_stat(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_stat_cbk_t func, void *data, loc_t *loc,
+ dict_t *xdata)
{
ec_cbk_t callback = {.stat = func};
ec_fop_data_t *fop = NULL;
@@ -1869,7 +1895,7 @@ ec_stat(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
fop = ec_fop_data_allocate(frame, this, GF_FOP_STAT, EC_FLAG_LOCK_SHARED,
- target, minimum, ec_wind_stat, ec_manager_stat,
+ target, fop_flags, ec_wind_stat, ec_manager_stat,
callback, data);
if (fop == NULL) {
goto out;
@@ -1965,8 +1991,9 @@ ec_wind_fstat(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
}
void
-ec_fstat(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
- fop_fstat_cbk_t func, void *data, fd_t *fd, dict_t *xdata)
+ec_fstat(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_fstat_cbk_t func, void *data, fd_t *fd,
+ dict_t *xdata)
{
ec_cbk_t callback = {.fstat = func};
ec_fop_data_t *fop = NULL;
@@ -1979,8 +2006,8 @@ ec_fstat(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
fop = ec_fop_data_allocate(frame, this, GF_FOP_FSTAT, EC_FLAG_LOCK_SHARED,
- target, minimum, ec_wind_fstat, ec_manager_stat,
- callback, data);
+ target, fop_flags, ec_wind_fstat,
+ ec_manager_stat, callback, data);
if (fop == NULL) {
goto out;
}
diff --git a/xlators/cluster/ec/src/ec-inode-write.c b/xlators/cluster/ec/src/ec-inode-write.c
index e7b34e67e10..9b5fe2a7fdc 100644
--- a/xlators/cluster/ec/src/ec-inode-write.c
+++ b/xlators/cluster/ec/src/ec-inode-write.c
@@ -8,10 +8,6 @@
cases as published by the Free Software Foundation.
*/
-#include <glusterfs/xlator.h>
-#include <glusterfs/defaults.h>
-
-#include "ec.h"
#include "ec-messages.h"
#include "ec-helpers.h"
#include "ec-common.h"
@@ -89,6 +85,8 @@ ec_update_write(ec_fop_data_t *fop, uintptr_t mask, off_t offset, uint64_t size)
goto out;
}
+ if (fop->locks[0].lock)
+ ec_lock_update_good(fop->locks[0].lock, fop);
vector.iov_base = iobuf->ptr;
vector.iov_len = size;
memset(vector.iov_base, 0, vector.iov_len);
@@ -183,26 +181,26 @@ ec_xattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
switch (fop->id) {
case GF_FOP_SETXATTR:
if (fop->cbks.setxattr) {
- fop->cbks.setxattr(frame, cookie, this, op_ret, op_errno,
- xdata);
+ QUORUM_CBK(fop->cbks.setxattr, fop, frame, cookie, this, op_ret,
+ op_errno, xdata);
}
break;
case GF_FOP_REMOVEXATTR:
if (fop->cbks.removexattr) {
- fop->cbks.removexattr(frame, cookie, this, op_ret, op_errno,
- xdata);
+ QUORUM_CBK(fop->cbks.removexattr, fop, frame, cookie, this,
+ op_ret, op_errno, xdata);
}
break;
case GF_FOP_FSETXATTR:
if (fop->cbks.fsetxattr) {
- fop->cbks.fsetxattr(frame, cookie, this, op_ret, op_errno,
- xdata);
+ QUORUM_CBK(fop->cbks.fsetxattr, fop, frame, cookie, this,
+ op_ret, op_errno, xdata);
}
break;
case GF_FOP_FREMOVEXATTR:
if (fop->cbks.fremovexattr) {
- fop->cbks.fremovexattr(frame, cookie, this, op_ret, op_errno,
- xdata);
+ QUORUM_CBK(fop->cbks.fremovexattr, fop, frame, cookie, this,
+ op_ret, op_errno, xdata);
}
break;
}
@@ -281,7 +279,7 @@ ec_manager_xattr(ec_fop_data_t *fop, int32_t state)
void
ec_removexattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_removexattr_cbk_t func, void *data,
+ uint32_t fop_flags, fop_removexattr_cbk_t func, void *data,
loc_t *loc, const char *name, dict_t *xdata)
{
ec_cbk_t callback = {.removexattr = func};
@@ -295,7 +293,7 @@ ec_removexattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
fop = ec_fop_data_allocate(frame, this, GF_FOP_REMOVEXATTR, 0, target,
- minimum, ec_wind_removexattr, ec_manager_xattr,
+ fop_flags, ec_wind_removexattr, ec_manager_xattr,
callback, data);
if (fop == NULL) {
goto out;
@@ -361,7 +359,7 @@ ec_wind_fremovexattr(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
void
ec_fremovexattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_fremovexattr_cbk_t func, void *data,
+ uint32_t fop_flags, fop_fremovexattr_cbk_t func, void *data,
fd_t *fd, const char *name, dict_t *xdata)
{
ec_cbk_t callback = {.fremovexattr = func};
@@ -375,8 +373,8 @@ ec_fremovexattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
fop = ec_fop_data_allocate(frame, this, GF_FOP_FREMOVEXATTR, 0, target,
- minimum, ec_wind_fremovexattr, ec_manager_xattr,
- callback, data);
+ fop_flags, ec_wind_fremovexattr,
+ ec_manager_xattr, callback, data);
if (fop == NULL) {
goto out;
}
@@ -492,16 +490,15 @@ ec_manager_setattr(ec_fop_data_t *fop, int32_t state)
if (fop->id == GF_FOP_SETATTR) {
if (fop->cbks.setattr != NULL) {
- fop->cbks.setattr(fop->req_frame, fop, fop->xl, cbk->op_ret,
- cbk->op_errno, &cbk->iatt[0],
- &cbk->iatt[1], cbk->xdata);
+ QUORUM_CBK(fop->cbks.setattr, fop, fop->req_frame, fop,
+ fop->xl, cbk->op_ret, cbk->op_errno,
+ &cbk->iatt[0], &cbk->iatt[1], cbk->xdata);
}
} else {
if (fop->cbks.fsetattr != NULL) {
- fop->cbks.fsetattr(fop->req_frame, fop, fop->xl,
- cbk->op_ret, cbk->op_errno,
- &cbk->iatt[0], &cbk->iatt[1],
- cbk->xdata);
+ QUORUM_CBK(fop->cbks.fsetattr, fop, fop->req_frame, fop,
+ fop->xl, cbk->op_ret, cbk->op_errno,
+ &cbk->iatt[0], &cbk->iatt[1], cbk->xdata);
}
}
@@ -550,7 +547,7 @@ ec_manager_setattr(ec_fop_data_t *fop, int32_t state)
void
ec_setattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_setattr_cbk_t func, void *data, loc_t *loc,
+ uint32_t fop_flags, fop_setattr_cbk_t func, void *data, loc_t *loc,
struct iatt *stbuf, int32_t valid, dict_t *xdata)
{
ec_cbk_t callback = {.setattr = func};
@@ -563,9 +560,9 @@ ec_setattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = ec_fop_data_allocate(frame, this, GF_FOP_SETATTR, 0, target, minimum,
- ec_wind_setattr, ec_manager_setattr, callback,
- data);
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_SETATTR, 0, target,
+ fop_flags, ec_wind_setattr, ec_manager_setattr,
+ callback, data);
if (fop == NULL) {
goto out;
}
@@ -627,7 +624,7 @@ ec_wind_fsetattr(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
void
ec_fsetattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_fsetattr_cbk_t func, void *data, fd_t *fd,
+ uint32_t fop_flags, fop_fsetattr_cbk_t func, void *data, fd_t *fd,
struct iatt *stbuf, int32_t valid, dict_t *xdata)
{
ec_cbk_t callback = {.fsetattr = func};
@@ -640,9 +637,9 @@ ec_fsetattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = ec_fop_data_allocate(frame, this, GF_FOP_FSETATTR, 0, target, minimum,
- ec_wind_fsetattr, ec_manager_setattr, callback,
- data);
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_FSETATTR, 0, target,
+ fop_flags, ec_wind_fsetattr, ec_manager_setattr,
+ callback, data);
if (fop == NULL) {
goto out;
}
@@ -707,7 +704,7 @@ ec_wind_setxattr(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
void
ec_setxattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_setxattr_cbk_t func, void *data, loc_t *loc,
+ uint32_t fop_flags, fop_setxattr_cbk_t func, void *data, loc_t *loc,
dict_t *dict, int32_t flags, dict_t *xdata)
{
ec_cbk_t callback = {.setxattr = func};
@@ -720,9 +717,9 @@ ec_setxattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = ec_fop_data_allocate(frame, this, GF_FOP_SETXATTR, 0, target, minimum,
- ec_wind_setxattr, ec_manager_xattr, callback,
- data);
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_SETXATTR, 0, target,
+ fop_flags, ec_wind_setxattr, ec_manager_xattr,
+ callback, data);
if (fop == NULL) {
goto out;
}
@@ -825,7 +822,7 @@ ec_wind_fsetxattr(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
void
ec_fsetxattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_fsetxattr_cbk_t func, void *data, fd_t *fd,
+ uint32_t fop_flags, fop_fsetxattr_cbk_t func, void *data, fd_t *fd,
dict_t *dict, int32_t flags, dict_t *xdata)
{
ec_cbk_t callback = {.fsetxattr = func};
@@ -839,7 +836,7 @@ ec_fsetxattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
fop = ec_fop_data_allocate(frame, this, GF_FOP_FSETXATTR, 0, target,
- minimum, ec_wind_fsetxattr, ec_manager_xattr,
+ fop_flags, ec_wind_fsetxattr, ec_manager_xattr,
callback, data);
if (fop == NULL) {
goto out;
@@ -992,9 +989,9 @@ ec_manager_fallocate(ec_fop_data_t *fop, int32_t state)
GF_ASSERT(cbk != NULL);
if (fop->cbks.fallocate != NULL) {
- fop->cbks.fallocate(fop->req_frame, fop, fop->xl, cbk->op_ret,
- cbk->op_errno, &cbk->iatt[0], &cbk->iatt[1],
- cbk->xdata);
+ QUORUM_CBK(fop->cbks.fallocate, fop, fop->req_frame, fop,
+ fop->xl, cbk->op_ret, cbk->op_errno, &cbk->iatt[0],
+ &cbk->iatt[1], cbk->xdata);
}
return EC_STATE_LOCK_REUSE;
@@ -1035,7 +1032,7 @@ ec_manager_fallocate(ec_fop_data_t *fop, int32_t state)
void
ec_fallocate(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_fallocate_cbk_t func, void *data, fd_t *fd,
+ uint32_t fop_flags, fop_fallocate_cbk_t func, void *data, fd_t *fd,
int32_t mode, off_t offset, size_t len, dict_t *xdata)
{
ec_cbk_t callback = {.fallocate = func};
@@ -1049,8 +1046,8 @@ ec_fallocate(call_frame_t *frame, xlator_t *this, uintptr_t target,
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
fop = ec_fop_data_allocate(frame, this, GF_FOP_FALLOCATE, 0, target,
- minimum, ec_wind_fallocate, ec_manager_fallocate,
- callback, data);
+ fop_flags, ec_wind_fallocate,
+ ec_manager_fallocate, callback, data);
if (fop == NULL) {
goto out;
}
@@ -1209,8 +1206,8 @@ ec_manager_discard(ec_fop_data_t *fop, int32_t state)
ec_dispatch_all(fop);
return EC_STATE_DELAYED_START;
} else {
- /*Assume discard to have succeeded on mask*/
- fop->good = fop->mask;
+ /* Assume discard to have succeeded on all bricks */
+ ec_succeed_all(fop);
}
/* Fall through */
@@ -1245,9 +1242,9 @@ ec_manager_discard(ec_fop_data_t *fop, int32_t state)
GF_ASSERT(cbk != NULL);
if (fop->cbks.discard != NULL) {
- fop->cbks.discard(fop->req_frame, fop, fop->xl, cbk->op_ret,
- cbk->op_errno, &cbk->iatt[0], &cbk->iatt[1],
- cbk->xdata);
+ QUORUM_CBK(fop->cbks.discard, fop, fop->req_frame, fop, fop->xl,
+ cbk->op_ret, cbk->op_errno, &cbk->iatt[0],
+ &cbk->iatt[1], cbk->xdata);
}
return EC_STATE_LOCK_REUSE;
@@ -1289,7 +1286,7 @@ ec_manager_discard(ec_fop_data_t *fop, int32_t state)
void
ec_discard(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_discard_cbk_t func, void *data, fd_t *fd,
+ uint32_t fop_flags, fop_discard_cbk_t func, void *data, fd_t *fd,
off_t offset, size_t len, dict_t *xdata)
{
ec_cbk_t callback = {.discard = func};
@@ -1302,9 +1299,9 @@ ec_discard(call_frame_t *frame, xlator_t *this, uintptr_t target,
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = ec_fop_data_allocate(frame, this, GF_FOP_DISCARD, 0, target, minimum,
- ec_wind_discard, ec_manager_discard, callback,
- data);
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_DISCARD, 0, target,
+ fop_flags, ec_wind_discard, ec_manager_discard,
+ callback, data);
if (fop == NULL) {
goto out;
}
@@ -1405,6 +1402,7 @@ int32_t
ec_manager_truncate(ec_fop_data_t *fop, int32_t state)
{
ec_cbk_data_t *cbk;
+ off_t offset_down;
switch (state) {
case EC_STATE_INIT:
@@ -1416,16 +1414,19 @@ ec_manager_truncate(ec_fop_data_t *fop, int32_t state)
/* Fall through */
case EC_STATE_LOCK:
+ offset_down = fop->user_size;
+ ec_adjust_offset_down(fop->xl->private, &offset_down, _gf_true);
+
if (fop->id == GF_FOP_TRUNCATE) {
ec_lock_prepare_inode(
fop, &fop->loc[0],
EC_UPDATE_DATA | EC_UPDATE_META | EC_QUERY_INFO,
- fop->offset, EC_RANGE_FULL);
+ offset_down, EC_RANGE_FULL);
} else {
ec_lock_prepare_fd(
fop, fop->fd,
EC_UPDATE_DATA | EC_UPDATE_META | EC_QUERY_INFO,
- fop->offset, EC_RANGE_FULL);
+ offset_down, EC_RANGE_FULL);
}
ec_lock(fop);
@@ -1471,17 +1472,15 @@ ec_manager_truncate(ec_fop_data_t *fop, int32_t state)
if (fop->id == GF_FOP_TRUNCATE) {
if (fop->cbks.truncate != NULL) {
- fop->cbks.truncate(fop->req_frame, fop, fop->xl,
- cbk->op_ret, cbk->op_errno,
- &cbk->iatt[0], &cbk->iatt[1],
- cbk->xdata);
+ QUORUM_CBK(fop->cbks.truncate, fop, fop->req_frame, fop,
+ fop->xl, cbk->op_ret, cbk->op_errno,
+ &cbk->iatt[0], &cbk->iatt[1], cbk->xdata);
}
} else {
if (fop->cbks.ftruncate != NULL) {
- fop->cbks.ftruncate(fop->req_frame, fop, fop->xl,
- cbk->op_ret, cbk->op_errno,
- &cbk->iatt[0], &cbk->iatt[1],
- cbk->xdata);
+ QUORUM_CBK(fop->cbks.ftruncate, fop, fop->req_frame, fop,
+ fop->xl, cbk->op_ret, cbk->op_errno,
+ &cbk->iatt[0], &cbk->iatt[1], cbk->xdata);
}
}
@@ -1530,7 +1529,7 @@ ec_manager_truncate(ec_fop_data_t *fop, int32_t state)
void
ec_truncate(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_truncate_cbk_t func, void *data, loc_t *loc,
+ uint32_t fop_flags, fop_truncate_cbk_t func, void *data, loc_t *loc,
off_t offset, dict_t *xdata)
{
ec_cbk_t callback = {.truncate = func};
@@ -1543,9 +1542,9 @@ ec_truncate(call_frame_t *frame, xlator_t *this, uintptr_t target,
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = ec_fop_data_allocate(frame, this, GF_FOP_TRUNCATE, 0, target, minimum,
- ec_wind_truncate, ec_manager_truncate, callback,
- data);
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_TRUNCATE, 0, target,
+ fop_flags, ec_wind_truncate, ec_manager_truncate,
+ callback, data);
if (fop == NULL) {
goto out;
}
@@ -1604,7 +1603,7 @@ ec_wind_ftruncate(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
void
ec_ftruncate(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_ftruncate_cbk_t func, void *data, fd_t *fd,
+ uint32_t fop_flags, fop_ftruncate_cbk_t func, void *data, fd_t *fd,
off_t offset, dict_t *xdata)
{
ec_cbk_t callback = {.ftruncate = func};
@@ -1618,8 +1617,8 @@ ec_ftruncate(call_frame_t *frame, xlator_t *this, uintptr_t target,
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
fop = ec_fop_data_allocate(frame, this, GF_FOP_FTRUNCATE, 0, target,
- minimum, ec_wind_ftruncate, ec_manager_truncate,
- callback, data);
+ fop_flags, ec_wind_ftruncate,
+ ec_manager_truncate, callback, data);
if (fop == NULL) {
goto out;
}
@@ -1973,6 +1972,23 @@ ec_get_and_merge_stripe(ec_t *ec, ec_fop_data_t *fop, ec_stripe_part_t which)
return found;
}
+static uintptr_t
+ec_get_lock_good_mask(inode_t *inode, xlator_t *xl)
+{
+ ec_lock_t *lock = NULL;
+ ec_inode_t *ictx = NULL;
+ LOCK(&inode->lock);
+ {
+ ictx = __ec_inode_get(inode, xl);
+ if (ictx)
+ lock = ictx->inode_lock;
+ }
+ UNLOCK(&inode->lock);
+ if (lock)
+ return lock->good_mask;
+ return 0;
+}
+
void
ec_writev_start(ec_fop_data_t *fop)
{
@@ -2009,20 +2025,29 @@ ec_writev_start(ec_fop_data_t *fop)
if (err != 0) {
goto failed_fd;
}
+ tail = fop->size - fop->user_size - fop->head;
if (fop->head > 0) {
- found_stripe = ec_get_and_merge_stripe(ec, fop, EC_STRIPE_HEAD);
- if (!found_stripe) {
- if (ec_make_internal_fop_xdata(&xdata)) {
- err = -ENOMEM;
- goto failed_xdata;
+ if (current > fop->offset) {
+ found_stripe = ec_get_and_merge_stripe(ec, fop, EC_STRIPE_HEAD);
+ if (!found_stripe) {
+ if (ec_make_internal_fop_xdata(&xdata)) {
+ err = -ENOMEM;
+ goto failed_xdata;
+ }
+ ec_readv(fop->frame, fop->xl,
+ ec_get_lock_good_mask(fop->fd->inode, fop->xl),
+ EC_MINIMUM_MIN, ec_writev_merge_head, NULL, fd,
+ ec->stripe_size, fop->offset, 0, xdata);
+ }
+ } else {
+ memset(fop->vector[0].iov_base, 0, fop->head);
+ memset(fop->vector[0].iov_base + fop->size - tail, 0, tail);
+ if (ec->stripe_cache && (fop->size <= ec->stripe_size)) {
+ ec_add_stripe_in_cache(ec, fop);
}
- ec_readv(fop->frame, fop->xl, -1, EC_MINIMUM_MIN,
- ec_writev_merge_head, NULL, fd, ec->stripe_size,
- fop->offset, 0, xdata);
}
}
- tail = fop->size - fop->user_size - fop->head;
if ((tail > 0) && ((fop->head == 0) || (fop->size > ec->stripe_size))) {
/* Current locking scheme will make sure the 'current' below will
* never decrease while the fop is in progress, so the checks will
@@ -2035,8 +2060,10 @@ ec_writev_start(ec_fop_data_t *fop)
err = -ENOMEM;
goto failed_xdata;
}
- ec_readv(fop->frame, fop->xl, -1, EC_MINIMUM_MIN,
- ec_writev_merge_tail, NULL, fd, ec->stripe_size,
+ ec_readv(fop->frame, fop->xl,
+ ec_get_lock_good_mask(fop->fd->inode, fop->xl),
+ EC_MINIMUM_MIN, ec_writev_merge_tail, NULL, fd,
+ ec->stripe_size,
fop->offset + fop->size - ec->stripe_size, 0, xdata);
}
} else {
@@ -2211,9 +2238,9 @@ ec_manager_writev(ec_fop_data_t *fop, int32_t state)
GF_ASSERT(cbk != NULL);
if (fop->cbks.writev != NULL) {
- fop->cbks.writev(fop->req_frame, fop, fop->xl, cbk->op_ret,
- cbk->op_errno, &cbk->iatt[0], &cbk->iatt[1],
- cbk->xdata);
+ QUORUM_CBK(fop->cbks.writev, fop, fop->req_frame, fop, fop->xl,
+ cbk->op_ret, cbk->op_errno, &cbk->iatt[0],
+ &cbk->iatt[1], cbk->xdata);
}
return EC_STATE_LOCK_REUSE;
@@ -2262,7 +2289,7 @@ ec_manager_writev(ec_fop_data_t *fop, int32_t state)
void
ec_writev(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_writev_cbk_t func, void *data, fd_t *fd,
+ uint32_t fop_flags, fop_writev_cbk_t func, void *data, fd_t *fd,
struct iovec *vector, int32_t count, off_t offset, uint32_t flags,
struct iobref *iobref, dict_t *xdata)
{
@@ -2276,7 +2303,7 @@ ec_writev(call_frame_t *frame, xlator_t *this, uintptr_t target,
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = ec_fop_data_allocate(frame, this, GF_FOP_WRITE, 0, target, minimum,
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_WRITE, 0, target, fop_flags,
ec_wind_writev, ec_manager_writev, callback,
data);
if (fop == NULL) {
diff --git a/xlators/cluster/ec/src/ec-locks.c b/xlators/cluster/ec/src/ec-locks.c
index f978af0ac67..601960d6154 100644
--- a/xlators/cluster/ec/src/ec-locks.c
+++ b/xlators/cluster/ec/src/ec-locks.c
@@ -8,13 +8,9 @@
cases as published by the Free Software Foundation.
*/
-#include <glusterfs/xlator.h>
-#include <glusterfs/defaults.h>
-
#include "ec-helpers.h"
#include "ec-common.h"
#include "ec-combine.h"
-#include "ec-method.h"
#include "ec-fops.h"
#include "ec-messages.h"
@@ -28,9 +24,36 @@ ec_lock_check(ec_fop_data_t *fop, uintptr_t *mask)
ec_t *ec = fop->xl->private;
ec_cbk_data_t *ans = NULL;
ec_cbk_data_t *cbk = NULL;
- uintptr_t locked = 0, notlocked = 0;
+ uintptr_t locked = 0;
+ int32_t good = 0;
+ int32_t eagain = 0;
+ int32_t estale = 0;
int32_t error = -1;
+ /* There are some errors that we'll handle in an special way while trying
+ * to acquire a lock.
+ *
+ * EAGAIN: If it's found during a parallel non-blocking lock request, we
+ * consider that there's contention on the inode, so we consider
+ * the acquisition a failure and try again with a sequential
+ * blocking lock request. This will ensure that we get a lock on
+ * as many bricks as possible (ignoring EAGAIN here would cause
+ * unnecessary triggers of self-healing).
+ *
+ * If it's found during a sequential blocking lock request, it's
+ * considered an error. Lock will only succeed if there are
+ * enough other bricks locked.
+ *
+ * ESTALE: This can appear during parallel or sequential lock request if
+ * the inode has just been unlinked. We consider this error is
+ * not recoverable, but we also don't consider it as fatal. So,
+ * if it happens during parallel lock, we won't attempt a
+ * sequential one unless there are EAGAIN errors on other
+ * bricks (and are enough to form a quorum), but if we reach
+ * quorum counting the ESTALE bricks, we consider the whole
+ * result of the operation is ESTALE instead of EIO.
+ */
+
list_for_each_entry(ans, &fop->cbk_list, list)
{
if (ans->op_ret >= 0) {
@@ -38,24 +61,23 @@ ec_lock_check(ec_fop_data_t *fop, uintptr_t *mask)
error = EIO;
}
locked |= ans->mask;
+ good = ans->count;
cbk = ans;
- } else {
- if (ans->op_errno == EAGAIN) {
- switch (fop->uint32) {
- case EC_LOCK_MODE_NONE:
- case EC_LOCK_MODE_ALL:
- /* Goal is to treat non-blocking lock as failure
- * even if there is a single EAGAIN*/
- notlocked |= ans->mask;
- break;
- }
- }
+ } else if (ans->op_errno == ESTALE) {
+ estale += ans->count;
+ } else if ((ans->op_errno == EAGAIN) &&
+ (fop->uint32 != EC_LOCK_MODE_INC)) {
+ eagain += ans->count;
}
}
if (error == -1) {
- if (gf_bits_count(locked | notlocked) >= ec->fragments) {
- if (notlocked == 0) {
+ /* If we have enough quorum with succeeded and EAGAIN answers, we
+ * ignore for now any ESTALE answer. If there are EAGAIN answers,
+ * we retry with a sequential blocking lock request if needed.
+ * Otherwise we succeed. */
+ if ((good + eagain) >= ec->fragments) {
+ if (eagain == 0) {
if (fop->answer == NULL) {
fop->answer = cbk;
}
@@ -68,21 +90,28 @@ ec_lock_check(ec_fop_data_t *fop, uintptr_t *mask)
case EC_LOCK_MODE_NONE:
error = EAGAIN;
break;
-
case EC_LOCK_MODE_ALL:
fop->uint32 = EC_LOCK_MODE_INC;
break;
-
default:
+ /* This shouldn't happen because eagain cannot be > 0
+ * when fop->uint32 is EC_LOCK_MODE_INC. */
error = EIO;
break;
}
}
} else {
- if (fop->answer && fop->answer->op_ret < 0)
+ /* We have been unable to find enough candidates that will be able
+ * to take the lock. If we have quorum on some answer, we return
+ * it. Otherwise we check if ESTALE answers allow us to reach
+ * quorum. If so, we return ESTALE. */
+ if (fop->answer && fop->answer->op_ret < 0) {
error = fop->answer->op_errno;
- else
+ } else if ((good + eagain + estale) >= ec->fragments) {
+ error = ESTALE;
+ } else {
error = EIO;
+ }
}
}
@@ -275,7 +304,7 @@ ec_manager_entrylk(ec_fop_data_t *fop, int32_t state)
void
ec_entrylk(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_entrylk_cbk_t func, void *data,
+ uint32_t fop_flags, fop_entrylk_cbk_t func, void *data,
const char *volume, loc_t *loc, const char *basename,
entrylk_cmd cmd, entrylk_type type, dict_t *xdata)
{
@@ -288,9 +317,9 @@ ec_entrylk(call_frame_t *frame, xlator_t *this, uintptr_t target,
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = ec_fop_data_allocate(frame, this, GF_FOP_ENTRYLK, 0, target, minimum,
- ec_wind_entrylk, ec_manager_entrylk, callback,
- data);
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_ENTRYLK, 0, target,
+ fop_flags, ec_wind_entrylk, ec_manager_entrylk,
+ callback, data);
if (fop == NULL) {
goto out;
}
@@ -403,7 +432,7 @@ ec_wind_fentrylk(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
void
ec_fentrylk(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_fentrylk_cbk_t func, void *data,
+ uint32_t fop_flags, fop_fentrylk_cbk_t func, void *data,
const char *volume, fd_t *fd, const char *basename, entrylk_cmd cmd,
entrylk_type type, dict_t *xdata)
{
@@ -416,9 +445,9 @@ ec_fentrylk(call_frame_t *frame, xlator_t *this, uintptr_t target,
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = ec_fop_data_allocate(frame, this, GF_FOP_FENTRYLK, 0, target, minimum,
- ec_wind_fentrylk, ec_manager_entrylk, callback,
- data);
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_FENTRYLK, 0, target,
+ fop_flags, ec_wind_fentrylk, ec_manager_entrylk,
+ callback, data);
if (fop == NULL) {
goto out;
}
@@ -650,7 +679,7 @@ ec_manager_inodelk(ec_fop_data_t *fop, int32_t state)
void
ec_inodelk(call_frame_t *frame, xlator_t *this, gf_lkowner_t *owner,
- uintptr_t target, int32_t minimum, fop_inodelk_cbk_t func,
+ uintptr_t target, uint32_t fop_flags, fop_inodelk_cbk_t func,
void *data, const char *volume, loc_t *loc, int32_t cmd,
struct gf_flock *flock, dict_t *xdata)
{
@@ -664,9 +693,9 @@ ec_inodelk(call_frame_t *frame, xlator_t *this, gf_lkowner_t *owner,
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = ec_fop_data_allocate(frame, this, GF_FOP_INODELK, 0, target, minimum,
- ec_wind_inodelk, ec_manager_inodelk, callback,
- data);
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_INODELK, 0, target,
+ fop_flags, ec_wind_inodelk, ec_manager_inodelk,
+ callback, data);
if (fop == NULL) {
goto out;
}
@@ -782,7 +811,7 @@ ec_wind_finodelk(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
void
ec_finodelk(call_frame_t *frame, xlator_t *this, gf_lkowner_t *owner,
- uintptr_t target, int32_t minimum, fop_finodelk_cbk_t func,
+ uintptr_t target, uint32_t fop_flags, fop_finodelk_cbk_t func,
void *data, const char *volume, fd_t *fd, int32_t cmd,
struct gf_flock *flock, dict_t *xdata)
{
@@ -796,9 +825,9 @@ ec_finodelk(call_frame_t *frame, xlator_t *this, gf_lkowner_t *owner,
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = ec_fop_data_allocate(frame, this, GF_FOP_FINODELK, 0, target, minimum,
- ec_wind_finodelk, ec_manager_inodelk, callback,
- data);
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_FINODELK, 0, target,
+ fop_flags, ec_wind_finodelk, ec_manager_inodelk,
+ callback, data);
if (fop == NULL) {
goto out;
}
@@ -1032,7 +1061,7 @@ ec_manager_lk(ec_fop_data_t *fop, int32_t state)
}
void
-ec_lk(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
+ec_lk(call_frame_t *frame, xlator_t *this, uintptr_t target, uint32_t fop_flags,
fop_lk_cbk_t func, void *data, fd_t *fd, int32_t cmd,
struct gf_flock *flock, dict_t *xdata)
{
@@ -1045,7 +1074,7 @@ ec_lk(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = ec_fop_data_allocate(frame, this, GF_FOP_LK, 0, target, minimum,
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_LK, 0, target, fop_flags,
ec_wind_lk, ec_manager_lk, callback, data);
if (fop == NULL) {
goto out;
diff --git a/xlators/cluster/ec/src/ec-messages.h b/xlators/cluster/ec/src/ec-messages.h
index 7c2880851a8..72e98f11286 100644
--- a/xlators/cluster/ec/src/ec-messages.h
+++ b/xlators/cluster/ec/src/ec-messages.h
@@ -55,6 +55,7 @@ GLFS_MSGID(EC, EC_MSG_INVALID_CONFIG, EC_MSG_HEAL_FAIL,
EC_MSG_CONFIG_XATTR_INVALID, EC_MSG_EXTENSION, EC_MSG_EXTENSION_NONE,
EC_MSG_EXTENSION_UNKNOWN, EC_MSG_EXTENSION_UNSUPPORTED,
EC_MSG_EXTENSION_FAILED, EC_MSG_NO_GF, EC_MSG_MATRIX_FAILED,
- EC_MSG_DYN_CREATE_FAILED, EC_MSG_DYN_CODEGEN_FAILED);
+ EC_MSG_DYN_CREATE_FAILED, EC_MSG_DYN_CODEGEN_FAILED,
+ EC_MSG_THREAD_CLEANUP_FAILED, EC_MSG_FD_BAD);
#endif /* !_EC_MESSAGES_H_ */
diff --git a/xlators/cluster/ec/src/ec-method.h b/xlators/cluster/ec/src/ec-method.h
index 2489fc84226..f91233b2f88 100644
--- a/xlators/cluster/ec/src/ec-method.h
+++ b/xlators/cluster/ec/src/ec-method.h
@@ -11,8 +11,6 @@
#ifndef __EC_METHOD_H__
#define __EC_METHOD_H__
-#include <glusterfs/xlator.h>
-
#include "ec-types.h"
#include "ec-galois.h"
diff --git a/xlators/cluster/ec/src/ec-types.h b/xlators/cluster/ec/src/ec-types.h
index f3d63ca09ce..de9b89bb2c9 100644
--- a/xlators/cluster/ec/src/ec-types.h
+++ b/xlators/cluster/ec/src/ec-types.h
@@ -130,7 +130,12 @@ typedef void (*ec_resume_f)(ec_fop_data_t *, int32_t);
enum _ec_read_policy { EC_ROUND_ROBIN, EC_GFID_HASH, EC_READ_POLICY_MAX };
-enum _ec_heal_need { EC_HEAL_NONEED, EC_HEAL_MAYBE, EC_HEAL_MUST };
+enum _ec_heal_need {
+ EC_HEAL_NONEED,
+ EC_HEAL_MAYBE,
+ EC_HEAL_MUST,
+ EC_HEAL_PURGE_INDEX
+};
enum _ec_stripe_part { EC_STRIPE_HEAD, EC_STRIPE_TAIL };
@@ -150,6 +155,7 @@ struct _ec_fd {
loc_t loc;
uintptr_t open;
int32_t flags;
+ uint64_t bad_version;
ec_fd_status_t fd_status[0];
};
@@ -171,6 +177,7 @@ struct _ec_inode {
gf_boolean_t have_config;
gf_boolean_t have_version;
gf_boolean_t have_size;
+ int32_t heal_count;
ec_config_t config;
uint64_t pre_version[2];
uint64_t post_version[2];
@@ -179,14 +186,15 @@ struct _ec_inode {
uint64_t dirty[2];
struct list_head heal;
ec_stripe_list_t stripe_cache;
+ uint64_t bad_version;
};
typedef int32_t (*fop_heal_cbk_t)(call_frame_t *, void *, xlator_t *, int32_t,
int32_t, uintptr_t, uintptr_t, uintptr_t,
- dict_t *);
+ uint32_t, dict_t *);
typedef int32_t (*fop_fheal_cbk_t)(call_frame_t *, void *, xlator_t *, int32_t,
int32_t, uintptr_t, uintptr_t, uintptr_t,
- dict_t *);
+ uint32_t, dict_t *);
union _ec_cbk {
fop_access_cbk_t access;
@@ -264,6 +272,7 @@ struct _ec_lock {
uint32_t refs_pending; /* Refs assigned to fops being prepared */
uint32_t waiting_flags; /*Track xattrop/dirty marking*/
gf_boolean_t acquired;
+ gf_boolean_t contention;
gf_boolean_t unlock_now;
gf_boolean_t release;
gf_boolean_t query;
@@ -307,9 +316,9 @@ struct _ec_fop_data {
int32_t id; /* ID of the file operation */
int32_t refs;
int32_t state;
- int32_t minimum; /* Minimum number of successful
- operation required to conclude a
- fop as successful */
+ uint32_t minimum; /* Minimum number of successful
+ operation required to conclude a
+ fop as successful */
int32_t expected;
int32_t winds;
int32_t jobs;
@@ -324,11 +333,12 @@ struct _ec_fop_data {
ec_cbk_data_t *answer; /* accepted answer */
int32_t lock_count;
int32_t locked;
+ gf_lock_t lock;
ec_lock_link_t locks[2];
int32_t first_lock;
- gf_lock_t lock;
- uint32_t flags;
+ uint32_t fop_flags; /* Flags passed by the caller. */
+ uint32_t flags; /* Internal flags. */
uint32_t first;
uintptr_t mask;
uintptr_t healing; /*Dispatch is done but call is successful only
@@ -616,6 +626,11 @@ struct _ec_statistics {
requests. (Basically memory allocation
errors). */
} stripe_cache;
+ struct {
+ gf_atomic_t attempted; /*Number of heals attempted on
+ files/directories*/
+ gf_atomic_t completed; /*Number of heals complted on files/directories*/
+ } shd;
};
struct _ec {
@@ -641,6 +656,8 @@ struct _ec {
uintptr_t xl_notify; /* Bit flag representing
notification for bricks. */
uintptr_t node_mask;
+ uintptr_t read_mask; /*Stores user defined read-mask*/
+ gf_atomic_t async_fop_count; /* Number of on going asynchronous fops. */
xlator_t **xl_list;
gf_lock_t lock;
gf_timer_t *timer;
@@ -650,6 +667,7 @@ struct _ec {
gf_boolean_t optimistic_changelog;
gf_boolean_t parallel_writes;
uint32_t stripe_cache;
+ uint32_t quorum_count;
uint32_t background_heals;
uint32_t heal_wait_qlen;
uint32_t self_heal_window_size; /* max size of read/writes */
diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c
index 13ffeb96012..7344be4968d 100644
--- a/xlators/cluster/ec/src/ec.c
+++ b/xlators/cluster/ec/src/ec.c
@@ -285,6 +285,7 @@ reconfigure(xlator_t *this, dict_t *options)
GF_OPTION_RECONF("parallel-writes", ec->parallel_writes, options, bool,
failed);
GF_OPTION_RECONF("stripe-cache", ec->stripe_cache, options, uint32, failed);
+ GF_OPTION_RECONF("quorum-count", ec->quorum_count, options, uint32, failed);
ret = 0;
if (ec_assign_read_policy(ec, read_policy)) {
ret = -1;
@@ -324,13 +325,18 @@ ec_get_event_from_state(ec_t *ec)
void
ec_up(xlator_t *this, ec_t *ec)
{
+ char str1[32], str2[32];
+
if (ec->timer != NULL) {
gf_timer_call_cancel(this->ctx, ec->timer);
ec->timer = NULL;
}
ec->up = 1;
- gf_msg(this->name, GF_LOG_INFO, 0, EC_MSG_EC_UP, "Going UP");
+ gf_msg(this->name, GF_LOG_INFO, 0, EC_MSG_EC_UP,
+ "Going UP : Child UP = %s Child Notify = %s",
+ ec_bin(str1, sizeof(str1), ec->xl_up, ec->nodes),
+ ec_bin(str2, sizeof(str2), ec->xl_notify, ec->nodes));
gf_event(EVENT_EC_MIN_BRICKS_UP, "subvol=%s", this->name);
}
@@ -338,13 +344,18 @@ ec_up(xlator_t *this, ec_t *ec)
void
ec_down(xlator_t *this, ec_t *ec)
{
+ char str1[32], str2[32];
+
if (ec->timer != NULL) {
gf_timer_call_cancel(this->ctx, ec->timer);
ec->timer = NULL;
}
ec->up = 0;
- gf_msg(this->name, GF_LOG_INFO, 0, EC_MSG_EC_DOWN, "Going DOWN");
+ gf_msg(this->name, GF_LOG_INFO, 0, EC_MSG_EC_DOWN,
+ "Going DOWN : Child UP = %s Child Notify = %s",
+ ec_bin(str1, sizeof(str1), ec->xl_up, ec->nodes),
+ ec_bin(str2, sizeof(str2), ec->xl_notify, ec->nodes));
gf_event(EVENT_EC_MIN_BRICKS_NOT_UP, "subvol=%s", this->name);
}
@@ -355,6 +366,7 @@ ec_notify_cbk(void *data)
ec_t *ec = data;
glusterfs_event_t event = GF_EVENT_MAXVAL;
gf_boolean_t propagate = _gf_false;
+ gf_boolean_t launch_heal = _gf_false;
LOCK(&ec->lock);
{
@@ -384,6 +396,11 @@ ec_notify_cbk(void *data)
* still bricks DOWN, they will be healed when they
* come up. */
ec_up(ec->xl, ec);
+
+ if (ec->shd.iamshd && !ec->shutdown) {
+ launch_heal = _gf_true;
+ GF_ATOMIC_INC(ec->async_fop_count);
+ }
}
propagate = _gf_true;
@@ -391,13 +408,12 @@ ec_notify_cbk(void *data)
unlock:
UNLOCK(&ec->lock);
+ if (launch_heal) {
+ /* We have just brought the volume UP, so we trigger
+ * a self-heal check on the root directory. */
+ ec_launch_replace_heal(ec);
+ }
if (propagate) {
- if ((event == GF_EVENT_CHILD_UP) && ec->shd.iamshd) {
- /* We have just brought the volume UP, so we trigger
- * a self-heal check on the root directory. */
- ec_launch_replace_heal(ec);
- }
-
default_notify(ec->xl, event, NULL);
}
}
@@ -425,10 +441,55 @@ ec_disable_delays(ec_t *ec)
{
ec->shutdown = _gf_true;
- return list_empty(&ec->pending_fops);
+ return __ec_is_last_fop(ec);
}
void
+ec_cleanup_healer_object(ec_t *ec)
+{
+ struct subvol_healer *healer = NULL;
+ ec_self_heald_t *shd = NULL;
+ void *res = NULL;
+ int i = 0;
+ gf_boolean_t is_join = _gf_false;
+
+ shd = &ec->shd;
+ if (!shd->iamshd)
+ return;
+
+ for (i = 0; i < ec->nodes; i++) {
+ healer = &shd->index_healers[i];
+ pthread_mutex_lock(&healer->mutex);
+ {
+ healer->rerun = 1;
+ if (healer->running) {
+ pthread_cond_signal(&healer->cond);
+ is_join = _gf_true;
+ }
+ }
+ pthread_mutex_unlock(&healer->mutex);
+ if (is_join) {
+ pthread_join(healer->thread, &res);
+ is_join = _gf_false;
+ }
+
+ healer = &shd->full_healers[i];
+ pthread_mutex_lock(&healer->mutex);
+ {
+ healer->rerun = 1;
+ if (healer->running) {
+ pthread_cond_signal(&healer->cond);
+ is_join = _gf_true;
+ }
+ }
+ pthread_mutex_unlock(&healer->mutex);
+ if (is_join) {
+ pthread_join(healer->thread, &res);
+ is_join = _gf_false;
+ }
+ }
+}
+void
ec_pending_fops_completed(ec_t *ec)
{
if (ec->shutdown) {
@@ -441,6 +502,9 @@ ec_set_up_state(ec_t *ec, uintptr_t index_mask, uintptr_t new_state)
{
uintptr_t current_state = 0;
+ if (xlator_is_cleanup_starting(ec->xl))
+ return _gf_false;
+
if ((ec->xl_notify & index_mask) == 0) {
ec->xl_notify |= index_mask;
ec->xl_notify_count++;
@@ -462,6 +526,7 @@ ec_upcall(ec_t *ec, struct gf_upcall *upcall)
struct gf_upcall_cache_invalidation *ci = NULL;
struct gf_upcall_inodelk_contention *lc = NULL;
inode_t *inode;
+ inode_table_t *table;
switch (upcall->event_type) {
case GF_UPCALL_CACHE_INVALIDATION:
@@ -475,8 +540,18 @@ ec_upcall(ec_t *ec, struct gf_upcall *upcall)
/* The lock is not owned by EC, ignore it. */
return _gf_true;
}
- inode = inode_find(((xlator_t *)ec->xl->graph->top)->itable,
- upcall->gfid);
+ table = ((xlator_t *)ec->xl->graph->top)->itable;
+ if (table == NULL) {
+ /* Self-heal daemon doesn't have an inode table on the top
+ * xlator because it doesn't need it. In this case we should
+ * use the inode table managed by EC itself where all inodes
+ * being healed should be present. However self-heal doesn't
+ * use eager-locking and inodelk's are already released as
+ * soon as possible. In this case we can safely ignore these
+ * notifications. */
+ return _gf_false;
+ }
+ inode = inode_find(table, upcall->gfid);
/* If inode is not found, it means that it's already released,
* so we can ignore it. Probably it has been released and
* destroyed while the contention notification was being sent.
@@ -544,6 +619,7 @@ ec_notify(xlator_t *this, int32_t event, void *data, void *data2)
/* If there aren't pending fops running after we have waken up
* them, we immediately propagate the notification. */
propagate = ec_disable_delays(ec);
+ ec_cleanup_healer_object(ec);
goto unlock;
}
@@ -554,7 +630,10 @@ ec_notify(xlator_t *this, int32_t event, void *data, void *data2)
if (event == GF_EVENT_CHILD_UP) {
/* We need to trigger a selfheal if a brick changes
* to UP state. */
- needs_shd_check = ec_set_up_state(ec, mask, mask);
+ if (ec_set_up_state(ec, mask, mask) && ec->shd.iamshd &&
+ !ec->shutdown) {
+ needs_shd_check = _gf_true;
+ }
} else if (event == GF_EVENT_CHILD_DOWN) {
ec_set_up_state(ec, mask, 0);
}
@@ -584,17 +663,21 @@ ec_notify(xlator_t *this, int32_t event, void *data, void *data2)
}
} else {
propagate = _gf_false;
+ needs_shd_check = _gf_false;
+ }
+
+ if (needs_shd_check) {
+ GF_ATOMIC_INC(ec->async_fop_count);
}
}
unlock:
UNLOCK(&ec->lock);
done:
+ if (needs_shd_check) {
+ ec_launch_replace_heal(ec);
+ }
if (propagate) {
- if (needs_shd_check && ec->shd.iamshd) {
- ec_launch_replace_heal(ec);
- }
-
error = default_notify(this, event, data);
}
@@ -627,6 +710,69 @@ ec_statistics_init(ec_t *ec)
GF_ATOMIC_INIT(ec->stats.stripe_cache.evicts, 0);
GF_ATOMIC_INIT(ec->stats.stripe_cache.allocs, 0);
GF_ATOMIC_INIT(ec->stats.stripe_cache.errors, 0);
+ GF_ATOMIC_INIT(ec->stats.shd.attempted, 0);
+ GF_ATOMIC_INIT(ec->stats.shd.completed, 0);
+}
+
+static int
+ec_assign_read_mask(ec_t *ec, char *read_mask_str)
+{
+ char *mask = NULL;
+ char *maskptr = NULL;
+ char *saveptr = NULL;
+ char *id_str = NULL;
+ int id = 0;
+ int ret = 0;
+ uintptr_t read_mask = 0;
+
+ if (!read_mask_str) {
+ ec->read_mask = 0;
+ ret = 0;
+ goto out;
+ }
+
+ mask = gf_strdup(read_mask_str);
+ if (!mask) {
+ ret = -1;
+ goto out;
+ }
+ maskptr = mask;
+
+ for (;;) {
+ id_str = strtok_r(maskptr, ":", &saveptr);
+ if (id_str == NULL)
+ break;
+ if (gf_string2int(id_str, &id)) {
+ gf_msg(ec->xl->name, GF_LOG_ERROR, 0, EC_MSG_XLATOR_INIT_FAIL,
+ "In read-mask \"%s\" id %s is not a valid integer",
+ read_mask_str, id_str);
+ ret = -1;
+ goto out;
+ }
+
+ if ((id < 0) || (id >= ec->nodes)) {
+ gf_msg(ec->xl->name, GF_LOG_ERROR, 0, EC_MSG_XLATOR_INIT_FAIL,
+ "In read-mask \"%s\" id %d is not in range [0 - %d]",
+ read_mask_str, id, ec->nodes - 1);
+ ret = -1;
+ goto out;
+ }
+ read_mask |= (1UL << id);
+ maskptr = NULL;
+ }
+
+ if (gf_bits_count(read_mask) < ec->fragments) {
+ gf_msg(ec->xl->name, GF_LOG_ERROR, 0, EC_MSG_XLATOR_INIT_FAIL,
+ "read-mask \"%s\" should contain at least %d ids", read_mask_str,
+ ec->fragments);
+ ret = -1;
+ goto out;
+ }
+ ec->read_mask = read_mask;
+ ret = 0;
+out:
+ GF_FREE(mask);
+ return ret;
}
int32_t
@@ -636,6 +782,7 @@ init(xlator_t *this)
char *read_policy = NULL;
char *extensions = NULL;
int32_t err;
+ char *read_mask_str = NULL;
if (this->parents == NULL) {
gf_msg(this->name, GF_LOG_WARNING, 0, EC_MSG_NO_PARENTS,
@@ -656,6 +803,7 @@ init(xlator_t *this)
ec->xl = this;
LOCK_INIT(&ec->lock);
+ GF_ATOMIC_INIT(ec->async_fop_count, 0);
INIT_LIST_HEAD(&ec->pending_fops);
INIT_LIST_HEAD(&ec->heal_waiting);
INIT_LIST_HEAD(&ec->healing);
@@ -714,12 +862,18 @@ init(xlator_t *this)
if (ec_assign_read_policy(ec, read_policy))
goto failed;
+ GF_OPTION_INIT("heal-timeout", ec->shd.timeout, int32, failed);
GF_OPTION_INIT("shd-max-threads", ec->shd.max_threads, uint32, failed);
GF_OPTION_INIT("shd-wait-qlength", ec->shd.wait_qlength, uint32, failed);
GF_OPTION_INIT("optimistic-change-log", ec->optimistic_changelog, bool,
failed);
GF_OPTION_INIT("parallel-writes", ec->parallel_writes, bool, failed);
GF_OPTION_INIT("stripe-cache", ec->stripe_cache, uint32, failed);
+ GF_OPTION_INIT("quorum-count", ec->quorum_count, uint32, failed);
+ GF_OPTION_INIT("ec-read-mask", read_mask_str, str, failed);
+
+ if (ec_assign_read_mask(ec, read_mask_str))
+ goto failed;
this->itable = inode_table_new(EC_SHD_INODE_LRU_LIMIT, this);
if (!this->itable)
@@ -759,6 +913,7 @@ failed:
void
fini(xlator_t *this)
{
+ ec_selfheal_daemon_fini(this);
__ec_destroy_private(this);
}
@@ -797,11 +952,12 @@ ec_gf_entrylk(call_frame_t *frame, xlator_t *this, const char *volume,
loc_t *loc, const char *basename, entrylk_cmd cmd,
entrylk_type type, dict_t *xdata)
{
- int32_t minimum = EC_MINIMUM_ALL;
+ uint32_t fop_flags = EC_MINIMUM_ALL;
+
if (cmd == ENTRYLK_UNLOCK)
- minimum = EC_MINIMUM_ONE;
- ec_entrylk(frame, this, -1, minimum, default_entrylk_cbk, NULL, volume, loc,
- basename, cmd, type, xdata);
+ fop_flags = EC_MINIMUM_ONE;
+ ec_entrylk(frame, this, -1, fop_flags, default_entrylk_cbk, NULL, volume,
+ loc, basename, cmd, type, xdata);
return 0;
}
@@ -811,10 +967,11 @@ ec_gf_fentrylk(call_frame_t *frame, xlator_t *this, const char *volume,
fd_t *fd, const char *basename, entrylk_cmd cmd,
entrylk_type type, dict_t *xdata)
{
- int32_t minimum = EC_MINIMUM_ALL;
+ uint32_t fop_flags = EC_MINIMUM_ALL;
+
if (cmd == ENTRYLK_UNLOCK)
- minimum = EC_MINIMUM_ONE;
- ec_fentrylk(frame, this, -1, minimum, default_fentrylk_cbk, NULL, volume,
+ fop_flags = EC_MINIMUM_ONE;
+ ec_fentrylk(frame, this, -1, fop_flags, default_fentrylk_cbk, NULL, volume,
fd, basename, cmd, type, xdata);
return 0;
@@ -905,7 +1062,7 @@ ec_gf_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
{
int error = 0;
ec_t *ec = this->private;
- int32_t minimum = EC_MINIMUM_ONE;
+ int32_t fop_flags = EC_MINIMUM_ONE;
if (name && strcmp(name, EC_XATTR_HEAL) != 0) {
EC_INTERNAL_XATTR_OR_GOTO(name, NULL, error, out);
@@ -920,11 +1077,11 @@ ec_gf_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
if (name && ((fnmatch(GF_XATTR_STIME_PATTERN, name, 0) == 0) ||
XATTR_IS_NODE_UUID(name) || XATTR_IS_NODE_UUID_LIST(name))) {
- minimum = EC_MINIMUM_ALL;
+ fop_flags = EC_MINIMUM_ALL;
}
- ec_getxattr(frame, this, -1, minimum, default_getxattr_cbk, NULL, loc, name,
- xdata);
+ ec_getxattr(frame, this, -1, fop_flags, default_getxattr_cbk, NULL, loc,
+ name, xdata);
return 0;
out:
@@ -954,11 +1111,12 @@ int32_t
ec_gf_inodelk(call_frame_t *frame, xlator_t *this, const char *volume,
loc_t *loc, int32_t cmd, struct gf_flock *flock, dict_t *xdata)
{
- int32_t minimum = EC_MINIMUM_ALL;
+ int32_t fop_flags = EC_MINIMUM_ALL;
+
if (flock->l_type == F_UNLCK)
- minimum = EC_MINIMUM_ONE;
+ fop_flags = EC_MINIMUM_ONE;
- ec_inodelk(frame, this, &frame->root->lk_owner, -1, minimum,
+ ec_inodelk(frame, this, &frame->root->lk_owner, -1, fop_flags,
default_inodelk_cbk, NULL, volume, loc, cmd, flock, xdata);
return 0;
@@ -968,10 +1126,11 @@ int32_t
ec_gf_finodelk(call_frame_t *frame, xlator_t *this, const char *volume,
fd_t *fd, int32_t cmd, struct gf_flock *flock, dict_t *xdata)
{
- int32_t minimum = EC_MINIMUM_ALL;
+ int32_t fop_flags = EC_MINIMUM_ALL;
+
if (flock->l_type == F_UNLCK)
- minimum = EC_MINIMUM_ONE;
- ec_finodelk(frame, this, &frame->root->lk_owner, -1, minimum,
+ fop_flags = EC_MINIMUM_ONE;
+ ec_finodelk(frame, this, &frame->root->lk_owner, -1, fop_flags,
default_finodelk_cbk, NULL, volume, fd, cmd, flock, xdata);
return 0;
@@ -991,10 +1150,11 @@ int32_t
ec_gf_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
struct gf_flock *flock, dict_t *xdata)
{
- int32_t minimum = EC_MINIMUM_ALL;
+ int32_t fop_flags = EC_MINIMUM_ALL;
+
if (flock->l_type == F_UNLCK)
- minimum = EC_MINIMUM_ONE;
- ec_lk(frame, this, -1, minimum, default_lk_cbk, NULL, fd, cmd, flock,
+ fop_flags = EC_MINIMUM_ONE;
+ ec_lk(frame, this, -1, fop_flags, default_lk_cbk, NULL, fd, cmd, flock,
xdata);
return 0;
@@ -1389,6 +1549,10 @@ ec_dump_private(xlator_t *this)
gf_proc_dump_write("childs_up", "%u", ec->xl_up_count);
gf_proc_dump_write("childs_up_mask", "%s",
ec_bin(tmp, sizeof(tmp), ec->xl_up, ec->nodes));
+ if (ec->read_mask) {
+ gf_proc_dump_write("read-mask", "%s",
+ ec_bin(tmp, sizeof(tmp), ec->read_mask, ec->nodes));
+ }
gf_proc_dump_write("background-heals", "%d", ec->background_heals);
gf_proc_dump_write("heal-wait-qlength", "%d", ec->heal_wait_qlen);
gf_proc_dump_write("self-heal-window-size", "%" PRIu32,
@@ -1397,6 +1561,7 @@ ec_dump_private(xlator_t *this)
gf_proc_dump_write("heal-waiters", "%d", ec->heal_waiters);
gf_proc_dump_write("read-policy", "%s", ec_read_policies[ec->read_policy]);
gf_proc_dump_write("parallel-writes", "%d", ec->parallel_writes);
+ gf_proc_dump_write("quorum-count", "%u", ec->quorum_count);
snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s.stats.stripe_cache",
this->type, this->name);
@@ -1416,6 +1581,10 @@ ec_dump_private(xlator_t *this)
GF_ATOMIC_GET(ec->stats.stripe_cache.allocs));
gf_proc_dump_write("errors", "%" GF_PRI_ATOMIC,
GF_ATOMIC_GET(ec->stats.stripe_cache.errors));
+ gf_proc_dump_write("heals-attempted", "%" GF_PRI_ATOMIC,
+ GF_ATOMIC_GET(ec->stats.shd.attempted));
+ gf_proc_dump_write("heals-completed", "%" GF_PRI_ATOMIC,
+ GF_ATOMIC_GET(ec->stats.shd.completed));
return 0;
}
@@ -1667,6 +1836,23 @@ struct volume_options options[] = {
"lead to extra memory consumption, maximum "
"(cache size * stripe size) Bytes per open file."},
{
+ .key = {"quorum-count"},
+ .type = GF_OPTION_TYPE_INT,
+ .default_value = "0",
+ .description =
+ "This option can be used to define how many successes on"
+ "the bricks constitute a success to the application. This"
+ " count should be in the range"
+ "[disperse-data-count, disperse-count] (inclusive)",
+ },
+ {
+ .key = {"ec-read-mask"},
+ .type = GF_OPTION_TYPE_STR,
+ .default_value = NULL,
+ .description = "This option can be used to choose which bricks can be"
+ " used for reading data/metadata of a file/directory",
+ },
+ {
.key = {NULL},
},
};
diff --git a/xlators/cluster/ec/src/ec.h b/xlators/cluster/ec/src/ec.h
index 1b210d9adc1..6f6de6d5981 100644
--- a/xlators/cluster/ec/src/ec.h
+++ b/xlators/cluster/ec/src/ec.h
@@ -18,6 +18,7 @@
#define EC_XATTR_SIZE EC_XATTR_PREFIX "size"
#define EC_XATTR_VERSION EC_XATTR_PREFIX "version"
#define EC_XATTR_HEAL EC_XATTR_PREFIX "heal"
+#define EC_XATTR_HEAL_NEW EC_XATTR_PREFIX "heal-new"
#define EC_XATTR_DIRTY EC_XATTR_PREFIX "dirty"
#define EC_STRIPE_CACHE_MAX_SIZE 10
#define EC_VERSION_SIZE 2