summaryrefslogtreecommitdiffstats
path: root/xlators/cluster/ec/src/ec-common.c
diff options
context:
space:
mode:
Diffstat (limited to 'xlators/cluster/ec/src/ec-common.c')
-rw-r--r--xlators/cluster/ec/src/ec-common.c1785
1 files changed, 882 insertions, 903 deletions
diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c
index 86432bd7da7..0eee0a3363f 100644
--- a/xlators/cluster/ec/src/ec-common.c
+++ b/xlators/cluster/ec/src/ec-common.c
@@ -24,29 +24,28 @@
#define EC_INVALID_INDEX UINT32_MAX
void
-ec_update_fd_status (fd_t *fd, xlator_t *xl, int idx,
- int32_t ret_status)
+ec_update_fd_status(fd_t *fd, xlator_t *xl, int idx, int32_t ret_status)
{
- ec_fd_t *fd_ctx;
+ ec_fd_t *fd_ctx;
- if (fd == NULL)
- return;
+ if (fd == NULL)
+ return;
- LOCK (&fd->lock);
- {
- fd_ctx = __ec_fd_get(fd, xl);
- if (fd_ctx) {
- if (ret_status >= 0)
- fd_ctx->fd_status[idx] = EC_FD_OPENED;
- else
- fd_ctx->fd_status[idx] = EC_FD_NOT_OPENED;
- }
+ LOCK(&fd->lock);
+ {
+ fd_ctx = __ec_fd_get(fd, xl);
+ if (fd_ctx) {
+ if (ret_status >= 0)
+ fd_ctx->fd_status[idx] = EC_FD_OPENED;
+ else
+ fd_ctx->fd_status[idx] = EC_FD_NOT_OPENED;
}
- UNLOCK (&fd->lock);
+ }
+ UNLOCK(&fd->lock);
}
static int
-ec_fd_ctx_need_open (fd_t *fd, xlator_t *this, uintptr_t *need_open)
+ec_fd_ctx_need_open(fd_t *fd, xlator_t *this, uintptr_t *need_open)
{
int i = 0;
int count = 0;
@@ -56,22 +55,22 @@ ec_fd_ctx_need_open (fd_t *fd, xlator_t *this, uintptr_t *need_open)
ec = this->private;
*need_open = 0;
- fd_ctx = ec_fd_get (fd, this);
+ fd_ctx = ec_fd_get(fd, this);
if (!fd_ctx)
return count;
- LOCK (&fd->lock);
+ LOCK(&fd->lock);
{
for (i = 0; i < ec->nodes; i++) {
- if ((fd_ctx->fd_status[i] == EC_FD_NOT_OPENED) &&
- (ec->xl_up & (1<<i))) {
- fd_ctx->fd_status[i] = EC_FD_OPENING;
- *need_open |= (1<<i);
- count++;
- }
+ if ((fd_ctx->fd_status[i] == EC_FD_NOT_OPENED) &&
+ (ec->xl_up & (1 << i))) {
+ fd_ctx->fd_status[i] = EC_FD_OPENING;
+ *need_open |= (1 << i);
+ count++;
+ }
}
}
- UNLOCK (&fd->lock);
+ UNLOCK(&fd->lock);
/* If fd needs to open on minimum number of nodes
* then ignore fixing the fd as it has been
@@ -84,136 +83,137 @@ ec_fd_ctx_need_open (fd_t *fd, xlator_t *this, uintptr_t *need_open)
}
static gf_boolean_t
-ec_is_fd_fixable (fd_t *fd)
+ec_is_fd_fixable(fd_t *fd)
{
if (!fd || !fd->inode)
return _gf_false;
- else if (fd_is_anonymous (fd))
+ else if (fd_is_anonymous(fd))
return _gf_false;
- else if (gf_uuid_is_null (fd->inode->gfid))
+ else if (gf_uuid_is_null(fd->inode->gfid))
return _gf_false;
return _gf_true;
}
static void
-ec_fix_open (ec_fop_data_t *fop)
+ec_fix_open(ec_fop_data_t *fop)
{
- int call_count = 0;
- uintptr_t need_open = 0;
- int ret = 0;
- loc_t loc = {0, };
+ int call_count = 0;
+ uintptr_t need_open = 0;
+ int ret = 0;
+ loc_t loc = {
+ 0,
+ };
- if (!ec_is_fd_fixable (fop->fd))
+ if (!ec_is_fd_fixable(fop->fd))
goto out;
/* Evaluate how many remote fd's to be opened */
- call_count = ec_fd_ctx_need_open (fop->fd, fop->xl, &need_open);
+ call_count = ec_fd_ctx_need_open(fop->fd, fop->xl, &need_open);
if (!call_count)
goto out;
- loc.inode = inode_ref (fop->fd->inode);
- gf_uuid_copy (loc.gfid, fop->fd->inode->gfid);
- ret = loc_path (&loc, NULL);
+ loc.inode = inode_ref(fop->fd->inode);
+ gf_uuid_copy(loc.gfid, fop->fd->inode->gfid);
+ ret = loc_path(&loc, NULL);
if (ret < 0) {
goto out;
}
if (IA_IFDIR == fop->fd->inode->ia_type) {
- ec_opendir(fop->frame, fop->xl, need_open, EC_MINIMUM_ONE,
- NULL, NULL, &fop->loc[0], fop->fd, NULL);
- } else{
- ec_open(fop->frame, fop->xl, need_open, EC_MINIMUM_ONE,
- NULL, NULL, &loc, fop->fd->flags, fop->fd, NULL);
+ ec_opendir(fop->frame, fop->xl, need_open, EC_MINIMUM_ONE, NULL, NULL,
+ &fop->loc[0], fop->fd, NULL);
+ } else {
+ ec_open(fop->frame, fop->xl, need_open, EC_MINIMUM_ONE, NULL, NULL,
+ &loc, fop->fd->flags, fop->fd, NULL);
}
out:
- loc_wipe (&loc);
+ loc_wipe(&loc);
}
off_t
-ec_range_end_get (off_t fl_start, size_t fl_size)
+ec_range_end_get(off_t fl_start, size_t fl_size)
{
- off_t fl_end = 0;
- switch (fl_size) {
+ off_t fl_end = 0;
+ switch (fl_size) {
case 0:
- return fl_start;
+ return fl_start;
case LLONG_MAX: /*Infinity*/
- return LLONG_MAX;
+ return LLONG_MAX;
default:
- fl_end = fl_start + fl_size - 1;
- if (fl_end < 0) /*over-flow*/
- return LLONG_MAX;
- else
- return fl_end;
- }
+ fl_end = fl_start + fl_size - 1;
+ if (fl_end < 0) /*over-flow*/
+ return LLONG_MAX;
+ else
+ return fl_end;
+ }
}
static gf_boolean_t
-ec_is_range_conflict (ec_lock_link_t *l1, ec_lock_link_t *l2)
+ec_is_range_conflict(ec_lock_link_t *l1, ec_lock_link_t *l2)
{
- return ((l1->fl_end >= l2->fl_start) && (l2->fl_end >= l1->fl_start));
+ return ((l1->fl_end >= l2->fl_start) && (l2->fl_end >= l1->fl_start));
}
static gf_boolean_t
-ec_lock_conflict (ec_lock_link_t *l1, ec_lock_link_t *l2)
+ec_lock_conflict(ec_lock_link_t *l1, ec_lock_link_t *l2)
{
- ec_t *ec = l1->fop->xl->private;
+ ec_t *ec = l1->fop->xl->private;
- /* Fops like access/stat won't have to worry what the other fops are
- * modifying as the fop is wound only to one brick. So it can be
- * executed in parallel*/
- if (l1->fop->minimum == EC_MINIMUM_ONE ||
- l2->fop->minimum == EC_MINIMUM_ONE)
- return _gf_false;
+ /* Fops like access/stat won't have to worry what the other fops are
+ * modifying as the fop is wound only to one brick. So it can be
+ * executed in parallel*/
+ if (l1->fop->minimum == EC_MINIMUM_ONE ||
+ l2->fop->minimum == EC_MINIMUM_ONE)
+ return _gf_false;
- if ((l1->fop->flags & EC_FLAG_LOCK_SHARED) &&
- (l2->fop->flags & EC_FLAG_LOCK_SHARED))
- return _gf_false;
+ if ((l1->fop->flags & EC_FLAG_LOCK_SHARED) &&
+ (l2->fop->flags & EC_FLAG_LOCK_SHARED))
+ return _gf_false;
- if (!ec->parallel_writes) {
- return _gf_true;
- }
+ if (!ec->parallel_writes) {
+ return _gf_true;
+ }
- return ec_is_range_conflict (l1, l2);
+ return ec_is_range_conflict(l1, l2);
}
uint32_t
-ec_select_first_by_read_policy (ec_t *ec, ec_fop_data_t *fop)
-{
- if (ec->read_policy == EC_ROUND_ROBIN) {
- return ec->idx;
- } else if (ec->read_policy == EC_GFID_HASH) {
- if (fop->use_fd) {
- return SuperFastHash((char *)fop->fd->inode->gfid,
- sizeof(fop->fd->inode->gfid)) % ec->nodes;
- } else {
- if (gf_uuid_is_null (fop->loc[0].gfid))
- loc_gfid (&fop->loc[0], fop->loc[0].gfid);
- return SuperFastHash((char *)fop->loc[0].gfid,
- sizeof(fop->loc[0].gfid)) % ec->nodes;
- }
+ec_select_first_by_read_policy(ec_t *ec, ec_fop_data_t *fop)
+{
+ if (ec->read_policy == EC_ROUND_ROBIN) {
+ return ec->idx;
+ } else if (ec->read_policy == EC_GFID_HASH) {
+ if (fop->use_fd) {
+ return SuperFastHash((char *)fop->fd->inode->gfid,
+ sizeof(fop->fd->inode->gfid)) %
+ ec->nodes;
+ } else {
+ if (gf_uuid_is_null(fop->loc[0].gfid))
+ loc_gfid(&fop->loc[0], fop->loc[0].gfid);
+ return SuperFastHash((char *)fop->loc[0].gfid,
+ sizeof(fop->loc[0].gfid)) %
+ ec->nodes;
}
- return 0;
+ }
+ return 0;
}
-static
-gf_boolean_t ec_child_valid(ec_t * ec, ec_fop_data_t * fop, uint32_t idx)
+static gf_boolean_t
+ec_child_valid(ec_t *ec, ec_fop_data_t *fop, uint32_t idx)
{
return (idx < ec->nodes) && (((fop->remaining >> idx) & 1) == 1);
}
-static
-uint32_t ec_child_next(ec_t * ec, ec_fop_data_t * fop, uint32_t idx)
+static uint32_t
+ec_child_next(ec_t *ec, ec_fop_data_t *fop, uint32_t idx)
{
- while (!ec_child_valid(ec, fop, idx))
- {
- if (++idx >= ec->nodes)
- {
+ while (!ec_child_valid(ec, fop, idx)) {
+ if (++idx >= ec->nodes) {
idx = 0;
}
- if (idx == fop->first)
- {
+ if (idx == fop->first) {
return EC_INVALID_INDEX;
}
}
@@ -221,20 +221,21 @@ uint32_t ec_child_next(ec_t * ec, ec_fop_data_t * fop, uint32_t idx)
return idx;
}
-int32_t ec_heal_report(call_frame_t * frame, void * cookie, xlator_t * this,
- int32_t op_ret, int32_t op_errno, uintptr_t mask,
- uintptr_t good, uintptr_t bad, dict_t * xdata)
+int32_t
+ec_heal_report(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, uintptr_t mask, uintptr_t good,
+ uintptr_t bad, dict_t *xdata)
{
if (op_ret < 0) {
- gf_msg (this->name, GF_LOG_DEBUG, op_errno,
- EC_MSG_HEAL_FAIL, "Heal failed");
+ gf_msg(this->name, GF_LOG_DEBUG, op_errno, EC_MSG_HEAL_FAIL,
+ "Heal failed");
} else {
if ((mask & ~good) != 0) {
- gf_msg (this->name, GF_LOG_DEBUG, 0,
- EC_MSG_HEAL_SUCCESS, "Heal succeeded on %d/%d "
- "subvolumes",
- gf_bits_count(mask & ~(good | bad)),
- gf_bits_count(mask & ~good));
+ gf_msg(this->name, GF_LOG_DEBUG, 0, EC_MSG_HEAL_SUCCESS,
+ "Heal succeeded on %d/%d "
+ "subvolumes",
+ gf_bits_count(mask & ~(good | bad)),
+ gf_bits_count(mask & ~good));
}
}
@@ -242,103 +243,101 @@ int32_t ec_heal_report(call_frame_t * frame, void * cookie, xlator_t * this,
}
static uintptr_t
-ec_fop_needs_name_heal (ec_fop_data_t *fop)
+ec_fop_needs_name_heal(ec_fop_data_t *fop)
{
- ec_t *ec = NULL;
- ec_cbk_data_t *cbk = NULL;
- ec_cbk_data_t *enoent_cbk = NULL;
+ ec_t *ec = NULL;
+ ec_cbk_data_t *cbk = NULL;
+ ec_cbk_data_t *enoent_cbk = NULL;
- ec = fop->xl->private;
- if (fop->id != GF_FOP_LOOKUP)
- return 0;
+ ec = fop->xl->private;
+ if (fop->id != GF_FOP_LOOKUP)
+ return 0;
- if (!fop->loc[0].name || strlen (fop->loc[0].name) == 0)
- return 0;
+ if (!fop->loc[0].name || strlen(fop->loc[0].name) == 0)
+ return 0;
- list_for_each_entry(cbk, &fop->cbk_list, list)
- {
- if (cbk->op_ret < 0 && cbk->op_errno == ENOENT) {
- enoent_cbk = cbk;
- break;
- }
+ list_for_each_entry(cbk, &fop->cbk_list, list)
+ {
+ if (cbk->op_ret < 0 && cbk->op_errno == ENOENT) {
+ enoent_cbk = cbk;
+ break;
}
+ }
- if (!enoent_cbk)
- return 0;
+ if (!enoent_cbk)
+ return 0;
- return ec->xl_up & ~enoent_cbk->mask;
+ return ec->xl_up & ~enoent_cbk->mask;
}
-int32_t ec_fop_needs_heal(ec_fop_data_t *fop)
+int32_t
+ec_fop_needs_heal(ec_fop_data_t *fop)
{
ec_t *ec = fop->xl->private;
if (fop->lock_count == 0) {
- /*
- * if fop->lock_count is zero that means it saw version mismatch
- * without any locks so it can't be trusted. If we launch a heal
- * based on this it will lead to INODELKs which will affect I/O
- * performance. Considering self-heal-daemon and operations on
- * the inode from client which take locks can still trigger the
- * heal we can choose to not attempt a heal when fop->lock_count
- * is zero.
- */
- return 0;
+ /*
+ * if fop->lock_count is zero that means it saw version mismatch
+ * without any locks so it can't be trusted. If we launch a heal
+ * based on this it will lead to INODELKs which will affect I/O
+ * performance. Considering self-heal-daemon and operations on
+ * the inode from client which take locks can still trigger the
+ * heal we can choose to not attempt a heal when fop->lock_count
+ * is zero.
+ */
+ return 0;
}
return (ec->xl_up & ~(fop->remaining | fop->good)) != 0;
}
-void ec_check_status(ec_fop_data_t * fop)
+void
+ec_check_status(ec_fop_data_t *fop)
{
- ec_t * ec = fop->xl->private;
+ ec_t *ec = fop->xl->private;
int32_t partial = 0;
char str1[32], str2[32], str3[32], str4[32], str5[32];
- if (!ec_fop_needs_name_heal (fop) && !ec_fop_needs_heal(fop)) {
+ if (!ec_fop_needs_name_heal(fop) && !ec_fop_needs_heal(fop)) {
return;
}
if (fop->answer && fop->answer->op_ret >= 0) {
- if ((fop->id == GF_FOP_LOOKUP) ||
- (fop->id == GF_FOP_STAT) || (fop->id == GF_FOP_FSTAT)) {
+ if ((fop->id == GF_FOP_LOOKUP) || (fop->id == GF_FOP_STAT) ||
+ (fop->id == GF_FOP_FSTAT)) {
partial = fop->answer->iatt[0].ia_type == IA_IFDIR;
} else if (fop->id == GF_FOP_OPENDIR) {
partial = 1;
}
}
- gf_msg (fop->xl->name, GF_LOG_WARNING, 0,
- EC_MSG_OP_FAIL_ON_SUBVOLS,
- "Operation failed on %d of %d subvolumes.(up=%s, mask=%s, "
- "remaining=%s, good=%s, bad=%s)",
- gf_bits_count(ec->xl_up & ~(fop->remaining | fop->good)), ec->nodes,
- ec_bin(str1, sizeof(str1), ec->xl_up, ec->nodes),
- ec_bin(str2, sizeof(str2), fop->mask, ec->nodes),
- ec_bin(str3, sizeof(str3), fop->remaining, ec->nodes),
- ec_bin(str4, sizeof(str4), fop->good, ec->nodes),
- ec_bin(str5, sizeof(str5),
- ec->xl_up & ~(fop->remaining | fop->good), ec->nodes));
- if (fop->use_fd)
- {
+ gf_msg(fop->xl->name, GF_LOG_WARNING, 0, EC_MSG_OP_FAIL_ON_SUBVOLS,
+ "Operation failed on %d of %d subvolumes.(up=%s, mask=%s, "
+ "remaining=%s, good=%s, bad=%s)",
+ gf_bits_count(ec->xl_up & ~(fop->remaining | fop->good)), ec->nodes,
+ ec_bin(str1, sizeof(str1), ec->xl_up, ec->nodes),
+ ec_bin(str2, sizeof(str2), fop->mask, ec->nodes),
+ ec_bin(str3, sizeof(str3), fop->remaining, ec->nodes),
+ ec_bin(str4, sizeof(str4), fop->good, ec->nodes),
+ ec_bin(str5, sizeof(str5), ec->xl_up & ~(fop->remaining | fop->good),
+ ec->nodes));
+ if (fop->use_fd) {
if (fop->fd != NULL) {
ec_fheal(NULL, fop->xl, -1, EC_MINIMUM_ONE, ec_heal_report, NULL,
fop->fd, partial, NULL);
}
- }
- else
- {
+ } else {
ec_heal(NULL, fop->xl, -1, EC_MINIMUM_ONE, ec_heal_report, NULL,
&fop->loc[0], partial, NULL);
- if (fop->loc[1].inode != NULL)
- {
+ if (fop->loc[1].inode != NULL) {
ec_heal(NULL, fop->xl, -1, EC_MINIMUM_ONE, ec_heal_report, NULL,
&fop->loc[1], partial, NULL);
}
}
}
-void ec_update_good(ec_fop_data_t *fop, uintptr_t good)
+void
+ec_update_good(ec_fop_data_t *fop, uintptr_t good)
{
fop->good = good;
@@ -349,7 +348,8 @@ void ec_update_good(ec_fop_data_t *fop, uintptr_t good)
}
}
-void ec_lock_update_good(ec_lock_t *lock, ec_fop_data_t *fop)
+void
+ec_lock_update_good(ec_lock_t *lock, ec_fop_data_t *fop)
{
/* Fops that are executed only on one brick do not have enough information
* to update the global mask of good bricks. */
@@ -365,15 +365,16 @@ void ec_lock_update_good(ec_lock_t *lock, ec_fop_data_t *fop)
lock->good_mask &= fop->good | fop->remaining;
}
-void __ec_fop_set_error(ec_fop_data_t * fop, int32_t error)
+void
+__ec_fop_set_error(ec_fop_data_t *fop, int32_t error)
{
- if ((error != 0) && (fop->error == 0))
- {
+ if ((error != 0) && (fop->error == 0)) {
fop->error = error;
}
}
-void ec_fop_set_error(ec_fop_data_t * fop, int32_t error)
+void
+ec_fop_set_error(ec_fop_data_t *fop, int32_t error)
{
LOCK(&fop->lock);
@@ -425,18 +426,20 @@ ec_fop_prepare_answer(ec_fop_data_t *fop, gf_boolean_t ro)
return cbk;
}
-void ec_sleep(ec_fop_data_t *fop)
+void
+ec_sleep(ec_fop_data_t *fop)
{
LOCK(&fop->lock);
- GF_ASSERT (fop->refs > 0);
+ GF_ASSERT(fop->refs > 0);
fop->refs++;
fop->jobs++;
UNLOCK(&fop->lock);
}
-int32_t ec_check_complete(ec_fop_data_t * fop, ec_resume_f resume)
+int32_t
+ec_check_complete(ec_fop_data_t *fop, ec_resume_f resume)
{
int32_t error = -1;
@@ -444,14 +447,11 @@ int32_t ec_check_complete(ec_fop_data_t * fop, ec_resume_f resume)
GF_ASSERT(fop->resume == NULL);
- if (--fop->jobs != 0)
- {
+ if (--fop->jobs != 0) {
ec_trace("WAIT", fop, "resume=%p", resume);
fop->resume = resume;
- }
- else
- {
+ } else {
error = fop->error;
fop->error = 0;
}
@@ -461,7 +461,8 @@ int32_t ec_check_complete(ec_fop_data_t * fop, ec_resume_f resume)
return error;
}
-void ec_resume(ec_fop_data_t * fop, int32_t error)
+void
+ec_resume(ec_fop_data_t *fop, int32_t error)
{
ec_resume_f resume = NULL;
@@ -469,16 +470,13 @@ void ec_resume(ec_fop_data_t * fop, int32_t error)
__ec_fop_set_error(fop, error);
- if (--fop->jobs == 0)
- {
+ if (--fop->jobs == 0) {
resume = fop->resume;
fop->resume = NULL;
- if (resume != NULL)
- {
+ if (resume != NULL) {
ec_trace("RESUME", fop, "error=%d", error);
- if (fop->error != 0)
- {
+ if (fop->error != 0) {
error = fop->error;
}
fop->error = 0;
@@ -487,21 +485,20 @@ void ec_resume(ec_fop_data_t * fop, int32_t error)
UNLOCK(&fop->lock);
- if (resume != NULL)
- {
+ if (resume != NULL) {
resume(fop, error);
}
ec_fop_data_release(fop);
}
-void ec_resume_parent(ec_fop_data_t * fop, int32_t error)
+void
+ec_resume_parent(ec_fop_data_t *fop, int32_t error)
{
- ec_fop_data_t * parent;
+ ec_fop_data_t *parent;
parent = fop->parent;
- if (parent != NULL)
- {
+ if (parent != NULL) {
ec_trace("RESUME_PARENT", fop, "error=%u", error);
fop->parent = NULL;
ec_resume(parent, error);
@@ -509,22 +506,23 @@ void ec_resume_parent(ec_fop_data_t * fop, int32_t error)
}
gf_boolean_t
-ec_is_recoverable_error (int32_t op_errno)
+ec_is_recoverable_error(int32_t op_errno)
{
- switch (op_errno) {
+ switch (op_errno) {
case ENOTCONN:
case ESTALE:
case ENOENT:
- case EBADFD:/*Opened fd but brick is disconnected*/
- case EIO:/*Backend-fs crash like XFS/ext4 etc*/
- return _gf_true;
- }
- return _gf_false;
+ case EBADFD: /*Opened fd but brick is disconnected*/
+ case EIO: /*Backend-fs crash like XFS/ext4 etc*/
+ return _gf_true;
+ }
+ return _gf_false;
}
-void ec_complete(ec_fop_data_t * fop)
+void
+ec_complete(ec_fop_data_t *fop)
{
- ec_cbk_data_t * cbk = NULL;
+ ec_cbk_data_t *cbk = NULL;
int32_t resume = 0, update = 0;
int healing_count = 0;
@@ -536,9 +534,9 @@ void ec_complete(ec_fop_data_t * fop)
if (fop->answer == NULL) {
if (!list_empty(&fop->cbk_list)) {
cbk = list_entry(fop->cbk_list.next, ec_cbk_data_t, list);
- healing_count = gf_bits_count (cbk->mask & fop->healing);
- /* fop shouldn't be treated as success if it is not
- * successful on at least fop->minimum good copies*/
+ healing_count = gf_bits_count(cbk->mask & fop->healing);
+ /* fop shouldn't be treated as success if it is not
+ * successful on at least fop->minimum good copies*/
if ((cbk->count - healing_count) >= fop->minimum) {
fop->answer = cbk;
@@ -560,8 +558,7 @@ void ec_complete(ec_fop_data_t * fop)
ec_update_good(fop, cbk->mask);
}
- if (resume)
- {
+ if (resume) {
ec_resume(fop, 0);
}
@@ -571,40 +568,39 @@ void ec_complete(ec_fop_data_t * fop)
/* There could be already granted locks sitting on the bricks, unlock for which
* must be wound at all costs*/
static gf_boolean_t
-ec_must_wind (ec_fop_data_t *fop)
-{
- if ((fop->id == GF_FOP_INODELK) || (fop->id == GF_FOP_FINODELK) ||
- (fop->id == GF_FOP_LK)) {
- if (fop->flock.l_type == F_UNLCK)
- return _gf_true;
- } else if ((fop->id == GF_FOP_ENTRYLK) ||
- (fop->id == GF_FOP_FENTRYLK)) {
- if (fop->entrylk_cmd == ENTRYLK_UNLOCK)
- return _gf_true;
- }
+ec_must_wind(ec_fop_data_t *fop)
+{
+ if ((fop->id == GF_FOP_INODELK) || (fop->id == GF_FOP_FINODELK) ||
+ (fop->id == GF_FOP_LK)) {
+ if (fop->flock.l_type == F_UNLCK)
+ return _gf_true;
+ } else if ((fop->id == GF_FOP_ENTRYLK) || (fop->id == GF_FOP_FENTRYLK)) {
+ if (fop->entrylk_cmd == ENTRYLK_UNLOCK)
+ return _gf_true;
+ }
- return _gf_false;
+ return _gf_false;
}
static gf_boolean_t
-ec_internal_op (ec_fop_data_t *fop)
-{
- if (ec_must_wind (fop))
- return _gf_true;
- if (fop->id == GF_FOP_XATTROP)
- return _gf_true;
- if (fop->id == GF_FOP_FXATTROP)
- return _gf_true;
- return _gf_false;
+ec_internal_op(ec_fop_data_t *fop)
+{
+ if (ec_must_wind(fop))
+ return _gf_true;
+ if (fop->id == GF_FOP_XATTROP)
+ return _gf_true;
+ if (fop->id == GF_FOP_FXATTROP)
+ return _gf_true;
+ return _gf_false;
}
char *
-ec_msg_str (ec_fop_data_t *fop)
+ec_msg_str(ec_fop_data_t *fop)
{
- loc_t *loc1 = NULL;
- loc_t *loc2 = NULL;
- char gfid1[64] = {0};
- char gfid2[64] = {0};
+ loc_t *loc1 = NULL;
+ loc_t *loc2 = NULL;
+ char gfid1[64] = {0};
+ char gfid2[64] = {0};
if (fop->errstr)
return fop->errstr;
@@ -614,29 +610,29 @@ ec_msg_str (ec_fop_data_t *fop)
loc2 = &fop->loc[1];
if (fop->id == GF_FOP_RENAME) {
- gf_asprintf(&fop->errstr,
- "FOP : '%s' failed on '%s' and '%s' with gfids "
- "%s and %s respectively", ec_fop_name (fop->id),
- loc1->path, loc2->path,
- uuid_utoa_r (loc1->gfid, gfid1),
- uuid_utoa_r (loc2->gfid, gfid2));
+ gf_asprintf(&fop->errstr,
+ "FOP : '%s' failed on '%s' and '%s' with gfids "
+ "%s and %s respectively",
+ ec_fop_name(fop->id), loc1->path, loc2->path,
+ uuid_utoa_r(loc1->gfid, gfid1),
+ uuid_utoa_r(loc2->gfid, gfid2));
} else {
- gf_asprintf(&fop->errstr,
- "FOP : '%s' failed on '%s' with gfid %s",
- ec_fop_name (fop->id),
- loc1->path, uuid_utoa_r (loc1->gfid, gfid1));
+ gf_asprintf(&fop->errstr, "FOP : '%s' failed on '%s' with gfid %s",
+ ec_fop_name(fop->id), loc1->path,
+ uuid_utoa_r(loc1->gfid, gfid1));
}
} else {
gf_asprintf(&fop->errstr, "FOP : '%s' failed on gfid %s",
- ec_fop_name (fop->id),
- uuid_utoa_r (fop->fd->inode->gfid, gfid1));
+ ec_fop_name(fop->id),
+ uuid_utoa_r(fop->fd->inode->gfid, gfid1));
}
return fop->errstr;
}
-int32_t ec_child_select(ec_fop_data_t * fop)
+int32_t
+ec_child_select(ec_fop_data_t *fop)
{
- ec_t * ec = fop->xl->private;
+ ec_t *ec = fop->xl->private;
int32_t first = 0, num = 0;
ec_fop_cleanup(fop);
@@ -645,26 +641,22 @@ int32_t ec_child_select(ec_fop_data_t * fop)
/* Wind the fop on same subvols as parent for any internal extra fops like
* head/tail read in case of writev fop. Unlocks shouldn't do this because
* unlock should go on all subvols where lock is performed*/
- if (fop->parent && !ec_internal_op (fop)) {
- fop->mask &= (fop->parent->mask & ~fop->parent->healing);
+ if (fop->parent && !ec_internal_op(fop)) {
+ fop->mask &= (fop->parent->mask & ~fop->parent->healing);
}
- if ((fop->mask & ~ec->xl_up) != 0)
- {
- gf_msg (fop->xl->name, GF_LOG_WARNING, 0,
- EC_MSG_OP_EXEC_UNAVAIL,
- "Executing operation with "
- "some subvolumes unavailable. (%lX). %s ",
- fop->mask & ~ec->xl_up, ec_msg_str(fop));
+ if ((fop->mask & ~ec->xl_up) != 0) {
+ gf_msg(fop->xl->name, GF_LOG_WARNING, 0, EC_MSG_OP_EXEC_UNAVAIL,
+ "Executing operation with "
+ "some subvolumes unavailable. (%lX). %s ",
+ fop->mask & ~ec->xl_up, ec_msg_str(fop));
fop->mask &= ec->xl_up;
}
- switch (fop->minimum)
- {
+ switch (fop->minimum) {
case EC_MINIMUM_ALL:
fop->minimum = gf_bits_count(fop->mask);
- if (fop->minimum >= ec->fragments)
- {
+ if (fop->minimum >= ec->fragments) {
break;
}
case EC_MINIMUM_MIN:
@@ -675,11 +667,11 @@ int32_t ec_child_select(ec_fop_data_t * fop)
}
if (ec->read_policy == EC_ROUND_ROBIN) {
- first = ec->idx;
- if (++first >= ec->nodes) {
- first = 0;
- }
- ec->idx = first;
+ first = ec->idx;
+ if (++first >= ec->nodes) {
+ first = 0;
+ }
+ ec->idx = first;
}
num = gf_bits_count(fop->mask);
@@ -690,14 +682,12 @@ int32_t ec_child_select(ec_fop_data_t * fop)
ec_trace("SELECT", fop, "");
- if ((num < fop->minimum) && (num < ec->fragments))
- {
- gf_msg (ec->xl->name, GF_LOG_ERROR, 0,
- EC_MSG_CHILDS_INSUFFICIENT,
- "Insufficient available children "
- "for this request (have %d, need "
- "%d). %s",
- num, fop->minimum, ec_msg_str(fop));
+ if ((num < fop->minimum) && (num < ec->fragments)) {
+ gf_msg(ec->xl->name, GF_LOG_ERROR, 0, EC_MSG_CHILDS_INSUFFICIENT,
+ "Insufficient available children "
+ "for this request (have %d, need "
+ "%d). %s",
+ num, fop->minimum, ec_msg_str(fop));
return 0;
}
@@ -706,10 +696,11 @@ int32_t ec_child_select(ec_fop_data_t * fop)
return 1;
}
-void ec_dispatch_next(ec_fop_data_t * fop, uint32_t idx)
+void
+ec_dispatch_next(ec_fop_data_t *fop, uint32_t idx)
{
uint32_t i = EC_INVALID_INDEX;
- ec_t * ec = fop->xl->private;
+ ec_t *ec = fop->xl->private;
LOCK(&fop->lock);
@@ -727,15 +718,15 @@ void ec_dispatch_next(ec_fop_data_t * fop, uint32_t idx)
UNLOCK(&fop->lock);
- if (i < EC_MAX_NODES)
- {
+ if (i < EC_MAX_NODES) {
fop->wind(ec, fop, idx);
}
}
-void ec_dispatch_mask(ec_fop_data_t * fop, uintptr_t mask)
+void
+ec_dispatch_mask(ec_fop_data_t *fop, uintptr_t mask)
{
- ec_t * ec = fop->xl->private;
+ ec_t *ec = fop->xl->private;
int32_t count, idx;
count = gf_bits_count(mask);
@@ -752,10 +743,8 @@ void ec_dispatch_mask(ec_fop_data_t * fop, uintptr_t mask)
UNLOCK(&fop->lock);
idx = 0;
- while (mask != 0)
- {
- if ((mask & 1) != 0)
- {
+ while (mask != 0) {
+ if ((mask & 1) != 0) {
fop->wind(ec, fop, idx);
}
idx++;
@@ -763,27 +752,27 @@ void ec_dispatch_mask(ec_fop_data_t * fop, uintptr_t mask)
}
}
-void ec_dispatch_start(ec_fop_data_t * fop)
+void
+ec_dispatch_start(ec_fop_data_t *fop)
{
fop->answer = NULL;
fop->good = 0;
INIT_LIST_HEAD(&fop->cbk_list);
- if (fop->lock_count > 0)
- {
+ if (fop->lock_count > 0) {
ec_owner_copy(fop->frame, &fop->req_frame->root->lk_owner);
}
}
-void ec_dispatch_one(ec_fop_data_t * fop)
+void
+ec_dispatch_one(ec_fop_data_t *fop)
{
ec_dispatch_start(fop);
- if (ec_child_select(fop))
- {
+ if (ec_child_select(fop)) {
fop->expected = 1;
- fop->first = ec_select_first_by_read_policy (fop->xl->private, fop);
+ fop->first = ec_select_first_by_read_policy(fop->xl->private, fop);
ec_dispatch_next(fop, fop->first);
}
@@ -799,8 +788,8 @@ ec_dispatch_one_retry(ec_fop_data_t *fop, ec_cbk_data_t **cbk)
*cbk = tmp;
}
if ((tmp != NULL) && (tmp->op_ret < 0) &&
- ec_is_recoverable_error (tmp->op_errno)) {
- GF_ASSERT (fop->mask & (1ULL << tmp->idx));
+ ec_is_recoverable_error(tmp->op_errno)) {
+ GF_ASSERT(fop->mask & (1ULL << tmp->idx));
fop->mask ^= (1ULL << tmp->idx);
if (fop->mask) {
return _gf_true;
@@ -810,12 +799,12 @@ ec_dispatch_one_retry(ec_fop_data_t *fop, ec_cbk_data_t **cbk)
return _gf_false;
}
-void ec_dispatch_inc(ec_fop_data_t * fop)
+void
+ec_dispatch_inc(ec_fop_data_t *fop)
{
ec_dispatch_start(fop);
- if (ec_child_select(fop))
- {
+ if (ec_child_select(fop)) {
fop->expected = gf_bits_count(fop->remaining);
fop->first = 0;
@@ -824,35 +813,34 @@ void ec_dispatch_inc(ec_fop_data_t * fop)
}
void
-ec_dispatch_all (ec_fop_data_t *fop)
+ec_dispatch_all(ec_fop_data_t *fop)
{
- ec_dispatch_start(fop);
+ ec_dispatch_start(fop);
- if (ec_child_select(fop)) {
- fop->expected = gf_bits_count(fop->remaining);
- fop->first = 0;
+ if (ec_child_select(fop)) {
+ fop->expected = gf_bits_count(fop->remaining);
+ fop->first = 0;
- ec_dispatch_mask(fop, fop->remaining);
- }
+ ec_dispatch_mask(fop, fop->remaining);
+ }
}
-void ec_dispatch_min(ec_fop_data_t * fop)
+void
+ec_dispatch_min(ec_fop_data_t *fop)
{
- ec_t * ec = fop->xl->private;
+ ec_t *ec = fop->xl->private;
uintptr_t mask;
uint32_t idx;
int32_t count;
ec_dispatch_start(fop);
- if (ec_child_select(fop))
- {
+ if (ec_child_select(fop)) {
fop->expected = count = ec->fragments;
- fop->first = ec_select_first_by_read_policy (fop->xl->private, fop);
+ fop->first = ec_select_first_by_read_policy(fop->xl->private, fop);
idx = fop->first - 1;
mask = 0;
- while (count-- > 0)
- {
+ while (count-- > 0) {
idx = ec_child_next(ec, fop, idx + 1);
if (idx < EC_MAX_NODES)
mask |= 1ULL << idx;
@@ -862,19 +850,18 @@ void ec_dispatch_min(ec_fop_data_t * fop)
}
}
-ec_lock_t *ec_lock_allocate(ec_fop_data_t *fop, loc_t *loc)
+ec_lock_t *
+ec_lock_allocate(ec_fop_data_t *fop, loc_t *loc)
{
ec_t *ec = fop->xl->private;
- ec_lock_t * lock;
+ ec_lock_t *lock;
int32_t err;
if ((loc->inode == NULL) ||
- (gf_uuid_is_null(loc->gfid) && gf_uuid_is_null(loc->inode->gfid)))
- {
- gf_msg (fop->xl->name, GF_LOG_ERROR, EINVAL,
- EC_MSG_INVALID_INODE,
- "Trying to lock based on an invalid "
- "inode");
+ (gf_uuid_is_null(loc->gfid) && gf_uuid_is_null(loc->inode->gfid))) {
+ gf_msg(fop->xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_INVALID_INODE,
+ "Trying to lock based on an invalid "
+ "inode");
__ec_fop_set_error(fop, EINVAL);
@@ -882,8 +869,7 @@ ec_lock_t *ec_lock_allocate(ec_fop_data_t *fop, loc_t *loc)
}
lock = mem_get0(ec->lock_pool);
- if (lock != NULL)
- {
+ if (lock != NULL) {
lock->good_mask = -1ULL;
INIT_LIST_HEAD(&lock->owners);
INIT_LIST_HEAD(&lock->waiting);
@@ -900,7 +886,8 @@ ec_lock_t *ec_lock_allocate(ec_fop_data_t *fop, loc_t *loc)
return lock;
}
-void ec_lock_destroy(ec_lock_t * lock)
+void
+ec_lock_destroy(ec_lock_t *lock)
{
loc_wipe(&lock->loc);
if (lock->fd != NULL) {
@@ -910,13 +897,15 @@ void ec_lock_destroy(ec_lock_t * lock)
mem_put(lock);
}
-int32_t ec_lock_compare(ec_lock_t * lock1, ec_lock_t * lock2)
+int32_t
+ec_lock_compare(ec_lock_t *lock1, ec_lock_t *lock2)
{
return gf_uuid_compare(lock1->loc.gfid, lock2->loc.gfid);
}
-void ec_lock_insert(ec_fop_data_t *fop, ec_lock_t *lock, uint32_t flags,
- loc_t *base, off_t fl_start, size_t fl_size)
+void
+ec_lock_insert(ec_fop_data_t *fop, ec_lock_t *lock, uint32_t flags, loc_t *base,
+ off_t fl_start, size_t fl_size)
{
ec_lock_link_t *link;
@@ -951,14 +940,14 @@ void ec_lock_insert(ec_fop_data_t *fop, ec_lock_t *lock, uint32_t flags,
link->update[EC_METADATA_TXN] = (flags & EC_UPDATE_META) != 0;
link->base = base;
link->fl_start = fl_start;
- link->fl_end = ec_range_end_get (fl_start, fl_size);
+ link->fl_end = ec_range_end_get(fl_start, fl_size);
lock->refs_pending++;
}
-void ec_lock_prepare_inode_internal(ec_fop_data_t *fop, loc_t *loc,
- uint32_t flags, loc_t *base,
- off_t fl_start, size_t fl_size)
+void
+ec_lock_prepare_inode_internal(ec_fop_data_t *fop, loc_t *loc, uint32_t flags,
+ loc_t *base, off_t fl_start, size_t fl_size)
{
ec_lock_t *lock = NULL;
ec_inode_t *ctx;
@@ -987,8 +976,8 @@ void ec_lock_prepare_inode_internal(ec_fop_data_t *fop, loc_t *loc,
if ((fop->lock_count > 0) && (fop->locks[0].lock == lock)) {
/* Combine data/meta updates */
fop->locks[0].update[EC_DATA_TXN] |= (flags & EC_UPDATE_DATA) != 0;
- fop->locks[0].update[EC_METADATA_TXN] |=
- (flags & EC_UPDATE_META) != 0;
+ fop->locks[0].update[EC_METADATA_TXN] |= (flags & EC_UPDATE_META) !=
+ 0;
/* Only one base inode is allowed per fop, so there shouldn't be
* overwrites here. */
@@ -999,8 +988,10 @@ void ec_lock_prepare_inode_internal(ec_fop_data_t *fop, loc_t *loc,
goto update_query;
}
- ec_trace("LOCK_INODELK", fop, "lock=%p, inode=%p. Lock already "
- "acquired", lock, loc->inode);
+ ec_trace("LOCK_INODELK", fop,
+ "lock=%p, inode=%p. Lock already "
+ "acquired",
+ lock, loc->inode);
goto insert;
}
@@ -1026,14 +1017,16 @@ unlock:
UNLOCK(&loc->inode->lock);
}
-void ec_lock_prepare_inode(ec_fop_data_t *fop, loc_t *loc, uint32_t flags,
- off_t fl_start, size_t fl_size)
+void
+ec_lock_prepare_inode(ec_fop_data_t *fop, loc_t *loc, uint32_t flags,
+ off_t fl_start, size_t fl_size)
{
ec_lock_prepare_inode_internal(fop, loc, flags, NULL, fl_start, fl_size);
}
-void ec_lock_prepare_parent_inode(ec_fop_data_t *fop, loc_t *loc, loc_t *base,
- uint32_t flags)
+void
+ec_lock_prepare_parent_inode(ec_fop_data_t *fop, loc_t *loc, loc_t *base,
+ uint32_t flags)
{
loc_t tmp;
int32_t err;
@@ -1052,7 +1045,7 @@ void ec_lock_prepare_parent_inode(ec_fop_data_t *fop, loc_t *loc, loc_t *base,
if ((flags & EC_INODE_SIZE) != 0) {
flags ^= EC_INODE_SIZE;
} else {
- base = NULL;
+ base = NULL;
}
ec_lock_prepare_inode_internal(fop, &tmp, flags, base, 0, LLONG_MAX);
@@ -1060,8 +1053,9 @@ void ec_lock_prepare_parent_inode(ec_fop_data_t *fop, loc_t *loc, loc_t *base,
loc_wipe(&tmp);
}
-void ec_lock_prepare_fd(ec_fop_data_t *fop, fd_t *fd, uint32_t flags,
- off_t fl_start, size_t fl_size)
+void
+ec_lock_prepare_fd(ec_fop_data_t *fop, fd_t *fd, uint32_t flags, off_t fl_start,
+ size_t fl_size)
{
loc_t loc;
int32_t err;
@@ -1083,15 +1077,14 @@ void ec_lock_prepare_fd(ec_fop_data_t *fop, fd_t *fd, uint32_t flags,
}
gf_boolean_t
-ec_config_check (xlator_t *xl, ec_config_t *config)
+ec_config_check(xlator_t *xl, ec_config_t *config)
{
ec_t *ec;
ec = xl->private;
if ((config->version != EC_CONFIG_VERSION) ||
(config->algorithm != EC_CONFIG_ALGORITHM) ||
- (config->gf_word_size != EC_GF_BITS) ||
- (config->bricks != ec->nodes) ||
+ (config->gf_word_size != EC_GF_BITS) || (config->bricks != ec->nodes) ||
(config->redundancy != ec->redundancy) ||
(config->chunk_size != EC_METHOD_CHUNK_SIZE)) {
uint32_t data_bricks;
@@ -1110,20 +1103,17 @@ ec_config_check (xlator_t *xl, ec_config_t *config)
if ((config->redundancy < 1) ||
(config->redundancy * 2 >= config->bricks) ||
!ec_is_power_of_2(config->gf_word_size) ||
- ((config->chunk_size * 8) % (config->gf_word_size * data_bricks)
- != 0)) {
- gf_msg (xl->name, GF_LOG_ERROR, EINVAL,
- EC_MSG_INVALID_CONFIG,
- "Invalid or corrupted config");
+ ((config->chunk_size * 8) % (config->gf_word_size * data_bricks) !=
+ 0)) {
+ gf_msg(xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_INVALID_CONFIG,
+ "Invalid or corrupted config");
} else {
- gf_msg (xl->name, GF_LOG_ERROR, EINVAL,
- EC_MSG_INVALID_CONFIG,
- "Unsupported config "
- "(V=%u, A=%u, W=%u, "
- "N=%u, R=%u, S=%u)",
- config->version, config->algorithm,
- config->gf_word_size, config->bricks,
- config->redundancy, config->chunk_size);
+ gf_msg(xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_INVALID_CONFIG,
+ "Unsupported config "
+ "(V=%u, A=%u, W=%u, "
+ "N=%u, R=%u, S=%u)",
+ config->version, config->algorithm, config->gf_word_size,
+ config->bricks, config->redundancy, config->chunk_size);
}
return _gf_false;
@@ -1133,20 +1123,18 @@ ec_config_check (xlator_t *xl, ec_config_t *config)
}
gf_boolean_t
-ec_set_dirty_flag (ec_lock_link_t *link, ec_inode_t *ctx,
- uint64_t *dirty)
+ec_set_dirty_flag(ec_lock_link_t *link, ec_inode_t *ctx, uint64_t *dirty)
{
-
gf_boolean_t set_dirty = _gf_false;
if (link->update[EC_DATA_TXN] && !ctx->dirty[EC_DATA_TXN]) {
- if (!link->optimistic_changelog)
- dirty[EC_DATA_TXN] = 1;
+ if (!link->optimistic_changelog)
+ dirty[EC_DATA_TXN] = 1;
}
if (link->update[EC_METADATA_TXN] && !ctx->dirty[EC_METADATA_TXN]) {
- if (!link->optimistic_changelog)
- dirty[EC_METADATA_TXN] = 1;
+ if (!link->optimistic_changelog)
+ dirty[EC_METADATA_TXN] = 1;
}
if (dirty[EC_METADATA_TXN] || dirty[EC_DATA_TXN]) {
@@ -1157,9 +1145,9 @@ ec_set_dirty_flag (ec_lock_link_t *link, ec_inode_t *ctx,
}
int32_t
-ec_prepare_update_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- dict_t *dict, dict_t *xdata)
+ec_prepare_update_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
{
struct list_head list;
ec_fop_data_t *fop = cookie, *parent, *tmp;
@@ -1179,95 +1167,87 @@ ec_prepare_update_cbk (call_frame_t *frame, void *cookie,
LOCK(&lock->loc.inode->lock);
- list_for_each_entry(link, &lock->owners, owner_list) {
+ list_for_each_entry(link, &lock->owners, owner_list)
+ {
if ((link->waiting_flags & provided_flags) != 0) {
link->waiting_flags ^= (link->waiting_flags & provided_flags);
if (EC_NEEDED_FLAGS(link->waiting_flags) == 0)
- list_add_tail(&link->fop->cbk_list, &list);
+ list_add_tail(&link->fop->cbk_list, &list);
}
}
if (op_ret < 0) {
- gf_msg (this->name, GF_LOG_WARNING, op_errno,
- EC_MSG_SIZE_VERS_GET_FAIL,
- "Failed to get size and version : %s",
- ec_msg_str(fop));
+ gf_msg(this->name, GF_LOG_WARNING, op_errno, EC_MSG_SIZE_VERS_GET_FAIL,
+ "Failed to get size and version : %s", ec_msg_str(fop));
goto unlock;
}
if (EC_FLAGS_HAVE(provided_flags, EC_FLAG_XATTROP)) {
- op_errno = -ec_dict_del_array(dict, EC_XATTR_VERSION,
- ctx->pre_version,
- EC_VERSION_SIZE);
+ op_errno = -ec_dict_del_array(dict, EC_XATTR_VERSION, ctx->pre_version,
+ EC_VERSION_SIZE);
+ if (op_errno != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, op_errno,
+ EC_MSG_VER_XATTR_GET_FAIL, "Unable to get version xattr. %s",
+ ec_msg_str(fop));
+ goto unlock;
+ }
+ ctx->post_version[0] += ctx->pre_version[0];
+ ctx->post_version[1] += ctx->pre_version[1];
+
+ ctx->have_version = _gf_true;
+
+ if (lock->loc.inode->ia_type == IA_IFREG ||
+ lock->loc.inode->ia_type == IA_INVAL) {
+ op_errno = -ec_dict_del_number(dict, EC_XATTR_SIZE, &ctx->pre_size);
if (op_errno != 0) {
- gf_msg (this->name, GF_LOG_ERROR, op_errno,
- EC_MSG_VER_XATTR_GET_FAIL,
- "Unable to get version xattr. %s",
- ec_msg_str(fop));
- goto unlock;
+ if (lock->loc.inode->ia_type == IA_IFREG) {
+ gf_msg(this->name, GF_LOG_ERROR, op_errno,
+ EC_MSG_SIZE_XATTR_GET_FAIL,
+ "Unable to get size xattr. %s", ec_msg_str(fop));
+ goto unlock;
+ }
+ } else {
+ ctx->post_size = ctx->pre_size;
+
+ ctx->have_size = _gf_true;
}
- ctx->post_version[0] += ctx->pre_version[0];
- ctx->post_version[1] += ctx->pre_version[1];
- ctx->have_version = _gf_true;
+ op_errno = -ec_dict_del_config(dict, EC_XATTR_CONFIG, &ctx->config);
+ if (op_errno != 0) {
+ if ((lock->loc.inode->ia_type == IA_IFREG) ||
+ (op_errno != ENODATA)) {
+ gf_msg(this->name, GF_LOG_ERROR, op_errno,
+ EC_MSG_CONFIG_XATTR_GET_FAIL,
+ "Unable to get config xattr. %s", ec_msg_str(fop));
- if (lock->loc.inode->ia_type == IA_IFREG ||
- lock->loc.inode->ia_type == IA_INVAL) {
- op_errno = -ec_dict_del_number(dict, EC_XATTR_SIZE,
- &ctx->pre_size);
- if (op_errno != 0) {
- if (lock->loc.inode->ia_type == IA_IFREG) {
- gf_msg (this->name, GF_LOG_ERROR, op_errno,
- EC_MSG_SIZE_XATTR_GET_FAIL,
- "Unable to get size xattr. %s",
- ec_msg_str(fop));
- goto unlock;
- }
- } else {
- ctx->post_size = ctx->pre_size;
-
- ctx->have_size = _gf_true;
+ goto unlock;
}
+ } else {
+ if (!ec_config_check(parent->xl, &ctx->config)) {
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL,
+ EC_MSG_CONFIG_XATTR_INVALID, "Invalid config xattr");
- op_errno = -ec_dict_del_config(dict, EC_XATTR_CONFIG,
- &ctx->config);
- if (op_errno != 0) {
- if ((lock->loc.inode->ia_type == IA_IFREG) ||
- (op_errno != ENODATA)) {
- gf_msg (this->name, GF_LOG_ERROR, op_errno,
- EC_MSG_CONFIG_XATTR_GET_FAIL,
- "Unable to get config xattr. %s",
- ec_msg_str(fop));
-
- goto unlock;
- }
- } else {
- if (!ec_config_check(parent->xl, &ctx->config)) {
- gf_msg (this->name, GF_LOG_ERROR, EINVAL,
- EC_MSG_CONFIG_XATTR_INVALID,
- "Invalid config xattr");
-
- op_errno = EINVAL;
-
- goto unlock;
- }
- ctx->have_config = _gf_true;
+ op_errno = EINVAL;
+
+ goto unlock;
}
+ ctx->have_config = _gf_true;
}
- ctx->have_info = _gf_true;
+ }
+ ctx->have_info = _gf_true;
}
- ec_set_dirty_flag (fop->data, ctx, dirty);
+ ec_set_dirty_flag(fop->data, ctx, dirty);
if (dirty[EC_METADATA_TXN] &&
(EC_FLAGS_HAVE(provided_flags, EC_FLAG_METADATA_DIRTY))) {
- GF_ASSERT (!ctx->dirty[EC_METADATA_TXN]);
- ctx->dirty[EC_METADATA_TXN] = 1;
+ GF_ASSERT(!ctx->dirty[EC_METADATA_TXN]);
+ ctx->dirty[EC_METADATA_TXN] = 1;
}
if (dirty[EC_DATA_TXN] &&
(EC_FLAGS_HAVE(provided_flags, EC_FLAG_DATA_DIRTY))) {
- GF_ASSERT (!ctx->dirty[EC_DATA_TXN]);
- ctx->dirty[EC_DATA_TXN] = 1;
+ GF_ASSERT(!ctx->dirty[EC_DATA_TXN]);
+ ctx->dirty[EC_DATA_TXN] = 1;
}
op_errno = 0;
unlock:
@@ -1279,20 +1259,20 @@ unlock:
* it dirty and update versions right away if dirty was not set before.
*/
if (lock->good_mask & ~(fop->good | fop->remaining)) {
- release = _gf_true;
+ release = _gf_true;
}
if (parent_link->update[0] && !parent_link->dirty[0]) {
- lock->release |= release;
+ lock->release |= release;
}
if (parent_link->update[1] && !parent_link->dirty[1]) {
- lock->release |= release;
+ lock->release |= release;
}
/* We don't allow the main fop to be executed on bricks that have not
* succeeded the initial xattrop. */
- ec_lock_update_good (lock, fop);
+ ec_lock_update_good(lock, fop);
/*As of now only data healing marks bricks as healing*/
lock->healing |= fop->healing;
@@ -1308,7 +1288,7 @@ unlock:
tmp->mask &= fop->good;
/*As of now only data healing marks bricks as healing*/
- if (ec_is_data_fop (tmp->id)) {
+ if (ec_is_data_fop(tmp->id)) {
tmp->healing |= fop->healing;
}
}
@@ -1322,52 +1302,53 @@ unlock:
static gf_boolean_t
ec_set_needed_flag(ec_lock_t *lock, ec_lock_link_t *link, uint64_t flag)
{
- uint64_t current;
+ uint64_t current;
- link->waiting_flags |= EC_FLAG_NEEDS(flag);
+ link->waiting_flags |= EC_FLAG_NEEDS(flag);
- current = EC_NEEDED_FLAGS(lock->waiting_flags);
- if (!EC_FLAGS_HAVE(current, flag)) {
- lock->waiting_flags |= EC_FLAG_NEEDS(flag);
- link->waiting_flags |= EC_FLAG_PROVIDES(flag);
+ current = EC_NEEDED_FLAGS(lock->waiting_flags);
+ if (!EC_FLAGS_HAVE(current, flag)) {
+ lock->waiting_flags |= EC_FLAG_NEEDS(flag);
+ link->waiting_flags |= EC_FLAG_PROVIDES(flag);
- return _gf_true;
- }
+ return _gf_true;
+ }
- return _gf_false;
+ return _gf_false;
}
static uint64_t
-ec_set_xattrop_flags_and_params (ec_lock_t *lock, ec_lock_link_t *link,
- uint64_t *dirty)
+ec_set_xattrop_flags_and_params(ec_lock_t *lock, ec_lock_link_t *link,
+ uint64_t *dirty)
{
- uint64_t oldflags = 0;
- uint64_t newflags = 0;
- ec_inode_t *ctx = lock->ctx;
+ uint64_t oldflags = 0;
+ uint64_t newflags = 0;
+ ec_inode_t *ctx = lock->ctx;
- oldflags = EC_NEEDED_FLAGS(lock->waiting_flags);
+ oldflags = EC_NEEDED_FLAGS(lock->waiting_flags);
- if (lock->query && !ctx->have_info) {
- ec_set_needed_flag(lock, link, EC_FLAG_XATTROP);
- }
+ if (lock->query && !ctx->have_info) {
+ ec_set_needed_flag(lock, link, EC_FLAG_XATTROP);
+ }
- if (dirty[EC_DATA_TXN]) {
- if (!ec_set_needed_flag(lock, link, EC_FLAG_DATA_DIRTY)) {
- dirty[EC_DATA_TXN] = 0;
- }
+ if (dirty[EC_DATA_TXN]) {
+ if (!ec_set_needed_flag(lock, link, EC_FLAG_DATA_DIRTY)) {
+ dirty[EC_DATA_TXN] = 0;
}
+ }
- if (dirty[EC_METADATA_TXN]) {
- if (!ec_set_needed_flag(lock, link, EC_FLAG_METADATA_DIRTY)) {
- dirty[EC_METADATA_TXN] = 0;
- }
+ if (dirty[EC_METADATA_TXN]) {
+ if (!ec_set_needed_flag(lock, link, EC_FLAG_METADATA_DIRTY)) {
+ dirty[EC_METADATA_TXN] = 0;
}
- newflags = EC_NEEDED_FLAGS(lock->waiting_flags);
+ }
+ newflags = EC_NEEDED_FLAGS(lock->waiting_flags);
- return oldflags ^ newflags;
+ return oldflags ^ newflags;
}
-void ec_get_size_version(ec_lock_link_t *link)
+void
+ec_get_size_version(ec_lock_link_t *link)
{
loc_t loc;
ec_lock_t *lock;
@@ -1375,7 +1356,7 @@ void ec_get_size_version(ec_lock_link_t *link)
ec_fop_data_t *fop;
dict_t *dict = NULL;
dict_t *xdata = NULL;
- ec_t *ec = NULL;
+ ec_t *ec = NULL;
int32_t error = 0;
gf_boolean_t set_dirty = _gf_false;
uint64_t allzero[EC_VERSION_SIZE] = {0, 0};
@@ -1383,18 +1364,18 @@ void ec_get_size_version(ec_lock_link_t *link)
lock = link->lock;
ctx = lock->ctx;
fop = link->fop;
- ec = fop->xl->private;
+ ec = fop->xl->private;
uint64_t changed_flags = 0;
- if (ec->optimistic_changelog &&
- !(ec->node_mask & ~link->lock->good_mask) && !ec_is_data_fop (fop->id))
- link->optimistic_changelog = _gf_true;
+ if (ec->optimistic_changelog && !(ec->node_mask & ~link->lock->good_mask) &&
+ !ec_is_data_fop(fop->id))
+ link->optimistic_changelog = _gf_true;
- set_dirty = ec_set_dirty_flag (link, ctx, dirty);
+ set_dirty = ec_set_dirty_flag(link, ctx, dirty);
/* If ec metadata has already been retrieved, do not try again. */
if (ctx->have_info && (!set_dirty)) {
- if (ec_is_data_fop (fop->id)) {
+ if (ec_is_data_fop(fop->id)) {
fop->healing |= lock->healing;
}
return;
@@ -1402,24 +1383,23 @@ void ec_get_size_version(ec_lock_link_t *link)
/* Determine if there's something we need to retrieve for the current
* operation. */
- if (!set_dirty && !lock->query &&
- (lock->loc.inode->ia_type != IA_IFREG) &&
+ if (!set_dirty && !lock->query && (lock->loc.inode->ia_type != IA_IFREG) &&
(lock->loc.inode->ia_type != IA_INVAL)) {
- return;
+ return;
}
memset(&loc, 0, sizeof(loc));
LOCK(&lock->loc.inode->lock);
- changed_flags = ec_set_xattrop_flags_and_params (lock, link, dirty);
+ changed_flags = ec_set_xattrop_flags_and_params(lock, link, dirty);
if (link->waiting_flags) {
- /* This fop needs to wait until all its flags are cleared which
- * potentially can be cleared by other xattrops that are already
- * wound*/
- ec_sleep(fop);
+ /* This fop needs to wait until all its flags are cleared which
+ * potentially can be cleared by other xattrops that are already
+ * wound*/
+ ec_sleep(fop);
} else {
- GF_ASSERT (!changed_flags);
+ GF_ASSERT(!changed_flags);
}
UNLOCK(&lock->loc.inode->lock);
@@ -1434,40 +1414,38 @@ void ec_get_size_version(ec_lock_link_t *link)
}
if (EC_FLAGS_HAVE(changed_flags, EC_FLAG_XATTROP)) {
- /* Once we know that an xattrop will be needed,
- * we try to get all available information in a
- * single call. */
- error = ec_dict_set_array(dict, EC_XATTR_VERSION, allzero,
- EC_VERSION_SIZE);
+ /* Once we know that an xattrop will be needed,
+ * we try to get all available information in a
+ * single call. */
+ error = ec_dict_set_array(dict, EC_XATTR_VERSION, allzero,
+ EC_VERSION_SIZE);
+ if (error != 0) {
+ goto out;
+ }
+
+ if (lock->loc.inode->ia_type == IA_IFREG ||
+ lock->loc.inode->ia_type == IA_INVAL) {
+ error = ec_dict_set_number(dict, EC_XATTR_SIZE, 0);
+ if (error == 0) {
+ error = ec_dict_set_number(dict, EC_XATTR_CONFIG, 0);
+ }
if (error != 0) {
goto out;
}
- if (lock->loc.inode->ia_type == IA_IFREG ||
- lock->loc.inode->ia_type == IA_INVAL) {
- error = ec_dict_set_number(dict, EC_XATTR_SIZE, 0);
- if (error == 0) {
- error = ec_dict_set_number(dict, EC_XATTR_CONFIG, 0);
- }
- if (error != 0) {
- goto out;
- }
-
- xdata = dict_new();
- if (xdata == NULL || dict_set_int32 (xdata, GF_GET_SIZE, 1)) {
- error = -ENOMEM;
- goto out;
- }
-
+ xdata = dict_new();
+ if (xdata == NULL || dict_set_int32(xdata, GF_GET_SIZE, 1)) {
+ error = -ENOMEM;
+ goto out;
}
+ }
}
- if (memcmp (allzero, dirty, sizeof (allzero))) {
- error = ec_dict_set_array(dict, EC_XATTR_DIRTY, dirty,
- EC_VERSION_SIZE);
- if (error != 0) {
- goto out;
- }
+ if (memcmp(allzero, dirty, sizeof(allzero))) {
+ error = ec_dict_set_array(dict, EC_XATTR_DIRTY, dirty, EC_VERSION_SIZE);
+ if (error != 0) {
+ goto out;
+ }
}
fop->frame->root->uid = 0;
@@ -1495,13 +1473,13 @@ void ec_get_size_version(ec_lock_link_t *link)
loc.name = NULL;
}
- ec_xattrop (fop->frame, fop->xl, fop->mask, fop->minimum,
- ec_prepare_update_cbk, link, &loc,
- GF_XATTROP_ADD_ARRAY64, dict, xdata);
+ ec_xattrop(fop->frame, fop->xl, fop->mask, fop->minimum,
+ ec_prepare_update_cbk, link, &loc, GF_XATTROP_ADD_ARRAY64,
+ dict, xdata);
} else {
ec_fxattrop(fop->frame, fop->xl, fop->mask, fop->minimum,
- ec_prepare_update_cbk, link, lock->fd,
- GF_XATTROP_ADD_ARRAY64, dict, xdata);
+ ec_prepare_update_cbk, link, lock->fd,
+ GF_XATTROP_ADD_ARRAY64, dict, xdata);
}
error = 0;
@@ -1526,8 +1504,7 @@ out:
}
gf_boolean_t
-__ec_get_inode_size(ec_fop_data_t *fop, inode_t *inode,
- uint64_t *size)
+__ec_get_inode_size(ec_fop_data_t *fop, inode_t *inode, uint64_t *size)
{
ec_inode_t *ctx;
gf_boolean_t found = _gf_false;
@@ -1547,14 +1524,13 @@ out:
}
gf_boolean_t
-ec_get_inode_size(ec_fop_data_t *fop, inode_t *inode,
- uint64_t *size)
+ec_get_inode_size(ec_fop_data_t *fop, inode_t *inode, uint64_t *size)
{
gf_boolean_t found = _gf_false;
LOCK(&inode->lock);
{
- found = __ec_get_inode_size (fop, inode, size);
+ found = __ec_get_inode_size(fop, inode, size);
}
UNLOCK(&inode->lock);
@@ -1562,8 +1538,7 @@ ec_get_inode_size(ec_fop_data_t *fop, inode_t *inode,
}
gf_boolean_t
-__ec_set_inode_size(ec_fop_data_t *fop, inode_t *inode,
- uint64_t size)
+__ec_set_inode_size(ec_fop_data_t *fop, inode_t *inode, uint64_t size)
{
ec_inode_t *ctx;
gf_boolean_t found = _gf_false;
@@ -1590,38 +1565,37 @@ out:
}
gf_boolean_t
-ec_set_inode_size(ec_fop_data_t *fop, inode_t *inode,
- uint64_t size)
+ec_set_inode_size(ec_fop_data_t *fop, inode_t *inode, uint64_t size)
{
gf_boolean_t found = _gf_false;
- LOCK (&inode->lock);
+ LOCK(&inode->lock);
{
- found = __ec_set_inode_size (fop, inode, size);
+ found = __ec_set_inode_size(fop, inode, size);
}
- UNLOCK (&inode->lock);
+ UNLOCK(&inode->lock);
return found;
}
static void
-ec_release_stripe_cache (ec_inode_t *ctx)
+ec_release_stripe_cache(ec_inode_t *ctx)
{
- ec_stripe_list_t *stripe_cache = NULL;
- ec_stripe_t *stripe = NULL;
+ ec_stripe_list_t *stripe_cache = NULL;
+ ec_stripe_t *stripe = NULL;
- stripe_cache = &ctx->stripe_cache;
- while (!list_empty (&stripe_cache->lru)) {
- stripe = list_first_entry (&stripe_cache->lru, ec_stripe_t,
- lru);
- list_del (&stripe->lru);
- GF_FREE (stripe);
- }
- stripe_cache->count = 0;
- stripe_cache->max = 0;
+ stripe_cache = &ctx->stripe_cache;
+ while (!list_empty(&stripe_cache->lru)) {
+ stripe = list_first_entry(&stripe_cache->lru, ec_stripe_t, lru);
+ list_del(&stripe->lru);
+ GF_FREE(stripe);
+ }
+ stripe_cache->count = 0;
+ stripe_cache->max = 0;
}
-void ec_clear_inode_info(ec_fop_data_t *fop, inode_t *inode)
+void
+ec_clear_inode_info(ec_fop_data_t *fop, inode_t *inode)
{
ec_inode_t *ctx;
@@ -1632,7 +1606,7 @@ void ec_clear_inode_info(ec_fop_data_t *fop, inode_t *inode)
goto unlock;
}
- ec_release_stripe_cache (ctx);
+ ec_release_stripe_cache(ctx);
ctx->have_info = _gf_false;
ctx->have_config = _gf_false;
ctx->have_version = _gf_false;
@@ -1648,10 +1622,10 @@ unlock:
UNLOCK(&inode->lock);
}
-int32_t ec_get_real_size_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, dict_t *xdata,
- struct iatt *postparent)
+int32_t
+ec_get_real_size_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata, struct iatt *postparent)
{
ec_fop_data_t *fop = cookie;
ec_lock_link_t *link;
@@ -1675,7 +1649,8 @@ int32_t ec_get_real_size_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
* Any error processing this request is ignored. In the worst case, an invalid
* or not up to date value in the iatt could cause some cache invalidation.
*/
-void ec_get_real_size(ec_lock_link_t *link)
+void
+ec_get_real_size(ec_lock_link_t *link)
{
ec_fop_data_t *fop;
dict_t *xdata;
@@ -1725,24 +1700,26 @@ ec_lock_update_fd(ec_lock_t *lock, ec_fop_data_t *fop)
}
static gf_boolean_t
-ec_link_has_lock_conflict (ec_lock_link_t *link, gf_boolean_t waitlist_check)
+ec_link_has_lock_conflict(ec_lock_link_t *link, gf_boolean_t waitlist_check)
{
- ec_lock_link_t *trav_link = NULL;
+ ec_lock_link_t *trav_link = NULL;
- list_for_each_entry (trav_link, &link->lock->owners, owner_list) {
- if (ec_lock_conflict (trav_link, link))
- return _gf_true;
- }
+ list_for_each_entry(trav_link, &link->lock->owners, owner_list)
+ {
+ if (ec_lock_conflict(trav_link, link))
+ return _gf_true;
+ }
- if (!waitlist_check)
- return _gf_false;
+ if (!waitlist_check)
+ return _gf_false;
- list_for_each_entry (trav_link, &link->lock->waiting, wait_list) {
- if (ec_lock_conflict (trav_link, link))
- return _gf_true;
- }
+ list_for_each_entry(trav_link, &link->lock->waiting, wait_list)
+ {
+ if (ec_lock_conflict(trav_link, link))
+ return _gf_true;
+ }
- return _gf_false;
+ return _gf_false;
}
static void
@@ -1763,7 +1740,7 @@ ec_lock_wake_shared(ec_lock_t *lock, struct list_head *list)
/* If the fop is not shareable, only this fop can be assigned as owner.
* Other fops will need to wait until this one finishes. */
- if (ec_link_has_lock_conflict (link, _gf_false)) {
+ if (ec_link_has_lock_conflict(link, _gf_false)) {
conflict = _gf_true;
}
@@ -1794,7 +1771,8 @@ ec_lock_apply(ec_lock_link_t *link)
ec_get_real_size(link);
}
-gf_boolean_t ec_lock_acquire(ec_lock_link_t *link);
+gf_boolean_t
+ec_lock_acquire(ec_lock_link_t *link);
static void
ec_lock_resume_shared(struct list_head *list)
@@ -1818,7 +1796,8 @@ ec_lock_resume_shared(struct list_head *list)
}
}
-void ec_lock_acquired(ec_lock_link_t *link)
+void
+ec_lock_acquired(ec_lock_link_t *link)
{
struct list_head list;
ec_lock_t *lock;
@@ -1850,8 +1829,9 @@ void ec_lock_acquired(ec_lock_link_t *link)
ec_lock_resume_shared(&list);
}
-int32_t ec_locked(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
+int32_t
+ec_locked(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
{
ec_fop_data_t *fop = cookie;
ec_lock_link_t *link = NULL;
@@ -1866,15 +1846,15 @@ int32_t ec_locked(call_frame_t *frame, void *cookie, xlator_t *this,
ec_lock_acquired(link);
ec_lock(fop->parent);
} else {
- gf_msg (this->name, GF_LOG_WARNING, op_errno,
- EC_MSG_PREOP_LOCK_FAILED,
- "Failed to complete preop lock");
+ gf_msg(this->name, GF_LOG_WARNING, op_errno, EC_MSG_PREOP_LOCK_FAILED,
+ "Failed to complete preop lock");
}
return 0;
}
-gf_boolean_t ec_lock_acquire(ec_lock_link_t *link)
+gf_boolean_t
+ec_lock_acquire(ec_lock_link_t *link)
{
ec_lock_t *lock;
ec_fop_data_t *fop;
@@ -1907,62 +1887,62 @@ gf_boolean_t ec_lock_acquire(ec_lock_link_t *link)
static ec_lock_link_t *
ec_lock_timer_cancel(xlator_t *xl, ec_lock_t *lock)
{
- ec_lock_link_t *timer_link;
+ ec_lock_link_t *timer_link;
- /* If we don't have any timer, there's nothing to cancel. */
- if (lock->timer == NULL) {
- return NULL;
- }
+ /* If we don't have any timer, there's nothing to cancel. */
+ if (lock->timer == NULL) {
+ return NULL;
+ }
- /* We are trying to access a lock that has an unlock timer active.
- * This means that the lock must be idle, i.e. no fop can be in the
- * owner, waiting or frozen lists. It also means that the lock cannot
- * have been marked as being released (this is done without timers).
- * There should only be one owner reference, but it's possible that
- * some fops are being prepared to use this lock. */
- GF_ASSERT ((lock->refs_owners == 1) &&
- list_empty(&lock->owners) && list_empty(&lock->waiting));
-
- /* We take the timer_link before cancelling the timer, since a
- * successful cancellation will destroy it. It must not be NULL
- * because it references the fop responsible for the delayed unlock
- * that we are currently trying to cancel. */
- timer_link = lock->timer->data;
- GF_ASSERT(timer_link != NULL);
-
- if (gf_timer_call_cancel(xl->ctx, lock->timer) < 0) {
- /* It's too late to avoid the execution of the timer callback.
- * Since we need to be sure that the callback has access to all
- * needed resources, we cannot resume the execution of the
- * timer fop now. This will be done in the callback. */
- timer_link = NULL;
- } else {
- /* The timer has been cancelled. The fop referenced by
- * timer_link holds the last reference. The caller is
- * responsible to release it when not needed anymore. */
- ec_trace("UNLOCK_CANCELLED", timer_link->fop, "lock=%p", lock);
- }
+ /* We are trying to access a lock that has an unlock timer active.
+ * This means that the lock must be idle, i.e. no fop can be in the
+ * owner, waiting or frozen lists. It also means that the lock cannot
+ * have been marked as being released (this is done without timers).
+ * There should only be one owner reference, but it's possible that
+ * some fops are being prepared to use this lock. */
+ GF_ASSERT((lock->refs_owners == 1) && list_empty(&lock->owners) &&
+ list_empty(&lock->waiting));
+
+ /* We take the timer_link before cancelling the timer, since a
+ * successful cancellation will destroy it. It must not be NULL
+ * because it references the fop responsible for the delayed unlock
+ * that we are currently trying to cancel. */
+ timer_link = lock->timer->data;
+ GF_ASSERT(timer_link != NULL);
+
+ if (gf_timer_call_cancel(xl->ctx, lock->timer) < 0) {
+ /* It's too late to avoid the execution of the timer callback.
+ * Since we need to be sure that the callback has access to all
+ * needed resources, we cannot resume the execution of the
+ * timer fop now. This will be done in the callback. */
+ timer_link = NULL;
+ } else {
+ /* The timer has been cancelled. The fop referenced by
+ * timer_link holds the last reference. The caller is
+ * responsible to release it when not needed anymore. */
+ ec_trace("UNLOCK_CANCELLED", timer_link->fop, "lock=%p", lock);
+ }
- /* We have two options here:
- *
- * 1. The timer has been successfully cancelled.
- *
- * This is the easiest case and we can continue with the currently
- * acquired lock.
- *
- * 2. The timer callback has already been fired.
- *
- * In this case we have not been able to cancel the timer before
- * the timer callback has been fired, but we also know that
- * lock->timer != NULL. This means that the timer callback is still
- * trying to acquire the inode mutex that we currently own. We are
- * safe until we release it. In this case we can safely clear
- * lock->timer. This will cause that the timer callback does nothing
- * once it acquires the mutex.
- */
- lock->timer = NULL;
+ /* We have two options here:
+ *
+ * 1. The timer has been successfully cancelled.
+ *
+ * This is the easiest case and we can continue with the currently
+ * acquired lock.
+ *
+ * 2. The timer callback has already been fired.
+ *
+ * In this case we have not been able to cancel the timer before
+ * the timer callback has been fired, but we also know that
+ * lock->timer != NULL. This means that the timer callback is still
+ * trying to acquire the inode mutex that we currently own. We are
+ * safe until we release it. In this case we can safely clear
+ * lock->timer. This will cause that the timer callback does nothing
+ * once it acquires the mutex.
+ */
+ lock->timer = NULL;
- return timer_link;
+ return timer_link;
}
static gf_boolean_t
@@ -1984,7 +1964,7 @@ ec_lock_assign_owner(ec_lock_link_t *link)
/* Since the link has just been prepared but it's not active yet, the
* refs_pending must be one at least (the ref owned by this link). */
- GF_ASSERT (lock->refs_pending > 0);
+ GF_ASSERT(lock->refs_pending > 0);
/* The link is not pending any more. It will be assigned to the owner,
* waiting or frozen list. */
lock->refs_pending--;
@@ -2017,7 +1997,7 @@ ec_lock_assign_owner(ec_lock_link_t *link)
* owners, or waiters(to prevent starvation).
* Otherwise we need to wait.
*/
- if (!lock->acquired || ec_link_has_lock_conflict (link, _gf_true)) {
+ if (!lock->acquired || ec_link_has_lock_conflict(link, _gf_true)) {
ec_trace("LOCK_QUEUE_WAIT", fop, "lock=%p", lock);
list_add_tail(&link->wait_list, &lock->waiting);
@@ -2032,7 +2012,7 @@ ec_lock_assign_owner(ec_lock_link_t *link)
* reference assigned to the timer fop. In this case we simply reuse it.
* Otherwise we need to increase the number of owners. */
if (timer_link == NULL) {
- lock->refs_owners++;
+ lock->refs_owners++;
}
assigned = _gf_true;
@@ -2090,14 +2070,14 @@ ec_lock_next_owner(ec_lock_link_t *link, ec_cbk_data_t *cbk,
/* If the fop fails on any of the good bricks, it is important to mark
* it dirty and update versions right away. */
if (link->update[0] || link->update[1]) {
- if (lock->good_mask & ~(fop->good | fop->remaining)) {
- lock->release = _gf_true;
- }
+ if (lock->good_mask & ~(fop->good | fop->remaining)) {
+ lock->release = _gf_true;
+ }
}
}
if (fop->healing) {
- lock->healing = fop->healing & (fop->good | fop->remaining);
+ lock->healing = fop->healing & (fop->good | fop->remaining);
}
ec_lock_update_good(lock, fop);
@@ -2108,7 +2088,8 @@ ec_lock_next_owner(ec_lock_link_t *link, ec_cbk_data_t *cbk,
ec_lock_resume_shared(&list);
}
-void ec_lock(ec_fop_data_t *fop)
+void
+ec_lock(ec_fop_data_t *fop)
{
ec_lock_link_t *link;
@@ -2116,7 +2097,7 @@ void ec_lock(ec_fop_data_t *fop)
* Which can result in refs == 0 for fop leading to use after free in this
* function when it calls ec_sleep so do ec_sleep at start and ec_resume at
* the end of this function.*/
- ec_sleep (fop);
+ ec_sleep(fop);
while (fop->locked < fop->lock_count) {
/* Since there are only up to 2 locks per fop, this xor will change
@@ -2186,17 +2167,16 @@ ec_lock_unfreeze(ec_lock_link_t *link)
}
}
-int32_t ec_unlocked(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
+int32_t
+ec_unlocked(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
{
ec_fop_data_t *fop = cookie;
ec_lock_link_t *link = fop->data;
if (op_ret < 0) {
- gf_msg (this->name, GF_LOG_WARNING, op_errno,
- EC_MSG_UNLOCK_FAILED,
- "entry/inode unlocking failed (%s)",
- ec_fop_name(link->fop->id));
+ gf_msg(this->name, GF_LOG_WARNING, op_errno, EC_MSG_UNLOCK_FAILED,
+ "entry/inode unlocking failed (%s)", ec_fop_name(link->fop->id));
} else {
ec_trace("UNLOCKED", link->fop, "lock=%p", link->lock);
}
@@ -2206,7 +2186,8 @@ int32_t ec_unlocked(call_frame_t *frame, void *cookie, xlator_t *this,
return 0;
}
-void ec_unlock_lock(ec_lock_link_t *link)
+void
+ec_unlock_lock(ec_lock_link_t *link)
{
ec_lock_t *lock;
ec_fop_data_t *fop;
@@ -2232,10 +2213,10 @@ void ec_unlock_lock(ec_lock_link_t *link)
}
}
-int32_t ec_update_size_version_done(call_frame_t * frame, void * cookie,
- xlator_t * this, int32_t op_ret,
- int32_t op_errno, dict_t * xattr,
- dict_t * xdata)
+int32_t
+ec_update_size_version_done(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xattr,
+ dict_t *xdata)
{
ec_fop_data_t *fop = cookie;
ec_lock_link_t *link;
@@ -2247,10 +2228,9 @@ int32_t ec_update_size_version_done(call_frame_t * frame, void * cookie,
ctx = lock->ctx;
if (op_ret < 0) {
- gf_msg(fop->xl->name, fop_log_level (fop->id, op_errno), op_errno,
+ gf_msg(fop->xl->name, fop_log_level(fop->id, op_errno), op_errno,
EC_MSG_SIZE_VERS_UPDATE_FAIL,
- "Failed to update version and size. %s",
- ec_msg_str(fop));
+ "Failed to update version and size. %s", ec_msg_str(fop));
} else {
fop->parent->good &= fop->good;
@@ -2287,14 +2267,14 @@ int32_t ec_update_size_version_done(call_frame_t * frame, void * cookie,
}
void
-ec_update_size_version(ec_lock_link_t *link, uint64_t *version,
- uint64_t size, uint64_t *dirty)
+ec_update_size_version(ec_lock_link_t *link, uint64_t *version, uint64_t size,
+ uint64_t *dirty)
{
ec_fop_data_t *fop;
ec_lock_t *lock;
ec_inode_t *ctx;
dict_t *dict = NULL;
- uintptr_t update_on = 0;
+ uintptr_t update_on = 0;
int32_t err = -ENOMEM;
fop = link->fop;
@@ -2331,11 +2311,10 @@ ec_update_size_version(ec_lock_link_t *link, uint64_t *version,
}
if (dirty[0] || dirty[1]) {
- err = ec_dict_set_array(dict, EC_XATTR_DIRTY,
- dirty, EC_VERSION_SIZE);
- if (err != 0) {
- goto out;
- }
+ err = ec_dict_set_array(dict, EC_XATTR_DIRTY, dirty, EC_VERSION_SIZE);
+ if (err != 0) {
+ goto out;
+ }
}
/* If config information is not known, we request it now. */
@@ -2351,13 +2330,13 @@ ec_update_size_version(ec_lock_link_t *link, uint64_t *version,
update_on = lock->good_mask | lock->healing;
if (link->lock->fd == NULL) {
- ec_xattrop(fop->frame, fop->xl, update_on, EC_MINIMUM_MIN,
- ec_update_size_version_done, link, &link->lock->loc,
- GF_XATTROP_ADD_ARRAY64, dict, NULL);
+ ec_xattrop(fop->frame, fop->xl, update_on, EC_MINIMUM_MIN,
+ ec_update_size_version_done, link, &link->lock->loc,
+ GF_XATTROP_ADD_ARRAY64, dict, NULL);
} else {
- ec_fxattrop(fop->frame, fop->xl, update_on, EC_MINIMUM_MIN,
- ec_update_size_version_done, link, link->lock->fd,
- GF_XATTROP_ADD_ARRAY64, dict, NULL);
+ ec_fxattrop(fop->frame, fop->xl, update_on, EC_MINIMUM_MIN,
+ ec_update_size_version_done, link, link->lock->fd,
+ GF_XATTROP_ADD_ARRAY64, dict, NULL);
}
fop->frame->root->uid = fop->uid;
@@ -2374,14 +2353,12 @@ out:
ec_fop_set_error(fop, -err);
- gf_msg (fop->xl->name, GF_LOG_ERROR, -err, EC_MSG_SIZE_VERS_UPDATE_FAIL,
- "Unable to update version and size. %s",
- ec_msg_str(fop));
+ gf_msg(fop->xl->name, GF_LOG_ERROR, -err, EC_MSG_SIZE_VERS_UPDATE_FAIL,
+ "Unable to update version and size. %s", ec_msg_str(fop));
if (lock->unlock_now) {
ec_unlock_lock(fop->data);
}
-
}
gf_boolean_t
@@ -2406,34 +2383,34 @@ ec_update_info(ec_lock_link_t *link)
/* If we set the dirty flag for update fop, we have to unset it.
* If fop has failed on some bricks, leave the dirty as marked. */
if (lock->unlock_now) {
- /* Ensure that nodes are up while doing final
- * metadata update.*/
- if (!(ec->node_mask & ~lock->good_mask) &&
- !(ec->node_mask & ~ec->xl_up)) {
- if (ctx->dirty[0] != 0) {
- dirty[0] = -1;
- }
- if (ctx->dirty[1] != 0) {
- dirty[1] = -1;
- }
- /*If everything is fine and we already
- *have version xattr set on entry, there
- *is no need to update version again*/
- if (ctx->pre_version[0]) {
- version[0] = 0;
- }
- if (ctx->pre_version[1]) {
- version[1] = 0;
- }
- } else {
- link->optimistic_changelog = _gf_false;
- ec_set_dirty_flag (link, ctx, dirty);
+ /* Ensure that nodes are up while doing final
+ * metadata update.*/
+ if (!(ec->node_mask & ~lock->good_mask) &&
+ !(ec->node_mask & ~ec->xl_up)) {
+ if (ctx->dirty[0] != 0) {
+ dirty[0] = -1;
+ }
+ if (ctx->dirty[1] != 0) {
+ dirty[1] = -1;
}
- memset(ctx->dirty, 0, sizeof(ctx->dirty));
+ /*If everything is fine and we already
+ *have version xattr set on entry, there
+ *is no need to update version again*/
+ if (ctx->pre_version[0]) {
+ version[0] = 0;
+ }
+ if (ctx->pre_version[1]) {
+ version[1] = 0;
+ }
+ } else {
+ link->optimistic_changelog = _gf_false;
+ ec_set_dirty_flag(link, ctx, dirty);
+ }
+ memset(ctx->dirty, 0, sizeof(ctx->dirty));
}
- if ((version[0] != 0) || (version[1] != 0) ||
- (dirty[0] != 0) || (dirty[1] != 0)) {
+ if ((version[0] != 0) || (version[1] != 0) || (dirty[0] != 0) ||
+ (dirty[1] != 0)) {
ec_update_size_version(link, version, size, dirty);
return _gf_true;
}
@@ -2464,185 +2441,185 @@ ec_unlock_now(ec_lock_link_t *link)
void
ec_lock_release(ec_t *ec, inode_t *inode)
{
- ec_lock_t *lock;
- ec_inode_t *ctx;
- ec_lock_link_t *timer_link = NULL;
+ ec_lock_t *lock;
+ ec_inode_t *ctx;
+ ec_lock_link_t *timer_link = NULL;
- LOCK(&inode->lock);
+ LOCK(&inode->lock);
- ctx = __ec_inode_get(inode, ec->xl);
- if (ctx == NULL) {
- goto done;
- }
- lock = ctx->inode_lock;
- if ((lock == NULL) || !lock->acquired || lock->release) {
- goto done;
- }
+ ctx = __ec_inode_get(inode, ec->xl);
+ if (ctx == NULL) {
+ goto done;
+ }
+ lock = ctx->inode_lock;
+ if ((lock == NULL) || !lock->acquired || lock->release) {
+ goto done;
+ }
- gf_msg_debug(ec->xl->name, 0,
- "Releasing inode %p due to lock contention", inode);
+ gf_msg_debug(ec->xl->name, 0, "Releasing inode %p due to lock contention",
+ inode);
- /* The lock is not marked to be released, so the frozen list should be
- * empty. */
- GF_ASSERT(list_empty(&lock->frozen));
+ /* The lock is not marked to be released, so the frozen list should be
+ * empty. */
+ GF_ASSERT(list_empty(&lock->frozen));
- timer_link = ec_lock_timer_cancel(ec->xl, lock);
+ timer_link = ec_lock_timer_cancel(ec->xl, lock);
- /* We mark the lock to be released as soon as possible. */
- lock->release = _gf_true;
+ /* We mark the lock to be released as soon as possible. */
+ lock->release = _gf_true;
done:
- UNLOCK(&inode->lock);
-
- /* If we have cancelled the timer, we need to start the unlock of the
- * inode. If there was a timer but we have been unable to cancel it
- * because it was just triggered, the timer callback will take care
- * of releasing the inode. */
- if (timer_link != NULL) {
- ec_unlock_now(timer_link);
- }
+ UNLOCK(&inode->lock);
+
+ /* If we have cancelled the timer, we need to start the unlock of the
+ * inode. If there was a timer but we have been unable to cancel it
+ * because it was just triggered, the timer callback will take care
+ * of releasing the inode. */
+ if (timer_link != NULL) {
+ ec_unlock_now(timer_link);
+ }
}
-void ec_unlock_timer_add(ec_lock_link_t *link);
+void
+ec_unlock_timer_add(ec_lock_link_t *link);
void
ec_unlock_timer_del(ec_lock_link_t *link)
{
- ec_lock_t *lock;
- inode_t *inode;
- gf_boolean_t now = _gf_false;
+ ec_lock_t *lock;
+ inode_t *inode;
+ gf_boolean_t now = _gf_false;
+
+ /* If we are here, it means that the timer has expired before having
+ * been cancelled. This guarantees that 'link' is still valid because
+ * the fop that contains it must be pending (if timer cancellation in
+ * ec_lock_assign_owner() fails, the fop is left sleeping).
+ *
+ * At the same time, the fop still has a reference to the lock, so
+ * it must also be valid.
+ */
+ lock = link->lock;
+
+ /* 'lock' must have a valid inode since it can only be destroyed
+ * when the lock itself is destroyed, but we have a reference to the
+ * lock to avoid this.
+ */
+ inode = lock->loc.inode;
+
+ LOCK(&inode->lock);
+
+ if (lock->timer != NULL) {
+ ec_trace("UNLOCK_DELAYED", link->fop, "lock=%p", lock);
+
+ /* The unlock timer has expired without anyone cancelling it.
+ * This means that it shouldn't have any owner, and the waiting
+ * and frozen lists should be empty. It must have only one
+ * owner reference, but there can be fops being prepared
+ * though.
+ * */
+ GF_ASSERT(!lock->release && (lock->refs_owners == 1) &&
+ list_empty(&lock->owners) && list_empty(&lock->waiting) &&
+ list_empty(&lock->frozen));
+
+ gf_timer_call_cancel(link->fop->xl->ctx, lock->timer);
+ lock->timer = NULL;
+
+ /* Any fop being processed from now on, will need to wait
+ * until the next unlock/lock cycle. */
+ lock->release = now = _gf_true;
+ }
+
+ UNLOCK(&inode->lock);
- /* If we are here, it means that the timer has expired before having
- * been cancelled. This guarantees that 'link' is still valid because
- * the fop that contains it must be pending (if timer cancellation in
- * ec_lock_assign_owner() fails, the fop is left sleeping).
+ if (now) {
+ ec_unlock_now(link);
+ } else {
+ /* The timer has been cancelled just after firing it but before
+ * getting here. This means that another fop has used the lock
+ * and everything should be handled as if this callback were
+ * have not been executed. However we still have an owner
+ * reference.
+ *
+ * We need to release our reference. If this is not the last
+ * reference (the most common case because another fop has
+ * taken another ref) we only need to decrement the counter.
+ * Otherwise we have been delayed enough so that the other fop
+ * has had time to acquire the reference, do its operation and
+ * release it. At the time of releasing it, the fop did found
+ * that the ref counter was > 1 (our reference), so the delayed
+ * unlock timer wasn't started. We need to start it again if we
+ * are the last reference.
*
- * At the same time, the fop still has a reference to the lock, so
- * it must also be valid.
+ * ec_unlock_timer_add() handles both cases.
*/
- lock = link->lock;
+ ec_unlock_timer_add(link);
- /* 'lock' must have a valid inode since it can only be destroyed
- * when the lock itself is destroyed, but we have a reference to the
- * lock to avoid this.
+ /* We need to resume the fop that was waiting for the delayed
+ * unlock.
*/
- inode = lock->loc.inode;
-
- LOCK(&inode->lock);
-
- if (lock->timer != NULL) {
- ec_trace("UNLOCK_DELAYED", link->fop, "lock=%p", lock);
-
- /* The unlock timer has expired without anyone cancelling it.
- * This means that it shouldn't have any owner, and the waiting
- * and frozen lists should be empty. It must have only one
- * owner reference, but there can be fops being prepared
- * though.
- * */
- GF_ASSERT(!lock->release &&
- (lock->refs_owners == 1) &&
- list_empty(&lock->owners) &&
- list_empty(&lock->waiting) &&
- list_empty(&lock->frozen));
-
- gf_timer_call_cancel(link->fop->xl->ctx, lock->timer);
- lock->timer = NULL;
-
- /* Any fop being processed from now on, will need to wait
- * until the next unlock/lock cycle. */
- lock->release = now = _gf_true;
- }
-
- UNLOCK(&inode->lock);
-
- if (now) {
- ec_unlock_now(link);
- } else {
- /* The timer has been cancelled just after firing it but before
- * getting here. This means that another fop has used the lock
- * and everything should be handled as if this callback were
- * have not been executed. However we still have an owner
- * reference.
- *
- * We need to release our reference. If this is not the last
- * reference (the most common case because another fop has
- * taken another ref) we only need to decrement the counter.
- * Otherwise we have been delayed enough so that the other fop
- * has had time to acquire the reference, do its operation and
- * release it. At the time of releasing it, the fop did found
- * that the ref counter was > 1 (our reference), so the delayed
- * unlock timer wasn't started. We need to start it again if we
- * are the last reference.
- *
- * ec_unlock_timer_add() handles both cases.
- */
- ec_unlock_timer_add(link);
-
- /* We need to resume the fop that was waiting for the delayed
- * unlock.
- */
- ec_resume(link->fop, 0);
- }
+ ec_resume(link->fop, 0);
+ }
}
-void ec_unlock_timer_cbk(void *data)
+void
+ec_unlock_timer_cbk(void *data)
{
- ec_unlock_timer_del(data);
+ ec_unlock_timer_del(data);
}
static gf_boolean_t
ec_eager_lock_used(ec_t *ec, ec_fop_data_t *fop)
{
- /* Fops with no locks at this point mean that they are sent as sub-fops
- * of other higher level fops. In this case we simply assume that the
- * parent fop will take correct care of the eager lock. */
- if (fop->lock_count == 0) {
- return _gf_true;
- }
+ /* Fops with no locks at this point mean that they are sent as sub-fops
+ * of other higher level fops. In this case we simply assume that the
+ * parent fop will take correct care of the eager lock. */
+ if (fop->lock_count == 0) {
+ return _gf_true;
+ }
- /* We may have more than one lock, but this only happens in the rename
- * fop, and both locks will reference an inode of the same type (a
- * directory in this case), so we only need to check the first lock. */
- if (fop->locks[0].lock->loc.inode->ia_type == IA_IFREG) {
- return ec->eager_lock;
- }
+ /* We may have more than one lock, but this only happens in the rename
+ * fop, and both locks will reference an inode of the same type (a
+ * directory in this case), so we only need to check the first lock. */
+ if (fop->locks[0].lock->loc.inode->ia_type == IA_IFREG) {
+ return ec->eager_lock;
+ }
- return ec->other_eager_lock;
+ return ec->other_eager_lock;
}
static uint32_t
ec_eager_lock_timeout(ec_t *ec, ec_lock_t *lock)
{
- if (lock->loc.inode->ia_type == IA_IFREG) {
- return ec->eager_lock_timeout;
- }
+ if (lock->loc.inode->ia_type == IA_IFREG) {
+ return ec->eager_lock_timeout;
+ }
- return ec->other_eager_lock_timeout;
+ return ec->other_eager_lock_timeout;
}
static gf_boolean_t
ec_lock_delay_create(ec_lock_link_t *link)
{
- struct timespec delay;
- ec_fop_data_t *fop = link->fop;
- ec_lock_t *lock = link->lock;
+ struct timespec delay;
+ ec_fop_data_t *fop = link->fop;
+ ec_lock_t *lock = link->lock;
- delay.tv_sec = ec_eager_lock_timeout(fop->xl->private, lock);
- delay.tv_nsec = 0;
- lock->timer = gf_timer_call_after(fop->xl->ctx, delay,
- ec_unlock_timer_cbk, link);
- if (lock->timer == NULL) {
- gf_msg(fop->xl->name, GF_LOG_WARNING, ENOMEM,
- EC_MSG_UNLOCK_DELAY_FAILED,
- "Unable to delay an unlock");
+ delay.tv_sec = ec_eager_lock_timeout(fop->xl->private, lock);
+ delay.tv_nsec = 0;
+ lock->timer = gf_timer_call_after(fop->xl->ctx, delay, ec_unlock_timer_cbk,
+ link);
+ if (lock->timer == NULL) {
+ gf_msg(fop->xl->name, GF_LOG_WARNING, ENOMEM,
+ EC_MSG_UNLOCK_DELAY_FAILED, "Unable to delay an unlock");
- return _gf_false;
- }
+ return _gf_false;
+ }
- return _gf_true;
+ return _gf_true;
}
-void ec_unlock_timer_add(ec_lock_link_t *link)
+void
+ec_unlock_timer_add(ec_lock_link_t *link)
{
ec_fop_data_t *fop = link->fop;
ec_lock_t *lock = link->lock;
@@ -2732,7 +2709,8 @@ void ec_unlock_timer_add(ec_lock_link_t *link)
}
}
-void ec_unlock(ec_fop_data_t *fop)
+void
+ec_unlock(ec_fop_data_t *fop)
{
int32_t i;
@@ -2741,7 +2719,8 @@ void ec_unlock(ec_fop_data_t *fop)
}
}
-void ec_flush_size_version(ec_fop_data_t * fop)
+void
+ec_flush_size_version(ec_fop_data_t *fop)
{
GF_ASSERT(fop->lock_count == 1);
ec_update_info(&fop->locks[0]);
@@ -2751,99 +2730,98 @@ static void
ec_update_stripe(ec_t *ec, ec_stripe_list_t *stripe_cache, ec_stripe_t *stripe,
ec_fop_data_t *fop)
{
- off_t base;
-
- /* On write fops, we only update existing fragments if the write has
- * succeeded. Otherwise, we remove them from the cache. */
- if ((fop->id == GF_FOP_WRITE) && (fop->answer != NULL) &&
- (fop->answer->op_ret >= 0)) {
- base = stripe->frag_offset - fop->frag_range.first;
- base *= ec->fragments;
-
- /* We check if the stripe offset falls inside the real region
- * modified by the write fop (a write request is allowed,
- * though uncommon, to write less bytes than requested). The
- * current write fop implementation doesn't allow partial
- * writes of fragments, so if there's no error, we are sure
- * that a full stripe has been completely modified or not
- * touched at all. The value of op_ret may not be a multiple
- * of the stripe size because it depends on the requested
- * size by the user, so we update the stripe if the write has
- * modified at least one byte (meaning ec has written the full
- * stripe). */
- if (base < fop->answer->op_ret + fop->head) {
- memcpy(stripe->data, fop->vector[0].iov_base + base,
- ec->stripe_size);
- list_move_tail(&stripe->lru, &stripe_cache->lru);
-
- GF_ATOMIC_INC(ec->stats.stripe_cache.updates);
- }
- } else {
- stripe->frag_offset = -1;
- list_move (&stripe->lru, &stripe_cache->lru);
-
- GF_ATOMIC_INC(ec->stats.stripe_cache.invals);
+ off_t base;
+
+ /* On write fops, we only update existing fragments if the write has
+ * succeeded. Otherwise, we remove them from the cache. */
+ if ((fop->id == GF_FOP_WRITE) && (fop->answer != NULL) &&
+ (fop->answer->op_ret >= 0)) {
+ base = stripe->frag_offset - fop->frag_range.first;
+ base *= ec->fragments;
+
+ /* We check if the stripe offset falls inside the real region
+ * modified by the write fop (a write request is allowed,
+ * though uncommon, to write less bytes than requested). The
+ * current write fop implementation doesn't allow partial
+ * writes of fragments, so if there's no error, we are sure
+ * that a full stripe has been completely modified or not
+ * touched at all. The value of op_ret may not be a multiple
+ * of the stripe size because it depends on the requested
+ * size by the user, so we update the stripe if the write has
+ * modified at least one byte (meaning ec has written the full
+ * stripe). */
+ if (base < fop->answer->op_ret + fop->head) {
+ memcpy(stripe->data, fop->vector[0].iov_base + base,
+ ec->stripe_size);
+ list_move_tail(&stripe->lru, &stripe_cache->lru);
+
+ GF_ATOMIC_INC(ec->stats.stripe_cache.updates);
}
+ } else {
+ stripe->frag_offset = -1;
+ list_move(&stripe->lru, &stripe_cache->lru);
+
+ GF_ATOMIC_INC(ec->stats.stripe_cache.invals);
+ }
}
static void
-ec_update_cached_stripes (ec_fop_data_t *fop)
-{
- uint64_t first;
- uint64_t last;
- ec_stripe_t *stripe = NULL;
- ec_inode_t *ctx = NULL;
- ec_stripe_list_t *stripe_cache = NULL;
- inode_t *inode = NULL;
- struct list_head *temp;
- struct list_head sentinel;
-
- first = fop->frag_range.first;
- /* 'last' represents the first stripe not touched by the operation */
- last = fop->frag_range.last;
-
- /* If there are no modified stripes, we don't need to do anything
- * else. */
- if (last <= first) {
- return;
- }
+ec_update_cached_stripes(ec_fop_data_t *fop)
+{
+ uint64_t first;
+ uint64_t last;
+ ec_stripe_t *stripe = NULL;
+ ec_inode_t *ctx = NULL;
+ ec_stripe_list_t *stripe_cache = NULL;
+ inode_t *inode = NULL;
+ struct list_head *temp;
+ struct list_head sentinel;
+
+ first = fop->frag_range.first;
+ /* 'last' represents the first stripe not touched by the operation */
+ last = fop->frag_range.last;
+
+ /* If there are no modified stripes, we don't need to do anything
+ * else. */
+ if (last <= first) {
+ return;
+ }
- if (!fop->use_fd) {
- inode = fop->loc[0].inode;
- } else {
- inode = fop->fd->inode;
- }
+ if (!fop->use_fd) {
+ inode = fop->loc[0].inode;
+ } else {
+ inode = fop->fd->inode;
+ }
- LOCK(&inode->lock);
+ LOCK(&inode->lock);
- ctx = __ec_inode_get (inode, fop->xl);
- if (ctx == NULL) {
- goto out;
- }
- stripe_cache = &ctx->stripe_cache;
-
- /* Since we'll be moving elements of the list to the tail, we might
- * end in an infinite loop. To avoid it, we insert a sentinel element
- * into the list, so that it will be used to detect when we have
- * traversed all existing elements once. */
- list_add_tail(&sentinel, &stripe_cache->lru);
- temp = stripe_cache->lru.next;
- while (temp != &sentinel) {
- stripe = list_entry(temp, ec_stripe_t, lru);
- temp = temp->next;
- if ((first <= stripe->frag_offset) &&
- (stripe->frag_offset < last)) {
- ec_update_stripe (fop->xl->private, stripe_cache,
- stripe, fop);
- }
+ ctx = __ec_inode_get(inode, fop->xl);
+ if (ctx == NULL) {
+ goto out;
+ }
+ stripe_cache = &ctx->stripe_cache;
+
+ /* Since we'll be moving elements of the list to the tail, we might
+ * end in an infinite loop. To avoid it, we insert a sentinel element
+ * into the list, so that it will be used to detect when we have
+ * traversed all existing elements once. */
+ list_add_tail(&sentinel, &stripe_cache->lru);
+ temp = stripe_cache->lru.next;
+ while (temp != &sentinel) {
+ stripe = list_entry(temp, ec_stripe_t, lru);
+ temp = temp->next;
+ if ((first <= stripe->frag_offset) && (stripe->frag_offset < last)) {
+ ec_update_stripe(fop->xl->private, stripe_cache, stripe, fop);
}
- list_del(&sentinel);
+ }
+ list_del(&sentinel);
out:
- UNLOCK(&inode->lock);
+ UNLOCK(&inode->lock);
}
-void ec_lock_reuse(ec_fop_data_t *fop)
+void
+ec_lock_reuse(ec_fop_data_t *fop)
{
ec_cbk_data_t *cbk;
ec_t *ec = NULL;
@@ -2854,13 +2832,13 @@ void ec_lock_reuse(ec_fop_data_t *fop)
if (ec_eager_lock_used(ec, fop) && cbk != NULL) {
if (cbk->xdata != NULL) {
- if ((dict_get_int32(cbk->xdata, GLUSTERFS_INODELK_COUNT,
- &count) == 0) && (count > 1)) {
+ if ((dict_get_int32(cbk->xdata, GLUSTERFS_INODELK_COUNT, &count) ==
+ 0) &&
+ (count > 1)) {
release = _gf_true;
}
if (release) {
- gf_msg_debug (fop->xl->name, 0,
- "Lock contention detected");
+ gf_msg_debug(fop->xl->name, 0, "Lock contention detected");
}
}
} else {
@@ -2869,24 +2847,25 @@ void ec_lock_reuse(ec_fop_data_t *fop)
* the lock. */
release = _gf_true;
}
- ec_update_cached_stripes (fop);
+ ec_update_cached_stripes(fop);
for (i = 0; i < fop->lock_count; i++) {
ec_lock_next_owner(&fop->locks[i], cbk, release);
}
}
-void __ec_manager(ec_fop_data_t * fop, int32_t error)
+void
+__ec_manager(ec_fop_data_t *fop, int32_t error)
{
ec_t *ec = fop->xl->private;
do {
ec_trace("MANAGER", fop, "error=%d", error);
- if (!ec_must_wind (fop)) {
- if (ec->xl_up_count < ec->fragments) {
- error = ENOTCONN;
- }
+ if (!ec_must_wind(fop)) {
+ if (ec->xl_up_count < ec->fragments) {
+ error = ENOTCONN;
+ }
}
if (error != 0) {
@@ -2912,20 +2891,20 @@ void __ec_manager(ec_fop_data_t * fop, int32_t error)
fop->jobs = 1;
fop->state = fop->handler(fop, fop->state);
- GF_ASSERT (fop->state >= 0);
+ GF_ASSERT(fop->state >= 0);
error = ec_check_complete(fop, __ec_manager);
} while (error >= 0);
}
-void ec_manager(ec_fop_data_t * fop, int32_t error)
+void
+ec_manager(ec_fop_data_t *fop, int32_t error)
{
GF_ASSERT(fop->jobs == 0);
GF_ASSERT(fop->winds == 0);
GF_ASSERT(fop->error == 0);
- if (fop->state == EC_STATE_START)
- {
+ if (fop->state == EC_STATE_START) {
fop->state = EC_STATE_INIT;
}