summaryrefslogtreecommitdiffstats
path: root/xlators
diff options
context:
space:
mode:
authorXavier Hernandez <xhernandez@datalab.es>2015-05-28 16:54:59 +0200
committerPranith Kumar Karampuri <pkarampu@redhat.com>2015-05-30 05:35:44 -0700
commitaf7e88ad4a3a1cd0b965fdbadb59c923e98550bc (patch)
tree76e2d201cc2468549a8c53219cfffeafab623fb3 /xlators
parent543d24312c1e2082b4f724ee233ceb410abb107b (diff)
cluster/ec: Ignore differences in non locked inodes
Backport of http://review.gluster.org/10974 When ec combines iatt structures from multiple bricks, it checks for equality in important fields. This is ok for iatt related to inodes involved in the operation that have been locked before starting execution. However some fops return iatt information from other inodes. For example a rename locks source and destination parent directories, but it also returns an iatt from the entry itself. In these cases we ignore differences in some fields to avoid false detection of inconsistencies and trigger unnecessary self-heals. Another issue is solved in this patch that caused that the real size of the file stored into the inode context was lost during self-heal. BUG: 1225796 Change-Id: I29f328a7b4895368ded859f3bae0359436c3588f Signed-off-by: Xavier Hernandez <xhernandez@datalab.es> Reviewed-on: http://review.gluster.org/10983 Tested-by: Gluster Build System <jenkins@build.gluster.com>
Diffstat (limited to 'xlators')
-rw-r--r--xlators/cluster/ec/src/ec-combine.c75
-rw-r--r--xlators/cluster/ec/src/ec-combine.h3
-rw-r--r--xlators/cluster/ec/src/ec-generic.c6
-rw-r--r--xlators/cluster/ec/src/ec-heal.c35
-rw-r--r--xlators/cluster/ec/src/ec-inode-read.c9
5 files changed, 100 insertions, 28 deletions
diff --git a/xlators/cluster/ec/src/ec-combine.c b/xlators/cluster/ec/src/ec-combine.c
index 4617a0430f1..6395b223c36 100644
--- a/xlators/cluster/ec/src/ec-combine.c
+++ b/xlators/cluster/ec/src/ec-combine.c
@@ -84,7 +84,7 @@ ec_combine_write (ec_fop_data_t *fop, ec_cbk_data_t *dst,
break;
}
- if (!ec_iatt_combine(dst->iatt, src->iatt, valid)) {
+ if (!ec_iatt_combine(fop, dst->iatt, src->iatt, valid)) {
gf_log(fop->xl->name, GF_LOG_NOTICE, "Mismatching iatt in "
"answers of '%s'", gf_fop_list[fop->id]);
@@ -104,23 +104,78 @@ void ec_iatt_time_merge(uint32_t * dst_sec, uint32_t * dst_nsec,
}
}
-int32_t ec_iatt_combine(struct iatt * dst, struct iatt * src, int32_t count)
+static
+uint64_t
+gfid_to_ino(uuid_t gfid)
{
+ uint64_t ino = 0;
int32_t i;
+ for (i = 8; i < 16; i++) {
+ ino <<= 8;
+ ino += (uint8_t)gfid[i];
+ }
+
+ return ino;
+}
+
+static
+gf_boolean_t
+ec_iatt_is_trusted(ec_fop_data_t *fop, struct iatt *iatt)
+{
+ uint64_t ino;
+ int32_t i;
+
+ /* Only the top level fop will have fop->locks filled. */
+ while (fop->parent != NULL) {
+ fop = fop->parent;
+ }
+
+ /* Check if the iatt references an inode locked by the current fop */
+ for (i = 0; i < fop->lock_count; i++) {
+ ino = gfid_to_ino(fop->locks[i].lock->loc.inode->gfid);
+ if (iatt->ia_ino == ino) {
+ return _gf_true;
+ }
+ }
+
+ return _gf_false;
+}
+
+int32_t ec_iatt_combine(ec_fop_data_t *fop, struct iatt *dst, struct iatt *src,
+ int32_t count)
+{
+ int32_t i;
+ gf_boolean_t failed = _gf_false;
+
for (i = 0; i < count; i++)
{
+ /* Check for basic fields. These fields must be equal always, even if
+ * the inode is not locked because in these cases the parent inode
+ * will be locked and differences in these fields require changes in
+ * the parent directory. */
if ((dst[i].ia_ino != src[i].ia_ino) ||
- (dst[i].ia_uid != src[i].ia_uid) ||
- (dst[i].ia_gid != src[i].ia_gid) ||
(((dst[i].ia_type == IA_IFBLK) || (dst[i].ia_type == IA_IFCHR)) &&
(dst[i].ia_rdev != src[i].ia_rdev)) ||
- ((dst[i].ia_type == IA_IFREG) &&
- (dst[i].ia_size != src[i].ia_size)) ||
- (st_mode_from_ia(dst[i].ia_prot, dst[i].ia_type) !=
- st_mode_from_ia(src[i].ia_prot, src[i].ia_type)) ||
- (gf_uuid_compare(dst[i].ia_gfid, src[i].ia_gfid) != 0))
- {
+ (gf_uuid_compare(dst[i].ia_gfid, src[i].ia_gfid) != 0)) {
+ failed = _gf_true;
+ }
+ /* Check for not so stable fields. These fields can change if the
+ * inode is not locked. */
+ if (!failed && ((dst[i].ia_uid != src[i].ia_uid) ||
+ (dst[i].ia_gid != src[i].ia_gid) ||
+ ((dst[i].ia_type == IA_IFREG) &&
+ (dst[i].ia_size != src[i].ia_size)) ||
+ (st_mode_from_ia(dst[i].ia_prot, dst[i].ia_type) !=
+ st_mode_from_ia(src[i].ia_prot, src[i].ia_type)))) {
+ if (!ec_iatt_is_trusted(fop, dst)) {
+ /* If the iatt contains information from an inode that is not
+ * locked, we ignore these differences and don't care which
+ * data is returned. */
+ failed = _gf_false;
+ }
+ }
+ if (failed) {
gf_log(THIS->name, GF_LOG_WARNING,
"Failed to combine iatt (inode: %lu-%lu, links: %u-%u, "
"uid: %u-%u, gid: %u-%u, rdev: %lu-%lu, size: %lu-%lu, "
diff --git a/xlators/cluster/ec/src/ec-combine.h b/xlators/cluster/ec/src/ec-combine.h
index cae2bb9274f..19a42ded706 100644
--- a/xlators/cluster/ec/src/ec-combine.h
+++ b/xlators/cluster/ec/src/ec-combine.h
@@ -20,7 +20,8 @@ typedef int32_t (* ec_combine_f)(ec_fop_data_t * fop, ec_cbk_data_t * dst,
void ec_iatt_rebuild(ec_t * ec, struct iatt * iatt, int32_t count,
int32_t answers);
-int32_t ec_iatt_combine(struct iatt * dst, struct iatt * src, int32_t count);
+int32_t ec_iatt_combine(ec_fop_data_t *fop, struct iatt *dst, struct iatt *src,
+ int32_t count);
int32_t ec_dict_compare(dict_t * dict1, dict_t * dict2);
int32_t ec_vector_compare(struct iovec * dst_vector, int32_t dst_count,
struct iovec * src_vector, int32_t src_count);
diff --git a/xlators/cluster/ec/src/ec-generic.c b/xlators/cluster/ec/src/ec-generic.c
index f50c7a70560..a455dad9f6b 100644
--- a/xlators/cluster/ec/src/ec-generic.c
+++ b/xlators/cluster/ec/src/ec-generic.c
@@ -235,8 +235,7 @@ out:
int32_t ec_combine_fsync(ec_fop_data_t * fop, ec_cbk_data_t * dst,
ec_cbk_data_t * src)
{
- if (!ec_iatt_combine(dst->iatt, src->iatt, 2))
- {
+ if (!ec_iatt_combine(fop, dst->iatt, src->iatt, 2)) {
gf_log(fop->xl->name, GF_LOG_NOTICE, "Mismatching iatt in "
"answers of 'GF_FOP_FSYNC'");
@@ -813,8 +812,7 @@ void ec_lookup_rebuild(ec_t * ec, ec_fop_data_t * fop, ec_cbk_data_t * cbk)
int32_t ec_combine_lookup(ec_fop_data_t * fop, ec_cbk_data_t * dst,
ec_cbk_data_t * src)
{
- if (!ec_iatt_combine(dst->iatt, src->iatt, 2))
- {
+ if (!ec_iatt_combine(fop, dst->iatt, src->iatt, 2)) {
gf_log(fop->xl->name, GF_LOG_NOTICE, "Mismatching iatt in "
"answers of 'GF_FOP_LOOKUP'");
diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c
index 80725e5a9fa..af75c9551b8 100644
--- a/xlators/cluster/ec/src/ec-heal.c
+++ b/xlators/cluster/ec/src/ec-heal.c
@@ -546,10 +546,25 @@ out:
return error;
}
+int32_t ec_heal_lock_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ ec_fop_data_t *fop = cookie;
+ ec_heal_t *heal = fop->data;
+
+ if (op_ret >= 0) {
+ GF_ASSERT(ec_set_inode_size(heal->fop, heal->fd->inode,
+ heal->total_size));
+ }
+
+ return 0;
+}
+
void ec_heal_lock(ec_heal_t *heal, int32_t type, fd_t *fd, loc_t *loc,
off_t offset, size_t size)
{
struct gf_flock flock;
+ fop_inodelk_cbk_t cbk = NULL;
flock.l_type = type;
flock.l_whence = SEEK_SET;
@@ -558,22 +573,28 @@ void ec_heal_lock(ec_heal_t *heal, int32_t type, fd_t *fd, loc_t *loc,
flock.l_pid = 0;
flock.l_owner.len = 0;
- /* Remove inode size information before unlocking it. */
- if ((type == F_UNLCK) && (heal->loc.inode != NULL)) {
- ec_clear_inode_info(heal->fop, heal->loc.inode);
+ if (type == F_UNLCK) {
+ /* Remove inode size information before unlocking it. */
+ if (fd == NULL) {
+ ec_clear_inode_info(heal->fop, heal->loc.inode);
+ } else {
+ ec_clear_inode_info(heal->fop, heal->fd->inode);
+ }
+ } else {
+ /* Otherwise use the callback to update size information. */
+ cbk = ec_heal_lock_cbk;
}
if (fd != NULL)
{
ec_finodelk(heal->fop->frame, heal->xl, heal->fop->mask,
- EC_MINIMUM_ALL, NULL, NULL, heal->xl->name, fd,
- F_SETLKW, &flock, NULL);
+ EC_MINIMUM_ALL, cbk, heal, heal->xl->name, fd, F_SETLKW,
+ &flock, NULL);
}
else
{
ec_inodelk(heal->fop->frame, heal->xl, heal->fop->mask, EC_MINIMUM_ALL,
- NULL, NULL, heal->xl->name, loc, F_SETLKW, &flock,
- NULL);
+ cbk, heal, heal->xl->name, loc, F_SETLKW, &flock, NULL);
}
}
diff --git a/xlators/cluster/ec/src/ec-inode-read.c b/xlators/cluster/ec/src/ec-inode-read.c
index 853d914148b..ae02e964c3d 100644
--- a/xlators/cluster/ec/src/ec-inode-read.c
+++ b/xlators/cluster/ec/src/ec-inode-read.c
@@ -936,8 +936,7 @@ out:
int32_t ec_combine_readlink(ec_fop_data_t * fop, ec_cbk_data_t * dst,
ec_cbk_data_t * src)
{
- if (!ec_iatt_combine(dst->iatt, src->iatt, 1))
- {
+ if (!ec_iatt_combine(fop, dst->iatt, src->iatt, 1)) {
gf_log(fop->xl->name, GF_LOG_NOTICE, "Mismatching iatt in "
"answers of 'GF_FOP_READLINK'");
@@ -1210,8 +1209,7 @@ int32_t ec_combine_readv(ec_fop_data_t * fop, ec_cbk_data_t * dst,
return 0;
}
- if (!ec_iatt_combine(dst->iatt, src->iatt, 1))
- {
+ if (!ec_iatt_combine(fop, dst->iatt, src->iatt, 1)) {
gf_log(fop->xl->name, GF_LOG_NOTICE, "Mismatching iatt in "
"answers of 'GF_FOP_READ'");
@@ -1493,8 +1491,7 @@ out:
int32_t ec_combine_stat(ec_fop_data_t * fop, ec_cbk_data_t * dst,
ec_cbk_data_t * src)
{
- if (!ec_iatt_combine(dst->iatt, src->iatt, 1))
- {
+ if (!ec_iatt_combine(fop, dst->iatt, src->iatt, 1)) {
gf_log(fop->xl->name, GF_LOG_NOTICE, "Mismatching iatt in "
"answers of 'GF_FOP_STAT'");