summaryrefslogtreecommitdiffstats
path: root/xlators/cluster/ec/src/ec-combine.c
diff options
context:
space:
mode:
authorXavier Hernandez <xhernandez@datalab.es>2015-05-20 15:17:35 +0200
committerPranith Kumar Karampuri <pkarampu@redhat.com>2015-05-27 03:25:47 -0700
commit3b666b40efbed157e8c5991f29b345d93b28c659 (patch)
treea6fb9a20bed31bbcb0e5dd2025e51d7f4ea6e257 /xlators/cluster/ec/src/ec-combine.c
parent5513144feb5b062b733d7514adf194429e31666f (diff)
cluster/ec: Forced unlock when lock contention is detected
EC uses an eager lock mechanism to optimize multiple read/write requests on the same entry or inode. This increases performance but can have adverse results when other clients try to access the same entry/inode. To solve this, this patch adds a functionality to detect when this happens and force an earlier release to not block other clients. The method consists on requesting GF_GLUSTERFS_INODELK_COUNT and GF_GLUSTERFS_ENTRYLK_COUNT for all fops that take a lock. When this count is greater than one, the lock is marked to be released. All fops already waiting for this lock will be executed normally before releasing the lock, but new requests that also require it will be blocked and restarted after the lock has been released and reacquired again. Another problem was that some operations did correctly lock the parent of an entry when needed, but got the size and version xattrs from the entry instead of the parent. This patch solves this problem by binding all queries of size and version to each lock and replacing all entrylk calls by inodelk ones to remove concurrent updates on directory metadata. This also allows rename to correctly update source and destination directories. Change-Id: I2df0b22bc6f407d49f3cbf0733b0720015bacfbd BUG: 1165041 Signed-off-by: Xavier Hernandez <xhernandez@datalab.es> Reviewed-on: http://review.gluster.org/10852 Tested-by: NetBSD Build System Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
Diffstat (limited to 'xlators/cluster/ec/src/ec-combine.c')
-rw-r--r--xlators/cluster/ec/src/ec-combine.c35
1 files changed, 16 insertions, 19 deletions
diff --git a/xlators/cluster/ec/src/ec-combine.c b/xlators/cluster/ec/src/ec-combine.c
index 9d4a18999f1..4617a0430f1 100644
--- a/xlators/cluster/ec/src/ec-combine.c
+++ b/xlators/cluster/ec/src/ec-combine.c
@@ -171,8 +171,10 @@ void ec_iatt_rebuild(ec_t * ec, struct iatt * iatt, int32_t count,
gf_boolean_t
ec_xattr_match (dict_t *dict, char *key, data_t *value, void *arg)
{
- if (fnmatch(GF_XATTR_STIME_PATTERN, key, 0) == 0)
+ if ((fnmatch(GF_XATTR_STIME_PATTERN, key, 0) == 0) ||
+ (strcmp(key, GLUSTERFS_OPEN_FD_COUNT) == 0)) {
return _gf_false;
+ }
return _gf_true;
}
@@ -185,6 +187,8 @@ ec_value_ignore (char *key)
(strcmp(key, GF_XATTR_USER_PATHINFO_KEY) == 0) ||
(strcmp(key, GF_XATTR_LOCKINFO_KEY) == 0) ||
(strcmp(key, GLUSTERFS_OPEN_FD_COUNT) == 0) ||
+ (strcmp(key, GLUSTERFS_INODELK_COUNT) == 0) ||
+ (strcmp(key, GLUSTERFS_ENTRYLK_COUNT) == 0) ||
(strncmp(key, GF_XATTR_CLRLK_CMD,
strlen (GF_XATTR_CLRLK_CMD)) == 0) ||
(strncmp(key, EC_QUOTA_PREFIX, strlen(EC_QUOTA_PREFIX)) == 0) ||
@@ -225,15 +229,9 @@ int32_t ec_dict_list(data_t ** list, int32_t * count, ec_cbk_data_t * cbk,
dict = (which == EC_COMBINE_XDATA) ? ans->xdata : ans->dict;
list[i] = dict_get(dict, key);
- if (list[i] == NULL)
- {
- gf_log(cbk->fop->xl->name, GF_LOG_ERROR, "Unexpected missing "
- "dictionary entry");
-
- return 0;
+ if (list[i] != NULL) {
+ i++;
}
-
- i++;
}
*count = i;
@@ -471,11 +469,6 @@ int32_t ec_dict_data_max32(ec_cbk_data_t *cbk, int32_t which, char *key)
return -1;
}
- if (num <= 1)
- {
- return 0;
- }
-
max = data_to_uint32(data[0]);
for (i = 1; i < num; i++)
{
@@ -507,10 +500,6 @@ int32_t ec_dict_data_max64(ec_cbk_data_t *cbk, int32_t which, char *key)
return -1;
}
- if (num <= 1) {
- return 0;
- }
-
max = data_to_uint64(data[0]);
for (i = 1; i < num; i++) {
tmp = data_to_uint64(data[i]);
@@ -630,6 +619,10 @@ int32_t ec_dict_data_combine(dict_t * dict, char * key, data_t * value,
{
return ec_dict_data_max32(data->cbk, data->which, key);
}
+ if ((strcmp(key, GLUSTERFS_INODELK_COUNT) == 0) ||
+ (strcmp(key, GLUSTERFS_ENTRYLK_COUNT) == 0)) {
+ return ec_dict_data_max32(data->cbk, data->which, key);
+ }
if (strcmp(key, QUOTA_SIZE_KEY) == 0) {
return ec_dict_data_quota(data->cbk, data->which, key);
@@ -831,6 +824,8 @@ void ec_combine (ec_cbk_data_t *newcbk, ec_combine_f combine)
LOCK(&fop->lock);
+ fop->received |= newcbk->mask;
+
item = fop->cbk_list.prev;
list_for_each_entry(cbk, &fop->cbk_list, list)
{
@@ -868,7 +863,9 @@ void ec_combine (ec_cbk_data_t *newcbk, ec_combine_f combine)
}
cbk = list_entry(fop->cbk_list.next, ec_cbk_data_t, list);
- needed = fop->minimum - cbk->count - fop->winds + 1;
+ if ((fop->mask ^ fop->remaining) == fop->received) {
+ needed = fop->minimum - cbk->count;
+ }
UNLOCK(&fop->lock);