summaryrefslogtreecommitdiffstats
path: root/xlators/cluster
diff options
context:
space:
mode:
authorSusant Palai <spalai@redhat.com>2016-11-07 12:00:13 +0530
committerRaghavendra G <rgowdapp@redhat.com>2017-04-10 00:20:03 -0400
commit656bf04955936319de4b8711debcc9931a7c778e (patch)
tree00a69c565f74cd2c39f2a3915873f34f33ed87c1 /xlators/cluster
parent5512a5723ad22dd099a57c60483eb80e5b2a14c4 (diff)
dht/rebalance: Crawler performance improvement
The job of the crawler in rebalance is to fetch files from each local subvolume and push them to migration queue if it is eligible for migration. And we do a lookup on the entries received to figure out the eligibilty. Since, the lookup done is on a local subvolume we receive linkto files and regular files as well. This requires us to do two lookups. first: do a lookup on the file to figure out whether it is a linkto file second: do a lookup on the file to figure out if it should be migrated Note: The migrator thread also does one lookup for the file before migration. Optimization: Remove the lookup done by the crawler. Offload these task to the migrator threads. For linkto file verification get the stat and xattr information from readdirp. So in total we have one lookup instead of three for each entry. Performance numbers: Create two node, two brick setup. Created 100000 files. And started rebalance. Since, there is no add-brick, no files will be migrated and we will get the crawler performance. Without patch: [root@gprfs039 ~]# grs Node Rebalanced-files size scanned failures skipped status run time in h:m:s --------- ----------- ----------- ----------- ----------- ----------- ------------ -------------- localhost 0 0Bytes 50070 0 0 completed 0:0:48 server2 0 0Bytes 49930 0 0 completed 0:0:44 volume rebalance: test1: success Total: 48 seconds WiththecurrentPatch: [root@gprfs039 mnt]# gluster v rebalance test1 status Node Rebalanced-files size scanned failures skipped status run time in h:m:s --------- ----------- ----------- ----------- ----------- ----------- ------------ -------------- localhost 0 0Bytes 50070 0 0 completed 0:0:12 server2 0 0Bytes 49930 0 0 completed 0:0:12 volume rebalance: test1: success Total: 12 seconds That's 4X speed gain. :) Updates glusterfs#155 Change-Id: Idc8e5b366e76c54aa40d698876ae62fe1630b6cc BUG: 1439571 Signed-off-by: Susant Palai <spalai@redhat.com> Reviewed-on: https://review.gluster.org/15781 Smoke: Gluster Build System <jenkins@build.gluster.org> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> CentOS-regression: Gluster Build System <jenkins@build.gluster.org> Reviewed-by: Raghavendra G <rgowdapp@redhat.com>
Diffstat (limited to 'xlators/cluster')
-rw-r--r--xlators/cluster/dht/src/dht-rebalance.c161
1 files changed, 34 insertions, 127 deletions
diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c
index bc6e983cd0a..84081fdaa13 100644
--- a/xlators/cluster/dht/src/dht-rebalance.c
+++ b/xlators/cluster/dht/src/dht-rebalance.c
@@ -2113,6 +2113,8 @@ gf_defrag_migrate_single_file (void *opaque)
double elapsed = {0,};
struct dht_container *rebal_entry = NULL;
inode_t *inode = NULL;
+ xlator_t *hashed_subvol = NULL;
+ xlator_t *cached_subvol = NULL;
rebal_entry = (struct dht_container *)opaque;
if (!rebal_entry) {
@@ -2180,6 +2182,32 @@ gf_defrag_migrate_single_file (void *opaque)
goto out;
}
+ hashed_subvol = dht_subvol_get_hashed (this, &entry_loc);
+ if (!hashed_subvol) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_HASHED_SUBVOL_GET_FAILED,
+ "Failed to get hashed subvol for %s",
+ loc->path);
+ ret = 0;
+ goto out;
+ }
+
+ cached_subvol = dht_subvol_get_cached (this, entry_loc.inode);
+ if (!cached_subvol) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_CACHED_SUBVOL_GET_FAILED,
+ "Failed to get cached subvol for %s",
+ loc->path);
+
+ ret = 0;
+ goto out;
+ }
+
+ if (hashed_subvol == cached_subvol) {
+ ret = 0;
+ goto out;
+ }
+
inode = inode_link (entry_loc.inode, entry_loc.parent, entry->d_name, &iatt);
inode_unref (entry_loc.inode);
/* use the inode returned by inode_link */
@@ -2426,12 +2454,8 @@ gf_defrag_get_entry (xlator_t *this, int i, struct dht_container **container,
int ret = -1;
char is_linkfile = 0;
gf_dirent_t *df_entry = NULL;
- loc_t entry_loc = {0,};
dict_t *xattr_rsp = NULL;
- struct iatt iatt = {0,};
struct dht_container *tmp_container = NULL;
- xlator_t *hashed_subvol = NULL;
- xlator_t *cached_subvol = NULL;
if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) {
ret = -1;
@@ -2447,7 +2471,7 @@ gf_defrag_get_entry (xlator_t *this, int i, struct dht_container **container,
ret = syncop_readdirp (conf->local_subvols[i], fd, 131072,
dir_dfmeta->offset_var[i].offset,
&(dir_dfmeta->equeue[i]),
- NULL, NULL);
+ xattr_req, NULL);
if (ret == 0) {
dir_dfmeta->offset_var[i].readdir_done = 1;
ret = 0;
@@ -2511,137 +2535,22 @@ gf_defrag_get_entry (xlator_t *this, int i, struct dht_container **container,
continue;
}
- loc_wipe (&entry_loc);
- ret = dht_build_child_loc (this, &entry_loc, loc,
- df_entry->d_name);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "Child loc"
- " build failed");
- ret = -1;
- goto out;
- }
-
- if (gf_uuid_is_null (df_entry->d_stat.ia_gfid)) {
- gf_msg (this->name, GF_LOG_ERROR, 0,
- DHT_MSG_GFID_NULL,
- "%s/%s gfid not present", loc->path,
- df_entry->d_name);
- continue;
- }
-
- gf_uuid_copy (entry_loc.gfid, df_entry->d_stat.ia_gfid);
-
- if (gf_uuid_is_null (loc->gfid)) {
- gf_msg (this->name, GF_LOG_ERROR, 0,
- DHT_MSG_GFID_NULL,
- "%s/%s gfid not present", loc->path,
- df_entry->d_name);
- continue;
- }
-
- gf_uuid_copy (entry_loc.pargfid, loc->gfid);
-
- entry_loc.inode->ia_type = df_entry->d_stat.ia_type;
-
- if (xattr_rsp) {
- dict_unref (xattr_rsp);
- xattr_rsp = NULL;
- }
-
- ret = syncop_lookup (conf->local_subvols[i], &entry_loc,
- &iatt, NULL, xattr_req, &xattr_rsp);
- if (ret) {
- gf_msg (this->name, GF_LOG_ERROR, 0,
- DHT_MSG_MIGRATE_FILE_FAILED,
- "Migrate file failed:%s lookup failed",
- entry_loc.path);
-
- if (-ret != ENOENT && -ret != ESTALE) {
-
- defrag->total_failures++;
-
- if (conf->decommission_in_progress) {
- ret = -1;
- goto out;
- } else {
- *should_commit_hash = 0;
- continue;
- }
- }
-
- continue;
- }
-
-
- is_linkfile = check_is_linkfile (NULL, &iatt, xattr_rsp,
- conf->link_xattr_name);
+ is_linkfile = check_is_linkfile (NULL, &df_entry->d_stat,
+ df_entry->dict,
+ conf->link_xattr_name);
if (is_linkfile) {
/* No need to add linkto file to the queue for
migration. Only the actual data file need to
be checked for migration criteria.
*/
+
gf_msg_debug (this->name, 0, "Skipping linkfile"
- " %s on subvol: %s", entry_loc.path,
+ " %s on subvol: %s", df_entry->d_name,
conf->local_subvols[i]->name);
continue;
}
-
- ret = syncop_lookup (this, &entry_loc, NULL, NULL,
- NULL, NULL);
- if (ret) {
- gf_msg (this->name, GF_LOG_WARNING, -ret,
- DHT_MSG_MIGRATE_FILE_FAILED,
- "lookup failed for file:%s",
- entry_loc.path);
-
- if (-ret != ENOENT && -ret != ESTALE) {
-
- defrag->total_failures++;
-
- if (conf->decommission_in_progress) {
- ret = -1;
- goto out;
- } else {
- *should_commit_hash = 0;
- continue;
- }
- }
-
- continue;
- }
-
- /* if distribute is present, it will honor this key.
- * -1, ENODATA is returned if distribute is not present
- * or file doesn't have a link-file. If file has
- * link-file, the path of link-file will be the value,
- * and also that guarantees that file has to be mostly
- * migrated */
-
- hashed_subvol = dht_subvol_get_hashed (this, &entry_loc);
- if (!hashed_subvol) {
- gf_msg (this->name, GF_LOG_ERROR, 0,
- DHT_MSG_HASHED_SUBVOL_GET_FAILED,
- "Failed to get hashed subvol for %s",
- loc->path);
- continue;
- }
-
- cached_subvol = dht_subvol_get_cached (this, entry_loc.inode);
- if (!cached_subvol) {
- gf_msg (this->name, GF_LOG_ERROR, 0,
- DHT_MSG_CACHED_SUBVOL_GET_FAILED,
- "Failed to get cached subvol for %s",
- loc->path);
-
- continue;
- }
-
- if (hashed_subvol == cached_subvol) {
- continue;
- }
-
/*Build Container Structure */
tmp_container = GF_CALLOC (1, sizeof(struct dht_container),
@@ -2702,8 +2611,6 @@ gf_defrag_get_entry (xlator_t *this, int i, struct dht_container **container,
}
out:
- loc_wipe (&entry_loc);
-
if (ret == 0) {
*container = tmp_container;
} else {