summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPoornima G <pgurusid@redhat.com>2016-07-19 15:20:09 +0530
committerKaushal M <kaushal@redhat.com>2016-08-24 21:42:50 -0700
commit9df752213e6f8a1cc9a5e875cf68ca8ef32f61db (patch)
tree1872f4e487433ff55ef001b7f680634001f461ec
parent9cd5066226770cf3c06a21757b963d315b8fe32b (diff)
gfapi: Fix IO error caused when there is consecutive graph switches
Backport of http://review.gluster.org/#/c/14722/ This is part 2 of the fix, the part 1 can be found at: http://review.gluster.org/#/c/14656/ Problem: ======= Consider a race between, __glfs_active_subvol() and graph_setup(). Lets say @TIME T1: fs->active_subvol = A fs->next_subvol = B __glfs_active_subvol() //under lock fs->mutex { .... new_subvol = fs->next_subvol //which is B .... //Start migration from A to B __glfs_first_lookup(){ .... unlock fs->mutex //@TIME T2 network fop lock fs->mutex .... } .... //migration continue on B fs->active_subvol = fs->next_subvol //which is C (explained below) .... } @Time T2, lets say in another thread, graph_setup() is called with C, note that at T2, fs->mutex is unlocked. graph_stup(C...) { lock fs->mutex .... if (fs->next_subvol) // which is B destroy subvol (fs->next_subvol) .... fs->next_subvol = C .... unlock fs->mutex } Thus at the end of this, fs->old_subvol = A; fs->active_subvol = C; fs->next_subvol = NULL; which is wrong, as B completed migration, but was destroyed by graph_setup, and C never was migrated. Solution: ========= Any new graph can be in one of the 2 states: - Picked for migration, migration in progress (fs->mip_subvol) - Not picked so far for migration (fs->next_subvol) graph_setup() updates fs->next_subvol only, __glfs_active_subvol() moves fs->next_subvol to fs->mip_subvol and fs->next_subvol = NULL atomically, and then once the migration is complete, make that the fs->active_subvol > Reviewed-on: http://review.gluster.org/14722 > Smoke: Gluster Build System <jenkins@build.gluster.org> > CentOS-regression: Gluster Build System <jenkins@build.gluster.org> > NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> > Reviewed-by: Raghavendra Talur <rtalur@redhat.com> > Reviewed-by: Rajesh Joseph <rjoseph@redhat.com> > Reviewed-by: Niels de Vos <ndevos@redhat.com> BUG: 1367294 Change-Id: Ib6ff0565105c5eedb912a43da4017cd413243612 Signed-off-by: Poornima G <pgurusid@redhat.com> Signed-off-by: Oleksandr Natalenko <oleksandr@natalenko.name> Reviewed-on: http://review.gluster.org/15167 Smoke: Gluster Build System <jenkins@build.gluster.org> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> CentOS-regression: Gluster Build System <jenkins@build.gluster.org> Reviewed-by: Kaushal M <kaushal@redhat.com>
-rw-r--r--api/src/glfs-internal.h12
-rw-r--r--api/src/glfs-master.c3
-rw-r--r--api/src/glfs-resolve.c14
3 files changed, 23 insertions, 6 deletions
diff --git a/api/src/glfs-internal.h b/api/src/glfs-internal.h
index e1b8c8ac5f2..fe59a3b8eb9 100644
--- a/api/src/glfs-internal.h
+++ b/api/src/glfs-internal.h
@@ -187,8 +187,16 @@ struct glfs {
int ret;
int err;
- xlator_t *active_subvol;
- xlator_t *next_subvol;
+ xlator_t *active_subvol; /* active graph */
+ xlator_t *mip_subvol; /* graph for which migration is in
+ * progress */
+ xlator_t *next_subvol; /* Any new graph is put to
+ * next_subvol, the graph in
+ * next_subvol can either be move to
+ * mip_subvol (if any IO picks it up
+ * for migration), or be detroyed (if
+ * there is a new graph, and this was
+ * never picked for migration) */
xlator_t *old_subvol;
char *oldvolfile;
diff --git a/api/src/glfs-master.c b/api/src/glfs-master.c
index b49ce2c8447..fd1f45d715f 100644
--- a/api/src/glfs-master.c
+++ b/api/src/glfs-master.c
@@ -45,7 +45,8 @@ graph_setup (struct glfs *fs, glusterfs_graph_t *graph)
{
if (new_subvol->switched ||
new_subvol == fs->active_subvol ||
- new_subvol == fs->next_subvol) {
+ new_subvol == fs->next_subvol ||
+ new_subvol == fs->mip_subvol) {
/* Spurious CHILD_UP event on old graph */
ret = 0;
goto unlock;
diff --git a/api/src/glfs-resolve.c b/api/src/glfs-resolve.c
index 4c44b4f7ac8..b31507a949b 100644
--- a/api/src/glfs-resolve.c
+++ b/api/src/glfs-resolve.c
@@ -825,6 +825,13 @@ __glfs_migrate_openfds (struct glfs *fs, xlator_t *subvol)
}
+/* Note that though it appears that this function executes under fs->mutex,
+ * it is not fully executed under fs->mutex. i.e. there are functions like
+ * __glfs_first_lookup, __glfs_refresh_inode, __glfs_migrate_openfds which
+ * unlocks fs->mutex before sending any network fop, and reacquire fs->mutex
+ * once the fop is complete. Hence the variable read from fs at the start of the
+ * function need not have the same value by the end of the function.
+ */
xlator_t *
__glfs_active_subvol (struct glfs *fs)
{
@@ -835,7 +842,8 @@ __glfs_active_subvol (struct glfs *fs)
if (!fs->next_subvol)
return fs->active_subvol;
- new_subvol = fs->next_subvol;
+ new_subvol = fs->mip_subvol = fs->next_subvol;
+ fs->next_subvol = NULL;
ret = __glfs_first_lookup (fs, new_subvol);
if (ret) {
@@ -869,8 +877,8 @@ __glfs_active_subvol (struct glfs *fs)
should be atomic
*/
fs->old_subvol = fs->active_subvol;
- fs->active_subvol = fs->next_subvol;
- fs->next_subvol = NULL;
+ fs->active_subvol = fs->mip_subvol;
+ fs->mip_subvol = NULL;
if (new_cwd) {
__glfs_cwd_set (fs, new_cwd);