From 69c434432853e2ba1ee53296f05c6a54ab300d02 Mon Sep 17 00:00:00 2001 From: Raghavendra Bhat Date: Fri, 12 Jun 2015 15:12:05 +0530 Subject: libgfapi: send explicit lookups on inodes linked in readdirp Backport of http://review.gluster.org/11236 If the inode is linked via readdirp, then the consuners of gfapi which are using handles (got either in lookup or readdirp) might not send an explicit lookup on that object again (ex: NFS, samba, USS). If there is a replicate volume where the replicas of the object are not in sync, then readdirp followed by fops might lead data being served from the subvolume which is not in sync with latest data. And since lookup is needed to trigger self-heal on that object the consumers might keep getting wrong data until an explicit lookup is not done. Fuse handles this situation by sending an explicit lookup by itself (fuse xlator) on those inodes which are linked via readdirp, whenever a fop comes on that inode. The same procedure is done in gfapi as well to address this situation. Thanks to shyam(srangana@redhat.com) for valuable inputs Change-Id: I4230fae8e0b01a95c056282b08ed30832d4804a7 BUG: 1240190 Signed-off-by: Raghavendra Bhat Reviewed-on: http://review.gluster.org/11545 Tested-by: NetBSD Build System Tested-by: Gluster Build System Reviewed-by: Shyamsundar Ranganathan Reviewed-by: Niels de Vos --- api/src/glfs-fops.c | 16 +++++++++++++++- api/src/glfs-resolve.c | 49 ++++++++++++++++++++++++++++++++++++------------- 2 files changed, 51 insertions(+), 14 deletions(-) (limited to 'api') diff --git a/api/src/glfs-fops.c b/api/src/glfs-fops.c index e1762ae1285..b66f336338d 100644 --- a/api/src/glfs-fops.c +++ b/api/src/glfs-fops.c @@ -2270,6 +2270,7 @@ glfd_entry_refresh (struct glfs_fd *glfd, int plus) xlator_t *subvol = NULL; gf_dirent_t entries; gf_dirent_t old; + gf_dirent_t *entry = NULL; int ret = -1; fd_t *fd = NULL; @@ -2304,8 +2305,20 @@ glfd_entry_refresh (struct glfs_fd *glfd, int plus) &entries, NULL, NULL); DECODE_SYNCOP_ERR (ret); if (ret >= 0) { - if (plus) + if (plus) { + /** + * Set inode_needs_lookup flag before linking the + * inode. Doing it later post linkage might lead + * to a race where a fop comes after inode link + * but before setting need_lookup flag. + */ + list_for_each_entry (entry, &glfd->entries, list) { + if (entry->inode) + inode_set_need_lookup (entry->inode, THIS); + } + gf_link_inodes_from_dirent (THIS, fd->inode, &entries); + } list_splice_init (&glfd->entries, &old.list); list_splice_init (&entries.list, &glfd->entries); @@ -2314,6 +2327,7 @@ glfd_entry_refresh (struct glfs_fd *glfd, int plus) errno = 0; } + if (ret > 0) glfd->next = list_entry (glfd->entries.next, gf_dirent_t, list); diff --git a/api/src/glfs-resolve.c b/api/src/glfs-resolve.c index 2767abf1c39..b5efcbae0e7 100644 --- a/api/src/glfs-resolve.c +++ b/api/src/glfs-resolve.c @@ -75,27 +75,44 @@ __glfs_first_lookup (struct glfs *fs, xlator_t *subvol) } +/** + * We have to check if need_lookup flag is set in both old and the new inodes. + * If its set in oldinode, then directly go ahead and do an explicit lookup. + * But if its not set in the oldinode, then check if the newinode is linked + * via readdirp. If so an explicit lookup is needed on the new inode, so that + * below xlators can set their respective contexts. + */ inode_t * -glfs_refresh_inode_safe (xlator_t *subvol, inode_t *oldinode) +glfs_refresh_inode_safe (xlator_t *subvol, inode_t *oldinode, + gf_boolean_t need_lookup) { loc_t loc = {0, }; int ret = -1; struct iatt iatt = {0, }; inode_t *newinode = NULL; + gf_boolean_t lookup_needed = _gf_false; if (!oldinode) return NULL; - if (oldinode->table->xl == subvol) + if (!need_lookup && oldinode->table->xl == subvol) return inode_ref (oldinode); newinode = inode_find (subvol->itable, oldinode->gfid); - if (newinode) - return newinode; + if (!need_lookup && newinode) { + + lookup_needed = inode_needs_lookup (newinode, THIS); + if (!lookup_needed) + return newinode; + } gf_uuid_copy (loc.gfid, oldinode->gfid); - loc.inode = inode_new (subvol->itable); + if (!newinode) + loc.inode = inode_new (subvol->itable); + else + loc.inode = newinode; + if (!loc.inode) return NULL; @@ -122,14 +139,15 @@ glfs_refresh_inode_safe (xlator_t *subvol, inode_t *oldinode) inode_t * -__glfs_refresh_inode (struct glfs *fs, xlator_t *subvol, inode_t *inode) +__glfs_refresh_inode (struct glfs *fs, xlator_t *subvol, inode_t *inode, + gf_boolean_t need_lookup) { inode_t *newinode = NULL; fs->migration_in_progress = 1; pthread_mutex_unlock (&fs->mutex); { - newinode = glfs_refresh_inode_safe (subvol, inode); + newinode = glfs_refresh_inode_safe (subvol, inode, need_lookup); } pthread_mutex_lock (&fs->mutex); fs->migration_in_progress = 0; @@ -622,7 +640,7 @@ glfs_migrate_fd_safe (struct glfs *fs, xlator_t *newsubvol, fd_t *oldfd) } } - newinode = glfs_refresh_inode_safe (newsubvol, oldinode); + newinode = glfs_refresh_inode_safe (newsubvol, oldinode, _gf_false); if (!newinode) { gf_msg (fs->volname, GF_LOG_WARNING, errno, API_MSG_INODE_REFRESH_FAILED, @@ -810,7 +828,8 @@ __glfs_active_subvol (struct glfs *fs) } if (fs->cwd) { - new_cwd = __glfs_refresh_inode (fs, new_subvol, fs->cwd); + new_cwd = __glfs_refresh_inode (fs, new_subvol, fs->cwd, + _gf_false); if (!new_cwd) { char buf1[64]; @@ -904,7 +923,8 @@ int __glfs_cwd_set (struct glfs *fs, inode_t *inode) { if (inode->table->xl != fs->active_subvol) { - inode = __glfs_refresh_inode (fs, fs->active_subvol, inode); + inode = __glfs_refresh_inode (fs, fs->active_subvol, inode, + _gf_false); if (!inode) return -1; } else { @@ -948,7 +968,7 @@ __glfs_cwd_get (struct glfs *fs) return cwd; } - cwd = __glfs_refresh_inode (fs, fs->active_subvol, fs->cwd); + cwd = __glfs_refresh_inode (fs, fs->active_subvol, fs->cwd, _gf_false); return cwd; } @@ -972,12 +992,15 @@ __glfs_resolve_inode (struct glfs *fs, xlator_t *subvol, struct glfs_object *object) { inode_t *inode = NULL; + gf_boolean_t lookup_needed = _gf_false; + + lookup_needed = inode_needs_lookup (object->inode, THIS); - if (object->inode->table->xl == subvol) + if (!lookup_needed && object->inode->table->xl == subvol) return inode_ref (object->inode); inode = __glfs_refresh_inode (fs, fs->active_subvol, - object->inode); + object->inode, lookup_needed); if (!inode) return NULL; -- cgit