From 82106bc92143f18ef66fbfb20301e9a331033db6 Mon Sep 17 00:00:00 2001 From: Xavi Hernandez Date: Tue, 20 Mar 2018 10:57:13 +0100 Subject: cluster/ec: fix SHD crash for null gfid's When the self-heal daemon is doing a full sweep it uses readdirp to get extra stat information from each file. This information is obtained in two steps by the posix xlator: first the directory is read to get the entries and then each entry is stated to get additional info. Between these two steps, it's possible that the file is removed by the user, so we'll get an error, leaving stat info empty. EC's heal daemon was using the gfid blindly, causing an assert failure when protocol/client was trying to encode the gfid. To fix the problem a check has been added. If we detect a null gfid, we simply ignore it and continue healing. Backport of: > BUG: 1558016 Change-Id: I2e4acdcecd0b6951055e50d1c37d686a2186a228 BUG: 1559079 Signed-off-by: Xavi Hernandez --- xlators/cluster/ec/src/ec-heald.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/xlators/cluster/ec/src/ec-heald.c b/xlators/cluster/ec/src/ec-heald.c index a703379a59b..65d8b5c4284 100644 --- a/xlators/cluster/ec/src/ec-heald.c +++ b/xlators/cluster/ec/src/ec-heald.c @@ -301,6 +301,14 @@ ec_shd_full_heal (xlator_t *subvol, gf_dirent_t *entry, loc_t *parent, if (!ec->shd.enabled) return -EBUSY; + if (gf_uuid_is_null(entry->d_stat.ia_gfid)) { + /* It's possible that an entry has been removed just after + * being seen in a directory but before getting its stat info. + * In this case we'll receive a NULL gfid here. Since the file + * doesn't exist anymore, we can safely ignore it. */ + return 0; + } + loc.parent = inode_ref (parent->inode); loc.name = entry->d_name; gf_uuid_copy (loc.gfid, entry->d_stat.ia_gfid); -- cgit