From eef0737ca6ae8056d906c7bff0a9280cc748270e Mon Sep 17 00:00:00 2001 From: Pranith Kumar K Date: Mon, 22 Jul 2013 16:44:09 +0530 Subject: cluster/afr: Handle parallel hardlinks self-heal Change-Id: Ieda11870c65edae500140b6c061f15a7b3f264f3 BUG: 986905 Signed-off-by: Pranith Kumar K Reviewed-on: http://review.gluster.org/5370 Tested-by: Gluster Build System Reviewed-by: Vijay Bellur --- tests/bugs/bug-986905.t | 27 +++++++++++++++++++++++++ xlators/cluster/afr/src/afr-self-heal-entry.c | 29 +++++++++++++++++++++++++++ 2 files changed, 56 insertions(+) create mode 100755 tests/bugs/bug-986905.t diff --git a/tests/bugs/bug-986905.t b/tests/bugs/bug-986905.t new file mode 100755 index 000000000..0fac40fb4 --- /dev/null +++ b/tests/bugs/bug-986905.t @@ -0,0 +1,27 @@ +#!/bin/bash + +. $(dirname $0)/../include.rc +. $(dirname $0)/../volume.rc + +#This script checks if hardlinks that are created while a brick is down are +#healed properly. + +cleanup; +function get_inum { + ls -i $1 | awk '{print $1}' +} + +TEST glusterd +TEST pidof glusterd +TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1} +TEST $CLI volume start $V0 +TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0 +TEST kill_brick $V0 $H0 $B0/${V0}0 +TEST touch $M0/a +TEST ln $M0/a $M0/link_a +TEST $CLI volume start $V0 force +EXPECT_WITHIN 20 "1" afr_child_up_status $V0 0 +TEST ls -l $M0 +inum=$(get_inum $B0/${V0}0/a) +EXPECT "$inum" get_inum $B0/${V0}0/link_a +cleanup diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c index 3598f79d1..db17052cb 100644 --- a/xlators/cluster/afr/src/afr-self-heal-entry.c +++ b/xlators/cluster/afr/src/afr-self-heal-entry.c @@ -1256,6 +1256,35 @@ afr_sh_entry_impunge_mknod (call_frame_t *impunge_frame, xlator_t *this, gf_log (this->name, GF_LOG_INFO, "%s: gfid set failed", impunge_local->loc.path); + /* + * Reason for adding GLUSTERFS_INTERNAL_FOP_KEY : + * + * Problem: + * While a brick is down in a replica pair, lets say the user creates + * one file(file-A) and a hard link to that file(h-file-A). After the + * brick comes back up, entry self-heal is attempted on parent dir of + * these two files. As part of readdir in self-heal it reads both the + * entries file-A and h-file-A for both of them it does name less lookup + * to check if there are any hardlinks already present in the + * destination brick. It finds that there are no hard links already + * present for files file-A, h-file-A. Self-heal does mknods for both + * file-A and h-file-A. This leads to file-A and h-file-A not being + * hardlinks anymore. + * + * Fix: (More like shrinking of race-window, the race itself is still + * present in posix-mknod). + * If mknod comes with the presence of GLUSTERFS_INTERNAL_FOP_KEY then + * posix_mknod checks if there are already any gfid-links and does + * link() instead of mknod. There still can be a race where two + * posix_mknods same gfid see that + * gfid-link file is not present and proceeds with mknods and result in + * two different files with same gfid. + */ + ret = dict_set_str (dict, GLUSTERFS_INTERNAL_FOP_KEY, "yes"); + if (ret) + gf_log (this->name, GF_LOG_INFO, "%s: %s set failed", + impunge_local->loc.path, GLUSTERFS_INTERNAL_FOP_KEY); + STACK_WIND_COOKIE (impunge_frame, afr_sh_entry_impunge_newfile_cbk, (void *) (long) child_index, priv->children[child_index], -- cgit