diff options
| -rwxr-xr-x | tests/bugs/bug-853258.t | 45 | ||||
| -rwxr-xr-x | tests/bugs/overlap.py | 59 | ||||
| -rw-r--r-- | tests/volume.rc | 9 | ||||
| -rw-r--r-- | xlators/cluster/dht/src/dht-layout.c | 16 | ||||
| -rw-r--r-- | xlators/cluster/dht/src/dht-selfheal.c | 82 | 
5 files changed, 187 insertions, 24 deletions
diff --git a/tests/bugs/bug-853258.t b/tests/bugs/bug-853258.t new file mode 100755 index 00000000..c702e6f3 --- /dev/null +++ b/tests/bugs/bug-853258.t @@ -0,0 +1,45 @@ +#!/bin/bash + +. $(dirname $0)/../include.rc +. $(dirname $0)/../volume.rc + +cleanup; + +TEST glusterd +TEST pidof glusterd + +mkdir -p $H0:$B0/${V0}0 +mkdir -p $H0:$B0/${V0}1 +mkdir -p $H0:$B0/${V0}2 +mkdir -p $H0:$B0/${V0}3 + +# Create and start a volume. +TEST $CLI volume create $V0 $H0:$B0/${V0}0 $H0:$B0/${V0}1 $H0:$B0/${V0}2 +TEST $CLI volume start $V0 +EXPECT_WITHIN 15 'Started' volinfo_field $V0 'Status'; + +# Force assignment of initial ranges. +TEST $CLI volume rebalance $V0 fix-layout start +EXPECT_WITHIN 15 "success:" rebalance_status_field $V0 + +# Get the original values. +xattrs="" +for i in $(seq 0 2); do +	xattrs="$xattrs $(dht_get_layout $B0/${V0}$i)" +done + +# Expand the volume and force assignment of new ranges. +TEST $CLI volume add-brick $V0 $H0:$B0/${V0}3 +# Force assignment of initial ranges. +TEST $CLI volume rebalance $V0 fix-layout start +EXPECT_WITHIN 15 "success:" rebalance_status_field $V0 + +for i in $(seq 0 3); do +	xattrs="$xattrs $(dht_get_layout $B0/${V0}$i)" +done + +overlap=$($(dirname $0)/overlap.py $xattrs) +# 2863311531 = 0xaaaaaaab = 2/3 overlap +TEST [ "$overlap" -ge 2863311531 ] + +cleanup diff --git a/tests/bugs/overlap.py b/tests/bugs/overlap.py new file mode 100755 index 00000000..15f2da47 --- /dev/null +++ b/tests/bugs/overlap.py @@ -0,0 +1,59 @@ +#!/usr/bin/python + +import sys + +def calculate_one (ov, nv): +    old_start = int(ov[18:26],16) +    old_end = int(ov[26:34],16) +    new_start = int(nv[18:26],16) +    new_end = int(nv[26:34],16) +    if (new_end < old_start) or (new_start > old_end): +        #print '%s, %s -> ZERO' % (ov, nv) +        return 0 +    all_start = max(old_start,new_start) +    all_end = min(old_end,new_end) +    #print '%s, %s -> %08x' % (ov, nv, all_end - all_start + 1) +    return all_end - all_start + 1 + +def calculate_all (values): +    total = 0 +    nv_index = len(values) / 2 +    for old_val in values[:nv_index]: +        new_val = values[nv_index] +        nv_index += 1 +        total += calculate_one(old_val,new_val) +    return total + +""" +test1_vals = [ +    '0x0000000000000000000000003fffffff',   # first quarter +    '0x0000000000000000400000007fffffff',   # second quarter +    '0x000000000000000080000000ffffffff',   # second half +    '0x00000000000000000000000055555554',   # first third +    '0x000000000000000055555555aaaaaaa9',   # second third +    '0x0000000000000000aaaaaaaaffffffff',   # last third +] + +test2_vals = [ +    '0x0000000000000000000000003fffffff',   # first quarter +    '0x0000000000000000400000007fffffff',   # second quarter +    '0x000000000000000080000000ffffffff',   # second half +    '0x00000000000000000000000055555554',   # first third +    # Next two are (incorrectly) swapped. +    '0x0000000000000000aaaaaaaaffffffff',   # last third +    '0x000000000000000055555555aaaaaaa9',   # second third +] + +print '%08x' % calculate_one(test1_vals[0],test1_vals[3]) +print '%08x' % calculate_one(test1_vals[1],test1_vals[4]) +print '%08x' % calculate_one(test1_vals[2],test1_vals[5]) +print '= %08x' % calculate_all(test1_vals) +print '%08x' % calculate_one(test2_vals[0],test2_vals[3]) +print '%08x' % calculate_one(test2_vals[1],test2_vals[4]) +print '%08x' % calculate_one(test2_vals[2],test2_vals[5]) +print '= %08x' % calculate_all(test2_vals) +""" + +if __name__ == '__main__': +    # Return decimal so bash can reason about it. +    print '%d' % calculate_all(sys.argv[1:]) diff --git a/tests/volume.rc b/tests/volume.rc index 9daf560b..2a6ada3e 100644 --- a/tests/volume.rc +++ b/tests/volume.rc @@ -26,8 +26,8 @@ function volume_option()          $CLI volume info $vol | egrep "^$key: " | cut -f2 -d' ';  } -function rebalance_status_completed_field { -        $CLI volume rebalance $V0 status | awk '{print $6}' | sed -n 3p +function rebalance_status_field { +        $CLI volume rebalance $1 status | sed -n '$p' | cut -d' ' -f4  }  function remove_brick_status_completed_field { @@ -194,3 +194,8 @@ function gd_is_replace_brick_completed {                  echo "N"          fi  } + +function dht_get_layout { +	local my_xa=trusted.glusterfs.dht +	getfattr -d -e hex -n $my_xa $1 2> /dev/null | grep "$my_xa=" | cut -d= -f2 +} diff --git a/xlators/cluster/dht/src/dht-layout.c b/xlators/cluster/dht/src/dht-layout.c index 8cae5265..34a7475b 100644 --- a/xlators/cluster/dht/src/dht-layout.c +++ b/xlators/cluster/dht/src/dht-layout.c @@ -412,6 +412,22 @@ dht_layout_entry_swap (dht_layout_t *layout, int i, int j)          layout->list[j].err    = err_swap;  } +void +dht_layout_range_swap (dht_layout_t *layout, int i, int j) +{ +        uint32_t  start_swap = 0; +        uint32_t  stop_swap = 0; + +        start_swap  = layout->list[i].start; +        stop_swap   = layout->list[i].stop; + +        layout->list[i].start  = layout->list[j].start; +        layout->list[i].stop   = layout->list[j].stop; + +        layout->list[j].start  = start_swap; +        layout->list[j].stop   = stop_swap; +} +  int64_t  dht_layout_entry_cmp_volname (dht_layout_t *layout, int i, int j)  { diff --git a/xlators/cluster/dht/src/dht-selfheal.c b/xlators/cluster/dht/src/dht-selfheal.c index 8463abdb..77afde82 100644 --- a/xlators/cluster/dht/src/dht-selfheal.c +++ b/xlators/cluster/dht/src/dht-selfheal.c @@ -38,12 +38,20 @@ dht_overlap_calc (dht_layout_t *old, int o, dht_layout_t *new, int n)  	if (old->list[o].err > 0 || new->list[n].err > 0)  		return 0; +        if (old->list[o].start == old->list[o].stop) { +                return 0; +        } + +        if (new->list[n].start == new->list[n].stop) { +                return 0; +        } +  	if ((old->list[o].start > new->list[n].stop) ||  	    (old->list[o].stop < new->list[n].start))  		return 0; -	return max (old->list[o].start, new->list[n].start) - -		min (old->list[o].stop, new->list[n].stop); +	return min (old->list[o].stop, new->list[n].stop) - +	        max (old->list[o].start, new->list[n].start) + 1;  } @@ -564,18 +572,25 @@ void dht_selfheal_layout_new_directory (call_frame_t *frame, loc_t *loc,  					dht_layout_t *new_layout);  void dht_layout_entry_swap (dht_layout_t *layout, int i, int j); +void dht_layout_range_swap (dht_layout_t *layout, int i, int j); + +/* + * It's a bit icky using local variables in a macro, but it makes the rest + * of the code a lot clearer. + */ +#define OV_ENTRY(x,y)      table[x*new->cnt+y]  void  dht_selfheal_layout_maximize_overlap (call_frame_t *frame, loc_t *loc,  				      dht_layout_t *new, dht_layout_t *old)  {  	int           i            = 0; -	int           k            = 0; +	int           j            = 0;  	uint32_t      curr_overlap = 0;  	uint32_t      max_overlap  = 0;  	int           max_overlap_idx = -1;  	uint32_t      overlap      = 0; - +        uint32_t     *table = NULL;  	dht_layout_sort_volname (old);  	/* Now both old_layout->list[] and new_layout->list[] @@ -584,31 +599,54 @@ dht_selfheal_layout_maximize_overlap (call_frame_t *frame, loc_t *loc,  	   to the same subvolumes  	*/ +        /* Build a table of overlaps between new[i] and old[j]. */ +        table = alloca(sizeof(overlap)*old->cnt*new->cnt); +        if (!table) { +                return; +        } +        memset(table,0,sizeof(overlap)*new->cnt*new->cnt); +        for (i = 0; i < new->cnt; ++i) { +                for (j = 0; j < old->cnt; ++j) { +                        OV_ENTRY(i,j) = dht_overlap_calc(old,j,new,i); +                } +        } +  	for (i = 0; i < new->cnt; i++) { -		if (new->list[i].err > 0) +		if (new->list[i].err > 0) {  			/* Subvol might be marked for decommission  			   with EINVAL, or some other serious error  			   marked with positive errno.  			*/  			continue; +                } -		curr_overlap = dht_overlap_calc (old, i, new, i); -		max_overlap = curr_overlap; -		max_overlap_idx = i; - -		/* Subvols up to `i' have already found their best -		   matches. Search only from the rest. -		*/ -		for (k = (i + 1); k < new->cnt; k++) { -			overlap = dht_overlap_calc (old, i, new, k); -			if (overlap > max_overlap) { -				max_overlap_idx = k; -				max_overlap = overlap; -			} -		} - -		if (max_overlap > curr_overlap) -			dht_layout_entry_swap (new, i, max_overlap_idx); +                max_overlap = 0; +                max_overlap_idx = i; +                for (j = (i + 1); j < new->cnt; ++j) { +                        /* Calculate the overlap now. */ +                        curr_overlap = OV_ENTRY(i,i) + OV_ENTRY(j,j); +                        /* Calculate the overlap after the proposed swap. */ +                        overlap = OV_ENTRY(i,j) + OV_ENTRY(j,i); +                        /* Are we better than status quo? */ +                        if (overlap > curr_overlap) { +                                overlap -= curr_overlap; +                                /* Are we better than the previous choice? */ +                                if (overlap > max_overlap) { +                                        max_overlap = overlap; +                                        max_overlap_idx = j; +                                } +                        } +                } + +		if (max_overlap_idx != i) { + 			dht_layout_range_swap (new, i, max_overlap_idx); +                        /* Need to swap the table values too. */ +                        for (j = 0; j < old->cnt; ++j) { +                                overlap = OV_ENTRY(i,j); +                                OV_ENTRY(i,j) = OV_ENTRY(max_overlap_idx,j); +                                OV_ENTRY(max_overlap_idx,j) = overlap; +                        } +                }  	}  }  | 
