diff options
| author | Amar Tumballi <amar@gluster.com> | 2011-07-01 04:39:41 +0000 | 
|---|---|---|
| committer | Anand Avati <avati@gluster.com> | 2011-07-14 01:01:33 -0700 | 
| commit | 6cf06cfd4bf72b16ac7665323629d354b78b6b05 (patch) | |
| tree | 0162bec6d2c801f78c81a4af0b53e6fa23c61e46 | |
| parent | 05a1422bbd82a28ccf7fa7c17b357f8350508e6e (diff) | |
cluster/distribute: handle layout overlaps while giving a new fix
Signed-off-by: Amar Tumballi <amar@gluster.com>
Signed-off-by: Anand Avati <avati@gluster.com>
BUG: 2258 (enhance gluster volume rebalance)
URL: http://bugs.gluster.com/cgi-bin/bugzilla3/show_bug.cgi?id=2258
| -rw-r--r-- | xlators/cluster/dht/src/dht-common.c | 32 | ||||
| -rw-r--r-- | xlators/cluster/dht/src/dht-common.h | 5 | ||||
| -rw-r--r-- | xlators/cluster/dht/src/dht-selfheal.c | 319 | 
3 files changed, 297 insertions, 59 deletions
diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c index 4bc87fc2c..237a07894 100644 --- a/xlators/cluster/dht/src/dht-common.c +++ b/xlators/cluster/dht/src/dht-common.c @@ -2080,7 +2080,6 @@ dht_setxattr (call_frame_t *frame, xlator_t *this,          dht_layout_t *layout   = NULL;          int           i        = 0;          int           op_errno = EINVAL; -        int           flag     = 0;          int           ret      = -1;          data_t       *tmp      = NULL; @@ -2115,29 +2114,18 @@ dht_setxattr (call_frame_t *frame, xlator_t *this,          tmp = dict_get (xattr, GF_XATTR_FIX_LAYOUT_KEY);          if (tmp) { -                for (i = 0; i < layout->cnt; i++) { -                        if (layout->list[i].start == layout->list[i].stop) { -                                flag = 1; -                                break; -                        } -                } -                if ((layout->cnt < conf->subvolume_cnt) || flag) { -                        gf_log (this->name, GF_LOG_INFO, -                                "expanding layout of %s from %d to %d", -                                loc->path, layout->cnt, conf->subvolume_cnt); - -                        ret = loc_dup (loc, &local->loc); -                        if (ret == -1) { -                                op_errno = ENOMEM; -                                goto err; -                        } +                gf_log (this->name, GF_LOG_INFO, +                        "fixing the layout of %s", loc->path); -                        dht_selfheal_new_directory (frame, dht_fix_layout_cbk, -                                                    layout); -                        return 0; +                ret = loc_dup (loc, &local->loc); +                if (ret == -1) { +                        op_errno = ENOMEM; +                        goto err;                  } -                op_errno = ENOTSUP; -                goto err; + +                dht_fix_directory_layout (frame, dht_fix_layout_cbk, +                                          layout); +                return 0;          }          local->call_cnt = layout->cnt; diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h index 461add168..c85ba9cfc 100644 --- a/xlators/cluster/dht/src/dht-common.h +++ b/xlators/cluster/dht/src/dht-common.h @@ -318,4 +318,9 @@ int dht_rename_links_cbk (call_frame_t *frame, void *cookie, xlator_t *this,  int dht_linkfile_recreate(call_frame_t *frame, fop_mknod_cbk_t linkfile_cbk,                           xlator_t *tovol, xlator_t *fromvol, loc_t *loc); + +int dht_fix_directory_layout (call_frame_t *frame, +                              dht_selfheal_dir_cbk_t dir_cbk, +                              dht_layout_t *layout); +  #endif /* _DHT_H */ diff --git a/xlators/cluster/dht/src/dht-selfheal.c b/xlators/cluster/dht/src/dht-selfheal.c index e05894a5c..a20526067 100644 --- a/xlators/cluster/dht/src/dht-selfheal.c +++ b/xlators/cluster/dht/src/dht-selfheal.c @@ -28,6 +28,53 @@  #include "dht-common.h" +#define DHT_SET_LAYOUT_RANGE(layout,i,srt,chunk,cnt,path)    do {       \ +                layout->list[i].start = srt;                            \ +                layout->list[i].stop  = srt + chunk - 1;                \ +                                                                        \ +                gf_log (this->name, GF_LOG_TRACE,                       \ +                        "gave fix: %u - %u on %s for %s",               \ +                        layout->list[i].start, layout->list[i].stop,    \ +                        layout->list[i].xlator->name, path);            \ +        } while (0) + + +static inline uint32_t +dht_find_overlap (int idx, int cnk_idx, uint32_t start, uint32_t stop, +                  uint32_t chunk_size) +{ +        uint32_t overlap = 0; +        uint32_t chunk_begin = 0; + +        chunk_begin = cnk_idx * chunk_size; + +        /* There is no chance of overlap */ +        if ((chunk_begin > stop) || +            ((chunk_begin + chunk_size) < start)) +                goto out; + +        if ((chunk_begin <= start) && +            ((chunk_begin + chunk_size) <= stop)) { +                overlap = ((chunk_begin + chunk_size) - start); +                goto out; +        } + +        if ((chunk_begin <= start) && +            ((chunk_begin + chunk_size) >= stop)) { +                overlap = (stop - start); +                goto out; +        } + +        if ((chunk_begin < stop) && +            ((chunk_begin + chunk_size) >= stop)) { +                overlap = (stop - chunk_begin); +                goto out; +        } + +out: +        return overlap; +} +  int  dht_selfheal_dir_finish (call_frame_t *frame, xlator_t *this, int ret)  { @@ -144,6 +191,30 @@ err:          return 0;  } +int +dht_fix_dir_xattr (call_frame_t *frame, loc_t *loc, dht_layout_t *layout) +{ +        dht_local_t *local = NULL; +        int          i = 0; +        int          count = 0; +        xlator_t    *this = NULL; + +        local = frame->local; +        this = frame->this; + +        gf_log (this->name, GF_LOG_DEBUG, +                "writing the new range for all subvolumes"); + +        local->call_cnt = count = layout->cnt; + +        for (i = 0; i < layout->cnt; i++) { +                dht_selfheal_dir_xattr_persubvol (frame, loc, layout, i); + +                if (--count == 0) +                        break; +        } +        return 0; +}  int  dht_selfheal_dir_xattr (call_frame_t *frame, loc_t *loc, dht_layout_t *layout) @@ -385,40 +456,204 @@ dht_selfheal_layout_alloc_start (xlator_t *this, loc_t *loc,          return start;  } - -void -dht_selfheal_layout_new_directory (call_frame_t *frame, loc_t *loc, -                                   dht_layout_t *layout) +static inline int +dht_get_layout_count (xlator_t *this, dht_layout_t *layout)  { -        xlator_t    *this = NULL; -        uint32_t     chunk = 0; -        int          i = 0; -        uint32_t     start = 0; -        int          cnt = 0; -        int          err = 0; -        int          start_subvol = 0; - -        this = frame->this; +        int i = 0; +        int err = 0; +        int count = 0;          for (i = 0; i < layout->cnt; i++) {                  err = layout->list[i].err;                  if (err == -1 || err == 0) {                          layout->list[i].err = -1; -                        cnt++; +                        count++;                  }          }          /* no subvolume has enough space, but can't stop directory creation */ -        if (!cnt) { +        if (!count) {                  for (i = 0; i < layout->cnt; i++) {                          err = layout->list[i].err;                          if (err == ENOSPC) {                                  layout->list[i].err = -1; -                                cnt++; +                                count++; +                        } +                } +        } + +        return count; +} + + +dht_layout_t * +dht_fix_layout_of_directory (call_frame_t *frame, loc_t *loc, +                             dht_layout_t *layout) +{ +        uint32_t      chunk        = 0; +        uint32_t      start        = 0; +        uint32_t      stop         = 0; +        uint32_t      overlap      = 0; +        uint32_t      max_overlap  = 0; +        uint32_t      chunk_begin  = 0; +        int           count        = 0; +        int           cnt          = 0; +        int           i            = 0; +        int           j            = 0; +        int           k            = 0; +        int           loop_cnt     = 0; +        int           start_subvol = 0; +        int          *fix_array    = NULL; +        xlator_t     *this         = NULL; +        dht_layout_t *new_layout   = NULL; +        dht_conf_t   *priv         = NULL; +        dht_local_t  *local        = NULL; + +        this  = frame->this; +        priv  = this->private; +        local = frame->local; + +        count = cnt = dht_get_layout_count (this, layout); + +        chunk = ((unsigned long) 0xffffffff) / ((cnt) ? cnt : 1); + +        start_subvol = dht_selfheal_layout_alloc_start (this, loc, layout); + +        fix_array = GF_CALLOC (sizeof (int), layout->cnt, gf_common_mt_char); +        if (!fix_array) { +                /* No fix, use the existing layout itself */ +                goto done; +        } + +        new_layout = dht_layout_new (this, priv->subvolume_cnt); +        if (!new_layout) +                goto done; + +        for (i = 0; i < new_layout->cnt; i++) { +                /* TODO: fix this in layout_alloc() itself */ +                new_layout->list[i].err = -ENOENT; +                if (i < layout->cnt) +                        new_layout->list[i].xlator = layout->list[i].xlator; +        } + +        /* Check if there are any overlap in layout, and give the proper fix */ +        for (i = 0; i < layout->cnt; i++) { +                /* No need to fix if 'err' is not '-1' */ +                if (layout->list[i].err != -1) +                        continue; + +                /* If already existing layout is having no range, skip it */ +                start = layout->list[i].start; +                stop  = layout->list[i].stop; +                if ((stop - start) == 0) +                        continue; + +                max_overlap = 0; + +                /* 'j' is used as starting point of each chunk */ +                for (j = 1; j <= count; j++) { +                        /* if chunk is already used, don't use it again */ +                        for (k = 0; k < i; k++) +                                if (j == fix_array[k]) +                                        break; +                        if (k < i) +                                continue; + +                        overlap = dht_find_overlap (i, (j-1), start, stop, chunk); +                        if (max_overlap < overlap) { +                                max_overlap = overlap; +                                fix_array[i] = j; +                        } +                } + +                /* If we have any overlap, then use that itself as new +                   layout for the subvolume */ +                if (fix_array[i]) { +                        chunk_begin = chunk * (fix_array[i] - 1); +                        new_layout->list[i].err = -1; +                        DHT_SET_LAYOUT_RANGE (new_layout, i, chunk_begin, +                                              chunk, cnt, loc->path); +                        /* make sure to give (max - 1) as 'stop' range, +                           if it is last chunk */ +                        if (fix_array[i] == count) +                                new_layout->list[i].stop = 0xffffffff; +                        if (--cnt == 0) +                                goto done; + +                } +        } + +        /* Now, look for layouts which are not having any overlaps +           and give it a fix */ +        for (loop_cnt = 0, i = start_subvol; loop_cnt < new_layout->cnt; +             i++, loop_cnt++) { +                if (i == new_layout->cnt) +                        i = 0; + +                /* If 'fix_array[i]' is set, the layout is already fixed. */ +                if (fix_array[i]) +                        continue; + +                if (layout->list[i].err != -1) { +                        new_layout->list[i].err = layout->list[i].err; +                        continue; +                } + +                for (k = 1; k <= count; k++) { +                        for (j = 0; j < new_layout->cnt; j++) { +                                if (k == fix_array[j]) +                                        break;                          } +                        /* Didn't find any of the list begining with 'k' */ +                        if (j == new_layout->cnt) +                                break;                  } + +                fix_array[i] = k; +                chunk_begin = (k - 1) * chunk; +                new_layout->list[i].err = -1; +                DHT_SET_LAYOUT_RANGE (new_layout, i, chunk_begin, chunk, cnt, +                                      loc->path); +                /* make sure to give (max - 1) as 'stop' range, +                   if it is last chunk */ +                if (k == count) +                        new_layout->list[i].stop = 0xffffffff; +                if (--cnt == 0) +                        goto done;          } +done: +        if (new_layout) { +                /* Now that the new layout has all the proper layout, change the +                   inode context */ +                dht_layout_set (this, loc->inode, new_layout); + +                /* Make sure the extra 'ref' for existing layout is removed */ +                dht_layout_unref (this, local->layout); + +                local->layout = new_layout; +        } + +        return new_layout; +} + + +void +dht_selfheal_layout_new_directory (call_frame_t *frame, loc_t *loc, +                                   dht_layout_t *layout) +{ +        xlator_t    *this = NULL; +        uint32_t     chunk = 0; +        int          i = 0; +        uint32_t     start = 0; +        int          cnt = 0; +        int          err = 0; +        int          start_subvol = 0; + +        this = frame->this; + +        cnt = dht_get_layout_count (this, layout); +          chunk = ((unsigned long) 0xffffffff) / ((cnt) ? cnt : 1);          start_subvol = dht_selfheal_layout_alloc_start (this, loc, layout); @@ -426,42 +661,32 @@ dht_selfheal_layout_new_directory (call_frame_t *frame, loc_t *loc,          for (i = start_subvol; i < layout->cnt; i++) {                  err = layout->list[i].err;                  if (err == -1) { -                        layout->list[i].start = start; -                        layout->list[i].stop  = start + chunk - 1; - -                        start = start + chunk; - -                        gf_log (this->name, GF_LOG_TRACE, -                                "gave fix: %u - %u on %s for %s", -                                layout->list[i].start, layout->list[i].stop, -                                layout->list[i].xlator->name, loc->path); +                        DHT_SET_LAYOUT_RANGE(layout, i, start, chunk, +                                             cnt, loc->path);                          if (--cnt == 0) {                                  layout->list[i].stop = 0xffffffff; -                                break; +                                goto done;                          } +                        start += chunk;                  }          }          for (i = 0; i < start_subvol; i++) {                  err = layout->list[i].err;                  if (err == -1) { -                        layout->list[i].start = start; -                        layout->list[i].stop  = start + chunk - 1; - -                        start = start + chunk; - -                        gf_log (this->name, GF_LOG_TRACE, -                                "gave fix: %u - %u on %s for %s", -                                layout->list[i].start, layout->list[i].stop, -                                layout->list[i].xlator->name, loc->path); +                        DHT_SET_LAYOUT_RANGE(layout, i, start, chunk, +                                             cnt, loc->path);                          if (--cnt == 0) {                                  layout->list[i].stop = 0xffffffff; -                                break; +                                goto done;                          } +                        start += chunk;                  }          } -} +done: +        return; +}  int  dht_selfheal_dir_getafix (call_frame_t *frame, loc_t *loc, @@ -532,6 +757,26 @@ dht_selfheal_new_directory (call_frame_t *frame,          return 0;  } +int +dht_fix_directory_layout (call_frame_t *frame, +                          dht_selfheal_dir_cbk_t dir_cbk, +                          dht_layout_t *layout) +{ +        dht_local_t  *local = NULL; +        dht_layout_t *tmp_layout = NULL; + +        local = frame->local; + +        local->selfheal.dir_cbk = dir_cbk; +        local->selfheal.layout = dht_layout_ref (frame->this, layout); + +        /* No layout sorting required here */ +        tmp_layout = dht_fix_layout_of_directory (frame, &local->loc, layout); +        dht_fix_dir_xattr (frame, &local->loc, tmp_layout); + +        return 0; +} +  int  dht_selfheal_directory (call_frame_t *frame, dht_selfheal_dir_cbk_t dir_cbk,  | 
