diff options
| author | Raghavendra G <rgowdapp@redhat.com> | 2014-08-11 10:14:18 +0530 | 
|---|---|---|
| committer | Kaleb KEITHLEY <kkeithle@redhat.com> | 2014-10-20 07:55:11 -0700 | 
| commit | fcd256faa951f459e344ced46f0cc409a5bbc147 (patch) | |
| tree | 8fbfdc6dc47b7fdfee08d80329d526dacd3df375 | |
| parent | 7a1e42e0d4be42fa8aa0c7a430b4fbc3ab509705 (diff) | |
cluster/dht: introduce locking api.
Change-Id: I41389ba91951d3e63e617aa32cd0bee848261c72
BUG: 1139998
Signed-off-by: Raghavendra G <rgowdapp@redhat.com>
Reviewed-on: http://review.gluster.org/8521
Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Vijay Bellur <vbellur@redhat.com>
Reviewed-on: http://review.gluster.org/8679
Reviewed-by: Kaleb KEITHLEY <kkeithle@redhat.com>
| -rw-r--r-- | xlators/cluster/dht/src/Makefile.am | 2 | ||||
| -rw-r--r-- | xlators/cluster/dht/src/dht-common.h | 56 | ||||
| -rw-r--r-- | xlators/cluster/dht/src/dht-helper.c | 572 | ||||
| -rw-r--r-- | xlators/cluster/dht/src/dht-helper.h | 19 | ||||
| -rw-r--r-- | xlators/cluster/dht/src/dht.c | 14 | 
5 files changed, 662 insertions, 1 deletions
diff --git a/xlators/cluster/dht/src/Makefile.am b/xlators/cluster/dht/src/Makefile.am index 5324d21151e..a824c7a5de3 100644 --- a/xlators/cluster/dht/src/Makefile.am +++ b/xlators/cluster/dht/src/Makefile.am @@ -21,7 +21,7 @@ nufa_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la  switch_la_LDFLAGS = -module -avoid-version  switch_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la -noinst_HEADERS = dht-common.h dht-mem-types.h \ +noinst_HEADERS = dht-common.h dht-mem-types.h dht-helper.h \  	$(top_builddir)/xlators/lib/src/libxlator.h  AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \ diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h index 972ba7a99d7..a8275816b5e 100644 --- a/xlators/cluster/dht/src/dht-common.h +++ b/xlators/cluster/dht/src/dht-common.h @@ -105,6 +105,16 @@ struct dht_skip_linkto_unlink {          uuid_t          hashed_gfid;  }; +typedef struct { +        xlator_t     *xl; +        loc_t         loc;     /* contains/points to inode to lock on. */ +        short         type;    /* read/write lock.                     */ +        char         *domain;  /* Only locks within a single domain +                                * contend with each other +                                */ +        gf_boolean_t  locked; +} dht_lock_t; +  struct dht_local {          int                      call_cnt;          loc_t                    loc; @@ -196,6 +206,15 @@ struct dht_local {          struct dht_skip_linkto_unlink  skip_unlink; +        struct { +                fop_inodelk_cbk_t   inodelk_cbk; +                dht_lock_t        **locks; +                int                 lk_count; + +                /* whether locking failed on _any_ of the "locks" above */ +                int                 op_ret; +                int                 op_errno; +        } lock;  };  typedef struct dht_local dht_local_t; @@ -290,6 +309,7 @@ struct dht_conf {          /* Request to filter directory entries in readdir request */          gf_boolean_t    readdir_optimize; +        struct mem_pool *lock_pool;  };  typedef struct dht_conf dht_conf_t; @@ -773,4 +793,40 @@ dht_lookup_everywhere_done (call_frame_t *frame, xlator_t *this);  int  dht_fill_dict_to_avoid_unlink_of_migrating_file (dict_t *dict); + + +/* Acquire non-blocking inodelk on a list of xlators. + * + * @lk_array: array of lock requests lock on. + * + * @lk_count: number of locks in @lk_array + * + * @inodelk_cbk: will be called after inodelk replies are received + * + * @retval: -1 if stack_winding inodelk fails. 0 otherwise. + *          inodelk_cbk is called with appropriate error on errors. + *          On failure to acquire lock on all members of list, successful + *          locks are unlocked before invoking cbk. + */ + +int +dht_nonblocking_inodelk (call_frame_t *frame, dht_lock_t **lk_array, +                         int lk_count, fop_inodelk_cbk_t inodelk_cbk); + +/* same as dht_nonblocking_inodelk, but issues sequential blocking locks on + * @lk_array directly. locks are issued on some order which remains same + * for a list of xlators (irrespective of order of xlators within list). + */ +int +dht_blocking_inodelk (call_frame_t *frame, dht_lock_t **lk_array, +                      int lk_count, fop_inodelk_cbk_t inodelk_cbk); + +int32_t +dht_unlock_inodelk (call_frame_t *frame, dht_lock_t **lk_array, int lk_count, +                    fop_inodelk_cbk_t inodelk_cbk); + +dht_lock_t * +dht_lock_new (xlator_t *this, xlator_t *xl, loc_t *loc, short type, +              const char *domain); +  #endif/* _DHT_H */ diff --git a/xlators/cluster/dht/src/dht-helper.c b/xlators/cluster/dht/src/dht-helper.c index 56008327330..88eedf10c1f 100644 --- a/xlators/cluster/dht/src/dht-helper.c +++ b/xlators/cluster/dht/src/dht-helper.c @@ -17,6 +17,7 @@  #include "glusterfs.h"  #include "xlator.h"  #include "dht-common.h" +#include "dht-helper.h"  int @@ -234,6 +235,164 @@ out:          return 0;  } +char * +dht_lock_asprintf (dht_lock_t *lock) +{ +        char *lk_buf                = NULL; +        char gfid[GF_UUID_BUF_SIZE] = {0, }; + +        if (lock == NULL) +                goto out; + +        uuid_utoa_r (lock->loc.gfid, gfid); + +        gf_asprintf (&lk_buf, "%s:%s", lock->xl->name, gfid); + +out: +        return lk_buf; +} + +void +dht_log_lk_array (char *name, gf_loglevel_t log_level, dht_lock_t **lk_array, +                  int count) +{ +        int   i      = 0; +        char *lk_buf = NULL; + +        if ((lk_array == NULL) || (count == 0)) +                goto out; + +        for (i = 0; i < count; i++) { +                lk_buf = dht_lock_asprintf (lk_array[i]); +                gf_log (name, log_level, "%d. %s", i, lk_buf); +                GF_FREE (lk_buf); +        } + +out: +        return; +} + +void +dht_lock_stack_destroy (call_frame_t *lock_frame) +{ +        dht_local_t *local = NULL; + +        local = lock_frame->local; + +        local->lock.locks = NULL; +        local->lock.lk_count = 0; + +        DHT_STACK_DESTROY (lock_frame); +        return; +} + +void +dht_lock_free (dht_lock_t *lock) +{ +        if (lock == NULL) +                goto out; + +        loc_wipe (&lock->loc); +        GF_FREE (lock->domain); +        mem_put (lock); + +out: +        return; +} + +void +dht_lock_array_free (dht_lock_t **lk_array, int count) +{ +        int            i       = 0; +        dht_lock_t    *lock    = NULL; + +        if (lk_array == NULL) +                goto out; + +        for (i = 0; i < count; i++) { +                lock = lk_array[i]; +                lk_array[i] = NULL; +                dht_lock_free (lock); +        } + +out: +        return; +} + +dht_lock_t * +dht_lock_new (xlator_t *this, xlator_t *xl, loc_t *loc, short type, +              const char *domain) +{ +        dht_conf_t *conf = NULL; +        dht_lock_t *lock = NULL; + +        conf = this->private; + +        lock = mem_get0 (conf->lock_pool); +        if (lock == NULL) +                goto out; + +        lock->xl = xl; +        lock->type = type; +        lock->domain = gf_strdup (domain); +        if (lock->domain == NULL) { +                dht_lock_free (lock); +                lock = NULL; +                goto out; +        } + +        /* Fill only inode and gfid. +           posix and protocol/server give preference to pargfid/basename over +           gfid/inode for resolution if all the three parameters of loc_t are +           present. I want to avoid the following hypothetical situation: + +           1. rebalance did a lookup on a dentry and got a gfid. +           2. rebalance acquires lock on loc_t which was filled with gfid and +              path (pargfid/bname) from step 1. +           3. somebody deleted and recreated the same file +           4. rename on the same path acquires lock on loc_t which now points +              to a different inode (and hence gets the lock). +           5. rebalance continues to migrate file (note that not all fops done +              by rebalance during migration are inode/gfid based Eg., unlink) +           6. rename continues. +        */ +        lock->loc.inode = inode_ref (loc->inode); +        loc_gfid (loc, lock->loc.gfid); + +out: +        return lock; +} + +int +dht_local_lock_init (call_frame_t *frame, dht_lock_t **lk_array, +                     int lk_count, fop_inodelk_cbk_t inodelk_cbk) +{ +        int          ret   = -1; +        dht_local_t *local = NULL; + +        local = frame->local; + +        if (local == NULL) { +                local = dht_local_init (frame, NULL, NULL, 0); +        } + +        if (local == NULL) { +                goto out; +        } + +        local->lock.inodelk_cbk = inodelk_cbk; +        local->lock.locks = lk_array; +        local->lock.lk_count = lk_count; + +        ret = dht_lock_order_requests (local->lock.locks, +                                       local->lock.lk_count); +        if (ret < 0) +                goto out; + +        ret = 0; +out: +        return ret; +}  void  dht_local_wipe (xlator_t *this, dht_local_t *local) @@ -281,6 +440,9 @@ dht_local_wipe (xlator_t *this, dht_local_t *local)                  local->selfheal.layout = NULL;          } +        dht_lock_array_free (local->lock.locks, local->lock.lk_count); +        GF_FREE (local->lock.locks); +          GF_FREE (local->newpath);          GF_FREE (local->key); @@ -1115,3 +1277,413 @@ dht_subvol_status (dht_conf_t *conf, xlator_t *subvol)          }          return 0;  } + +void +dht_inodelk_done (call_frame_t *lock_frame) +{ +        fop_inodelk_cbk_t  inodelk_cbk = NULL; +        call_frame_t      *main_frame  = NULL; +        dht_local_t       *local       = NULL; + +        local = lock_frame->local; +        main_frame = local->main_frame; + +        local->lock.locks = NULL; +        local->lock.lk_count = 0; + +        inodelk_cbk = local->lock.inodelk_cbk; +        local->lock.inodelk_cbk = NULL; + +        inodelk_cbk (main_frame, NULL, main_frame->this, local->lock.op_ret, +                     local->lock.op_errno, NULL); + +        dht_lock_stack_destroy (lock_frame); +        return; +} + +int +dht_inodelk_cleanup_cbk (call_frame_t *frame, void *cookie, +                         xlator_t *this, int32_t op_ret, int32_t op_errno, +                         dict_t *xdata) +{ +        dht_inodelk_done (frame); +        return 0; +} + +inline int32_t +dht_lock_count (dht_lock_t **lk_array, int lk_count) +{ +        int i = 0, locked = 0; + +        if ((lk_array == NULL) || (lk_count == 0)) +                goto out; + +        for (i = 0; i < lk_count; i++) { +                if (lk_array[i]->locked) +                        locked++; +        } +out: +        return locked; +} + +void +dht_inodelk_cleanup (call_frame_t *lock_frame) +{ +        dht_lock_t  **lk_array = NULL; +        int           lk_count = 0, lk_acquired = 0; +        dht_local_t  *local    = NULL; + +        local = lock_frame->local; + +        lk_array = local->lock.locks; +        lk_count = local->lock.lk_count; + +        lk_acquired = dht_lock_count (lk_array, lk_count); +        if (lk_acquired != 0) { +                dht_unlock_inodelk (lock_frame, lk_array, lk_count, +                                    dht_inodelk_cleanup_cbk); +        } else { +                dht_inodelk_done (lock_frame); +        } + +        return; +} + +int32_t +dht_unlock_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, +                        int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ +        dht_local_t *local                  = NULL; +        int          lk_index               = 0, call_cnt = 0; +        char         gfid[GF_UUID_BUF_SIZE] = {0}; + +        lk_index = (long) cookie; + +        local = frame->local; +        if (op_ret < 0) { +                uuid_utoa_r (local->lock.locks[lk_index]->loc.gfid, +                             gfid); + +                gf_log (this->name, GF_LOG_WARNING, +                        "unlocking failed on %s:%s (%s)", +                        local->lock.locks[lk_index]->xl->name, +                        gfid, strerror (op_errno)); +        } else { +                local->lock.locks[lk_index]->locked = 0; +        } + +        call_cnt = dht_frame_return (frame); +        if (is_last_call (call_cnt)) { +                dht_inodelk_done (frame); +        } + +        return 0; +} + +call_frame_t * +dht_lock_frame (call_frame_t *parent_frame) +{ +        call_frame_t *lock_frame = NULL; + +        lock_frame = copy_frame (parent_frame); +        if (lock_frame == NULL) +                goto out; + +        set_lk_owner_from_ptr (&lock_frame->root->lk_owner, parent_frame->root); + +out: +        return lock_frame; +} + +int32_t +dht_unlock_inodelk (call_frame_t *frame, dht_lock_t **lk_array, int lk_count, +                    fop_inodelk_cbk_t inodelk_cbk) +{ +        dht_local_t     *local      = NULL; +        struct gf_flock  flock      = {0,}; +        int              ret        = -1 , i = 0; +        call_frame_t    *lock_frame = NULL; +        int              call_cnt   = 0; + +        GF_VALIDATE_OR_GOTO ("dht-locks", frame, done); +        GF_VALIDATE_OR_GOTO (frame->this->name, lk_array, done); +        GF_VALIDATE_OR_GOTO (frame->this->name, inodelk_cbk, done); + +        call_cnt = dht_lock_count (lk_array, lk_count); +        if (call_cnt == 0) { +                ret = 0; +                goto done; +        } + +        lock_frame = dht_lock_frame (frame); +        if (lock_frame == NULL) { +                gf_log (frame->this->name, GF_LOG_WARNING, +                        "cannot allocate a frame, not unlocking following " +                        "locks:"); + +                dht_log_lk_array (frame->this->name, GF_LOG_WARNING, lk_array, +                                  lk_count); +                goto done; +        } + +        ret = dht_local_lock_init (lock_frame, lk_array, lk_count, inodelk_cbk); +        if (ret < 0) { +                gf_log (frame->this->name, GF_LOG_WARNING, +                        "storing locks in local failed, not unlocking " +                        "following locks:"); + +                dht_log_lk_array (frame->this->name, GF_LOG_WARNING, lk_array, +                                  lk_count); + +                goto done; +        } + +        local = lock_frame->local; +        local->main_frame = frame; +        local->call_cnt = call_cnt; + +        flock.l_type = F_UNLCK; + +        for (i = 0; i < local->lock.lk_count; i++) { +                if (!local->lock.locks[i]->locked) +                        continue; + +                STACK_WIND_COOKIE (lock_frame, dht_unlock_inodelk_cbk, +                                   (void *)(long)i, +                                   local->lock.locks[i]->xl, +                                   local->lock.locks[i]->xl->fops->inodelk, +                                   local->lock.locks[i]->domain, +                                   &local->lock.locks[i]->loc, F_SETLK, +                                   &flock, NULL); +        } + +        return 0; + +done: +        if (lock_frame) +                dht_lock_stack_destroy (lock_frame); + +        /* no locks acquired, invoke inodelk_cbk */ +        if (ret == 0) +                inodelk_cbk (frame, NULL, frame->this, 0, 0, NULL); + +        return ret; +} + +int32_t +dht_nonblocking_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, +                             int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ +        dht_local_t *local                   = NULL; +        int          lk_index               = 0, call_cnt = 0; +        char          gfid[GF_UUID_BUF_SIZE] = {0}; + +        local = frame->local; +        lk_index = (long) cookie; + +        if (op_ret == -1) { +                local->lock.op_ret = -1; +                local->lock.op_errno = op_errno; + +                if (local && local->lock.locks[lk_index]) { +                        uuid_utoa_r (local->lock.locks[lk_index]->loc.inode->gfid, +                                     gfid); + +                        gf_log (this->name, GF_LOG_DEBUG, +                                "inodelk failed on gfid: %s " +                                "subvolume: %s (%s)", gfid, +                                local->lock.locks[lk_index]->xl->name, +                                strerror (op_errno)); +                } + +                goto out; +        } + +        local->lock.locks[lk_index]->locked = _gf_true; + +out: +        call_cnt = dht_frame_return (frame); +        if (is_last_call (call_cnt)) { +                if (local->lock.op_ret < 0) { +                        dht_inodelk_cleanup (frame); +                        return 0; +                } + +                dht_inodelk_done (frame); +        } + +        return 0; +} + +int +dht_nonblocking_inodelk (call_frame_t *frame, dht_lock_t **lk_array, +                         int lk_count, fop_inodelk_cbk_t inodelk_cbk) +{ +        struct gf_flock  flock      = {0,}; +        int              i          = 0, ret = 0; +        dht_local_t     *local      = NULL; +        call_frame_t    *lock_frame = NULL; + +        GF_VALIDATE_OR_GOTO ("dht-locks", frame, out); +        GF_VALIDATE_OR_GOTO (frame->this->name, lk_array, out); +        GF_VALIDATE_OR_GOTO (frame->this->name, inodelk_cbk, out); + +        lock_frame = dht_lock_frame (frame); +        if (lock_frame == NULL) +                goto out; + +        ret = dht_local_lock_init (lock_frame, lk_array, lk_count, inodelk_cbk); +        if (ret < 0) { +                goto out; +        } + +        local = lock_frame->local; +        local->main_frame = frame; + +        local->call_cnt = lk_count; + +        for (i = 0; i < local->lock.lk_count; i++) { +                flock.l_type = local->lock.locks[i]->type; + +                STACK_WIND_COOKIE (lock_frame, dht_nonblocking_inodelk_cbk, +                                   (void *) (long) i, +                                   local->lock.locks[i]->xl, +                                   local->lock.locks[i]->xl->fops->inodelk, +                                   local->lock.locks[i]->domain, +                                   &local->lock.locks[i]->loc, F_SETLK, +                                   &flock, NULL); +        } + +        return 0; + +out: +        if (lock_frame) +                dht_lock_stack_destroy (lock_frame); + +        return -1; +} + +int32_t +dht_blocking_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, +                          int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ +        int          lk_index = 0; +        dht_local_t *local    = NULL; + +        lk_index = (long) cookie; + +        local = frame->local; + +        if (op_ret == 0) { +                local->lock.locks[lk_index]->locked = _gf_true; +        } else { +                local->lock.op_ret = -1; +                local->lock.op_errno = op_errno; +                goto cleanup; +        } + +        if (lk_index == (local->lock.lk_count - 1)) { +                dht_inodelk_done (frame); +        } else { +                dht_blocking_inodelk_rec (frame, ++lk_index); +        } + +        return 0; + +cleanup: +        dht_inodelk_cleanup (frame); + +        return 0; +} + +void +dht_blocking_inodelk_rec (call_frame_t *frame, int i) +{ +        dht_local_t     *local = NULL; +        struct gf_flock  flock = {0,}; + +        local = frame->local; + +        flock.l_type = local->lock.locks[i]->type; + +        STACK_WIND_COOKIE (frame, dht_blocking_inodelk_cbk, +                           (void *) (long) i, +                           local->lock.locks[i]->xl, +                           local->lock.locks[i]->xl->fops->inodelk, +                           local->lock.locks[i]->domain, +                           &local->lock.locks[i]->loc, F_SETLKW, &flock, NULL); + +        return; +} + +int +dht_lock_request_cmp (const void *val1, const void *val2) +{ +        dht_lock_t *lock1 = NULL; +        dht_lock_t *lock2 = NULL; +        int         ret   = 0; + +        lock1 = (dht_lock_t *)val1; +        lock2 = (dht_lock_t *)val2; + +        GF_VALIDATE_OR_GOTO ("dht-locks", lock1, out); +        GF_VALIDATE_OR_GOTO ("dht-locks", lock2, out); + +        ret = strcmp (lock1->xl->name, lock2->xl->name); + +        if (ret == 0) { +                ret = uuid_compare (lock1->loc.gfid, lock2->loc.gfid); +        } + +out: +        return ret; +} + +int +dht_lock_order_requests (dht_lock_t **locks, int count) +{ +        int        ret     = -1; + +        if (!locks || !count) +                goto out; + +        qsort (locks, count, sizeof (*locks), dht_lock_request_cmp); +        ret = 0; + +out: +        return ret; +} + +int +dht_blocking_inodelk (call_frame_t *frame, dht_lock_t **lk_array, +                      int lk_count, fop_inodelk_cbk_t inodelk_cbk) +{ +        int           ret        = -1; +        call_frame_t *lock_frame = NULL; +        dht_local_t  *local      = NULL; + +        GF_VALIDATE_OR_GOTO ("dht-locks", frame, out); +        GF_VALIDATE_OR_GOTO (frame->this->name, lk_array, out); +        GF_VALIDATE_OR_GOTO (frame->this->name, inodelk_cbk, out); + +        lock_frame = dht_lock_frame (frame); +        if (lock_frame == NULL) +                goto out; + +        ret = dht_local_lock_init (lock_frame, lk_array, lk_count, inodelk_cbk); +        if (ret < 0) { +                goto out; +        } + +        local = lock_frame->local; +        local->main_frame = frame; + +        dht_blocking_inodelk_rec (lock_frame, 0); + +        return 0; +out: +        if (lock_frame) +                dht_lock_stack_destroy (lock_frame); + +        return -1; +} diff --git a/xlators/cluster/dht/src/dht-helper.h b/xlators/cluster/dht/src/dht-helper.h new file mode 100644 index 00000000000..e3ab9c4d93b --- /dev/null +++ b/xlators/cluster/dht/src/dht-helper.h @@ -0,0 +1,19 @@ +/* +  Copyright (c) 2008-2014 Red Hat, Inc. <http://www.redhat.com> +  This file is part of GlusterFS. + +  This file is licensed to you under your choice of the GNU Lesser +  General Public License, version 3 or any later version (LGPLv3 or +  later), or the GNU General Public License, version 2 (GPLv2), in all +  cases as published by the Free Software Foundation. +*/ +#ifndef _DHT_HELPER_H +#define _DHT_HELPER_H + +int +dht_lock_order_requests (dht_lock_t **lk_array, int count); + +void +dht_blocking_inodelk_rec (call_frame_t *frame, int i); + +#endif diff --git a/xlators/cluster/dht/src/dht.c b/xlators/cluster/dht/src/dht.c index 9d396434ed3..97bd2a0fe99 100644 --- a/xlators/cluster/dht/src/dht.c +++ b/xlators/cluster/dht/src/dht.c @@ -213,6 +213,9 @@ fini (xlator_t *this)                          GF_FREE (conf->file_layouts);                  } +                if (conf->lock_pool) +                        mem_pool_destroy (conf->lock_pool); +                  GF_FREE (conf->subvolumes);                  GF_FREE (conf->subvolume_status); @@ -483,6 +486,14 @@ init (xlator_t *this)                  goto err;          } +        conf->lock_pool = mem_pool_new (dht_lock_t, 512); +        if (!conf->lock_pool) { +                gf_log (this->name, GF_LOG_ERROR, +                        "failed to create lock mem_pool, failing " +                                                "initialization"); +                goto err; +        } +          this->private = conf;          return 0; @@ -504,6 +515,9 @@ err:                  GF_FREE (conf->defrag); +                if (conf->lock_pool) +                        mem_pool_destroy (conf->lock_pool); +                  GF_FREE (conf);          }  | 
