diff options
| author | Pavan Sondur <pavan@gluster.com> | 2010-09-30 02:25:31 +0000 | 
|---|---|---|
| committer | Vijay Bellur <vijay@dev.gluster.com> | 2010-09-30 11:19:24 -0700 | 
| commit | af18c636c44b1ea56296850e55afe0e4b2ce845c (patch) | |
| tree | 40f8470ec000b96d61b3f8d53286aa0812c9d921 | |
| parent | 760daf28898cbb8b5072551735bebee16450ba08 (diff) | |
protocol/client: cluster/afr: Support lock recovery and self heal.
Signed-off-by: Pavan Vilas Sondur <pavan@gluster.com>
Signed-off-by: Vijay Bellur <vijay@dev.gluster.com>
BUG: 865 (Add locks recovery support in GlusterFS)
URL: http://bugs.gluster.com/cgi-bin/bugzilla3/show_bug.cgi?id=865
23 files changed, 2120 insertions, 74 deletions
diff --git a/libglusterfs/src/glusterfs.h b/libglusterfs/src/glusterfs.h index f107a8abfa4..9b5a0e99239 100644 --- a/libglusterfs/src/glusterfs.h +++ b/libglusterfs/src/glusterfs.h @@ -150,15 +150,28 @@ typedef enum {          GF_LK_GETLK = 0,          GF_LK_SETLK,          GF_LK_SETLKW, +        GF_LK_RESLK_LCK, +        GF_LK_RESLK_LCKW, +        GF_LK_RESLK_UNLCK,  } glusterfs_lk_cmds_t;  typedef enum {          GF_LK_F_RDLCK = 0,          GF_LK_F_WRLCK, -        GF_LK_F_UNLCK +        GF_LK_F_UNLCK, +        GF_LK_RECLK,  } glusterfs_lk_types_t; +typedef enum { +        F_RESLK_LCK = 200, +        F_RESLK_LCKW, +        F_RESLK_UNLCK, +} glusterfs_lk_recovery_cmds_t; + +typedef enum { +        F_GETLK_FD = 250, +} glusterfs_lk_rec_types_t;  typedef enum {          GF_LOCK_POSIX, diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index 64e22c3229e..9d9f5d0414d 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -1197,6 +1197,16 @@ afr_fd_ctx_set (xlator_t *this, fd_t *fd)                  fd_ctx->up_count   = priv->up_count;                  fd_ctx->down_count = priv->down_count; +                fd_ctx->locked_on = GF_CALLOC (sizeof (*fd_ctx->locked_on), +                                               priv->child_count, +                                               gf_afr_mt_char); +                if (!fd_ctx->locked_on) { +                        gf_log (this->name, GF_LOG_ERROR, +                                "Out of memory"); +                        ret = -ENOMEM; +                        goto unlock; +                } +                  ret = __fd_ctx_set (fd, this, (uint64_t)(long) fd_ctx);                  INIT_LIST_HEAD (&fd_ctx->entries); @@ -1426,6 +1436,9 @@ afr_cleanup_fd_ctx (xlator_t *this, fd_t *fd)                  if (fd_ctx->opened_on)                          GF_FREE (fd_ctx->opened_on); +                if (fd_ctx->locked_on) +                        GF_FREE (fd_ctx->locked_on); +                  GF_FREE (fd_ctx);          } @@ -2298,8 +2311,9 @@ int32_t  afr_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,              int32_t op_ret, int32_t op_errno, struct flock *lock)  { -        afr_local_t *local = NULL; -        afr_private_t *priv = NULL; +	afr_local_t *local = NULL; +	afr_private_t *priv = NULL; +        int            ret  = 0;          int child_index = -1; @@ -2339,7 +2353,18 @@ afr_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,          } else {                  /* locking has succeeded on all nodes that are up */ -                AFR_STACK_UNWIND (lk, frame, local->op_ret, local->op_errno, +                ret = afr_mark_locked_nodes (this, local->fd, +                                             local->cont.lk.locked_nodes); +                if (ret) +                        gf_log (this->name, GF_LOG_DEBUG, +                                "Could not save locked nodes info in fdctx"); + +                ret = afr_save_locked_fd (this, local->fd); +                if (ret) +                        gf_log (this->name, GF_LOG_DEBUG, +                                "Could not save locked fd"); + +		AFR_STACK_UNWIND (lk, frame, local->op_ret, local->op_errno,                                    &local->cont.lk.ret_flock);          } diff --git a/xlators/cluster/afr/src/afr-lk-common.c b/xlators/cluster/afr/src/afr-lk-common.c index ee53d1d7bfb..de95a6c763b 100644 --- a/xlators/cluster/afr/src/afr-lk-common.c +++ b/xlators/cluster/afr/src/afr-lk-common.c @@ -1678,3 +1678,104 @@ afr_unlock (call_frame_t *frame, xlator_t *this)          return 0;  } + +int +afr_mark_locked_nodes (xlator_t *this, fd_t *fd, +                       unsigned char *locked_nodes) +{ +        afr_private_t *priv  = NULL; +        afr_fd_ctx_t  *fdctx = NULL; +        uint64_t       tmp   = 0; +        int            ret   = 0; + +        priv = this->private; + +        afr_fd_ctx_set (this, fd); +        if (ret < 0) +                goto out; + +        ret = fd_ctx_get (fd, this, &tmp); +        fdctx = (afr_fd_ctx_t *) (long) tmp; + +        GF_ASSERT (fdctx->locked_on); + +        memcpy (fdctx->locked_on, locked_nodes, +                priv->child_count); + +out: +        return ret; +} + +static int +__is_fd_saved (xlator_t *this, fd_t *fd) +{ +        afr_locked_fd_t *locked_fd = NULL; +        afr_private_t   *priv      = NULL; +        int              found     = 0; + +        priv = this->private; + +        list_for_each_entry (locked_fd, &priv->saved_fds, list) { +                if (locked_fd->fd == fd) { +                        found = 1; +                        break; +                } +        } + +        return found; +} + +static int +__afr_save_locked_fd (xlator_t *this, fd_t *fd) +{ +        afr_private_t   *priv      = NULL; +        afr_locked_fd_t *locked_fd = NULL; +        int              ret       = 0; + +        priv = this->private; + +        locked_fd = GF_CALLOC (1, sizeof (*locked_fd), +                               gf_afr_mt_locked_fd); +        if (!locked_fd) { +                gf_log (this->name, GF_LOG_ERROR, +                        "Out of memory"); +                ret = -1; +                goto out; +        } + +        INIT_LIST_HEAD (&locked_fd->list); + +        list_add_tail (&locked_fd->list, &priv->saved_fds); + +out: +        return ret; +} + +int +afr_save_locked_fd (xlator_t *this, fd_t *fd) +{ +        afr_private_t   *priv      = NULL; +        int              ret       = 0; + +        priv = this->private; + +        pthread_mutex_lock (&priv->mutex); +        { +                if (__is_fd_saved (this, fd)) { +                        gf_log (this->name, GF_LOG_DEBUG, +                                "fd=%p already saved", fd); +                        goto unlock; +                } + +                ret = __afr_save_locked_fd (this, fd); +                if (ret) { +                        gf_log (this->name, GF_LOG_DEBUG, +                                "fd=%p could not be saved"); +                        goto unlock; +                } +        } +unlock: +        pthread_mutex_unlock (&priv->mutex); + +        return ret; +} diff --git a/xlators/cluster/afr/src/afr-mem-types.h b/xlators/cluster/afr/src/afr-mem-types.h index c1a86b8275b..31a80e8f57a 100644 --- a/xlators/cluster/afr/src/afr-mem-types.h +++ b/xlators/cluster/afr/src/afr-mem-types.h @@ -41,6 +41,7 @@ enum gf_afr_mem_types_ {          gf_afr_mt_loc_t,          gf_afr_mt_entry_name,          gf_afr_mt_pump_priv, +        gf_afr_mt_locked_fd,          gf_afr_mt_end  };  #endif diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c index 09094bdbe58..69b281d973e 100644 --- a/xlators/cluster/afr/src/afr.c +++ b/xlators/cluster/afr/src/afr.c @@ -641,6 +641,9 @@ init (xlator_t *this)          priv->first_lookup = 1;          priv->root_inode = NULL; +        pthread_mutex_init (&priv->mutex, NULL); +        INIT_LIST_HEAD (&priv->saved_fds); +  	ret = 0;  out:  	return ret; diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index 26f2c989f4e..68b4a1e305e 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -85,6 +85,9 @@ typedef struct _afr_private {  	struct _pump_private *pump_private; /* Set if we are loaded as pump */          int                   use_afr_in_pump; + +        pthread_mutex_t  mutex; +        struct list_head saved_fds;   /* list of fds on which locks have succeeded */  } afr_private_t;  typedef struct { @@ -262,6 +265,11 @@ typedef struct {  } afr_internal_lock_t; +typedef struct _afr_locked_fd { +        fd_t  *fd; +        struct list_head list; +} afr_locked_fd_t; +  typedef struct _afr_local {  	unsigned int call_count;  	unsigned int success_count; @@ -619,6 +627,8 @@ typedef struct {          int  hit, miss;          gf_boolean_t failed_over;          struct list_head entries; /* needed for readdir failover */ + +        unsigned char *locked_on; /* which subvolumes locks have been successful */  } afr_fd_ctx_t; @@ -655,6 +665,13 @@ int32_t  afr_notify (xlator_t *this, int32_t event,              void *data, ...); +int +afr_save_locked_fd (xlator_t *this, fd_t *fd); + +int +afr_mark_locked_nodes (xlator_t *this, fd_t *fd, +                       unsigned char *locked_nodes); +  void  afr_set_lk_owner (call_frame_t *frame, xlator_t *this); diff --git a/xlators/features/locks/src/Makefile.am b/xlators/features/locks/src/Makefile.am index d10b874befb..53dd3aa5da5 100644 --- a/xlators/features/locks/src/Makefile.am +++ b/xlators/features/locks/src/Makefile.am @@ -3,7 +3,7 @@ xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features  locks_la_LDFLAGS = -module -avoidversion -locks_la_SOURCES = common.c posix.c entrylk.c inodelk.c +locks_la_SOURCES = common.c posix.c entrylk.c inodelk.c reservelk.c  locks_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la   noinst_HEADERS = locks.h common.h locks-mem-types.h diff --git a/xlators/features/locks/src/common.c b/xlators/features/locks/src/common.c index b34cd97813a..9f2d11304df 100644 --- a/xlators/features/locks/src/common.c +++ b/xlators/features/locks/src/common.c @@ -103,6 +103,12 @@ fd_to_fdnum (fd_t *fd)          return ((unsigned long) fd);  } +fd_t * +fd_from_fdnum (posix_lock_t *lock) +{ +        return ((fd_t *) lock->fd_num); +} +  int  __pl_inode_is_empty (pl_inode_t *pl_inode)  { @@ -441,6 +447,9 @@ pl_inode_get (xlator_t *this, inode_t *inode)  	INIT_LIST_HEAD (&pl_inode->dom_list);  	INIT_LIST_HEAD (&pl_inode->ext_list);  	INIT_LIST_HEAD (&pl_inode->rw_list); +	INIT_LIST_HEAD (&pl_inode->reservelk_list); +	INIT_LIST_HEAD (&pl_inode->blocked_reservelks); +        INIT_LIST_HEAD (&pl_inode->blocked_calls);  	inode_ctx_put (inode, this, (uint64_t)(long)(pl_inode)); diff --git a/xlators/features/locks/src/common.h b/xlators/features/locks/src/common.h index d707294475f..c7d817f8da0 100644 --- a/xlators/features/locks/src/common.h +++ b/xlators/features/locks/src/common.h @@ -116,4 +116,18 @@ pl_trace_release (xlator_t *this, fd_t *fd);  unsigned long  fd_to_fdnum (fd_t *fd); +fd_t * +fd_from_fdnum (posix_lock_t *lock); + +int +pl_reserve_setlk (xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock, +                  int can_block); +int +reservelks_equal (posix_lock_t *l1, posix_lock_t *l2); + +int +pl_verify_reservelk (xlator_t *this, pl_inode_t *pl_inode, +                     posix_lock_t *lock, int can_block); +int +pl_reserve_unlock (xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *reqlock);  #endif /* __COMMON_H__ */ diff --git a/xlators/features/locks/src/locks-mem-types.h b/xlators/features/locks/src/locks-mem-types.h index cf50240863b..f441f95cfa9 100644 --- a/xlators/features/locks/src/locks-mem-types.h +++ b/xlators/features/locks/src/locks-mem-types.h @@ -33,6 +33,7 @@ enum gf_locks_mem_types_ {          gf_locks_mt_pl_rw_req_t,          gf_locks_mt_posix_locks_private_t,          gf_locks_mt_pl_local_t, +        gf_locks_mt_pl_fdctx_t,          gf_locks_mt_end  };  #endif diff --git a/xlators/features/locks/src/locks.h b/xlators/features/locks/src/locks.h index 60474615e5f..483e3e6669e 100644 --- a/xlators/features/locks/src/locks.h +++ b/xlators/features/locks/src/locks.h @@ -125,6 +125,9 @@ struct __pl_inode {          struct list_head dom_list;       /* list of domains */          struct list_head ext_list;       /* list of fcntl locks */          struct list_head rw_list;        /* list of waiting r/w requests */ +        struct list_head reservelk_list;        /* list of reservelks */ +        struct list_head blocked_reservelks;        /* list of blocked reservelks */ +        struct list_head blocked_calls;  /* List of blocked lock calls while a reserve is held*/          int              mandatory;      /* if mandatory locking is enabled */          inode_t          *refkeeper;     /* hold refs on an inode while locks are @@ -150,4 +153,8 @@ typedef struct {          gf_boolean_t   posixlk_count_req;  } pl_local_t; +typedef struct { +        struct list_head locks_list; +} pl_fdctx_t; +  #endif /* __POSIX_LOCKS_H__ */ diff --git a/xlators/features/locks/src/posix.c b/xlators/features/locks/src/posix.c index 165280265ad..f085594243c 100644 --- a/xlators/features/locks/src/posix.c +++ b/xlators/features/locks/src/posix.c @@ -741,6 +741,194 @@ pl_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,          return 0;  } +static int +__fd_has_locks (pl_inode_t *pl_inode, fd_t *fd) +{ +        int           found = 0; +        posix_lock_t *l     = NULL; + +        list_for_each_entry (l, &pl_inode->ext_list, list) { +                if ((l->fd_num == fd_to_fdnum(fd))) { +                        found = 1; +                        break; +                } +        } + +        return found; +} + +static posix_lock_t * +lock_dup (posix_lock_t *lock) +{ +        posix_lock_t *new_lock = NULL; + +        new_lock = new_posix_lock (&lock->user_flock, lock->transport, +                                   lock->client_pid, lock->owner, +                                   (fd_t *)lock->fd_num); +        return new_lock; +} + +static int +__dup_locks_to_fdctx (pl_inode_t *pl_inode, fd_t *fd, +                      pl_fdctx_t *fdctx) +{ +        posix_lock_t *l        = NULL; +        posix_lock_t *duplock = NULL; +        int ret = 0; + +        fdctx = GF_CALLOC (1, sizeof (*fdctx), +                           gf_locks_mt_pl_fdctx_t); +        if (!fdctx) { +                ret = -1; +                goto out; +        } + +        INIT_LIST_HEAD (&fdctx->locks_list); + +        list_for_each_entry (l, &pl_inode->ext_list, list) { +                if ((l->fd_num == fd_to_fdnum(fd))) { +                        duplock = lock_dup (l); +                        if (!duplock) { +                                gf_log (THIS->name, GF_LOG_DEBUG, +                                        "Out of memory"); +                                ret = -1; +                                break; +                        } + +                        list_add_tail (&duplock->list, &fdctx->locks_list); +                } +        } + +out: +        return ret; +} + +static int +__copy_locks_to_fdctx (pl_inode_t *pl_inode, fd_t *fd, +                      pl_fdctx_t *fdctx) +{ +        int ret = 0; + +        ret = __dup_locks_to_fdctx (pl_inode, fd, fdctx); +        if (ret) +                goto out; + +        ret = fd_ctx_set (fd, THIS, (uint64_t) (unsigned long)&fdctx); +        if (ret) +                gf_log (THIS->name, GF_LOG_DEBUG, +                        "Failed to set fdctx"); +out: +        return ret; + +} + +static void +pl_mark_eol_lock (posix_lock_t *lock) +{ +        lock->user_flock.l_type = GF_LK_RECLK; +        return; +} + +static posix_lock_t * +__get_next_fdctx_lock (pl_fdctx_t *fdctx) +{ +        posix_lock_t *lock = NULL; + +        GF_ASSERT (fdctx); + +        if (list_empty (&fdctx->locks_list)) { +                gf_log (THIS->name, GF_LOG_DEBUG, +                        "fdctx lock list empty"); +                goto out; +        } + +        lock = list_entry (&fdctx->locks_list, typeof (*lock), +                           list); + +        GF_ASSERT (lock); + +        list_del_init (&lock->list); + +out: +        return lock; +} + +static int +__set_next_lock_fd (pl_fdctx_t *fdctx, posix_lock_t *reqlock) +{ +        posix_lock_t *lock  = NULL; +        int           ret   = 0; + +        GF_ASSERT (fdctx); + +        lock = __get_next_fdctx_lock (fdctx); +        if (!lock) { +                gf_log (THIS->name, GF_LOG_DEBUG, +                        "marking EOL in reqlock"); +                pl_mark_eol_lock (reqlock); +                goto out; +        } + +        reqlock->user_flock = lock->user_flock; + +out: +        if (lock) +                __destroy_lock (lock); + +        return ret; +} +static int +pl_getlk_fd (xlator_t *this, pl_inode_t *pl_inode, +             fd_t *fd, posix_lock_t *reqlock) +{ +        uint64_t    tmp   = 0; +        pl_fdctx_t *fdctx = NULL; +        int         ret   = 0; + +        pthread_mutex_lock (&pl_inode->mutex); +        { +                if (!__fd_has_locks (pl_inode, fd)) { +                        gf_log (this->name, GF_LOG_DEBUG, +                                "fd=%p has no active locks", fd); +                        ret = 0; +                        goto unlock; +                } + +                gf_log (this->name, GF_LOG_DEBUG, +                        "There are active locks on fd"); + +                ret = fd_ctx_get (fd, this, &tmp); +                fdctx = (pl_fdctx_t *) tmp; +                if (ret) { +                        gf_log (this->name, GF_LOG_TRACE, +                                "no fdctx -> copying all locks on fd"); + +                        ret = __copy_locks_to_fdctx (pl_inode, fd, fdctx); +                        if (ret) { +                                gf_log (this->name, GF_LOG_ERROR, +                                        "Out of memory"); +                                goto unlock; +                        } + +                        ret = __set_next_lock_fd (fdctx, reqlock); + +                } else { +                        gf_log (this->name, GF_LOG_TRACE, +                                "fdctx present -> returning the next lock"); +                        ret = __set_next_lock_fd (fdctx, reqlock); +                        if (ret) { +                                gf_log (this->name, GF_LOG_DEBUG, +                                        "could not get next lock of fd"); +                                goto unlock; +                        } +                } +        } + +unlock: +        pthread_mutex_unlock (&pl_inode->mutex); +        return ret; + +}  int  pl_lk (call_frame_t *frame, xlator_t *this, @@ -791,6 +979,68 @@ pl_lk (call_frame_t *frame, xlator_t *this,          switch (cmd) { +        case F_RESLK_LCKW: +                can_block = 1; + +                /* fall through */ +        case F_RESLK_LCK: +                memcpy (&reqlock->user_flock, flock, sizeof (struct flock)); +                reqlock->frame = frame; +                reqlock->this = this; + +                ret = pl_reserve_setlk (this, pl_inode, reqlock, +                                        can_block); +                if (ret < 0) { +                        if (can_block) +                                goto out; + +                        op_ret = -1; +                        op_errno = -ret; +                        __destroy_lock (reqlock); +                        goto unwind; +                } +                /* Finally a getlk and return the call */ +                conf = pl_getlk (pl_inode, reqlock); +                if (conf) +                        posix_lock_to_flock (conf, flock); +                break; + +        case F_RESLK_UNLCK: +                reqlock->frame = frame; +                reqlock->this = this; +                ret = pl_reserve_unlock (this, pl_inode, reqlock); +                if (ret < 0) { +                        op_ret = -1; +                        op_errno = -ret; +                } +                __destroy_lock (reqlock); +                goto unwind; + +                break; + +        case F_GETLK_FD: +                reqlock->frame = frame; +                reqlock->this = this; +                ret = pl_verify_reservelk (this, pl_inode, reqlock, can_block); +                GF_ASSERT (ret >= 0); + +                ret = pl_getlk_fd (this, pl_inode, fd, reqlock); +                if (ret < 0) { +                        gf_log (this->name, GF_LOG_DEBUG, +                                "getting locks on fd failed"); +                        op_ret = -1; +                        op_errno = ENOLCK; +                        goto unwind; +                } + +                gf_log (this->name, GF_LOG_TRACE, +                        "Replying with a lock on fd for healing"); + +                posix_lock_to_flock (reqlock, flock); +                __destroy_lock (reqlock); + +                break; +  #if F_GETLK != F_GETLK64          case F_GETLK64:  #endif @@ -816,6 +1066,12 @@ pl_lk (call_frame_t *frame, xlator_t *this,  #endif          case F_SETLK:                  memcpy (&reqlock->user_flock, flock, sizeof (struct flock)); +                ret = pl_verify_reservelk (this, pl_inode, reqlock, can_block); +                if (ret < 0) { +                        gf_log (this->name, GF_LOG_TRACE, +                                "Lock blocked due to conflicting reserve lock"); +                        goto out; +                }                  ret = pl_setlk (this, pl_inode, reqlock,                                  can_block); diff --git a/xlators/features/locks/src/reservelk.c b/xlators/features/locks/src/reservelk.c new file mode 100644 index 00000000000..c36484c46cc --- /dev/null +++ b/xlators/features/locks/src/reservelk.c @@ -0,0 +1,450 @@ +/* +  Copyright (c) 2006, 2007, 2008 Gluster, Inc. <http://www.gluster.com> +  This file is part of GlusterFS. + +  GlusterFS is free software; you can redistribute it and/or modify +  it under the terms of the GNU General Public License as published +  by the Free Software Foundation; either version 3 of the License, +  or (at your option) any later version. + +  GlusterFS is distributed in the hope that it will be useful, but +  WITHOUT ANY WARRANTY; without even the implied warranty of +  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU +  General Public License for more details. + +  You should have received a copy of the GNU General Public License +  along with this program.  If not, see +  <http://www.gnu.org/licenses/>. +*/ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "glusterfs.h" +#include "compat.h" +#include "xlator.h" +#include "inode.h" +#include "logging.h" +#include "common-utils.h" +#include "list.h" + +#include "locks.h" +#include "common.h" + +void +__delete_reserve_lock (posix_lock_t *lock) +{ +	list_del (&lock->list); +} + +void +__destroy_reserve_lock (posix_lock_t *lock) +{ +	FREE (lock); +} + +/* Return true if the two reservelks have exactly same lock boundaries */ +int +reservelks_equal (posix_lock_t *l1, posix_lock_t *l2) +{ +	if ((l1->fl_start == l2->fl_start) && +	    (l1->fl_end == l2->fl_end)) +		return 1; + +	return 0; +} + +/* Determine if lock is grantable or not */ +static posix_lock_t * +__reservelk_grantable (pl_inode_t *pl_inode, posix_lock_t *lock) +{ +        xlator_t     *this     = NULL; +	posix_lock_t *l        = NULL; +        posix_lock_t *ret_lock = NULL; + +        this = THIS; + +	if (list_empty (&pl_inode->reservelk_list)) { +                gf_log (this->name, GF_LOG_TRACE, +                        "No reservelks in list"); +		goto out; +        } +	list_for_each_entry (l, &pl_inode->reservelk_list, list){ +		if (reservelks_equal (lock, l)) { +                        ret_lock = l; +                        break; +		} +	} +out: +	return ret_lock; +} + +static int +__same_owner_reservelk (posix_lock_t *l1, posix_lock_t *l2) +{ +        return ((l1->owner == l2->owner)); + +} + +static posix_lock_t * +__matching_reservelk (pl_inode_t *pl_inode, posix_lock_t *lock) +{ +        posix_lock_t *l = NULL; + +        if (list_empty (&pl_inode->reservelk_list)) { +            gf_log ("posix-locks", GF_LOG_TRACE, +                    "reservelk list empty"); +            return NULL; +        } + +        list_for_each_entry (l, &pl_inode->reservelk_list, list) { +                if (reservelks_equal (l, lock)) { +                        gf_log ("posix-locks", GF_LOG_TRACE, +                                "equal reservelk found"); +                        break; +                } +        } + +        return l; +} + +static int +__reservelk_conflict (xlator_t *this, pl_inode_t *pl_inode, +                      posix_lock_t *lock) +{ +        posix_lock_t *conf = NULL; +        int ret = 0; + +        conf = __matching_reservelk (pl_inode, lock); +        if (conf) { +                gf_log (this->name, GF_LOG_TRACE, +                        "Matching reservelk found"); +                if (__same_owner_reservelk (lock, conf)) { +                        list_del_init (&conf->list); +                        gf_log (this->name, GF_LOG_TRACE, +                                "Removing the matching reservelk for setlk to progress"); +                        FREE (conf); +                        ret = 0; +                } else { +                        gf_log (this->name, GF_LOG_TRACE, +                                "Conflicting reservelk found"); +                        ret = 1; +                } + +        } +        return ret; + +} + +int +pl_verify_reservelk (xlator_t *this, pl_inode_t *pl_inode, +                     posix_lock_t *lock, int can_block) +{ +        int ret = 0; + +        pthread_mutex_lock (&pl_inode->mutex); +        { +                if (__reservelk_conflict (this, pl_inode, lock)) { +                        gf_log (this->name, GF_LOG_TRACE, +                                "Found conflicting reservelk. Blocking until reservelk is unlocked."); +                        lock->blocked = can_block; +                        list_add_tail (&lock->list, &pl_inode->blocked_calls); +                        ret = -1; +                        goto unlock; +                } + +                gf_log (this->name, GF_LOG_TRACE, +                        "no conflicting reservelk found. Call continuing"); +                ret = 0; + +        } +unlock: +        pthread_mutex_unlock (&pl_inode->mutex); + +        return ret; + +} + + +/* Determines if lock can be granted and adds the lock. If the lock + * is blocking, adds it to the blocked_reservelks. + */ +static int +__lock_reservelk (xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock, +                  int can_block) +{ +	posix_lock_t *conf = NULL; +	int ret = -EINVAL; + +	conf = __reservelk_grantable (pl_inode, lock); +	if (conf){ +		ret = -EAGAIN; +		if (can_block == 0) +			goto out; + +		list_add_tail (&lock->list, &pl_inode->blocked_reservelks); + +                gf_log (this->name, GF_LOG_TRACE, +                        "%s (pid=%d) lk-owner:%"PRIu64" %"PRId64" - %"PRId64" => Blocked", +                        lock->fl_type == F_UNLCK ? "Unlock" : "Lock", +                        lock->client_pid, +                        lock->owner, +                        lock->user_flock.l_start, +                        lock->user_flock.l_len); + + +		goto out; +	} + +	list_add (&lock->list, &pl_inode->reservelk_list); + +	ret = 0; + +out: +	return ret; +} + +static posix_lock_t * +find_matching_reservelk (posix_lock_t *lock, pl_inode_t *pl_inode) +{ +	posix_lock_t *l = NULL; +	list_for_each_entry (l, &pl_inode->reservelk_list, list) { +		if (reservelks_equal (l, lock)) +			return l; +	} +	return NULL; +} + +/* Set F_UNLCK removes a lock which has the exact same lock boundaries + * as the UNLCK lock specifies. If such a lock is not found, returns invalid + */ +static posix_lock_t * +__reserve_unlock_lock (xlator_t *this, posix_lock_t *lock, pl_inode_t *pl_inode) +{ + +	posix_lock_t *conf = NULL; + +	conf = find_matching_reservelk (lock, pl_inode); +	if (!conf) { +                gf_log (this->name, GF_LOG_DEBUG, +                        " Matching lock not found for unlock"); +		goto out; +        } +	__delete_reserve_lock (conf); +        gf_log (this->name, GF_LOG_DEBUG, +                " Matching lock found for unlock"); + +out: +	return conf; + + +} + +static void +__grant_blocked_reserve_locks (xlator_t *this, pl_inode_t *pl_inode, +                               struct list_head *granted) +{ +	int	      bl_ret = 0; +	posix_lock_t *bl = NULL; +	posix_lock_t *tmp = NULL; + +        struct list_head blocked_list; + +        INIT_LIST_HEAD (&blocked_list); +        list_splice_init (&pl_inode->blocked_reservelks, &blocked_list); + +	list_for_each_entry_safe (bl, tmp, &blocked_list, list) { + +		list_del_init (&bl->list); + +		bl_ret = __lock_reservelk (this, pl_inode, bl, 1); + +		if (bl_ret == 0) { +			list_add (&bl->list, granted); +                } +        } +	return; +} + +/* Grant all reservelks blocked on lock(s) */ +void +grant_blocked_reserve_locks (xlator_t *this, pl_inode_t *pl_inode) +{ +	struct list_head granted; +	posix_lock_t *lock = NULL; +	posix_lock_t *tmp = NULL; + +	INIT_LIST_HEAD (&granted); + +        if (list_empty (&pl_inode->blocked_reservelks)) { +                gf_log (this->name, GF_LOG_TRACE, +                        "No blocked locks to be granted"); +                return; +        } + +        pthread_mutex_lock (&pl_inode->mutex); +	{ +		__grant_blocked_reserve_locks (this, pl_inode, &granted); +	} +        pthread_mutex_unlock (&pl_inode->mutex); + +	list_for_each_entry_safe (lock, tmp, &granted, list) { +                gf_log (this->name, GF_LOG_TRACE, +			"%s (pid=%d) (lk-owner=%"PRIu64") %"PRId64" - %"PRId64" => Granted", +			lock->fl_type == F_UNLCK ? "Unlock" : "Lock", +			lock->client_pid, +			lock->owner, +			lock->user_flock.l_start, +			lock->user_flock.l_len); + +		STACK_UNWIND_STRICT (lk, lock->frame, 0, 0, &lock->user_flock); +	} + +} + +static void +__grant_blocked_lock_calls (xlator_t *this, pl_inode_t *pl_inode, +                            struct list_head *granted) +{ +	int	      bl_ret = 0; +	posix_lock_t *bl = NULL; +	posix_lock_t *tmp = NULL; + +        struct list_head blocked_list; + +        INIT_LIST_HEAD (&blocked_list); +        list_splice_init (&pl_inode->blocked_reservelks, &blocked_list); + +	list_for_each_entry_safe (bl, tmp, &blocked_list, list) { + +		list_del_init (&bl->list); + +		bl_ret = pl_verify_reservelk (this, pl_inode, bl, bl->blocked); + +		if (bl_ret == 0) { +                        list_add_tail (&bl->list, granted); +                } +        } +	return; +} + +void +grant_blocked_lock_calls (xlator_t *this, pl_inode_t *pl_inode) +{ +	struct list_head granted; +	posix_lock_t *lock = NULL; +	posix_lock_t *tmp = NULL; +        fd_t         *fd  = NULL; + +        int can_block = 0; +        int32_t cmd = 0; +        int ret = 0; + +        if (list_empty (&pl_inode->blocked_calls)) { +                gf_log (this->name, GF_LOG_TRACE, +                        "No blocked lock calls to be granted"); +                return; +        } + +        pthread_mutex_lock (&pl_inode->mutex); +	{ +		__grant_blocked_lock_calls (this, pl_inode, &granted); +	} +        pthread_mutex_unlock (&pl_inode->mutex); + +        list_for_each_entry_safe (lock, tmp, &granted, list) { +                fd = fd_from_fdnum (lock); + +                if (lock->blocked) { +                        can_block = 1; +                        cmd = F_SETLKW; +                } +                else +                        cmd = F_SETLK; + +                lock->blocked = 0; +                ret = pl_setlk (this, pl_inode, lock, can_block); +                if (ret == -1) { +                        if (can_block) { +                                pl_trace_block (this, lock->frame, fd, NULL, +                                                cmd, &lock->user_flock, NULL); +                                continue; +                        } else { +                                gf_log (this->name, GF_LOG_DEBUG, "returning EAGAIN"); +                                pl_trace_out (this, lock->frame, fd, NULL, cmd, +                                              &lock->user_flock, -1, EAGAIN, NULL); +                                pl_update_refkeeper (this, fd->inode); +                                STACK_UNWIND_STRICT (lk, lock->frame, -1, EAGAIN, &lock->user_flock); +                                __destroy_lock (lock); +                        } +                } + +        } + +} + + +int +pl_reserve_unlock (xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock) +{ +        posix_lock_t *retlock = NULL; +        int ret = -1; + +        pthread_mutex_lock (&pl_inode->mutex); +	{ +		retlock = __reserve_unlock_lock (this, lock, pl_inode); +		if (!retlock) { +			gf_log (this->name, GF_LOG_DEBUG, +				"Bad Unlock issued on Inode lock"); +                        ret = -EINVAL; +                        goto out; +                } + +                gf_log (this->name, GF_LOG_TRACE, +                        "Reservelk Unlock successful"); +		__destroy_reserve_lock (retlock); +                ret = 0; +        } +out: +        pthread_mutex_unlock (&pl_inode->mutex); + +        grant_blocked_reserve_locks (this, pl_inode); +        grant_blocked_lock_calls (this, pl_inode); + +        return ret; + +} + +int +pl_reserve_setlk (xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock, +                  int can_block) +{ +	int ret = -EINVAL; + +	pthread_mutex_lock (&pl_inode->mutex); +	{ + +			ret = __lock_reservelk (this, pl_inode, lock, can_block); +                        if (ret < 0) +				gf_log (this->name, GF_LOG_TRACE, +                                        "%s (pid=%d) (lk-owner=%"PRIu64") %"PRId64" - %"PRId64" => NOK", +					lock->fl_type == F_UNLCK ? "Unlock" : "Lock", +					lock->client_pid, +                                        lock->owner, +					lock->user_flock.l_start, +					lock->user_flock.l_len); +                        else +				gf_log (this->name, GF_LOG_TRACE, +                                        "%s (pid=%d) (lk-owner=%"PRIu64") %"PRId64" - %"PRId64" => OK", +                                        lock->fl_type == F_UNLCK ? "Unlock" : "Lock", +                                        lock->client_pid, +                                        lock->owner, +                                        lock->fl_start, +                                        lock->fl_end); + +	} +	pthread_mutex_unlock (&pl_inode->mutex); +        return ret; +} diff --git a/xlators/protocol/client/src/Makefile.am b/xlators/protocol/client/src/Makefile.am index 007810e9901..f2dea68d70d 100644 --- a/xlators/protocol/client/src/Makefile.am +++ b/xlators/protocol/client/src/Makefile.am @@ -9,7 +9,7 @@ client_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \  	$(top_builddir)/rpc/xdr/src/libgfxdr.la  client_la_SOURCES = client.c client-helpers.c client3_1-fops.c  \ -	client-handshake.c client-callback.c +	client-handshake.c client-callback.c client-lk.c  noinst_HEADERS = client.h client-mem-types.h  AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \ diff --git a/xlators/protocol/client/src/client-handshake.c b/xlators/protocol/client/src/client-handshake.c index 7511813d3cb..8b0c90ebc8c 100644 --- a/xlators/protocol/client/src/client-handshake.c +++ b/xlators/protocol/client/src/client-handshake.c @@ -343,14 +343,38 @@ unwind:  }  int +client_notify_parents_child_up (xlator_t *this) +{ +        xlator_list_t          *parent = NULL; + +        /* As fuse is not 'parent' of any translator now, triggering its +           CHILD_UP event is hacky in case client has only client protocol */ +        if (!this->parents && this->ctx && this->ctx->master) { +                /* send notify to 'ctx->master' if it exists */ +                xlator_notify (this->ctx->master, GF_EVENT_CHILD_UP, +                               this->graph); +        } + +        parent = this->parents; +        while (parent) { +                xlator_notify (parent->xlator, GF_EVENT_CHILD_UP, +                               this); +                parent = parent->next; +        } + +        return 0; +} + +int  client_post_handshake (call_frame_t *frame, xlator_t *this)  {          clnt_conf_t            *conf = NULL;          clnt_fd_ctx_t          *tmp = NULL;          clnt_fd_ctx_t          *fdctx = NULL; -        xlator_list_t          *parent = NULL;          struct list_head        reopen_head; +        int count = 0; +          if (!this || !this->private)                  goto out; @@ -366,34 +390,33 @@ client_post_handshake (call_frame_t *frame, xlator_t *this)                          list_del_init (&fdctx->sfd_pos);                          list_add_tail (&fdctx->sfd_pos, &reopen_head); +                        count++;                  }          }          pthread_mutex_unlock (&conf->lock); -        list_for_each_entry_safe (fdctx, tmp, &reopen_head, sfd_pos) { -                list_del_init (&fdctx->sfd_pos); +        /* Delay notifying CHILD_UP to parents +           until all locks are recovered */ +        if (count > 0) { +                gf_log (this->name, GF_LOG_TRACE, +                        "%d fds open - Delaying child_up until they are re-opened", +                        count); +                client_save_number_fds (conf, count); -                if (fdctx->is_dir) -                        protocol_client_reopendir (this, fdctx); -                else -                        protocol_client_reopen (this, fdctx); -        } +                list_for_each_entry_safe (fdctx, tmp, &reopen_head, sfd_pos) { +                        list_del_init (&fdctx->sfd_pos); -        /* As fuse is not 'parent' of any translator now, triggering its -           CHILD_UP event is hacky in case client has only client protocol */ -        if (!this->parents && this->ctx && this->ctx->master) { -                /* send notify to 'ctx->master' if it exists */ -                xlator_notify (this->ctx->master, GF_EVENT_CHILD_UP, -                               this->graph); -        } +                        if (fdctx->is_dir) +                                protocol_client_reopendir (this, fdctx); +                        else +                                protocol_client_reopen (this, fdctx); +                } +        } else { +                gf_log (this->name, GF_LOG_TRACE, +                        "No open fds - notifying all parents child up"); +                client_notify_parents_child_up (this); -        parent = this->parents; -        while (parent) { -                xlator_notify (parent->xlator, GF_EVENT_CHILD_UP, -                               this); -                parent = parent->next;          } -  out:          return 0;  } diff --git a/xlators/protocol/client/src/client-helpers.c b/xlators/protocol/client/src/client-helpers.c index d8da60aa1ae..d1282d50c90 100644 --- a/xlators/protocol/client/src/client-helpers.c +++ b/xlators/protocol/client/src/client-helpers.c @@ -76,16 +76,26 @@ this_fd_set_ctx (fd_t *file, xlator_t *this, loc_t *loc, clnt_fd_ctx_t *ctx)          ret = fd_ctx_get (file, this, &oldaddr);          if (ret >= 0) { -                gf_log (this->name, GF_LOG_DEBUG, -                        "%s (%"PRId64"): trying duplicate remote fd set. ", -                        loc->path, loc->inode->ino); +                if (loc) +                        gf_log (this->name, GF_LOG_DEBUG, +                                "%s (%"PRId64"): trying duplicate remote fd set. ", +                                loc->path, loc->inode->ino); +                else +                        gf_log (this->name, GF_LOG_DEBUG, +                                "%p: trying duplicate remote fd set. ", +                                file);          }          ret = fd_ctx_set (file, this, (uint64_t)(unsigned long)ctx);          if (ret < 0) { -                gf_log (this->name, GF_LOG_DEBUG, -                        "%s (%"PRId64"): failed to set remote fd", -                        loc->path, loc->inode->ino); +                if (loc) +                        gf_log (this->name, GF_LOG_DEBUG, +                                "%s (%"PRId64"): failed to set remote fd", +                                loc->path, loc->inode->ino); +                else +                        gf_log (this->name, GF_LOG_DEBUG, +                                "%p: failed to set remote fd", +                                file);          }  out:          return; diff --git a/xlators/protocol/client/src/client-lk.c b/xlators/protocol/client/src/client-lk.c new file mode 100644 index 00000000000..4ad7fc2b1ec --- /dev/null +++ b/xlators/protocol/client/src/client-lk.c @@ -0,0 +1,949 @@ +/* +  Copyright (c) 2008-2009 Gluster, Inc. <http://www.gluster.com> +  This file is part of GlusterFS. + +  GlusterFS is free software; you can redistribute it and/or modify +  it under the terms of the GNU General Public License as published +  by the Free Software Foundation; either version 3 of the License, +  or (at your option) any later version. + +  GlusterFS is distributed in the hope that it will be useful, but +  WITHOUT ANY WARRANTY; without even the implied warranty of +  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU +  General Public License for more details. + +  You should have received a copy of the GNU General Public License +  along with this program.  If not, see +  <http://www.gnu.org/licenses/>. +*/ + +#include "common-utils.h" +#include "xlator.h" +#include "client.h" + +static void +__insert_and_merge (clnt_fd_ctx_t *fdctx, client_posix_lock_t *lock); + +static int +client_send_recovery_lock (call_frame_t *frame, xlator_t *this, +                           client_posix_lock_t *lock); +static void +__dump_client_lock (client_posix_lock_t *lock) +{ +        xlator_t *this = NULL; + +        this = THIS; + +        gf_log (this->name, GF_LOG_TRACE, +                "{fd=%lld}" +                "{%s lk-owner:%"PRIu64" %"PRId64" - %"PRId64"}" +                "{start=%"PRId64" end=%"PRId64"}", +                (unsigned long long)lock->fd, +                lock->fl_type == F_WRLCK ? "Write-Lock" : "Read-Lock", +                lock->owner, +                lock->user_flock.l_start, +                lock->user_flock.l_len, +                lock->fl_start, +                lock->fl_end); +} + +static int +dump_client_locks_fd (clnt_fd_ctx_t *fdctx) +{ +        client_posix_lock_t *lock = NULL; +        int count = 0; + +        pthread_mutex_lock (&fdctx->mutex); +        { +                list_for_each_entry (lock, &fdctx->lock_list, list) { +                        __dump_client_lock (lock); +                        count++; +                } +        } +        pthread_mutex_unlock (&fdctx->mutex); + +        return count; + +} + +int +dump_client_locks (inode_t *inode) +{ +        fd_t             *fd    = NULL; +        clnt_conf_t    *conf  = NULL; +        xlator_t         *this  = NULL; +        clnt_fd_ctx_t  *fdctx = NULL; + +        int total_count = 0; +        int locks_fd_count   = 0; + +        this = THIS; +        conf = this->private; + +        LOCK (&inode->lock); +        { +                list_for_each_entry (fd, &inode->fd_list, inode_list) { +                        locks_fd_count = 0; + +                        pthread_mutex_lock (&conf->lock); +                        { +                                fdctx = this_fd_get_ctx (fd, this); +                        } +                        pthread_mutex_unlock (&conf->lock); + +                        if (fdctx) +                                locks_fd_count = dump_client_locks_fd (fdctx); + +                        total_count += locks_fd_count; +                } + +        } +        UNLOCK (&inode->lock); + +        return total_count; + +} + +static off_t +__get_lock_length (off_t start, off_t end) +{ +        if (end == LLONG_MAX) +                return 0; +        else +                return (end - start + 1); +} + +/* Add two locks */ +static client_posix_lock_t * +add_locks (client_posix_lock_t *l1, client_posix_lock_t *l2) +{ +	client_posix_lock_t *sum = NULL; + +	sum = GF_CALLOC (1, sizeof (*sum), gf_client_mt_clnt_lock_t); +	if (!sum) +		return NULL; + +	sum->fl_start = min (l1->fl_start, l2->fl_start); +	sum->fl_end   = max (l1->fl_end, l2->fl_end); + +        sum->user_flock.l_start = sum->fl_start; +        sum->user_flock.l_len   = __get_lock_length (sum->fl_start, +                                                     sum->fl_end); + +	return sum; +} + +/* Return true if the locks have the same owner */ +static int +same_owner (client_posix_lock_t *l1, client_posix_lock_t *l2) +{ +        return ((l1->owner == l2->owner)); +} + +/* Return true if the locks overlap, false otherwise */ +static int +locks_overlap (client_posix_lock_t *l1, client_posix_lock_t *l2) +{ +	/* +	   Note: +	   FUSE always gives us absolute offsets, so no need to worry +	   about SEEK_CUR or SEEK_END +	*/ + +	return ((l1->fl_end >= l2->fl_start) && +		(l2->fl_end >= l1->fl_start)); +} + +static void +__delete_client_lock (client_posix_lock_t *lock) +{ +	list_del_init (&lock->list); +} + +/* Destroy a posix_lock */ +static void +__destroy_client_lock (client_posix_lock_t *lock) +{ +	free (lock); +} + +/* Subtract two locks */ +struct _values { +	client_posix_lock_t *locks[3]; +}; + +/* {big} must always be contained inside {small} */ +static struct _values +subtract_locks (client_posix_lock_t *big, client_posix_lock_t *small) +{ +	struct _values v = { .locks = {0, 0, 0} }; + +	if ((big->fl_start == small->fl_start) && +	    (big->fl_end   == small->fl_end)) { +		/* both edges coincide with big */ +		v.locks[0] = GF_CALLOC (1, sizeof (client_posix_lock_t), +                                        gf_client_mt_clnt_lock_t ); +		GF_ASSERT (v.locks[0]); +		memcpy (v.locks[0], big, sizeof (client_posix_lock_t)); +		v.locks[0]->fl_type = small->fl_type; +	} +	else if ((small->fl_start > big->fl_start) && +		 (small->fl_end   < big->fl_end)) { +		/* both edges lie inside big */ +		v.locks[0] = GF_CALLOC (1, sizeof (client_posix_lock_t), +                                        gf_client_mt_clnt_lock_t); +		GF_ASSERT (v.locks[0]); +		v.locks[1] = GF_CALLOC (1, sizeof (client_posix_lock_t), +                                     gf_client_mt_clnt_lock_t); +		GF_ASSERT (v.locks[1]); +		v.locks[2] = GF_CALLOC (1, sizeof (client_posix_lock_t), +                                        gf_client_mt_clnt_lock_t); +		GF_ASSERT (v.locks[2]); + +		memcpy (v.locks[0], big, sizeof (client_posix_lock_t)); +		v.locks[0]->fl_end = small->fl_start - 1; +                v.locks[0]->user_flock.l_len = __get_lock_length (v.locks[0]->fl_start, +                                                                  v.locks[0]->fl_end); + +		memcpy (v.locks[1], small, sizeof (client_posix_lock_t)); +		memcpy (v.locks[2], big, sizeof (client_posix_lock_t)); +		v.locks[2]->fl_start = small->fl_end + 1; +                v.locks[2]->user_flock.l_start = small->fl_end + 1; +	} +	/* one edge coincides with big */ +	else if (small->fl_start == big->fl_start) { +		v.locks[0] = GF_CALLOC (1, sizeof (client_posix_lock_t), +                                        gf_client_mt_clnt_lock_t); +		GF_ASSERT (v.locks[0]); +		v.locks[1] = GF_CALLOC (1, sizeof (client_posix_lock_t), +                                        gf_client_mt_clnt_lock_t); +		GF_ASSERT (v.locks[1]); + +		memcpy (v.locks[0], big, sizeof (client_posix_lock_t)); +		v.locks[0]->fl_start = small->fl_end + 1; +                v.locks[0]->user_flock.l_start = small->fl_end + 1; + +		memcpy (v.locks[1], small, sizeof (client_posix_lock_t)); +	} +	else if (small->fl_end   == big->fl_end) { +		v.locks[0] = GF_CALLOC (1, sizeof (client_posix_lock_t), +                                        gf_client_mt_clnt_lock_t); +		GF_ASSERT (v.locks[0]); +		v.locks[1] = GF_CALLOC (1, sizeof (client_posix_lock_t), +                                        gf_client_mt_clnt_lock_t); +		GF_ASSERT (v.locks[1]); + +		memcpy (v.locks[0], big, sizeof (client_posix_lock_t)); +		v.locks[0]->fl_end = small->fl_start - 1; +                v.locks[0]->user_flock.l_len = __get_lock_length (v.locks[0]->fl_start, +                                                                  v.locks[0]->fl_end); + +		memcpy (v.locks[1], small, sizeof (client_posix_lock_t)); +	} +        else { +                gf_log ("client-protocol", GF_LOG_ERROR, +                        "Unexpected case in subtract_locks. Please send " +                        "a bug report to gluster-devel@nongnu.org"); +        } + +        return v; +} + +static void +__delete_unlck_locks (clnt_fd_ctx_t *fdctx) +{ +	client_posix_lock_t *l = NULL; +	client_posix_lock_t *tmp = NULL; + +	list_for_each_entry_safe (l, tmp, &fdctx->lock_list, list) { +		if (l->fl_type == F_UNLCK) { +			__delete_client_lock (l); +			__destroy_client_lock (l); +		} +	} +} + +static void +__insert_lock (clnt_fd_ctx_t *fdctx, client_posix_lock_t *lock) +{ +	list_add_tail (&lock->list, &fdctx->lock_list); + +	return; +} + +static void +__insert_and_merge (clnt_fd_ctx_t *fdctx, client_posix_lock_t *lock) +{ +        client_posix_lock_t  *conf = NULL; +        client_posix_lock_t  *t = NULL; +        client_posix_lock_t  *sum = NULL; +        int            i = 0; +        struct _values v = { .locks = {0, 0, 0} }; + +        list_for_each_entry_safe (conf, t, &fdctx->lock_list, list) { +                if (!locks_overlap (conf, lock)) +                        continue; + +                if (same_owner (conf, lock)) { +                        if (conf->fl_type == lock->fl_type) { +                                sum = add_locks (lock, conf); + +                                sum->fd         = lock->fd; + +                                __delete_client_lock (conf); +                                __destroy_client_lock (conf); + +                                __destroy_client_lock (lock); +                                __insert_and_merge (fdctx, sum); + +                                return; +                        } else { +                                sum = add_locks (lock, conf); + +                                sum->fd         = conf->fd; +                                sum->owner      = conf->owner; + +                                v = subtract_locks (sum, lock); + +                                __delete_client_lock (conf); +                                __destroy_client_lock (conf); + +                                __delete_client_lock (lock); +                                __destroy_client_lock (lock); + +                                __destroy_client_lock (sum); + +                                for (i = 0; i < 3; i++) { +                                        if (!v.locks[i]) +                                                continue; + +                                        INIT_LIST_HEAD (&v.locks[i]->list); +                                        __insert_and_merge (fdctx, +                                                            v.locks[i]); +                                } + +                                __delete_unlck_locks (fdctx); +                                return; +                        } +                } + +                if (lock->fl_type == F_UNLCK) { +                        continue; +                } + +                if ((conf->fl_type == F_RDLCK) && (lock->fl_type == F_RDLCK)) { +                        __insert_lock (fdctx, lock); +                        return; +                } +        } + +        /* no conflicts, so just insert */ +        if (lock->fl_type != F_UNLCK) { +                __insert_lock (fdctx, lock); +        } else { +                __destroy_client_lock (lock); +        } +} + +static void +client_setlk (clnt_fd_ctx_t *fdctx, client_posix_lock_t *lock) +{ +        pthread_mutex_lock (&fdctx->mutex); +        { +                __insert_and_merge (fdctx, lock); +        } +        pthread_mutex_unlock (&fdctx->mutex); + +        return; +} + +static void +destroy_client_lock (client_posix_lock_t *lock) +{ +        GF_FREE (lock); +} + +int32_t +delete_granted_locks_owner (fd_t *fd, uint64_t owner) +{ +        clnt_fd_ctx_t     *fdctx = NULL; +        client_posix_lock_t *lock  = NULL; +        client_posix_lock_t *tmp   = NULL; +        xlator_t            *this  = NULL; + +        struct list_head delete_list; +        int ret   = 0; +        int count = 0; + +        INIT_LIST_HEAD (&delete_list); +        this = THIS; +        fdctx = this_fd_get_ctx (fd, this); +        if (!fdctx) { +                gf_log (this->name, GF_LOG_DEBUG, +                        "fdctx not valid"); +                ret = -1; +                goto out; +        } + +        pthread_mutex_lock (&fdctx->mutex); +        { +                list_for_each_entry_safe (lock, tmp, &fdctx->lock_list, list) { +                        if (lock->owner == owner) { +                                list_del_init (&lock->list); +                                list_add_tail (&lock->list, &delete_list); +                                count++; +                        } +                } +        } +        pthread_mutex_unlock (&fdctx->mutex); + +        list_for_each_entry_safe (lock, tmp, &delete_list, list) { +                list_del_init (&lock->list); +                destroy_client_lock (lock); +        } + +/* FIXME: Need to actually print the locks instead of count */ +        gf_log (this->name, GF_LOG_DEBUG, +                "Number of locks cleared=%d", count); + +out: +        return ret; +} + +int32_t +delete_granted_locks_fd (clnt_fd_ctx_t *fdctx) +{ +        client_posix_lock_t *lock = NULL; +        client_posix_lock_t *tmp = NULL; +        xlator_t            *this = NULL; + +        struct list_head delete_list; +        int ret   = 0; +        int count = 0; + +        INIT_LIST_HEAD (&delete_list); +        this = THIS; + +        pthread_mutex_lock (&fdctx->mutex); +        { +                list_splice_init (&fdctx->lock_list, &delete_list); +        } +        pthread_mutex_unlock (&fdctx->mutex); + +        list_for_each_entry_safe (lock, tmp, &delete_list, list) { +                list_del_init (&lock->list); +                count++; +                destroy_client_lock (lock); +        } + +        /* FIXME: Need to actually print the locks instead of count */ +        gf_log (this->name, GF_LOG_DEBUG, +                "Number of locks cleared=%d", count); + +        return  ret; +} + +static void +client_mark_bad_fd (fd_t *fd, clnt_fd_ctx_t *fdctx) +{ +        xlator_t *this = NULL; + +        this = THIS; +        if (fdctx) +                fdctx->remote_fd = -1; +        this_fd_set_ctx (fd, this, NULL, fdctx); +} + +int32_t +client_cmd_to_gf_cmd (int32_t cmd, int32_t *gf_cmd) +{ +        int ret = 0; + +        if (cmd == F_GETLK || cmd == F_GETLK64) +                *gf_cmd = GF_LK_GETLK; +        else if (cmd == F_SETLK || cmd == F_SETLK64) +                *gf_cmd = GF_LK_SETLK; +        else if (cmd == F_SETLKW || cmd == F_SETLKW64) +                *gf_cmd = GF_LK_SETLKW; +        else if (cmd == F_RESLK_LCK) +                *gf_cmd = GF_LK_RESLK_LCK; +        else if (cmd == F_RESLK_LCKW) +                *gf_cmd = GF_LK_RESLK_LCKW; +        else if (cmd == F_RESLK_UNLCK) +                *gf_cmd = GF_LK_RESLK_UNLCK; +        else +                ret = -1; + +        return ret; + +} + +static client_posix_lock_t * +new_client_lock (struct flock *flock, uint64_t owner, +                 int32_t cmd, fd_t *fd) +{ +        client_posix_lock_t *new_lock = NULL; +        xlator_t            *this = NULL; + + +        this = THIS; +        new_lock = GF_CALLOC (1, sizeof (*new_lock), +                              gf_client_mt_clnt_lock_t); +        if (!new_lock) { +                gf_log (this->name, GF_LOG_ERROR, +                        "Out of memory"); +                goto out; +        } + +        INIT_LIST_HEAD (&new_lock->list); +        new_lock->fd = fd; +        memcpy (&new_lock->user_flock, flock, sizeof (struct flock)); + +        new_lock->fl_type  = flock->l_type; +        new_lock->fl_start = flock->l_start; + +	if (flock->l_len == 0) +		new_lock->fl_end = LLONG_MAX; +	else +		new_lock->fl_end = flock->l_start + flock->l_len - 1; + +        new_lock->owner = owner; +        new_lock->cmd = cmd; /* Not really useful */ + + +out: +        return new_lock; +} + +void +client_save_number_fds (clnt_conf_t *conf, int count) +{ +        LOCK (&conf->rec_lock); +        { +                conf->reopen_fd_count = count; +        } +        UNLOCK (&conf->rec_lock); +} + +int +client_add_lock_for_recovery (fd_t *fd, struct flock *flock, uint64_t owner, +                              int32_t cmd) +{ +        clnt_fd_ctx_t       *fdctx = NULL; +        xlator_t            *this  = NULL; +        client_posix_lock_t *lock  = NULL; +        clnt_conf_t         *conf  = NULL; + +        int ret = 0; + +        this = THIS; +        conf = this->private; + +        pthread_mutex_lock (&conf->lock); +        { +                fdctx = this_fd_get_ctx (fd, this); +        } +        pthread_mutex_unlock (&conf->lock); + +        if (!fdctx) { +                gf_log (this->name, GF_LOG_DEBUG, +                        "failed to get fd context. Marking as bad fd."); +                ret = -EBADFD; +                goto out; +        } + +        lock = new_client_lock (flock, owner, cmd, fd); +        if (!lock) { +                gf_log (this->name, GF_LOG_ERROR, +                        "Out of memory"); +                ret = -ENOMEM; +                goto out; +        } + +        client_setlk (fdctx, lock); + +out: +        return ret; + +} + +static int +construct_reserve_unlock (struct flock *lock, call_frame_t *frame, +                          client_posix_lock_t *client_lock) +{ +        GF_ASSERT (lock); +        GF_ASSERT (frame); +        GF_ASSERT (frame->root->lk_owner); + +        lock->l_type = F_UNLCK; +        lock->l_start = 0; +        lock->l_whence = SEEK_SET; +        lock->l_len = 0; /* Whole file */ +        lock->l_pid = (uint64_t)frame->root; + +        frame->root->lk_owner = client_lock->owner; + +        return 0; +} + +static int +construct_reserve_lock (client_posix_lock_t *client_lock, call_frame_t *frame, +                        struct flock *lock) +{ +        GF_ASSERT (client_lock); + +        memcpy (lock, &(client_lock->user_flock), sizeof (struct flock)); + +        frame->root->lk_owner = client_lock->owner; + +        return 0; +} + +uint64_t +decrement_reopen_fd_count (xlator_t *this, clnt_conf_t *conf) +{ +        uint64_t fd_count = 0; + +        LOCK (&conf->rec_lock); +        { +                fd_count = --(conf->reopen_fd_count); +        } +        UNLOCK (&conf->rec_lock); + +        if (fd_count == 0) { +                gf_log (this->name, GF_LOG_DEBUG, +                        "All locks healed on the last fd - notifying CHILDUP"); +                client_notify_parents_child_up (this); +        } + +        return fd_count; +} + +int32_t +client_remove_reserve_lock_cbk (call_frame_t *frame, +                                void *cookie, +                                xlator_t *this, +                                int32_t op_ret, +                                int32_t op_errno, +                                struct flock *lock) +{ +        clnt_local_t *local = NULL; +        clnt_conf_t  *conf  = NULL; + +        uint64_t fd_count = 0; + +        local = frame->local; +        conf  = this->private; + +        if (op_ret < 0) { +                /* TODO: critical error describing recovery command +                   and blanket on ops on fd */ +                gf_log (this->name, GF_LOG_CRITICAL, +                        "Lock recovery failed with error msg=%s", +                        strerror(op_errno)); +                goto cleanup; +        } + +        gf_log (this->name, GF_LOG_DEBUG, +                "Removing reserve lock was successful."); + +cleanup: +        frame->local = NULL; +        client_mark_bad_fd (local->client_lock->fd, +                            local->fdctx); +        destroy_client_lock (local->client_lock); +        client_local_wipe (local); +        STACK_DESTROY (frame->root); +        fd_count = decrement_reopen_fd_count (this, conf); +        gf_log (this->name, GF_LOG_DEBUG, +                "Need to attempt lock recovery on %lld open fds", +                (unsigned long long) fd_count); +	return 0; +} + +static void +client_remove_reserve_lock (xlator_t *this, call_frame_t *frame, +                            client_posix_lock_t *lock) +{ +        struct flock unlock; +        clnt_local_t *local = NULL; + +        local = frame->local; +        construct_reserve_unlock (&unlock, frame, lock); + +        STACK_WIND (frame, client_remove_reserve_lock_cbk, +                    this, this->fops->lk, +                    lock->fd, F_RESLK_UNLCK, &unlock); +} + +static client_posix_lock_t * +get_next_recovery_lock (xlator_t *this, clnt_local_t *local) +{ +        client_posix_lock_t *lock = NULL; + +        pthread_mutex_lock (&local->mutex); +        { +                if (list_empty (&local->lock_list)) { +                        gf_log (this->name, GF_LOG_DEBUG, +                                "lock-list empty"); +                        goto unlock; +                } + +                lock = list_entry ((local->lock_list).next, typeof (*lock), list); +                list_del_init (&lock->list); +        } +unlock: +        pthread_mutex_unlock (&local->mutex); + +        return lock; + +} + +int32_t +client_reserve_lock_cbk (call_frame_t *frame, +		void *cookie, +		xlator_t *this, +		int32_t op_ret, +		int32_t op_errno, +		struct flock *lock) +{ + +        clnt_local_t *local = NULL; +        clnt_conf_t  *conf  = NULL; + +        uint64_t fd_count = 0; + +        local = frame->local; +        conf  = this->private; + +        /* Got the reserve lock. Check if lock is grantable and proceed +           with the real lk call */ + +        if (op_ret >= 0) { +                /* Lock is grantable if flock reflects a successful getlk() call*/ +                if (lock->l_type == F_UNLCK && lock->l_pid) { +                        gf_log (this->name, GF_LOG_DEBUG, +                                "Got the reservelk, but the lock is not grantable. "); +                        client_remove_reserve_lock (this, frame, local->client_lock); +                        goto out; +                } + +                gf_log (this->name, GF_LOG_DEBUG, +                        "Reserve Lock succeeded"); +                client_send_recovery_lock (frame, this, local->client_lock); +                goto out; +        } + +        /* Somebody else has a reserve lk. Lock conflict detected. +           Mark fd as bad */ + +        gf_log (this->name, GF_LOG_DEBUG, +                "Reservelk OP failed. Aborting lock recovery and marking bad fd"); + +        client_mark_bad_fd (local->client_lock->fd, +                            local->fdctx); +        destroy_client_lock (local->client_lock); +        frame->local = NULL; +        client_local_wipe (local); +        STACK_DESTROY (frame->root); +        fd_count = decrement_reopen_fd_count (this, conf); +        gf_log (this->name, GF_LOG_DEBUG, +                "Need to attempt lock recovery on %lld open fds", +                (unsigned long long) fd_count); + +out: +	return 0; +} + +int32_t +client_recovery_lock_cbk (call_frame_t *frame, +                          void *cookie, +                          xlator_t *this, +                          int32_t op_ret, +                          int32_t op_errno, +                          struct flock *lock) +{ +        clnt_local_t *local = NULL; +        clnt_fd_ctx_t *fdctx = NULL; +        clnt_conf_t   *conf  = NULL; +        client_posix_lock_t *next_lock = NULL; + +        struct flock reserve_flock; +        uint64_t fd_count = 0; + +        local = frame->local; +        conf  = this->private; + +        if (op_ret < 0) { +                /* TODO: critical error describing recovery command +                   and blanket on ops on fd */ +                gf_log (this->name, GF_LOG_CRITICAL, +                        "Lock recovery failed with error msg=%s", +                        strerror(op_errno)); + +                client_mark_bad_fd (local->client_lock->fd, +                                    local->fdctx); +                goto cleanup; + +                /* Lock recovered. Continue with reserve lock for next lock */ +        } else { +                gf_log (this->name, GF_LOG_DEBUG, +                        "lock recovered successfully -  Continuing with next lock."); + +                next_lock = get_next_recovery_lock (this, local); +                if (!next_lock) { +                        gf_log (this->name, GF_LOG_DEBUG, +                                "All locks recovered on fd"); +                        goto cleanup; +                } + +                construct_reserve_lock (next_lock, frame, &reserve_flock); +                local->fdctx       = fdctx; +                local->client_lock = next_lock; + +                STACK_WIND (frame, client_reserve_lock_cbk, +                            this, this->fops->lk, +                            next_lock->fd, F_RESLK_LCK, &reserve_flock); +                goto out; + +        } + +cleanup: +        frame->local = NULL; +        client_local_wipe (local); +        if (local->client_lock) +                destroy_client_lock (local->client_lock); +        STACK_DESTROY (frame->root); +        fd_count = decrement_reopen_fd_count (this, conf); +        gf_log (this->name, GF_LOG_DEBUG, +                "Need to attempt lock recovery on %lld open fds", +                (unsigned long long) fd_count); + +out: +	return 0; +} + +static int +client_send_recovery_lock (call_frame_t *frame, xlator_t *this, +                           client_posix_lock_t *lock) +{ + +        frame->root->lk_owner = lock->owner; + +        /* Send all locks as F_SETLK to prevent the frame +           from blocking if there is a conflict */ + +        STACK_WIND (frame, client_recovery_lock_cbk, +                    this, this->fops->lk, +                    lock->fd, F_SETLK, +                    &(lock->user_flock)); + +        return 0; +} + +static int +client_lockrec_init (clnt_fd_ctx_t *fdctx, clnt_local_t *local) +{ + +        INIT_LIST_HEAD (&local->lock_list); +        pthread_mutex_init (&local->mutex, NULL); + +        pthread_mutex_lock (&fdctx->mutex); +        { +                list_splice_init (&fdctx->lock_list, &local->lock_list); +        } +        pthread_mutex_unlock (&fdctx->mutex); + +        return 0; +} + +int +client_attempt_lock_recovery (xlator_t *this, clnt_fd_ctx_t *fdctx) +{ +        call_frame_t        *frame = NULL; +        clnt_local_t      *local = NULL; +        client_posix_lock_t *lock  = NULL; + +        struct flock reserve_flock; +        int ret = 0; + +        local = GF_CALLOC (1, sizeof (*local), gf_client_mt_clnt_local_t); +        if (!local) { +                gf_log (this->name, GF_LOG_ERROR, +                        "Out of memory"); +                ret = -ENOMEM; +                goto out; +        } + +        client_lockrec_init (fdctx, local); + +        lock = get_next_recovery_lock (this, local); +        if (!lock) { +                gf_log (this->name, GF_LOG_DEBUG, +                        "No locks on fd"); +                ret = -1; +                goto out; +        } + +        frame = create_frame (this, this->ctx->pool); +        if (!frame) { +                gf_log (this->name, GF_LOG_ERROR, +                        "Out of memory"); +                ret = -1; +                goto out; +        } + +        construct_reserve_lock (lock, frame, &reserve_flock); + +        local->fdctx       = fdctx; +        local->client_lock = lock; + +        STACK_WIND (frame, client_reserve_lock_cbk, +                    this, this->fops->lk, +                    lock->fd, F_RESLK_LCK, &reserve_flock); + +out: +        return ret; + + +} + +int32_t +client_dump_locks (char *name, inode_t *inode, +                   dict_t *dict) +{ +        int     ret = 0; +        char    dict_string[256]; + +        ret = dump_client_locks (inode); +        snprintf (dict_string, 256, "%d locks dumped in log file", ret); + +        dict = dict_new (); +        if (!dict) { +                gf_log (THIS->name, GF_LOG_DEBUG, +                        "Out of memory"); +                ret = -1; +                goto out; +        } + +        ret = dict_set_str (dict, "trusted.glusterfs.clientlk-dump", dict_string); +        if (ret) { +                gf_log (THIS->name, GF_LOG_DEBUG, +                        "Could not set dict with %s", CLIENT_DUMP_LOCKS); +                goto out; +        } + +out: +        return ret; +} + +int32_t +is_client_dump_locks_cmd (char *name) +{ +        int ret = 0; + +        if (strcmp (name, CLIENT_DUMP_LOCKS) == 0) +                ret = 1; + +        return ret; +} diff --git a/xlators/protocol/client/src/client-mem-types.h b/xlators/protocol/client/src/client-mem-types.h index 638e537d116..50015b18fd1 100644 --- a/xlators/protocol/client/src/client-mem-types.h +++ b/xlators/protocol/client/src/client-mem-types.h @@ -28,6 +28,7 @@ enum gf_client_mem_types_ {          gf_client_mt_clnt_local_t,          gf_client_mt_clnt_req_buf_t,          gf_client_mt_clnt_fdctx_t, +        gf_client_mt_clnt_lock_t,          gf_client_mt_end,  };  #endif /* __CLIENT_MEM_TYPES_H__ */ diff --git a/xlators/protocol/client/src/client.c b/xlators/protocol/client/src/client.c index 726a9dad792..b703b88f4b7 100644 --- a/xlators/protocol/client/src/client.c +++ b/xlators/protocol/client/src/client.c @@ -1865,6 +1865,8 @@ init (xlator_t *this)          pthread_mutex_init (&conf->lock, NULL);          INIT_LIST_HEAD (&conf->saved_fds); +        LOCK_INIT (&conf->rec_lock); +          this->private = conf;          /* If it returns -1, then its a failure, if it returns +1 we need diff --git a/xlators/protocol/client/src/client.h b/xlators/protocol/client/src/client.h index 84940de5f84..a31873a027b 100644 --- a/xlators/protocol/client/src/client.h +++ b/xlators/protocol/client/src/client.h @@ -33,7 +33,7 @@  /* FIXME: Needs to be defined in a common file */  #define CLIENT_CMD_CONNECT "trusted.glusterfs.client-connect"  #define CLIENT_CMD_DISCONNECT "trusted.glusterfs.client-disconnect" - +#define CLIENT_DUMP_LOCKS "trusted.glusterfs.clientlk-dump"  struct clnt_options {          char *remote_subvolume;          int   ping_timeout; @@ -54,6 +54,10 @@ typedef struct clnt_conf {          rpc_clnt_prog_t       *mgmt;          rpc_clnt_prog_t       *handshake;          rpc_clnt_prog_t       *dump; + +        uint64_t               reopen_fd_count; /* Count of fds reopened after a +                                                   connection is established */ +        gf_lock_t              rec_lock;  } clnt_conf_t;  typedef struct _client_fd_ctx { @@ -68,8 +72,24 @@ typedef struct _client_fd_ctx {          char              released;          int32_t           flags;          int32_t           wbflags; + +        pthread_mutex_t   mutex; +        struct list_head  lock_list;     /* List of all granted locks on this fd */  } clnt_fd_ctx_t; +typedef struct _client_posix_lock { +        fd_t              *fd;            /* The fd on which the lk operation was made */ + +        struct flock       user_flock;    /* the flock supplied by the user */ +        off_t              fl_start; +        off_t              fl_end; +        short              fl_type; +        int32_t            cmd;           /* the cmd for the lock call */ +        uint64_t           owner;         /* lock owner from fuse */ + +        struct list_head   list;          /* reference used to add to the fdctx list of locks */ +} client_posix_lock_t; +  typedef struct client_local {          loc_t              loc;          loc_t              loc2; @@ -79,6 +99,12 @@ typedef struct client_local {          uint32_t           wbflags;          struct iobref     *iobref;          fop_cbk_fn_t       op; + +        client_posix_lock_t *client_lock; +        uint64_t           owner; +        int32_t            cmd; +        struct list_head   lock_list; +        pthread_mutex_t    mutex;  } clnt_local_t;  typedef struct client_args { @@ -138,6 +164,17 @@ int unserialize_rsp_direntp (struct gfs3_readdirp_rsp *rsp, gf_dirent_t *entries  int clnt_readdir_rsp_cleanup (gfs3_readdir_rsp *rsp);  int clnt_readdirp_rsp_cleanup (gfs3_readdirp_rsp *rsp); - - +int client_attempt_lock_recovery (xlator_t *this, clnt_fd_ctx_t *fdctx); +int32_t delete_granted_locks_owner (fd_t *fd, uint64_t owner); +int client_add_lock_for_recovery (fd_t *fd, struct flock *flock, uint64_t owner, +                                  int32_t cmd); +uint64_t decrement_reopen_fd_count (xlator_t *this, clnt_conf_t *conf); +int32_t delete_granted_locks_fd (clnt_fd_ctx_t *fdctx); +int32_t client_cmd_to_gf_cmd (int32_t cmd, int32_t *gf_cmd); +void client_save_number_fds (clnt_conf_t *conf, int count); +int dump_client_locks (inode_t *inode); +int client_notify_parents_child_up (xlator_t *this); +int32_t is_client_dump_locks_cmd (char *name); +int32_t client_dump_locks (char *name, inode_t *inode, +                           dict_t *dict);  #endif /* !_CLIENT_H */ diff --git a/xlators/protocol/client/src/client3_1-fops.c b/xlators/protocol/client/src/client3_1-fops.c index 69f2646978d..c3add8fd3ba 100644 --- a/xlators/protocol/client/src/client3_1-fops.c +++ b/xlators/protocol/client/src/client3_1-fops.c @@ -313,6 +313,7 @@ client3_1_open_cbk (struct rpc_req *req, struct iovec *iov, int count,                  fdctx->wbflags   = local->wbflags;                  INIT_LIST_HEAD (&fdctx->sfd_pos); +                INIT_LIST_HEAD (&fdctx->lock_list);                  this_fd_set_ctx (fd, frame->this, &local->loc, fdctx); @@ -605,10 +606,14 @@ client3_1_flush_cbk (struct rpc_req *req, struct iovec *iov, int count,                       void *myframe)  {          call_frame_t    *frame      = NULL; +        clnt_local_t  *local      = NULL; +        xlator_t        *this       = NULL;          gf_common_rsp    rsp        = {0,};          int              ret        = 0;          frame = myframe; +        this  = THIS; +        local = frame->local;          if (-1 == req->rpc_status) {                  rsp.op_ret   = -1; @@ -623,6 +628,18 @@ client3_1_flush_cbk (struct rpc_req *req, struct iovec *iov, int count,                  goto out;          } +        if (rsp.op_ret >= 0) { +                /* Delete all saved locks of the owner issuing flush */ +                gf_log (this->name, GF_LOG_DEBUG, +                        "Attempting to delete locks of owner=%llu", +                        (long long unsigned) local->owner); +                delete_granted_locks_owner (local->fd, local->owner); +        } + +        frame->local = NULL; +        if (local) +                client_local_wipe (local); +  out:          STACK_UNWIND_STRICT (flush, frame, rsp.op_ret,                               gf_error_to_errno (rsp.op_errno)); @@ -1442,6 +1459,7 @@ client3_1_create_cbk (struct rpc_req *req, struct iovec *iov, int count,                  fdctx->flags     = local->flags;                  INIT_LIST_HEAD (&fdctx->sfd_pos); +                INIT_LIST_HEAD (&fdctx->lock_list);                  this_fd_set_ctx (fd, frame->this, &local->loc, fdctx); @@ -1506,12 +1524,14 @@ int  client3_1_lk_cbk (struct rpc_req *req, struct iovec *iov, int count,                    void *myframe)  { -        call_frame_t  *frame = NULL; -        struct flock   lock = {0,}; -        gfs3_lk_rsp    rsp        = {0,}; -        int            ret        = 0; +        call_frame_t    *frame      = NULL; +        clnt_local_t  *local      = NULL; +        struct flock     lock       = {0,}; +        gfs3_lk_rsp      rsp        = {0,}; +        int              ret        = 0;          frame = myframe; +        local = frame->local;          if (-1 == req->rpc_status) {                  rsp.op_ret   = -1; @@ -1531,6 +1551,20 @@ client3_1_lk_cbk (struct rpc_req *req, struct iovec *iov, int count,                  gf_flock_to_flock (&rsp.flock, &lock);          } +        /* Save the lock to the client lock cache to be able +           to recover in the case of server reboot.*/ +        if (local->cmd == F_SETLK || local->cmd == F_SETLKW) { +                ret = client_add_lock_for_recovery (local->fd, &lock, +                                                    local->owner, local->cmd); +                if (ret < 0) { +                        rsp.op_ret = -1; +                        rsp.op_errno = -ret; +                } +        } + +        frame->local = NULL; +        client_local_wipe (local); +  out:          STACK_UNWIND_STRICT (lk, frame, rsp.op_ret,                               gf_error_to_errno (rsp.op_errno), &lock); @@ -1777,6 +1811,7 @@ client3_1_opendir_cbk (struct rpc_req *req, struct iovec *iov, int count,                  fdctx->is_dir    = 1;                  INIT_LIST_HEAD (&fdctx->sfd_pos); +                INIT_LIST_HEAD (&fdctx->lock_list);                  this_fd_set_ctx (fd, frame->this, &local->loc, fdctx); @@ -2014,12 +2049,14 @@ int  client3_1_reopen_cbk (struct rpc_req *req, struct iovec *iov, int count,                        void           *myframe)  { -        int32_t        ret = -1; -        gfs3_open_rsp  rsp = {0,}; -        clnt_local_t  *local = NULL; -        clnt_conf_t   *conf = NULL; -        clnt_fd_ctx_t *fdctx = NULL; -        call_frame_t  *frame = NULL; +        int32_t        ret                   = -1; +        gfs3_open_rsp  rsp                   = {0,}; +        int            attempt_lock_recovery = _gf_false; +        uint64_t       fd_count              = 0; +        clnt_local_t  *local                 = NULL; +        clnt_conf_t   *conf                  = NULL; +        clnt_fd_ctx_t *fdctx                 = NULL; +        call_frame_t  *frame                 = NULL;          frame = myframe;          local = frame->local; @@ -2052,6 +2089,7 @@ client3_1_reopen_cbk (struct rpc_req *req, struct iovec *iov, int count,                          if (!fdctx->released) {                                  list_add_tail (&fdctx->sfd_pos, &conf->saved_fds); +                                attempt_lock_recovery = _gf_true;                                  fdctx = NULL;                          }                  } @@ -2060,6 +2098,20 @@ client3_1_reopen_cbk (struct rpc_req *req, struct iovec *iov, int count,            }          } +        if (attempt_lock_recovery) { +                ret = client_attempt_lock_recovery (frame->this, local->fdctx); +                if (ret < 0) +                        gf_log (frame->this->name, GF_LOG_DEBUG, +                                "No locks on fd to recover"); +                else { +                        fd_count = decrement_reopen_fd_count (frame->this, conf); +                        gf_log (frame->this->name, GF_LOG_DEBUG, +                                "Need to attempt lock recovery on %lld open fds", +                                (unsigned long long) fd_count); + +                } +        } +  out:          if (fdctx)                  client_fdctx_destroy (frame->this, fdctx); @@ -2380,6 +2432,9 @@ client3_1_release (call_frame_t *frame, xlator_t *this,          if (remote_fd != -1) {                  req.fd = remote_fd;                  req.gfs_id = GFS3_OP_RELEASE; + +                delete_granted_locks_fd (fdctx); +                  ret = client_submit_request (this, &req, frame, conf->fops,                                               GFS3_OP_RELEASE,                                               client3_1_release_cbk, NULL, @@ -3468,7 +3523,6 @@ unwind:  } -  int32_t  client3_1_flush (call_frame_t *frame, xlator_t *this,                   void *data) @@ -3477,8 +3531,9 @@ client3_1_flush (call_frame_t *frame, xlator_t *this,          gfs3_flush_req  req      = {0,};          clnt_fd_ctx_t  *fdctx    = NULL;          clnt_conf_t    *conf     = NULL; +        clnt_local_t *local    = NULL;          int             op_errno = ESTALE; -        int             ret        = 0; +        int             ret      = 0;          if (!frame || !this || !data)                  goto unwind; @@ -3507,6 +3562,21 @@ client3_1_flush (call_frame_t *frame, xlator_t *this,                  goto unwind;          } +        conf = this->private; + +        local = GF_CALLOC (1, sizeof (*local), gf_client_mt_clnt_local_t); +        if (!local) { +                gf_log (this->name, GF_LOG_ERROR, +                        "Out of Memory"); +                STACK_UNWIND (frame, -1, ENOMEM); +                return 0; + +        } + +        local->fd = fd_ref (args->fd); +        local->owner = frame->root->lk_owner; +        frame->local = local; +          req.fd = fdctx->remote_fd;          req.gfs_id = GFS3_OP_FLUSH; @@ -4012,16 +4082,23 @@ client3_1_getxattr (call_frame_t *frame, xlator_t *this,          clnt_conf_t       *conf     = NULL;          clnt_args_t       *args     = NULL;          gfs3_getxattr_req  req      = {0,}; +        dict_t            *dict     = NULL;          int                ret      = 0; +        int32_t            op_ret   = 0;          int                op_errno = ESTALE; -        if (!frame || !this || !data) +        if (!frame || !this || !data) { +                op_ret   = -1; +                op_errno = 0;                  goto unwind; - +        }          args = data; -        if (!(args->loc && args->loc->inode)) +        if (!(args->loc && args->loc->inode)) { +                op_ret   = -1; +                op_errno = EINVAL;                  goto unwind; +        }          memcpy (req.gfid,  args->loc->inode->gfid, 16);          req.namelen = 1; /* Use it as a flag */ @@ -4035,19 +4112,42 @@ client3_1_getxattr (call_frame_t *frame, xlator_t *this,          conf = this->private; +        if (args && args->name) { +                if (is_client_dump_locks_cmd ((char *)args->name)) { +                        ret = client_dump_locks ((char *)args->name, +                                                 args->loc->inode, +                                                 dict); +                        if (ret) { +                                gf_log (this->name, GF_LOG_DEBUG, +                                        "Client dump locks failed"); +                                op_ret = -1; +                                op_errno = EINVAL; +                        } + +                        GF_ASSERT (dict); +                        op_ret = 0; +                        op_errno = 0; +                        goto unwind; +                } +        } +          ret = client_submit_request (this, &req, frame, conf->fops,                                       GFS3_OP_GETXATTR,                                       client3_1_getxattr_cbk, NULL,                                       xdr_from_getxattr_req, NULL, 0,                                       NULL, 0, NULL);          if (ret) { +                op_ret   = -1;                  op_errno = ENOTCONN;                  goto unwind;          }          return 0;  unwind: -        STACK_UNWIND_STRICT (getxattr, frame, -1, op_errno, NULL); +        STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, NULL); +        if (dict) +                dict_unref (dict); +          return 0;  } @@ -4242,20 +4342,28 @@ int32_t  client3_1_lk (call_frame_t *frame, xlator_t *this,                void *data)  { -        clnt_args_t   *args     = NULL; -        gfs3_lk_req    req      = {0,}; -        int32_t        gf_cmd   = 0; -        int32_t        gf_type  = 0; -        clnt_fd_ctx_t *fdctx    = NULL; -        clnt_conf_t   *conf     = NULL; -        int            op_errno = ESTALE; -        int           ret        = 0; +        clnt_args_t     *args       = NULL; +        gfs3_lk_req      req        = {0,}; +        int32_t          gf_cmd     = 0; +        int32_t          gf_type    = 0; +        clnt_fd_ctx_t   *fdctx      = NULL; +        clnt_local_t    *local      = NULL; +        clnt_conf_t     *conf       = NULL; +        int              op_errno   = ESTALE; +        int              ret        = 0;          if (!frame || !this || !data)                  goto unwind;          args = data;          conf = this->private; +        local = GF_CALLOC (1, sizeof (*local), gf_client_mt_clnt_local_t); +        if (!local) { +                gf_log (this->name, GF_LOG_ERROR, +                        "Out of memory"); +                op_errno = ENOMEM; +                goto unwind; +        }          pthread_mutex_lock (&conf->lock);          { @@ -4278,16 +4386,11 @@ client3_1_lk (call_frame_t *frame, xlator_t *this,                  goto unwind;          } -        if (args->cmd == F_GETLK || args->cmd == F_GETLK64) -                gf_cmd = GF_LK_GETLK; -        else if (args->cmd == F_SETLK || args->cmd == F_SETLK64) -                gf_cmd = GF_LK_SETLK; -        else if (args->cmd == F_SETLKW || args->cmd == F_SETLKW64) -                gf_cmd = GF_LK_SETLKW; -        else { +        ret = client_cmd_to_gf_cmd (args->cmd, &gf_cmd); +        if (ret) { +                op_errno = EINVAL;                  gf_log (this->name, GF_LOG_DEBUG,                          "Unknown cmd (%d)!", gf_cmd); -                goto unwind;          }          switch (args->flock->l_type) { @@ -4302,6 +4405,11 @@ client3_1_lk (call_frame_t *frame, xlator_t *this,                  break;          } +        local->owner = frame->root->lk_owner; +        local->cmd   = args->cmd; +        local->fd    = fd_ref (args->fd); +        frame->local = local; +          req.fd    = fdctx->remote_fd;          req.cmd   = gf_cmd;          req.type  = gf_type; diff --git a/xlators/protocol/legacy/client/src/client-protocol.c b/xlators/protocol/legacy/client/src/client-protocol.c index 57cfbc73e33..ebb4e6fcf96 100644 --- a/xlators/protocol/legacy/client/src/client-protocol.c +++ b/xlators/protocol/legacy/client/src/client-protocol.c @@ -145,16 +145,26 @@ this_fd_set_ctx (fd_t *file, xlator_t *this, loc_t *loc, client_fd_ctx_t *ctx)          ret = fd_ctx_get (file, this, &oldaddr);          if (ret >= 0) { -                gf_log (this->name, GF_LOG_DEBUG, -                        "%s (%"PRId64"): trying duplicate remote fd set. ", -                        loc->path, loc->inode->ino); +                if (loc) +                        gf_log (this->name, GF_LOG_DEBUG, +                                "%s (%"PRId64"): trying duplicate remote fd set. ", +                                loc->path, loc->inode->ino); +                else +                        gf_log (this->name, GF_LOG_DEBUG, +                                "%p: trying duplicate remote fd set. ", +                                file);          }          ret = fd_ctx_set (file, this, (uint64_t)(unsigned long)ctx);          if (ret < 0) { -                gf_log (this->name, GF_LOG_DEBUG, -                        "%s (%"PRId64"): failed to set remote fd", -                        loc->path, loc->inode->ino); +                if (loc) +                        gf_log (this->name, GF_LOG_DEBUG, +                                "%s (%"PRId64"): failed to set remote fd", +                                loc->path, loc->inode->ino); +                else +                        gf_log (this->name, GF_LOG_DEBUG, +                                "%p: failed to set remote fd", +                                file);          }  out:          return; diff --git a/xlators/protocol/server/src/server3_1-fops.c b/xlators/protocol/server/src/server3_1-fops.c index a0276ad6bb4..9c0ac90cdf9 100644 --- a/xlators/protocol/server/src/server3_1-fops.c +++ b/xlators/protocol/server/src/server3_1-fops.c @@ -4851,6 +4851,15 @@ server_lk (rpcsvc_request_t *req)          case GF_LK_SETLKW:                  state->cmd = F_SETLKW;                  break; +         case GF_LK_RESLK_LCK: +                state->cmd = F_RESLK_LCK; +                break; +        case GF_LK_RESLK_LCKW: +                state->cmd = F_RESLK_LCKW; +                break; +        case GF_LK_RESLK_UNLCK: +                state->cmd = F_RESLK_UNLCK; +                break;          }          gf_flock_to_flock (&args.flock, &state->flock);  | 
