glusterd: Volume locks and transaction specific opinfos

With this patch we are replacing the existing cluster-wide lock taken on glusterds across the cluster, with volume locks which are also taken on glusterds across the cluster, but are volume specific. So with the volume locks we are able to perform more than one gluster operation at the same time, as long as the operations are being performed on different volumes. We maintain a global list of volume-locks (using a dict for a list) where the key is the volume name, and which saves the uuid of the originator glusterd. These locks are held and released per volume transaction. In order to acheive multiple gluster operations occuring at the same time, we also separate opinfos in the op-state-machine, as a part of this patch. To do so, we generate a unique transaction-id (uuid) per gluster transaction. An opinfo is then associated with this transaction id, which is used throughout the transaction. We maintain a run-time global list(using a dict) of transaction-ids, and their respective opinfos to achieve this. Change-Id: Iaad505a854bac8de8f83beec0357eb6cde3f7ea8 Upstream Review Url: http://review.gluster.org/5994/ BUG: 1011470 Signed-off-by: Avra Sengupta <asengupt@redhat.com>
author: Avra Sengupta <asengupt@redhat.com> 2013-09-14 22:46:41 +0530
committer: shishir gowda <sgowda@redhat.com> 2013-10-10 13:26:16 +0530
commit: 9c447bc5e924e840e18efdbc1023470bf2b38228 (patch)
tree: c8c370375844f19894f9bc15ff389eaf14622214 /xlators/mgmt/glusterd/src/glusterd-syncop.c
parent: cd87a145ef57a166f1463629b457c0ad90f1ba5e (diff)
1 files changed, 378 insertions, 53 deletions
diff --git a/xlators/mgmt/glusterd/src/glusterd-syncop.c b/xlators/mgmt/glusterd/src/glusterd-syncop.c
index a854e0530..8b6019107 100644
--- a/xlators/mgmt/glusterd/src/glusterd-syncop.c
+++ b/xlators/mgmt/glusterd/src/glusterd-syncop.c
@@ -17,6 +17,9 @@
 #include "glusterd.h"
 #include "glusterd-op-sm.h"
 #include "glusterd-utils.h"
+#include "glusterd-locks.h"
+
+extern glusterd_op_info_t opinfo;
 
 static inline void
 gd_synctask_barrier_wait (struct syncargs *args, int count)
@@ -56,11 +59,25 @@ gd_collate_errors (struct syncargs *args, int op_ret, int op_errno,
                 }
 
                 switch (op_code){
+                        case GLUSTERD_MGMT_VOLUME_LOCK:
+                        {
+                                len = snprintf (op_err, sizeof(op_err) - 1,
+                                                "Locking volume failed "
+                                                "on %s. %s", peer_str, err_str);
+                                break;
+                        }
+                        case GLUSTERD_MGMT_VOLUME_UNLOCK:
+                        {
+                                len = snprintf (op_err, sizeof(op_err) - 1,
+                                                "Unlocking volume failed "
+                                                "on %s. %s", peer_str, err_str);
+                                break;
+                        }
                         case GLUSTERD_MGMT_CLUSTER_LOCK :
                         {
                                 len = snprintf (op_err, sizeof(op_err) - 1,
-                                                "Locking failed on %s. %s",
-                                                peer_str, err_str);
+                                                "Locking cluster failed "
+                                                "on %s. %s", peer_str, err_str);
                                 break;
                         }
                         case GLUSTERD_MGMT_CLUSTER_UNLOCK :
@@ -353,6 +370,181 @@ gd_syncop_mgmt_lock (glusterd_peerinfo_t *peerinfo, struct syncargs *args,
 }
 
 int32_t
+_gd_syncop_mgmt_volume_lock_cbk (struct rpc_req *req, struct iovec *iov,
+                                 int count, void *myframe)
+{
+        int                         ret         = -1;
+        struct syncargs             *args       = NULL;
+        glusterd_peerinfo_t         *peerinfo   = NULL;
+        gd1_mgmt_volume_lock_rsp     rsp        = {{0},};
+        call_frame_t                *frame      = NULL;
+        int                         op_ret      = -1;
+        int                         op_errno    = -1;
+
+        GF_ASSERT(req);
+        GF_ASSERT(iov);
+        GF_ASSERT(myframe);
+
+        frame  = myframe;
+        args   = frame->local;
+        peerinfo = frame->cookie;
+        frame->local = NULL;
+        frame->cookie = NULL;
+
+        if (-1 == req->rpc_status) {
+                op_errno = ENOTCONN;
+                goto out;
+        }
+
+        ret = xdr_to_generic (*iov, &rsp,
+                              (xdrproc_t)xdr_gd1_mgmt_volume_lock_rsp);
+        if (ret < 0)
+                goto out;
+
+        uuid_copy (args->uuid, rsp.uuid);
+
+        op_ret = rsp.op_ret;
+        op_errno = rsp.op_errno;
+out:
+        gd_collate_errors (args, op_ret, op_errno, NULL,
+                           GLUSTERD_MGMT_VOLUME_LOCK, peerinfo, rsp.uuid);
+        STACK_DESTROY (frame->root);
+        synctask_barrier_wake(args);
+        return 0;
+}
+
+int32_t
+gd_syncop_mgmt_volume_lock_cbk (struct rpc_req *req, struct iovec *iov,
+                                int count, void *myframe)
+{
+        return glusterd_big_locked_cbk (req, iov, count, myframe,
+                                        _gd_syncop_mgmt_volume_lock_cbk);
+}
+
+int
+gd_syncop_mgmt_volume_lock (glusterd_op_t op, dict_t *op_ctx,
+                            glusterd_peerinfo_t *peerinfo,
+                            struct syncargs *args, uuid_t my_uuid,
+                            uuid_t recv_uuid, uuid_t txn_id)
+{
+        int                      ret  = -1;
+        gd1_mgmt_volume_lock_req req  = {{0},};
+        glusterd_conf_t         *conf = THIS->private;
+
+        GF_ASSERT(op_ctx);
+        GF_ASSERT(peerinfo);
+        GF_ASSERT(args);
+
+        ret = dict_allocate_and_serialize (op_ctx,
+                                           &req.dict.dict_val,
+                                           &req.dict.dict_len);
+        if (ret)
+                goto out;
+
+        uuid_copy (req.uuid, my_uuid);
+        uuid_copy (req.txn_id, txn_id);
+        req.op = op;
+        synclock_unlock (&conf->big_lock);
+        ret = gd_syncop_submit_request (peerinfo->rpc, &req, args, peerinfo,
+                                        &gd_mgmt_prog,
+                                        GLUSTERD_MGMT_VOLUME_LOCK,
+                                        gd_syncop_mgmt_volume_lock_cbk,
+                                        (xdrproc_t) xdr_gd1_mgmt_volume_lock_req);
+        synclock_lock (&conf->big_lock);
+out:
+        gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
+        return ret;
+}
+
+int32_t
+_gd_syncop_mgmt_volume_unlock_cbk (struct rpc_req *req, struct iovec *iov,
+                                   int count, void *myframe)
+{
+        int                         ret         = -1;
+        struct syncargs             *args       = NULL;
+        glusterd_peerinfo_t         *peerinfo   = NULL;
+        gd1_mgmt_volume_unlock_rsp   rsp        = {{0},};
+        call_frame_t                *frame      = NULL;
+        int                         op_ret      = -1;
+        int                         op_errno    = -1;
+
+        GF_ASSERT(req);
+        GF_ASSERT(iov);
+        GF_ASSERT(myframe);
+
+        frame  = myframe;
+        args   = frame->local;
+        peerinfo = frame->cookie;
+        frame->local = NULL;
+        frame->cookie = NULL;
+
+        if (-1 == req->rpc_status) {
+                op_errno = ENOTCONN;
+                goto out;
+        }
+
+        ret = xdr_to_generic (*iov, &rsp,
+                              (xdrproc_t)xdr_gd1_mgmt_volume_unlock_rsp);
+        if (ret < 0)
+                goto out;
+
+        uuid_copy (args->uuid, rsp.uuid);
+
+        /* Set peer as locked, so we unlock only the locked peers */
+        if (rsp.op_ret == 0)
+                peerinfo->locked = _gf_true;
+        op_ret = rsp.op_ret;
+        op_errno = rsp.op_errno;
+out:
+        gd_collate_errors (args, op_ret, op_errno, NULL,
+                           GLUSTERD_MGMT_VOLUME_UNLOCK, peerinfo, rsp.uuid);
+        STACK_DESTROY (frame->root);
+        synctask_barrier_wake(args);
+        return 0;
+}
+
+int32_t
+gd_syncop_mgmt_volume_unlock_cbk (struct rpc_req *req, struct iovec *iov,
+                                  int count, void *myframe)
+{
+        return glusterd_big_locked_cbk (req, iov, count, myframe,
+                                        _gd_syncop_mgmt_volume_unlock_cbk);
+}
+
+int
+gd_syncop_mgmt_volume_unlock (dict_t *op_ctx, glusterd_peerinfo_t *peerinfo,
+                              struct syncargs *args, uuid_t my_uuid,
+                              uuid_t recv_uuid, uuid_t txn_id)
+{
+        int                          ret  = -1;
+        gd1_mgmt_volume_unlock_req   req  = {{0},};
+        glusterd_conf_t             *conf = THIS->private;
+
+        GF_ASSERT(op_ctx);
+        GF_ASSERT(peerinfo);
+        GF_ASSERT(args);
+
+        ret = dict_allocate_and_serialize (op_ctx,
+                                           &req.dict.dict_val,
+                                           &req.dict.dict_len);
+        if (ret)
+                goto out;
+
+        uuid_copy (req.uuid, my_uuid);
+        uuid_copy (req.txn_id, txn_id);
+        synclock_unlock (&conf->big_lock);
+        ret = gd_syncop_submit_request (peerinfo->rpc, &req, args, peerinfo,
+                                        &gd_mgmt_prog,
+                                        GLUSTERD_MGMT_VOLUME_UNLOCK,
+                                        gd_syncop_mgmt_volume_unlock_cbk,
+                                        (xdrproc_t) xdr_gd1_mgmt_volume_unlock_req);
+        synclock_lock (&conf->big_lock);
+out:
+        gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
+        return ret;
+}
+
+int32_t
 _gd_syncop_mgmt_unlock_cbk (struct rpc_req *req, struct iovec *iov,
                             int count, void *myframe)
 {
@@ -814,8 +1006,8 @@ gd_build_peers_list (struct list_head *peers, struct list_head *xact_peers,
 }
 
 int
-gd_lock_op_phase (struct list_head *peers, glusterd_op_t op, dict_t *op_ctx,
-                  char **op_errstr, int npeers)
+gd_lock_op_phase (glusterd_conf_t  *conf, glusterd_op_t op, dict_t *op_ctx,
+                  char **op_errstr, int npeers, uuid_t txn_id)
 {
         int                 ret         = -1;
         int                 peer_cnt    = 0;
@@ -823,6 +1015,9 @@ gd_lock_op_phase (struct list_head *peers, glusterd_op_t op, dict_t *op_ctx,
         xlator_t            *this       = NULL;
         glusterd_peerinfo_t *peerinfo   = NULL;
         struct syncargs     args        = {0};
+        struct list_head   *peers       = NULL;
+
+        peers = &conf->xaction_peers;
 
         if (!npeers) {
                 ret = 0;
@@ -833,22 +1028,38 @@ gd_lock_op_phase (struct list_head *peers, glusterd_op_t op, dict_t *op_ctx,
         synctask_barrier_init((&args));
         peer_cnt = 0;
         list_for_each_entry (peerinfo, peers, op_peers_list) {
-                /* Reset lock status */
-                peerinfo->locked = _gf_false;
-                gd_syncop_mgmt_lock (peerinfo, &args, MY_UUID, peer_uuid);
+                if (conf->op_version < 3) {
+                        /* Reset lock status */
+                        peerinfo->locked = _gf_false;
+                        gd_syncop_mgmt_lock (peerinfo, &args,
+                                             MY_UUID, peer_uuid);
+                } else
+                        gd_syncop_mgmt_volume_lock (op, op_ctx, peerinfo, &args,
+                                                    MY_UUID, peer_uuid, txn_id);
                 peer_cnt++;
         }
         gd_synctask_barrier_wait((&args), peer_cnt);
-        ret = args.op_ret;
-        if (ret) {
-                gf_asprintf (op_errstr, "Another transaction could be "
-                             "in progress. Please try again after "
-                             "sometime.");
-                gf_log (this->name, GF_LOG_ERROR, "Failed to acquire lock");
-                goto out;
+
+        if (args.op_ret) {
+                if (args.errstr)
+                         *op_errstr = gf_strdup (args.errstr);
+                else {
+                        ret = gf_asprintf (op_errstr, "Another transaction could be "
+                                           "in progress. Please try again after "
+                                           "sometime.");
+                        if (ret == -1)
+                                *op_errstr = NULL;
+
+                        gf_log (this->name, GF_LOG_ERROR,
+                                "Failed to acquire lock");
+
+                }
         }
 
-        ret = 0;
+        ret = args.op_ret;
+
+        gf_log (this->name, GF_LOG_DEBUG, "Sent lock op req for 'Volume %s' "
+                "to %d peers. Returning %d", gd_op_list[op], peer_cnt, ret);
 out:
         return ret;
 }
@@ -1016,9 +1227,10 @@ out:
 }
 
 int
-gd_unlock_op_phase (struct list_head *peers, glusterd_op_t op, int op_ret,
+gd_unlock_op_phase (glusterd_conf_t  *conf, glusterd_op_t op, int op_ret,
                     rpcsvc_request_t *req, dict_t *op_ctx, char *op_errstr,
-                    int npeers, gf_boolean_t is_locked)
+                    int npeers, char *volname, gf_boolean_t is_acquired,
+                    uuid_t txn_id)
 {
         glusterd_peerinfo_t *peerinfo   = NULL;
         glusterd_peerinfo_t *tmp        = NULL;
@@ -1027,6 +1239,9 @@ gd_unlock_op_phase (struct list_head *peers, glusterd_op_t op, int op_ret,
         int                 ret         = -1;
         xlator_t            *this       = NULL;
         struct syncargs     args        = {0};
+        struct list_head   *peers       = NULL;
+
+        peers = &conf->xaction_peers;
 
         if (!npeers) {
                 ret = 0;
@@ -1035,20 +1250,23 @@ gd_unlock_op_phase (struct list_head *peers, glusterd_op_t op, int op_ret,
 
         /* If the lock has not been held during this
          * transaction, do not send unlock requests */
-        if (!is_locked)
+        if (!is_acquired)
                 goto out;
 
         this = THIS;
         synctask_barrier_init((&args));
         peer_cnt = 0;
         list_for_each_entry_safe (peerinfo, tmp, peers, op_peers_list) {
-                /* Only unlock peers that were locked */
-                if (peerinfo->locked) {
-                        gd_syncop_mgmt_unlock (peerinfo, &args, MY_UUID,
-                                               tmp_uuid);
-                        peer_cnt++;
-                }
-
+                if (conf->op_version < 3) {
+                        /* Only unlock peers that were locked */
+                        if (peerinfo->locked)
+                                gd_syncop_mgmt_unlock (peerinfo, &args,
+                                                       MY_UUID, tmp_uuid);
+                } else
+                        gd_syncop_mgmt_volume_unlock (op_ctx, peerinfo,
+                                                      &args, MY_UUID,
+                                                      tmp_uuid, txn_id);
+                peer_cnt++;
                 list_del_init (&peerinfo->op_peers_list);
         }
         gd_synctask_barrier_wait((&args), peer_cnt);
@@ -1061,8 +1279,18 @@ gd_unlock_op_phase (struct list_head *peers, glusterd_op_t op, int op_ret,
 out:
         glusterd_op_send_cli_response (op, op_ret, 0, req, op_ctx, op_errstr);
         glusterd_op_clear_op (op);
-        if (is_locked)
-                glusterd_unlock (MY_UUID);
+        if (is_acquired) {
+                /* Based on the op-version, we release the cluster or volume lock */
+                if (conf->op_version < 3)
+                        glusterd_unlock (MY_UUID);
+                else {
+                        ret = glusterd_volume_unlock (volname, MY_UUID);
+                        if (ret)
+                                gf_log (this->name, GF_LOG_ERROR,
+                                        "Unable to release lock for %s",
+                                        volname);
+                }
+        }
 
         return 0;
 }
@@ -1079,7 +1307,8 @@ gd_get_brick_count (struct list_head *bricks)
 }
 
 int
-gd_brick_op_phase (glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict, char **op_errstr)
+gd_brick_op_phase (glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict,
+                   char **op_errstr)
 {
         glusterd_pending_node_t *pending_node = NULL;
         struct list_head        selected = {0,};
@@ -1151,21 +1380,63 @@ out:
 void
 gd_sync_task_begin (dict_t *op_ctx, rpcsvc_request_t * req)
 {
-        int                         ret             = -1;
-        int                         npeers          = 0;
-        dict_t                      *req_dict       = NULL;
-        glusterd_conf_t             *conf           = NULL;
-        glusterd_op_t               op              = 0;
-        int32_t                     tmp_op          = 0;
-        char                        *op_errstr      = NULL;
-        xlator_t                    *this           = NULL;
-        gf_boolean_t                is_locked       = _gf_false;
+        int                         ret              = -1;
+        int                         npeers           = 0;
+        dict_t                      *req_dict        = NULL;
+        glusterd_conf_t             *conf            = NULL;
+        glusterd_op_t               op               = 0;
+        int32_t                     tmp_op           = 0;
+        char                        *op_errstr       = NULL;
+        char                        *tmp             = NULL;
+        char                        *volname         = NULL;
+        xlator_t                    *this            = NULL;
+        gf_boolean_t                is_acquired      = _gf_false;
+        uuid_t                      *txn_id          = NULL;
+        uuid_t                      *originator_uuid = NULL;
+        glusterd_op_info_t          txn_opinfo;
 
         this = THIS;
         GF_ASSERT (this);
         conf = this->private;
         GF_ASSERT (conf);
 
+        /* Generate a transaction-id for this operation and
+         * save it in the dict */
+        txn_id = GF_CALLOC (1, sizeof(uuid_t), gf_common_mt_uuid_t);
+        if (!txn_id) {
+                ret = -1;
+                goto out;
+        }
+
+        uuid_generate (*txn_id);
+
+        ret = dict_set_bin (op_ctx, "transaction_id",
+                            txn_id, sizeof (uuid_t));
+        if (ret) {
+                gf_log (this->name, GF_LOG_ERROR,
+                        "Failed to set transaction id.");
+                goto out;
+        } else
+                gf_log (this->name, GF_LOG_DEBUG,
+                        "Transaction_id = %s", uuid_utoa (*txn_id));
+
+        /* Save the MY_UUID as the originator_uuid */
+        originator_uuid = GF_CALLOC (1, sizeof(uuid_t),
+                                     gf_common_mt_uuid_t);
+        if (!originator_uuid) {
+                ret = -1;
+                goto out;
+        }
+
+        uuid_copy (*originator_uuid, MY_UUID);
+        ret = dict_set_bin (op_ctx, "originator_uuid",
+                            originator_uuid, sizeof (uuid_t));
+        if (ret) {
+                gf_log (this->name, GF_LOG_ERROR,
+                        "Failed to set originator_uuid.");
+                goto out;
+        }
+
         ret = dict_get_int32 (op_ctx, GD_SYNC_OPCODE_KEY, &tmp_op);
         if (ret) {
                 gf_log (this->name, GF_LOG_ERROR, "Failed to get volume "
@@ -1174,26 +1445,71 @@ gd_sync_task_begin (dict_t *op_ctx, rpcsvc_request_t * req)
         }
 
         op = tmp_op;
-        ret = glusterd_lock (MY_UUID);
-        if (ret) {
-                gf_log (this->name, GF_LOG_ERROR, "Unable to acquire lock");
-                gf_asprintf (&op_errstr, "Another transaction is in progress. "
-                             "Please try again after sometime.");
-                goto out;
+
+        /* Based on the op_version, acquire a cluster or volume lock */
+        if (conf->op_version < 3) {
+                ret = glusterd_lock (MY_UUID);
+                if (ret) {
+                        gf_log (this->name, GF_LOG_ERROR,
+                                "Unable to acquire lock");
+                        gf_asprintf (&op_errstr,
+                                     "Another transaction is in progress. "
+                                     "Please try again after sometime.");
+                        goto out;
+                }
+        } else {
+
+                /* If no volname is given as a part of the command, locks will
+                 * not be held */
+                ret = dict_get_str (op_ctx, "volname", &tmp);
+                if (ret) {
+                        gf_log ("", GF_LOG_DEBUG, "Failed to get volume "
+                                "name");
+                        goto local_locking_done;
+                } else {
+                        /* Use a copy of volname, as cli response will be
+                         * sent before the unlock, and the volname in the
+                         * dict, might be removed */
+                        volname = gf_strdup (tmp);
+                        if (!volname)
+                                goto out;
+                }
+
+                ret = glusterd_volume_lock (volname, MY_UUID);
+                if (ret) {
+                        gf_log (this->name, GF_LOG_ERROR,
+                                "Unable to acquire lock for %s", volname);
+                        gf_asprintf (&op_errstr,
+                                     "Another transaction is in progress "
+                                     "for %s. Please try again after sometime.",
+                                     volname);
+                        goto out;
+                }
         }
 
-        is_locked = _gf_true;
+        is_acquired = _gf_true;
+
+local_locking_done:
+
+        /* Save opinfo for this transaction with the transaction id */
+        txn_opinfo.op = op;
+        ret = glusterd_set_txn_opinfo (txn_id, &txn_opinfo);
+        if (ret)
+                gf_log (this->name, GF_LOG_ERROR,
+                        "Unable to set transaction's opinfo");
+
+        opinfo = txn_opinfo;
 
-        /* storing op globally to access in synctask code paths
-         * This is still acceptable, as we are performing this under
-         * the 'cluster' lock*/
-        glusterd_op_set_op  (op);
         INIT_LIST_HEAD (&conf->xaction_peers);
         npeers = gd_build_peers_list  (&conf->peers, &conf->xaction_peers, op);
 
-        ret = gd_lock_op_phase (&conf->xaction_peers, op, op_ctx, &op_errstr, npeers);
-        if (ret)
-                goto out;
+        /* If no volname is given as a part of the command, locks will
+         * not be held */
+        if (volname) {
+                ret = gd_lock_op_phase (conf, op, op_ctx, &op_errstr, npeers, *txn_id);
+                if (ret)
+                        goto out;
+        }
 
         ret = glusterd_op_build_payload (&req_dict, &op_errstr, op_ctx);
         if (ret) {
@@ -1220,8 +1536,17 @@ gd_sync_task_begin (dict_t *op_ctx, rpcsvc_request_t * req)
 
         ret = 0;
 out:
-        (void) gd_unlock_op_phase (&conf->xaction_peers, op, ret, req,
-                                   op_ctx, op_errstr, npeers, is_locked);
+        (void) gd_unlock_op_phase (conf, op, ret, req, op_ctx, op_errstr,
+                                   npeers, volname, is_acquired, *txn_id);
+
+        /* Clearing the transaction opinfo */
+        ret = glusterd_clear_txn_opinfo (txn_id);
+        if (ret)
+                gf_log (this->name, GF_LOG_ERROR,
+                        "Unable to clear transaction's opinfo");
+
+        if (volname)
+                GF_FREE (volname);
 
         if (req_dict)
                 dict_unref (req_dict);
author	Avra Sengupta <asengupt@redhat.com>	2013-09-14 22:46:41 +0530
committer	shishir gowda <sgowda@redhat.com>	2013-10-10 13:26:16 +0530
commit	9c447bc5e924e840e18efdbc1023470bf2b38228 (patch)
tree	c8c370375844f19894f9bc15ff389eaf14622214 /xlators/mgmt/glusterd/src/glusterd-syncop.c
parent	cd87a145ef57a166f1463629b457c0ad90f1ba5e (diff)