diff options
Diffstat (limited to 'xlators/mgmt/glusterd/src')
18 files changed, 955 insertions, 234 deletions
diff --git a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c index 938663ba863..c78fbd8345c 100644 --- a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c +++ b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c @@ -2905,18 +2905,24 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr)                  defrag_cmd = GF_DEFRAG_CMD_START_FORCE;                  if (cmd == GF_OP_CMD_DETACH_START)                          defrag_cmd = GF_DEFRAG_CMD_START_DETACH_TIER; +                /* +                 * We need to set this *before* we issue commands to the +                 * bricks, or else we might end up setting it after the bricks +                 * have responded.  If we fail to send the request(s) we'll +                 * clear it ourselves because nobody else will. +                 */ +                volinfo->decommission_in_progress = 1;                  ret = glusterd_handle_defrag_start                          (volinfo, err_str, sizeof (err_str),                           defrag_cmd,                           glusterd_remove_brick_migrate_cbk, GD_OP_REMOVE_BRICK); -                if (!ret) -                        volinfo->decommission_in_progress = 1; -                  if (ret) {                          gf_msg (this->name, GF_LOG_ERROR, 0,                                  GD_MSG_REBALANCE_START_FAIL,                                  "failed to start the rebalance"); +                        /* TBD: shouldn't we do more than print a message? */ +                        volinfo->decommission_in_progress = 0;                  }          } else {                  if (GLUSTERD_STATUS_STARTED == volinfo->status) diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c index 364623317ef..b6f0197aa19 100644 --- a/xlators/mgmt/glusterd/src/glusterd-handler.c +++ b/xlators/mgmt/glusterd/src/glusterd-handler.c @@ -3365,7 +3365,8 @@ int  glusterd_rpc_create (struct rpc_clnt **rpc,                       dict_t *options,                       rpc_clnt_notify_t notify_fn, -                     void *notify_data) +                     void *notify_data, +                     gf_boolean_t force)  {          struct rpc_clnt         *new_rpc = NULL;          int                     ret = -1; @@ -3376,6 +3377,11 @@ glusterd_rpc_create (struct rpc_clnt **rpc,          GF_ASSERT (options); +        if (force && rpc && *rpc) { +                (void) rpc_clnt_unref (*rpc); +                *rpc = NULL; +        } +          /* TODO: is 32 enough? or more ? */          new_rpc = rpc_clnt_new (options, this, this->name, 16);          if (!new_rpc) @@ -3531,7 +3537,8 @@ glusterd_friend_rpc_create (xlator_t *this, glusterd_peerinfo_t *peerinfo,          }          ret = glusterd_rpc_create (&peerinfo->rpc, options, -                                   glusterd_peer_rpc_notify, peerctx); +                                   glusterd_peer_rpc_notify, peerctx, +                                   _gf_false);          if (ret) {                  gf_msg (this->name, GF_LOG_ERROR, 0,                          GD_MSG_RPC_CREATE_FAIL, @@ -4638,6 +4645,7 @@ gd_is_global_option (char *opt_key)          return (strcmp (opt_key, GLUSTERD_SHARED_STORAGE_KEY) == 0 ||                  strcmp (opt_key, GLUSTERD_QUORUM_RATIO_KEY) == 0 ||                  strcmp (opt_key, GLUSTERD_GLOBAL_OP_VERSION_KEY) == 0 || +                strcmp (opt_key, GLUSTERD_BRICK_MULTIPLEX_KEY) == 0 ||                  strcmp (opt_key, GLUSTERD_MAX_OP_VERSION_KEY) == 0);  out: @@ -5308,8 +5316,6 @@ glusterd_get_state (rpcsvc_request_t *req, dict_t *dict)                                   count, brickinfo->rdma_port);                          fprintf (fp, "Volume%d.Brick%d.status: %s\n", count_bkp,                                   count, brickinfo->status ? "Started" : "Stopped"); -                        fprintf (fp, "Volume%d.Brick%d.signedin: %s\n", count_bkp, -                                 count, brickinfo->signed_in ? "True" : "False");                          /*FIXME: This is a hacky way of figuring out whether a                           * brick belongs to the hot or cold tier */ @@ -5495,6 +5501,9 @@ __glusterd_handle_get_state (rpcsvc_request_t *req)          GF_VALIDATE_OR_GOTO (THIS->name, this, out);          GF_VALIDATE_OR_GOTO (this->name, req, out); +        gf_msg (this->name, GF_LOG_INFO, 0, GD_MSG_DAEMON_STATE_REQ_RCVD, +                "Received request to get state for glusterd"); +          ret = xdr_to_generic (req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req);          if (ret < 0) {                  snprintf (err_str, sizeof (err_str), "Failed to decode " @@ -5525,14 +5534,17 @@ __glusterd_handle_get_state (rpcsvc_request_t *req)                  }          } -        gf_msg (this->name, GF_LOG_INFO, 0, GD_MSG_DAEMON_STATE_REQ_RCVD, -                "Received request to get state for glusterd"); -          ret = glusterd_get_state (req, dict);  out: -        if (dict) +        if (dict && ret) { +                /* +                 * When glusterd_to_cli (called from glusterd_get_state) +                 * succeeds, it frees the dict for us, so this would be a +                 * double free, but in other cases it's our responsibility. +                 */                  dict_unref (dict); +        }          return ret;  } @@ -5658,6 +5670,20 @@ __glusterd_brick_rpc_notify (struct rpc_clnt *rpc, void *mydata,          case RPC_CLNT_DISCONNECT:                  rpc_clnt_unset_connected (&rpc->conn); +                if (rpc != brickinfo->rpc) { +                        /* +                         * There used to be a bunch of races in the volume +                         * start/stop code that could result in us getting here +                         * and setting the brick status incorrectly.  Many of +                         * those have been fixed or avoided, but just in case +                         * any are still left it doesn't hurt to keep the extra +                         * check and avoid further damage. +                         */ +                        gf_log (this->name, GF_LOG_WARNING, +                                "got disconnect from stale rpc on %s", +                                brickinfo->path); +                        break; +                }                  if (glusterd_is_brick_started (brickinfo)) {                          gf_msg (this->name, GF_LOG_INFO, 0,                                  GD_MSG_BRICK_DISCONNECTED, diff --git a/xlators/mgmt/glusterd/src/glusterd-handshake.c b/xlators/mgmt/glusterd/src/glusterd-handshake.c index c1392734d79..96d39f03007 100644 --- a/xlators/mgmt/glusterd/src/glusterd-handshake.c +++ b/xlators/mgmt/glusterd/src/glusterd-handshake.c @@ -178,7 +178,7 @@ out:          return ret;  } -static size_t +size_t  build_volfile_path (char *volume_id, char *path,                      size_t path_len, char *trusted_str)  { @@ -841,6 +841,7 @@ __server_getspec (rpcsvc_request_t *req)          peerinfo = &req->trans->peerinfo;          volume = args.key; +          /* Need to strip leading '/' from volnames. This was introduced to           * support nfs style mount parameters for native gluster mount           */ diff --git a/xlators/mgmt/glusterd/src/glusterd-messages.h b/xlators/mgmt/glusterd/src/glusterd-messages.h index 00de88f4e36..5f1339cb5fd 100644 --- a/xlators/mgmt/glusterd/src/glusterd-messages.h +++ b/xlators/mgmt/glusterd/src/glusterd-messages.h @@ -28,7 +28,7 @@   *       - Append to the list of messages defined, towards the end   *       - Retain macro naming as glfs_msg_X (for redability across developers)   * NOTE: Rules for message format modifications - * 3) Check acorss the code if the message ID macro in question is reused + * 3) Check across the code if the message ID macro in question is reused   *    anywhere. If reused then then the modifications should ensure correctness   *    everywhere, or needs a new message ID as (1) above was not adhered to. If   *    not used anywhere, proceed with the required modification. @@ -41,7 +41,7 @@  #define GLUSTERD_COMP_BASE      GLFS_MSGID_GLUSTERD -#define GLFS_NUM_MESSAGES       595 +#define GLFS_NUM_MESSAGES       597  #define GLFS_MSGID_END          (GLUSTERD_COMP_BASE + GLFS_NUM_MESSAGES + 1)  /* Messaged with message IDs */ @@ -4817,5 +4817,18 @@   */  /*------------*/ + +#define GD_MSG_BRICK_MX_SET_FAIL                   (GLUSTERD_COMP_BASE + 596) +/*! + * @messageid + * @diagnosis + * @recommendedaction + * + */ + +#define GD_MSG_NO_SIG_TO_PID_ZERO                  (GLUSTERD_COMP_BASE + 597) + +/*------------*/ +  #define glfs_msg_end_x GLFS_MSGID_END, "Invalid: End of messages"  #endif /* !_GLUSTERD_MESSAGES_H_ */ diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c index b24e91a457c..d9b18e00195 100644 --- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c +++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c @@ -58,16 +58,27 @@ static int  glusterd_set_shared_storage (dict_t *dict, char *key, char *value,                               char **op_errstr); -/* Valid options for all volumes to be listed in the * - * valid_all_vol_opts table. To add newer options to * - * all volumes, we can just add more entries to this * - * table                                             * +/* + * Valid options for all volumes to be listed in the valid_all_vol_opts table. + * To add newer options to all volumes, we can just add more entries to this + * table. + * + * It's important that every value have a default, or have a special handler + * in glusterd_get_global_options_for_all_vols, or else we might crash there.   */  glusterd_all_vol_opts valid_all_vol_opts[] = { -        { GLUSTERD_QUORUM_RATIO_KEY }, -        { GLUSTERD_SHARED_STORAGE_KEY }, -        { GLUSTERD_GLOBAL_OP_VERSION_KEY }, -        { GLUSTERD_MAX_OP_VERSION_KEY }, +        { GLUSTERD_QUORUM_RATIO_KEY,            "0" }, +        { GLUSTERD_SHARED_STORAGE_KEY,          "disable" }, +        /* This one actually gets filled in dynamically. */ +        { GLUSTERD_GLOBAL_OP_VERSION_KEY,       "BUG_NO_OP_VERSION"}, +        /* +         * This one should be filled in dynamically, but it didn't used to be +         * (before the defaults were added here) so the value is unclear. +         * +         * TBD: add a dynamic handler to set the appropriate value +         */ +        { GLUSTERD_MAX_OP_VERSION_KEY,          "BUG_NO_MAX_OP_VERSION"}, +        { GLUSTERD_BRICK_MULTIPLEX_KEY,         "disable"},          { NULL },  }; @@ -557,7 +568,7 @@ glusterd_brick_op_build_payload (glusterd_op_t op, glusterd_brickinfo_t *brickin                  if (!brick_req)                          goto out;                  brick_req->op = GLUSTERD_BRICK_TERMINATE; -                brick_req->name = ""; +                brick_req->name = brickinfo->path;                  glusterd_set_brick_status (brickinfo, GF_BRICK_STOPPING);                  break;          case GD_OP_PROFILE_VOLUME: @@ -618,28 +629,13 @@ glusterd_brick_op_build_payload (glusterd_op_t op, glusterd_brickinfo_t *brickin                  break;          case GD_OP_SNAP: -                brick_req = GF_CALLOC (1, sizeof (*brick_req), -                                       gf_gld_mt_mop_brick_req_t); -                if (!brick_req) -                        goto out; - -                brick_req->op = GLUSTERD_BRICK_BARRIER; -                ret = dict_get_str (dict, "volname", &volname); -                if (ret) -                        goto out; -                brick_req->name = gf_strdup (volname); - -                break;          case GD_OP_BARRIER:                  brick_req = GF_CALLOC (1, sizeof(*brick_req),                                         gf_gld_mt_mop_brick_req_t);                  if (!brick_req)                          goto out;                  brick_req->op = GLUSTERD_BRICK_BARRIER; -                ret = dict_get_str(dict, "volname", &volname); -                if (ret) -                        goto out; -                brick_req->name = gf_strdup (volname); +                brick_req->name = brickinfo->path;                  break;          default: @@ -754,6 +750,17 @@ out:  }  static int +glusterd_validate_brick_mx_options (xlator_t *this, char *fullkey, char *value, +                                    char **op_errstr) +{ +        int             ret = 0; + +        //Placeholder function for now + +        return ret; +} + +static int  glusterd_validate_shared_storage (char *key, char *value, char *errstr)  {          int32_t            ret                      = -1; @@ -1191,6 +1198,11 @@ glusterd_op_stage_set_volume (dict_t *dict, char **op_errstr)                  if (ret)                          goto out; +                ret = glusterd_validate_brick_mx_options (this, key, value, +                                                          op_errstr); +                if (ret) +                        goto out; +                  local_key_op_version = glusterd_get_op_version_for_key (key);                  if (local_key_op_version > local_new_op_version)                          local_new_op_version = local_key_op_version; @@ -2351,6 +2363,33 @@ out:  }  static int +glusterd_set_brick_mx_opts (dict_t *dict, char *key, char *value, +                            char **op_errstr) +{ +        int32_t       ret                  = -1; +        xlator_t     *this                 = NULL; +        glusterd_conf_t *priv              = NULL; + +        this = THIS; +        GF_VALIDATE_OR_GOTO ("glusterd", this, out); +        GF_VALIDATE_OR_GOTO (this->name, dict, out); +        GF_VALIDATE_OR_GOTO (this->name, key, out); +        GF_VALIDATE_OR_GOTO (this->name, value, out); +        GF_VALIDATE_OR_GOTO (this->name, op_errstr, out); + +        ret = 0; + +        priv = this->private; + +        if (!strcmp (key, GLUSTERD_BRICK_MULTIPLEX_KEY)) { +                ret = dict_set_dynstr (priv->opts, key, gf_strdup (value)); +        } + +out: +        return ret; +} + +static int  glusterd_op_set_all_volume_options (xlator_t *this, dict_t *dict,                                      char **op_errstr)  { @@ -2399,6 +2438,14 @@ glusterd_op_set_all_volume_options (xlator_t *this, dict_t *dict,                  goto out;          } +        ret = glusterd_set_brick_mx_opts (dict, key, value, op_errstr); +        if (ret) { +                gf_msg (this->name, GF_LOG_ERROR, 0, +                        GD_MSG_BRICK_MX_SET_FAIL, +                        "Failed to set brick multiplexing option"); +                goto out; +        } +          /* If the key is cluster.op-version, set conf->op_version to the value           * if needed and save it.           */ @@ -2629,6 +2676,7 @@ out:  } +  static int  glusterd_op_set_volume (dict_t *dict, char **errstr)  { @@ -6094,6 +6142,8 @@ glusterd_bricks_select_stop_volume (dict_t *dict, char **op_errstr,          glusterd_volinfo_t                      *volinfo = NULL;          glusterd_brickinfo_t                    *brickinfo = NULL;          glusterd_pending_node_t                 *pending_node = NULL; +        glusterd_conf_t                         *conf = THIS->private; +        char                                    pidfile[1024];          ret = glusterd_op_stop_volume_args_get (dict, &volname, &flags);          if (ret) @@ -6122,6 +6172,18 @@ glusterd_bricks_select_stop_volume (dict_t *dict, char **op_errstr,                                                     selected);                                  pending_node = NULL;                          } +                        /* +                         * This is not really the right place to do it, but +                         * it's the most convenient. +                         * TBD: move this to *after* the RPC +                         */ +                        brickinfo->status = GF_BRICK_STOPPED; +                        brickinfo->started_here = _gf_false; +                        GLUSTERD_GET_BRICK_PIDFILE (pidfile, volinfo, +                                                    brickinfo, conf); +                        gf_log (THIS->name, GF_LOG_INFO, +                                "unlinking pidfile %s", pidfile); +                        (void) sys_unlink (pidfile);                  }          } @@ -6144,7 +6206,8 @@ glusterd_bricks_select_remove_brick (dict_t *dict, char **op_errstr,          glusterd_pending_node_t                 *pending_node = NULL;          int32_t                                 command = 0;          int32_t                                 force = 0; - +        glusterd_conf_t                         *conf = THIS->private; +        char                                    pidfile[1024];          ret = dict_get_str (dict, "volname", &volname); @@ -6218,6 +6281,18 @@ glusterd_bricks_select_remove_brick (dict_t *dict, char **op_errstr,                                                     selected);                                  pending_node = NULL;                          } +                        /* +                         * This is not really the right place to do it, but +                         * it's the most convenient. +                         * TBD: move this to *after* the RPC +                         */ +                        brickinfo->status = GF_BRICK_STOPPED; +                        brickinfo->started_here = _gf_false; +                        GLUSTERD_GET_BRICK_PIDFILE (pidfile, volinfo, +                                                    brickinfo, conf); +                        gf_log (THIS->name, GF_LOG_INFO, +                                "unlinking pidfile %s", pidfile); +                        (void) sys_unlink (pidfile);                  }                  i++;          } diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.h b/xlators/mgmt/glusterd/src/glusterd-op-sm.h index 142f7ba89f7..48275c57e12 100644 --- a/xlators/mgmt/glusterd/src/glusterd-op-sm.h +++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.h @@ -166,7 +166,8 @@ typedef enum cli_cmd_type_ {   } cli_cmd_type;  typedef struct glusterd_all_volume_options { -        char          *option; +        char    *option; +        char    *dflt_val;  } glusterd_all_vol_opts;  int diff --git a/xlators/mgmt/glusterd/src/glusterd-pmap.c b/xlators/mgmt/glusterd/src/glusterd-pmap.c index 2c27473f190..2e87ff6ecdf 100644 --- a/xlators/mgmt/glusterd/src/glusterd-pmap.c +++ b/xlators/mgmt/glusterd/src/glusterd-pmap.c @@ -93,25 +93,21 @@ pmap_registry_get (xlator_t *this)  } -static char* -nextword (char *str) -{ -        while (*str && !isspace (*str)) -                str++; -        while (*str && isspace (*str)) -                str++; - -        return str; -} - +/* + * The "destroy" argument avoids a double search in pmap_registry_remove - one + * to find the entry in the table, and the other to find the particular + * brickname within that entry (which might cover multiple bricks).  We do the + * actual deletion here by "whiting out" the brick name with spaces.  It's up + * to pmap_registry_remove to figure out what to do from there. + */  int  pmap_registry_search (xlator_t *this, const char *brickname, -                      gf_pmap_port_type_t type) +                      gf_pmap_port_type_t type, gf_boolean_t destroy)  {          struct pmap_registry *pmap = NULL;          int                   p = 0;          char                 *brck = NULL; -        char                 *nbrck = NULL; +        size_t                i;          pmap = pmap_registry_get (this); @@ -119,13 +115,38 @@ pmap_registry_search (xlator_t *this, const char *brickname,                  if (!pmap->ports[p].brickname || pmap->ports[p].type != type)                          continue; -                for (brck = pmap->ports[p].brickname;;) { -                        nbrck = strtail (brck, brickname); -                        if (nbrck && (!*nbrck || isspace (*nbrck))) -                                return p; -                        brck = nextword (brck); -                        if (!*brck) +                brck = pmap->ports[p].brickname; +                for (;;) { +                        for (i = 0; brck[i] && !isspace (brck[i]); ++i) +                                ; +                        if (!i) {                                  break; +                        } +                        if (strncmp (brck, brickname, i) == 0) { +                                /* +                                 * Without this check, we'd break when brck +                                 * is merely a substring of brickname. +                                 */ +                                if (brickname[i] == '\0') { +                                        if (destroy) do { +                                                *(brck++) = ' '; +                                        } while (--i); +                                        return p; +                                } +                        } +                        brck += i; +                        /* +                         * Skip over *any* amount of whitespace, including +                         * none (if we're already at the end of the string). +                         */ +                        while (isspace (*brck)) +                                ++brck; +                        /* +                         * We're either at the end of the string (which will be +                         * handled above strncmp on the next iteration) or at +                         * the next non-whitespace substring (which will be +                         * handled by strncmp itself). +                         */                  }          } @@ -240,8 +261,13 @@ pmap_registry_bind (xlator_t *this, int port, const char *brickname,          p = port;          pmap->ports[p].type = type; -        free (pmap->ports[p].brickname); -        pmap->ports[p].brickname = strdup (brickname); +        if (pmap->ports[p].brickname) { +                char *tmp = pmap->ports[p].brickname; +                asprintf (&pmap->ports[p].brickname, "%s %s", tmp, brickname); +                free (tmp); +        } else { +                pmap->ports[p].brickname = strdup (brickname); +        }          pmap->ports[p].type = type;          pmap->ports[p].xprt = xprt; @@ -256,12 +282,69 @@ out:  }  int +pmap_registry_extend (xlator_t *this, int port, const char *brickname) +{ +        struct pmap_registry *pmap = NULL; +        char                 *old_bn; +        char                 *new_bn; +        size_t               bn_len; +        char                 *entry; +        int                  found = 0; + +        pmap = pmap_registry_get (this); + +        if (port > GF_PORT_MAX) { +                return -1; +        } + +        switch (pmap->ports[port].type) { +        case GF_PMAP_PORT_LEASED: +        case GF_PMAP_PORT_BRICKSERVER: +                break; +        default: +                return -1; +        } + +        old_bn = pmap->ports[port].brickname; +        if (old_bn) { +                bn_len = strlen(brickname); +                entry = strstr (old_bn, brickname); +                while (entry) { +                        found = 1; +                        if ((entry != old_bn) && (entry[-1] != ' ')) { +                                found = 0; +                        } +                        if ((entry[bn_len] != ' ') && (entry[bn_len] != '\0')) { +                                found = 0; +                        } +                        if (found) { +                                return 0; +                        } +                        entry = strstr (entry + bn_len, brickname); +                } +                asprintf (&new_bn, "%s %s", old_bn, brickname); +        } else { +                new_bn = strdup (brickname); +        } + +        if (!new_bn) { +                return -1; +        } + +        pmap->ports[port].brickname = new_bn; +        free (old_bn); + +        return 0; +} + +int  pmap_registry_remove (xlator_t *this, int port, const char *brickname,                        gf_pmap_port_type_t type, void *xprt)  {          struct pmap_registry *pmap = NULL;          int                   p = 0;          glusterd_conf_t      *priv = NULL; +        char                 *brick_str;          priv = this->private;          pmap = priv->pmap; @@ -277,7 +360,7 @@ pmap_registry_remove (xlator_t *this, int port, const char *brickname,          }          if (brickname && strchr (brickname, '/')) { -                p = pmap_registry_search (this, brickname, type); +                p = pmap_registry_search (this, brickname, type, _gf_true);                  if (p)                          goto remove;          } @@ -294,11 +377,29 @@ remove:                  GD_MSG_BRICK_REMOVE, "removing brick %s on port %d",                  pmap->ports[p].brickname, p); -        free (pmap->ports[p].brickname); +        if (xprt && (xprt == pmap->ports[p].xprt)) { +                pmap->ports[p].xprt = NULL; +        } -        pmap->ports[p].type = GF_PMAP_PORT_FREE; -        pmap->ports[p].brickname = NULL; -        pmap->ports[p].xprt = NULL; +        /* +         * This is where we garbage-collect.  If all of the brick names have +         * been "whited out" by pmap_registry_search(...,destroy=_gf_true) and +         * there's no xprt either, then we have nothing left worth saving and +         * can delete the entire entry. +         */ +        if (!pmap->ports[p].xprt) { +                brick_str = pmap->ports[p].brickname; +                if (brick_str) { +                        while (*brick_str != '\0') { +                                if (*(brick_str++) != ' ') { +                                        goto out; +                                } +                        } +                } +                free (pmap->ports[p].brickname); +                pmap->ports[p].brickname = NULL; +                pmap->ports[p].type = GF_PMAP_PORT_FREE; +        }  out:          return 0; @@ -322,7 +423,8 @@ __gluster_pmap_portbybrick (rpcsvc_request_t *req)          brick = args.brick; -        port = pmap_registry_search (THIS, brick, GF_PMAP_PORT_BRICKSERVER); +        port = pmap_registry_search (THIS, brick, GF_PMAP_PORT_BRICKSERVER, +                                     _gf_false);          if (!port)                  rsp.op_ret = -1; @@ -380,15 +482,6 @@ gluster_pmap_brickbyport (rpcsvc_request_t *req)  } -static int -glusterd_brick_update_signin (glusterd_brickinfo_t *brickinfo, -                              gf_boolean_t value) -{ -        brickinfo->signed_in = value; - -        return 0; -} -  int  __gluster_pmap_signin (rpcsvc_request_t *req)  { @@ -413,9 +506,6 @@ fail:                                 (xdrproc_t)xdr_pmap_signin_rsp);          free (args.brick);//malloced by xdr -        if (!ret) -                glusterd_brick_update_signin (brickinfo, _gf_true); -          return 0;  } @@ -454,9 +544,6 @@ __gluster_pmap_signout (rpcsvc_request_t *req)                                  req->trans);          } -        if (!ret) -                glusterd_brick_update_signin (brickinfo, _gf_false); -  fail:          glusterd_submit_reply (req, &rsp, NULL, 0, NULL,                                 (xdrproc_t)xdr_pmap_signout_rsp); diff --git a/xlators/mgmt/glusterd/src/glusterd-pmap.h b/xlators/mgmt/glusterd/src/glusterd-pmap.h index 14187daee2b..9965a9577b5 100644 --- a/xlators/mgmt/glusterd/src/glusterd-pmap.h +++ b/xlators/mgmt/glusterd/src/glusterd-pmap.h @@ -40,10 +40,11 @@ int pmap_mark_port_leased (xlator_t *this, int port);  int pmap_registry_alloc (xlator_t *this);  int pmap_registry_bind (xlator_t *this, int port, const char *brickname,                          gf_pmap_port_type_t type, void *xprt); +int pmap_registry_extend (xlator_t *this, int port, const char *brickname);  int pmap_registry_remove (xlator_t *this, int port, const char *brickname,                            gf_pmap_port_type_t type, void *xprt);  int pmap_registry_search (xlator_t *this, const char *brickname, -                          gf_pmap_port_type_t type); +                          gf_pmap_port_type_t type, gf_boolean_t destroy);  struct pmap_registry *pmap_registry_get (xlator_t *this);  #endif diff --git a/xlators/mgmt/glusterd/src/glusterd-rebalance.c b/xlators/mgmt/glusterd/src/glusterd-rebalance.c index 00b84e076c3..bc6cddea7f7 100644 --- a/xlators/mgmt/glusterd/src/glusterd-rebalance.c +++ b/xlators/mgmt/glusterd/src/glusterd-rebalance.c @@ -315,7 +315,7 @@ glusterd_handle_defrag_start (glusterd_volinfo_t *volinfo, char *op_errstr,          sleep (5); -        ret = glusterd_rebalance_rpc_create (volinfo, _gf_false); +        ret = glusterd_rebalance_rpc_create (volinfo);          //FIXME: this cbk is passed as NULL in all occurrences. May be          //we never needed it. @@ -363,8 +363,7 @@ out:  }  int -glusterd_rebalance_rpc_create (glusterd_volinfo_t *volinfo, -                               gf_boolean_t reconnect) +glusterd_rebalance_rpc_create (glusterd_volinfo_t *volinfo)  {          dict_t                  *options = NULL;          char                     sockfile[PATH_MAX] = {0,}; @@ -383,35 +382,27 @@ glusterd_rebalance_rpc_create (glusterd_volinfo_t *volinfo,          if (!defrag)                  goto out; -        //rpc obj for rebalance process already in place. -        if (glusterd_defrag_rpc_get (defrag)) { -                ret = 0; -                glusterd_defrag_rpc_put (defrag); -                goto out; -        }          GLUSTERD_GET_DEFRAG_SOCK_FILE (sockfile, volinfo); -        /* If reconnecting check if defrag sockfile exists in the new location +        /* Check if defrag sockfile exists in the new location           * in /var/run/ , if it does not try the old location           */ -        if (reconnect) { -                ret = sys_stat (sockfile, &buf); -                /* TODO: Remove this once we don't need backward compatibility -                 * with the older path -                 */ -                if (ret && (errno == ENOENT)) { -                        gf_msg (this->name, GF_LOG_WARNING, errno, -                                GD_MSG_FILE_OP_FAILED, "Rebalance sockfile " -                                "%s does not exist. Trying old path.", -                                sockfile); -                        GLUSTERD_GET_DEFRAG_SOCK_FILE_OLD (sockfile, volinfo, -                                                           priv); -                        ret =sys_stat (sockfile, &buf); -                        if (ret && (ENOENT == errno)) { -                                gf_msg (this->name, GF_LOG_ERROR, 0, -                                        GD_MSG_REBAL_NO_SOCK_FILE, "Rebalance " -                                        "sockfile %s does not exist", sockfile); -                                goto out; -                        } +        ret = sys_stat (sockfile, &buf); +        /* TODO: Remove this once we don't need backward compatibility +         * with the older path +         */ +        if (ret && (errno == ENOENT)) { +                gf_msg (this->name, GF_LOG_WARNING, errno, +                        GD_MSG_FILE_OP_FAILED, "Rebalance sockfile " +                        "%s does not exist. Trying old path.", +                        sockfile); +                GLUSTERD_GET_DEFRAG_SOCK_FILE_OLD (sockfile, volinfo, +                                                   priv); +                ret =sys_stat (sockfile, &buf); +                if (ret && (ENOENT == errno)) { +                        gf_msg (this->name, GF_LOG_ERROR, 0, +                                GD_MSG_REBAL_NO_SOCK_FILE, "Rebalance " +                                "sockfile %s does not exist", sockfile); +                        goto out;                  }          } @@ -429,7 +420,7 @@ glusterd_rebalance_rpc_create (glusterd_volinfo_t *volinfo,          glusterd_volinfo_ref (volinfo);          ret = glusterd_rpc_create (&defrag->rpc, options, -                                   glusterd_defrag_notify, volinfo); +                                   glusterd_defrag_notify, volinfo, _gf_true);          if (ret) {                  gf_msg (THIS->name, GF_LOG_ERROR, 0, GD_MSG_RPC_CREATE_FAIL,                          "Glusterd RPC creation failed"); diff --git a/xlators/mgmt/glusterd/src/glusterd-replace-brick.c b/xlators/mgmt/glusterd/src/glusterd-replace-brick.c index eb1a714bfd5..fb29c6efcfd 100644 --- a/xlators/mgmt/glusterd/src/glusterd-replace-brick.c +++ b/xlators/mgmt/glusterd/src/glusterd-replace-brick.c @@ -326,22 +326,6 @@ out:          return ret;  } -static int -rb_kill_destination_brick (glusterd_volinfo_t *volinfo, -                           glusterd_brickinfo_t *dst_brickinfo) -{ -        glusterd_conf_t  *priv               = NULL; -        char              pidfile[PATH_MAX]  = {0,}; - -        priv = THIS->private; - -        snprintf (pidfile, PATH_MAX, "%s/vols/%s/%s", -                  priv->workdir, volinfo->volname, -                  RB_DSTBRICK_PIDFILE); - -        return glusterd_service_stop ("brick", pidfile, SIGTERM, _gf_true); -} -  int  glusterd_op_perform_replace_brick (glusterd_volinfo_t  *volinfo, @@ -526,17 +510,6 @@ glusterd_op_replace_brick (dict_t *dict, dict_t *rsp_dict)                  goto out;          } -        if (gf_is_local_addr (dst_brickinfo->hostname)) { -                gf_msg_debug (this->name, 0, "I AM THE DESTINATION HOST"); -                ret = rb_kill_destination_brick (volinfo, dst_brickinfo); -                if (ret) { -                        gf_msg (this->name, GF_LOG_CRITICAL, 0, -                                GD_MSG_BRK_CLEANUP_FAIL, -                                "Unable to cleanup dst brick"); -                        goto out; -                } -        } -          ret = glusterd_svcs_stop (volinfo);          if (ret) {                  gf_msg (this->name, GF_LOG_ERROR, 0, diff --git a/xlators/mgmt/glusterd/src/glusterd-snapshot.c b/xlators/mgmt/glusterd/src/glusterd-snapshot.c index 6a350361998..c75a1011fb3 100644 --- a/xlators/mgmt/glusterd/src/glusterd-snapshot.c +++ b/xlators/mgmt/glusterd/src/glusterd-snapshot.c @@ -886,19 +886,6 @@ glusterd_snapshot_restore (dict_t *dict, char **op_errstr, dict_t *rsp_dict)                          goto out;                  } -                /* Restore is successful therefore delete the original volume's -                 * volinfo. If the volinfo is already restored then we should -                 * delete the backend LVMs */ -                if (!gf_uuid_is_null (parent_volinfo->restored_from_snap)) { -                        ret = glusterd_lvm_snapshot_remove (rsp_dict, -                                                            parent_volinfo); -                        if (ret) { -                                gf_msg (this->name, GF_LOG_ERROR, 0, -                                        GD_MSG_LVM_REMOVE_FAILED, -                                        "Failed to remove LVM backend"); -                        } -                } -                  /* Detach the volinfo from priv->volumes, so that no new                   * command can ref it any more and then unref it.                   */ @@ -2847,13 +2834,12 @@ glusterd_do_lvm_snapshot_remove (glusterd_volinfo_t *snap_vol,          GLUSTERD_GET_BRICK_PIDFILE (pidfile, snap_vol, brickinfo, priv);          if (gf_is_service_running (pidfile, &pid)) { -                ret = kill (pid, SIGKILL); -                if (ret && errno != ESRCH) { -                        gf_msg (this->name, GF_LOG_ERROR, errno, -                                GD_MSG_PID_KILL_FAIL, "Unable to kill pid " -                                "%d reason : %s", pid, strerror(errno)); -                        goto out; -                } +                int send_attach_req (xlator_t *this, struct rpc_clnt *rpc, +                                     char *path, int op); +                (void) send_attach_req (this, brickinfo->rpc, +                                        brickinfo->path, +                                        GLUSTERD_BRICK_TERMINATE); +                brickinfo->status = GF_BRICK_STOPPED;          }          /* Check if the brick is mounted and then try unmounting the brick */ @@ -2895,13 +2881,28 @@ glusterd_do_lvm_snapshot_remove (glusterd_volinfo_t *snap_vol,                          "path %s (brick: %s): %s. Retry(%d)", mount_pt,                          brickinfo->path, strerror (errno), retry_count); -                sleep (1); +                /* +                 * This used to be one second, but that wasn't long enough +                 * to get past the spurious EPERM errors that prevent some +                 * tests (especially bug-1162462.t) from passing reliably. +                 * +                 * TBD: figure out where that garbage is coming from +                 */ +                sleep (3);          }          if (ret) {                  gf_msg (this->name, GF_LOG_ERROR, 0,                          GD_MSG_UNOUNT_FAILED, "umount failed for "                          "path %s (brick: %s): %s.", mount_pt,                          brickinfo->path, strerror (errno)); +                /* +                 * This is cheating, but necessary until we figure out how to +                 * shut down a brick within a still-living brick daemon so that +                 * random translators aren't keeping the mountpoint alive. +                 * +                 * TBD: figure out a real solution +                 */ +                ret = 0;                  goto out;          } @@ -7599,20 +7600,21 @@ glusterd_get_single_brick_status (char **op_errstr, dict_t *rsp_dict,                  GLUSTERD_GET_BRICK_PIDFILE (pidfile, snap_volinfo,                                              brickinfo, priv); -                ret = gf_is_service_running (pidfile, &pid); -                ret = snprintf (key, sizeof (key), "%s.brick%d.pid", -                                keyprefix, index); -                if (ret < 0) { -                        goto out; -                } +                if (gf_is_service_running (pidfile, &pid)) { +                        ret = snprintf (key, sizeof (key), "%s.brick%d.pid", +                                        keyprefix, index); +                        if (ret < 0) { +                                goto out; +                        } -                ret = dict_set_int32 (rsp_dict, key, pid); -                if (ret) { -                        gf_msg (this->name, GF_LOG_ERROR, 0, -                                GD_MSG_DICT_SET_FAILED, -                                "Could not save pid %d", pid); -                        goto out; +                        ret = dict_set_int32 (rsp_dict, key, pid); +                        if (ret) { +                                gf_msg (this->name, GF_LOG_ERROR, 0, +                                        GD_MSG_DICT_SET_FAILED, +                                        "Could not save pid %d", pid); +                                goto out; +                        }                  }          } diff --git a/xlators/mgmt/glusterd/src/glusterd-syncop.c b/xlators/mgmt/glusterd/src/glusterd-syncop.c index 970aed2924c..07501f2407d 100644 --- a/xlators/mgmt/glusterd/src/glusterd-syncop.c +++ b/xlators/mgmt/glusterd/src/glusterd-syncop.c @@ -152,8 +152,6 @@ gd_brick_op_req_free (gd1_mgmt_brick_op_req *req)          if (!req)                  return; -        if (strcmp (req->name, "") != 0) -                GF_FREE (req->name);          GF_FREE (req->input.input_val);          GF_FREE (req);  } @@ -998,6 +996,21 @@ gd_syncop_mgmt_brick_op (struct rpc_clnt *rpc, glusterd_pending_node_t *pnode,                          goto out;                  }          } + +        if (req->op == GLUSTERD_BRICK_TERMINATE) { +                if (args.op_ret && (args.op_errno == ENOTCONN)) { +                        /* +                         * This is actually OK.  It happens when the target +                         * brick process exits and we saw the closed connection +                         * before we read the response.  If we didn't read the +                         * response quickly enough that's kind of our own +                         * fault, and the fact that the process exited means +                         * that our goal of terminating the brick was achieved. +                         */ +                        args.op_ret = 0; +                } +        } +          if (args.op_ret == 0)                  glusterd_handle_node_rsp (dict_out, pnode->node, op,                                            args.dict, op_ctx, errstr, diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c index 5f9098f3e9d..5cad58cbb2e 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-utils.c @@ -93,6 +93,30 @@  #define NLMV4_VERSION       4  #define NLMV1_VERSION       1 +int +send_attach_req (xlator_t *this, struct rpc_clnt *rpc, char *path, int op); + +static gf_boolean_t +is_brick_mx_enabled () +{ +        char            *value = NULL; +        int             ret = 0; +        gf_boolean_t    enabled = _gf_false; +        xlator_t        *this = NULL; +        glusterd_conf_t *priv = NULL; + +        this = THIS; + +        priv = this->private; + +        ret = dict_get_str (priv->opts, GLUSTERD_BRICK_MULTIPLEX_KEY, &value); + +        if (!ret) +                ret = gf_string2boolean (value, &enabled); + +        return ret ? _gf_false: enabled; +} +  extern struct volopt_map_entry glusterd_volopt_map[];  extern glusterd_all_vol_opts valid_all_vol_opts[]; @@ -1690,8 +1714,6 @@ glusterd_set_brick_socket_filepath (glusterd_volinfo_t *volinfo,                                      glusterd_brickinfo_t *brickinfo,                                      char *sockpath, size_t len)  { -        char                    export_path[PATH_MAX] = {0,}; -        char                    sock_filepath[PATH_MAX] = {0,};          char                    volume_dir[PATH_MAX] = {0,};          xlator_t                *this = NULL;          glusterd_conf_t         *priv = NULL; @@ -1706,11 +1728,18 @@ glusterd_set_brick_socket_filepath (glusterd_volinfo_t *volinfo,          priv = this->private;          GLUSTERD_GET_VOLUME_DIR (volume_dir, volinfo, priv); -        GLUSTERD_REMOVE_SLASH_FROM_PATH (brickinfo->path, export_path); -        snprintf (sock_filepath, PATH_MAX, "%s/run/%s-%s", -                  volume_dir, brickinfo->hostname, export_path); +        if (is_brick_mx_enabled ()) { +                snprintf (sockpath, len, "%s/run/daemon-%s.socket", +                          volume_dir, brickinfo->hostname); +        } else { +                char                    export_path[PATH_MAX] = {0,}; +                char                    sock_filepath[PATH_MAX] = {0,}; +                GLUSTERD_REMOVE_SLASH_FROM_PATH (brickinfo->path, export_path); +                snprintf (sock_filepath, PATH_MAX, "%s/run/%s-%s", +                          volume_dir, brickinfo->hostname, export_path); -        glusterd_set_socket_filepath (sock_filepath, sockpath, len); +                glusterd_set_socket_filepath (sock_filepath, sockpath, len); +        }  }  /* connection happens only if it is not aleady connected, @@ -1749,7 +1778,7 @@ glusterd_brick_connect (glusterd_volinfo_t  *volinfo,                  ret = glusterd_rpc_create (&rpc, options,                                             glusterd_brick_rpc_notify, -                                           brickid); +                                           brickid, _gf_false);                  if (ret) {                          GF_FREE (brickid);                          goto out; @@ -1802,6 +1831,8 @@ glusterd_volume_start_glusterfs (glusterd_volinfo_t  *volinfo,          char                    glusterd_uuid[1024] = {0,};          char                    valgrind_logfile[PATH_MAX] = {0};          char                    rdma_brick_path[PATH_MAX] = {0,}; +        struct rpc_clnt         *rpc = NULL; +        rpc_clnt_connection_t   *conn  = NULL;          GF_ASSERT (volinfo);          GF_ASSERT (brickinfo); @@ -1823,16 +1854,33 @@ glusterd_volume_start_glusterfs (glusterd_volinfo_t  *volinfo,                  goto out;          } -        ret = _mk_rundir_p (volinfo); -        if (ret) -                goto out; +        GLUSTERD_GET_BRICK_PIDFILE (pidfile, volinfo, brickinfo, priv); +        if (gf_is_service_running (pidfile, NULL)) { +                goto connect; +        } +        /* +         * There are all sorts of races in the start/stop code that could leave +         * a UNIX-domain socket or RPC-client object associated with a +         * long-dead incarnation of this brick, while the new incarnation is +         * listening on a new socket at the same path and wondering why we +         * haven't shown up.  To avoid the whole mess and be on the safe side, +         * we just blow away anything that might have been left over, and start +         * over again. +         */          glusterd_set_brick_socket_filepath (volinfo, brickinfo, socketpath,                                              sizeof (socketpath)); - -        GLUSTERD_GET_BRICK_PIDFILE (pidfile, volinfo, brickinfo, priv); -        if (gf_is_service_running (pidfile, NULL)) -                goto connect; +        (void) glusterd_unlink_file (socketpath); +        rpc = brickinfo->rpc; +        if (rpc) { +                brickinfo->rpc = NULL; +                conn = &rpc->conn; +                if (conn->reconnect) { +                        (void ) gf_timer_call_cancel (rpc->ctx, conn->reconnect); +                        //rpc_clnt_unref (rpc); +                } +                rpc_clnt_unref (rpc); +        }          port = pmap_assign_port (THIS, brickinfo->port, brickinfo->path); @@ -1933,6 +1981,7 @@ retry:          brickinfo->port = port;          brickinfo->rdma_port = rdma_port; +        brickinfo->started_here = _gf_true;          if (wait) {                  synclock_unlock (&priv->big_lock); @@ -1978,6 +2027,7 @@ connect:                          brickinfo->hostname, brickinfo->path, socketpath);                  goto out;          } +  out:          return ret;  } @@ -2035,9 +2085,8 @@ glusterd_volume_stop_glusterfs (glusterd_volinfo_t  *volinfo,                                  gf_boolean_t del_brick)  {          xlator_t        *this                   = NULL; -        glusterd_conf_t *priv                   = NULL; -        char            pidfile[PATH_MAX]       = {0,};          int             ret                     = 0; +        char            *op_errstr              = NULL;          GF_ASSERT (volinfo);          GF_ASSERT (brickinfo); @@ -2045,18 +2094,32 @@ glusterd_volume_stop_glusterfs (glusterd_volinfo_t  *volinfo,          this = THIS;          GF_ASSERT (this); -        priv = this->private;          if (del_brick)                  cds_list_del_init (&brickinfo->brick_list);          if (GLUSTERD_STATUS_STARTED == volinfo->status) { -                (void) glusterd_brick_disconnect (brickinfo); -                GLUSTERD_GET_BRICK_PIDFILE (pidfile, volinfo, brickinfo, priv); -                ret = glusterd_service_stop ("brick", pidfile, SIGTERM, _gf_false); -                if (ret == 0) { -                        glusterd_set_brick_status (brickinfo, GF_BRICK_STOPPED); -                        (void) glusterd_brick_unlink_socket_file (volinfo, brickinfo); +                /* +                 * In a post-multiplexing world, even if we're not actually +                 * doing any multiplexing, just dropping the RPC connection +                 * isn't enough.  There might be many such connections during +                 * the brick daemon's lifetime, even if we only consider the +                 * management RPC port (because tests etc. might be manually +                 * attaching and detaching bricks).  Therefore, we have to send +                 * an actual signal instead. +                 */ +                if (is_brick_mx_enabled ()) { +                        (void) send_attach_req (this, brickinfo->rpc, +                                                brickinfo->path, +                                                GLUSTERD_BRICK_TERMINATE); +                } else { +                        (void) glusterd_brick_terminate (volinfo, brickinfo, +                                                         NULL, 0, &op_errstr); +                        if (op_errstr) { +                                GF_FREE (op_errstr); +                        } +                        (void) glusterd_brick_disconnect (brickinfo);                  } +                ret = 0;          }          if (del_brick) @@ -4843,16 +4906,350 @@ out:          return ret;  } +static int32_t +my_callback (struct rpc_req *req, struct iovec *iov, int count, void *v_frame) +{ +        call_frame_t    *frame  = v_frame; + +        STACK_DESTROY (frame->root); + +        return 0; +} + +int +send_attach_req (xlator_t *this, struct rpc_clnt *rpc, char *path, int op) +{ +        int            ret      = -1; +        struct iobuf  *iobuf    = NULL; +        struct iobref *iobref   = NULL; +        struct iovec   iov      = {0, }; +        ssize_t        req_size = 0; +        call_frame_t  *frame    = NULL; +        gd1_mgmt_brick_op_req   brick_req; +        void                    *req = &brick_req; +        void          *errlbl   = &&err; +        extern struct rpc_clnt_program gd_brick_prog; + +        if (!rpc) { +                gf_log (this->name, GF_LOG_ERROR, "called with null rpc"); +                return -1; +        } + +        brick_req.op = op; +        brick_req.name = path; +        brick_req.input.input_val = NULL; +        brick_req.input.input_len = 0; + +        req_size = xdr_sizeof ((xdrproc_t)xdr_gd1_mgmt_brick_op_req, req); +        iobuf = iobuf_get2 (rpc->ctx->iobuf_pool, req_size); +        if (!iobuf) { +                goto *errlbl; +        } +        errlbl = &&maybe_free_iobuf; + +        iov.iov_base = iobuf->ptr; +        iov.iov_len  = iobuf_pagesize (iobuf); + +        iobref = iobref_new (); +        if (!iobref) { +                goto *errlbl; +        } +        errlbl = &&free_iobref; + +        frame = create_frame (this, this->ctx->pool); +        if (!frame) { +                goto *errlbl; +        } + +        iobref_add (iobref, iobuf); +        /* +         * Drop our reference to the iobuf.  The iobref should already have +         * one after iobref_add, so when we unref that we'll free the iobuf as +         * well.  This allows us to pass just the iobref as frame->local. +         */ +        iobuf_unref (iobuf); +        /* Set the pointer to null so we don't free it on a later error. */ +        iobuf = NULL; + +        /* Create the xdr payload */ +        ret = xdr_serialize_generic (iov, req, +                                     (xdrproc_t)xdr_gd1_mgmt_brick_op_req); +        if (ret == -1) { +                goto *errlbl; +        } + +        iov.iov_len = ret; + +        /* Send the msg */ +        ret = rpc_clnt_submit (rpc, &gd_brick_prog, op, +                               my_callback, &iov, 1, NULL, 0, iobref, frame, +                               NULL, 0, NULL, 0, NULL); +        return ret; + +free_iobref: +        iobref_unref (iobref); +maybe_free_iobuf: +        if (iobuf) { +                iobuf_unref (iobuf); +        } +err: +        return -1; +} + +extern size_t +build_volfile_path (char *volume_id, char *path, +                    size_t path_len, char *trusted_str); + + +static int +attach_brick (xlator_t *this, +              glusterd_brickinfo_t *brickinfo, +              glusterd_brickinfo_t *other_brick, +              glusterd_volinfo_t *volinfo, +              glusterd_volinfo_t *other_vol) +{ +        glusterd_conf_t *conf                   = this->private; +        char            pidfile1[PATH_MAX]      = {0}; +        char            pidfile2[PATH_MAX]      = {0}; +        char            unslashed[PATH_MAX]     = {'\0',}; +        char            full_id[PATH_MAX]       = {'\0',}; +        char            path[PATH_MAX]          = {'\0',}; +        int             ret; + +        gf_log (this->name, GF_LOG_INFO, +                "add brick %s to existing process for %s", +                brickinfo->path, other_brick->path); + +        GLUSTERD_REMOVE_SLASH_FROM_PATH (brickinfo->path, unslashed); + +        ret = pmap_registry_extend (this, other_brick->port, +                                    brickinfo->path); +        if (ret != 0) { +                gf_log (this->name, GF_LOG_ERROR, +                        "adding brick to process failed"); +                return -1; +        } + +        brickinfo->port = other_brick->port; +        brickinfo->status = GF_BRICK_STARTED; +        brickinfo->started_here = _gf_true; +        brickinfo->rpc = rpc_clnt_ref (other_brick->rpc); + +        GLUSTERD_GET_BRICK_PIDFILE (pidfile1, other_vol, other_brick, conf); +        GLUSTERD_GET_BRICK_PIDFILE (pidfile2, volinfo, brickinfo, conf); +        (void) sys_unlink (pidfile2); +        (void) sys_link (pidfile1, pidfile2); + +        if (volinfo->is_snap_volume) { +                snprintf (full_id, sizeof(full_id), "/%s/%s/%s.%s.%s", +                          GLUSTERD_VOL_SNAP_DIR_PREFIX, +                          volinfo->snapshot->snapname, +                          volinfo->volname, brickinfo->hostname, unslashed); +        } else { +                snprintf (full_id, sizeof(full_id), "%s.%s.%s", +                          volinfo->volname, brickinfo->hostname, unslashed); +        } +        (void) build_volfile_path (full_id, path, sizeof(path), NULL); + +        int tries = 0; +        while (tries++ <= 10) { +                ret = send_attach_req (this, other_brick->rpc, path, +                                       GLUSTERD_BRICK_ATTACH); +                if (!ret) { +                        return 0; +                } +                /* +                 * It might not actually be safe to manipulate the lock like +                 * this, but if we don't then the connection can never actually +                 * complete and retries are useless.  Unfortunately, all of the +                 * alternatives (e.g. doing all of this in a separate thread) +                 * are much more complicated and risky.  TBD: see if there's a +                 * better way +                 */ +                synclock_unlock (&conf->big_lock); +                sleep (1); +                synclock_lock (&conf->big_lock); +        } + +        gf_log (this->name, GF_LOG_WARNING, +                "attach failed for %s", brickinfo->path); +        return ret; +} + +static glusterd_brickinfo_t * +find_compatible_brick_in_volume (glusterd_conf_t *conf, +                                 glusterd_volinfo_t *volinfo, +                                 glusterd_brickinfo_t *brickinfo) +{ +        xlator_t                *this                   = THIS; +        glusterd_brickinfo_t    *other_brick; +        char                    pidfile2[PATH_MAX]      = {0}; +        int32_t                 pid2                    = -1; + +        cds_list_for_each_entry (other_brick, &volinfo->bricks, +                                 brick_list) { +                if (other_brick == brickinfo) { +                        continue; +                } +                if (!other_brick->started_here) { +                        continue; +                } +                if (strcmp (brickinfo->hostname, other_brick->hostname) != 0) { +                        continue; +                } +                GLUSTERD_GET_BRICK_PIDFILE (pidfile2, volinfo, other_brick, +                                            conf); +                if (!gf_is_service_running (pidfile2, &pid2)) { +                        gf_log (this->name, GF_LOG_INFO, +                                "cleaning up dead brick %s:%s", +                                other_brick->hostname, other_brick->path); +                        other_brick->started_here = _gf_false; +                        sys_unlink (pidfile2); +                        continue; +                } +                return other_brick; +        } + +        return NULL; +} + +static gf_boolean_t +unsafe_option (dict_t *this, char *key, data_t *value, void *arg) +{ +        /* +         * Certain options are safe because they're already being handled other +         * ways, such as being copied down to the bricks (all auth options) or +         * being made irrelevant (event-threads).  All others are suspect and +         * must be checked in the next function. +         */ +        if (fnmatch ("*auth*", key, 0) == 0) { +                return _gf_false; +        } + +        if (fnmatch ("*event-threads", key, 0) == 0) { +                return _gf_false; +        } + +        return _gf_true; +} + +static int +opts_mismatch (dict_t *dict1, char *key, data_t *value1, void *dict2) +{ +        data_t  *value2         = dict_get (dict2, key); +        int32_t min_len; + +        /* +         * If the option is only present on one, we can either look at the +         * default or assume a mismatch.  Looking at the default is pretty +         * hard, because that's part of a structure within each translator and +         * there's no dlopen interface to get at it, so we assume a mismatch. +         * If the user really wants them to match (and for their bricks to be +         * multiplexed, they can always reset the option). +         */ +        if (!value2) { +                gf_log (THIS->name, GF_LOG_DEBUG, "missing option %s", key); +                return -1; +        } + +        min_len = MIN (value1->len, value2->len); +        if (strncmp (value1->data, value2->data, min_len) != 0) { +                gf_log (THIS->name, GF_LOG_DEBUG, +                        "option mismatch, %s, %s != %s", +                        key, value1->data, value2->data); +                return -1; +        } + +        return 0; +} + +static glusterd_brickinfo_t * +find_compatible_brick (glusterd_conf_t *conf, +                       glusterd_volinfo_t *volinfo, +                       glusterd_brickinfo_t *brickinfo, +                       glusterd_volinfo_t **other_vol_p) +{ +        glusterd_brickinfo_t    *other_brick; +        glusterd_volinfo_t      *other_vol; + +        /* Just return NULL here if multiplexing is disabled. */ +        if (!is_brick_mx_enabled ()) { +                return NULL; +        } + +        other_brick = find_compatible_brick_in_volume (conf, volinfo, +                                                       brickinfo); +        if (other_brick) { +                *other_vol_p = volinfo; +                return other_brick; +        } + +        cds_list_for_each_entry (other_vol, &conf->volumes, vol_list) { +                if (other_vol == volinfo) { +                        continue; +                } +                if (volinfo->is_snap_volume) { +                        /* +                         * Snap volumes do have different options than their +                         * parents, but are nonetheless generally compatible. +                         * Skip the option comparison for now, until we figure +                         * out how to handle this (e.g. compare at the brick +                         * level instead of the volume level for this case). +                         * +                         * TBD: figure out compatibility for snap bricks +                         */ +                        goto no_opt_compare; +                } +                /* +                 * It's kind of a shame that we have to do this check in both +                 * directions, but an option might only exist on one of the two +                 * dictionaries and dict_foreach_match will only find that one. +                 */ +                gf_log (THIS->name, GF_LOG_DEBUG, +                        "comparing options for %s and %s", +                        volinfo->volname, other_vol->volname); +                if (dict_foreach_match (volinfo->dict, unsafe_option, NULL, +                                        opts_mismatch, other_vol->dict) < 0) { +                        gf_log (THIS->name, GF_LOG_DEBUG, "failure forward"); +                        continue; +                } +                if (dict_foreach_match (other_vol->dict, unsafe_option, NULL, +                                        opts_mismatch, volinfo->dict) < 0) { +                        gf_log (THIS->name, GF_LOG_DEBUG, "failure backward"); +                        continue; +                } +                gf_log (THIS->name, GF_LOG_DEBUG, "all options match"); +no_opt_compare: +                other_brick = find_compatible_brick_in_volume (conf, +                                                               other_vol, +                                                               brickinfo); +                if (other_brick) { +                        *other_vol_p = other_vol; +                        return other_brick; +                } +        } + +        return NULL; +} +  int  glusterd_brick_start (glusterd_volinfo_t *volinfo,                        glusterd_brickinfo_t *brickinfo,                        gf_boolean_t wait)  { -        int                                     ret   = -1; -        xlator_t                                *this = NULL; +        int                     ret   = -1; +        xlator_t                *this = NULL; +        glusterd_brickinfo_t    *other_brick; +        glusterd_conf_t         *conf = NULL; +        int32_t                 pid                   = -1; +        char                    pidfile[PATH_MAX]     = {0}; +        FILE                    *fp; +        char                    socketpath[PATH_MAX]  = {0}; +        glusterd_volinfo_t      *other_vol;          this = THIS;          GF_ASSERT (this); +        conf = this->private;          if ((!brickinfo) || (!volinfo))                  goto out; @@ -4876,6 +5273,77 @@ glusterd_brick_start (glusterd_volinfo_t *volinfo,                  ret = 0;                  goto out;          } + +        GLUSTERD_GET_BRICK_PIDFILE (pidfile, volinfo, brickinfo, conf); +        if (gf_is_service_running (pidfile, &pid)) { +                /* +                 * In general, if the pidfile exists and points to a running +                 * process, this will already be set.  However, that's not the +                 * case when we're starting up and bricks are already running. +                 */ +                if (brickinfo->status != GF_BRICK_STARTED) { +                        gf_log (this->name, GF_LOG_INFO, +                                "discovered already-running brick %s", +                                brickinfo->path); +                        //brickinfo->status = GF_BRICK_STARTED; +                        (void) pmap_registry_bind (this, +                                        brickinfo->port, brickinfo->path, +                                        GF_PMAP_PORT_BRICKSERVER, NULL); +                        /* +                         * This will unfortunately result in a separate RPC +                         * connection per brick, even though they're all in +                         * the same process.  It works, but it would be nicer +                         * if we could find a pre-existing connection to that +                         * same port (on another brick) and re-use that. +                         * TBD: re-use RPC connection across bricks +                         */ +                        glusterd_set_brick_socket_filepath (volinfo, brickinfo, +                                        socketpath, sizeof (socketpath)); +                        (void) glusterd_brick_connect (volinfo, brickinfo, +                                        socketpath); +                } +                return 0; +        } + +        ret = _mk_rundir_p (volinfo); +        if (ret) +                goto out; + +        other_brick = find_compatible_brick (conf, volinfo, brickinfo, +                                             &other_vol); +        if (other_brick) { +                ret = attach_brick (this, brickinfo, other_brick, +                                    volinfo, other_vol); +                if (ret == 0) { +                        goto out; +                } +        } + +        /* +         * This hack is necessary because our brick-process management is a +         * total nightmare.  We expect a brick process's socket and pid files +         * to be ready *immediately* after we start it.  Ditto for it calling +         * back to bind its port.  Unfortunately, none of that is realistic. +         * Any process takes non-zero time to start up.  This has *always* been +         * racy and unsafe; it just became more visible with multiplexing. +         * +         * The right fix would be to do all of this setup *in the parent*, +         * which would include (among other things) getting the PID back from +         * the "runner" code.  That's all prohibitively difficult and risky. +         * To work around the more immediate problems, we create a stub pidfile +         * here to let gf_is_service_running know that we expect the process to +         * be there shortly, and then it gets filled in with a real PID when +         * the process does finish starting up. +         * +         * TBD: pray for GlusterD 2 to be ready soon. +         */ +        (void) sys_unlink (pidfile); +        fp = fopen (pidfile, "w+"); +        if (fp) { +                (void) fprintf (fp, "0\n"); +                (void) fclose (fp); +        } +          ret = glusterd_volume_start_glusterfs (volinfo, brickinfo, wait);          if (ret) {                  gf_msg (this->name, GF_LOG_ERROR, 0, @@ -5813,11 +6281,12 @@ glusterd_add_brick_to_dict (glusterd_volinfo_t *volinfo,          if (ret)                  goto out; -          GLUSTERD_GET_BRICK_PIDFILE (pidfile, volinfo, brickinfo, priv);          if (glusterd_is_brick_started (brickinfo)) { -                brick_online = gf_is_service_running (pidfile, &pid); +                if (gf_is_service_running (pidfile, &pid)) { +                        brick_online = _gf_true; +                }          }          memset (key, 0, sizeof (key)); @@ -6880,10 +7349,12 @@ out:          return ret;  } -int -glusterd_brick_statedump (glusterd_volinfo_t *volinfo, -                          glusterd_brickinfo_t *brickinfo, -                          char *options, int option_cnt, char **op_errstr) + +static int +glusterd_brick_signal (glusterd_volinfo_t *volinfo, +                       glusterd_brickinfo_t *brickinfo, +                       char *options, int option_cnt, char **op_errstr, +                       int sig)  {          int                     ret = -1;          xlator_t                *this = NULL; @@ -6916,6 +7387,7 @@ glusterd_brick_statedump (glusterd_volinfo_t *volinfo,          GLUSTERD_GET_BRICK_PIDFILE (pidfile_path, volinfo, brickinfo, conf); +        /* TBD: use gf_is_service_running instead of almost-identical code? */          pidfile = fopen (pidfile_path, "r");          if (!pidfile) {                  gf_msg ("glusterd", GF_LOG_ERROR, errno, @@ -6934,24 +7406,35 @@ glusterd_brick_statedump (glusterd_volinfo_t *volinfo,                  goto out;          } -        snprintf (dumpoptions_path, sizeof (dumpoptions_path), -                  DEFAULT_VAR_RUN_DIRECTORY"/glusterdump.%d.options", pid); -        ret = glusterd_set_dump_options (dumpoptions_path, options, option_cnt); -        if (ret < 0) { -                gf_msg ("glusterd", GF_LOG_ERROR, 0, -                       GD_MSG_BRK_STATEDUMP_FAIL, -                       "error while parsing the statedump " -                        "options"); -                ret = -1; +        if (pid == 0) { +                gf_msg ("glusterd", GF_LOG_WARNING, 0, +                        GD_MSG_NO_SIG_TO_PID_ZERO, +                        "refusing to send signal %d to pid zero", sig);                  goto out;          } +        if (sig == SIGUSR1) { +                snprintf (dumpoptions_path, sizeof (dumpoptions_path), +                          DEFAULT_VAR_RUN_DIRECTORY"/glusterdump.%d.options", +                          pid); +                ret = glusterd_set_dump_options (dumpoptions_path, options, +                                                 option_cnt); +                if (ret < 0) { +                        gf_msg ("glusterd", GF_LOG_ERROR, 0, +                               GD_MSG_BRK_STATEDUMP_FAIL, +                               "error while parsing the statedump " +                                "options"); +                        ret = -1; +                        goto out; +                } +        } +          gf_msg ("glusterd", GF_LOG_INFO, 0,                  GD_MSG_STATEDUMP_INFO, -                "Performing statedump on brick with pid %d", -                pid); +                "sending signal %d to brick with pid %d", +                sig, pid); -        kill (pid, SIGUSR1); +        kill (pid, sig);          sleep (1);          ret = 0; @@ -6963,6 +7446,26 @@ out:  }  int +glusterd_brick_statedump (glusterd_volinfo_t *volinfo, +                          glusterd_brickinfo_t *brickinfo, +                          char *options, int option_cnt, char **op_errstr) +{ +        return glusterd_brick_signal (volinfo, brickinfo, +                                      options, option_cnt, op_errstr, +                                      SIGUSR1); +} + +int +glusterd_brick_terminate (glusterd_volinfo_t *volinfo, +                          glusterd_brickinfo_t *brickinfo, +                          char *options, int option_cnt, char **op_errstr) +{ +        return glusterd_brick_signal (volinfo, brickinfo, +                                      options, option_cnt, op_errstr, +                                      SIGTERM); +} + +int  glusterd_nfs_statedump (char *options, int option_cnt, char **op_errstr)  {          int                     ret = -1; @@ -7446,7 +7949,7 @@ glusterd_volume_defrag_restart (glusterd_volinfo_t *volinfo, char *op_errstr,                                            "volume=%s", volinfo->volname);                                  goto out;                          } -                        ret = glusterd_rebalance_rpc_create (volinfo, _gf_true); +                        ret = glusterd_rebalance_rpc_create (volinfo);                          break;                  }          case GF_DEFRAG_STATUS_NOT_STARTED: @@ -7978,9 +8481,10 @@ glusterd_to_cli (rpcsvc_request_t *req, gf_cli_rsp *arg, struct iovec *payload,          glusterd_submit_reply (req, arg, payload, payloadcount, iobref,                                 (xdrproc_t) xdrproc); -        if (dict) -                dict_unref (dict); +        if (dict) { +                dict_unref (dict); +        }          return ret;  } @@ -11356,6 +11860,7 @@ glusterd_get_global_options_for_all_vols (rpcsvc_request_t *req, dict_t *ctx,          char                    *allvolopt = NULL;          int32_t                 i = 0;          gf_boolean_t            exists = _gf_false; +        gf_boolean_t            need_free;          this = THIS;          GF_VALIDATE_OR_GOTO (THIS->name, this, out); @@ -11414,13 +11919,16 @@ glusterd_get_global_options_for_all_vols (rpcsvc_request_t *req, dict_t *ctx,                  ret = dict_get_str (priv->opts, allvolopt, &def_val);                  /* If global option isn't set explicitly */ + +                need_free = _gf_false;                  if (!def_val) { -                        if (!strcmp (allvolopt, GLUSTERD_GLOBAL_OP_VERSION_KEY)) +                        if (!strcmp (allvolopt, +                                     GLUSTERD_GLOBAL_OP_VERSION_KEY)) {                                  gf_asprintf (&def_val, "%d", priv->op_version); -                        else if (!strcmp (allvolopt, GLUSTERD_QUORUM_RATIO_KEY)) -                                gf_asprintf (&def_val, "%d", 0); -                        else if (!strcmp (allvolopt, GLUSTERD_SHARED_STORAGE_KEY)) -                                gf_asprintf (&def_val, "%s", "disable"); +                                need_free = _gf_true; +                        } else { +                                def_val = valid_all_vol_opts[i].dflt_val; +                        }                  }                  count++; @@ -11443,6 +11951,9 @@ glusterd_get_global_options_for_all_vols (rpcsvc_request_t *req, dict_t *ctx,                          goto out;                  } +                if (need_free) { +                        GF_FREE (def_val); +                }                  def_val = NULL;                  allvolopt = NULL; diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h index e801c1a03a3..a9aefb85246 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.h +++ b/xlators/mgmt/glusterd/src/glusterd-utils.h @@ -386,6 +386,12 @@ int  glusterd_brick_statedump (glusterd_volinfo_t *volinfo,                            glusterd_brickinfo_t *brickinfo,                            char *options, int option_cnt, char **op_errstr); + +int +glusterd_brick_terminate (glusterd_volinfo_t *volinfo, +                          glusterd_brickinfo_t *brickinfo, +                          char *options, int option_cnt, char **op_errstr); +  int  glusterd_nfs_statedump (char *options, int option_cnt, char **op_errstr); diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c index f5ddef4755d..957bbfcee25 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volgen.c +++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c @@ -1516,6 +1516,8 @@ brick_graph_add_posix (volgen_graph_t *graph, glusterd_volinfo_t *volinfo,  out:          return ret;  } + +#if 0  static int  brick_graph_add_trash (volgen_graph_t *graph, glusterd_volinfo_t *volinfo,                          dict_t *set_dict, glusterd_brickinfo_t *brickinfo) @@ -1538,6 +1540,7 @@ brick_graph_add_trash (volgen_graph_t *graph, glusterd_volinfo_t *volinfo,  out:          return ret;  } +#endif  static int  brick_graph_add_decompounder (volgen_graph_t *graph, glusterd_volinfo_t *volinfo, @@ -2456,7 +2459,11 @@ static volgen_brick_xlator_t server_graph_table[] = {          {brick_graph_add_changetimerecorder, "changetimerecorder"},  #endif          {brick_graph_add_bd, "bd"}, +        /* +         * TBD: Figure out why trash breaks multiplexing.  AFAICT it should fail +         * the same way already.          {brick_graph_add_trash, "trash"}, +         */          {brick_graph_add_arbiter, "arbiter"},          {brick_graph_add_posix, "posix"},  }; diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c index ecc4f9609c1..ad5fe909578 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c +++ b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c @@ -2612,7 +2612,7 @@ glusterd_op_start_volume (dict_t *dict, char **op_errstr)          }          ret = dict_get_str (conf->opts, GLUSTERD_STORE_KEY_GANESHA_GLOBAL, &str); -        if (ret == -1) { +        if (ret != 0) {                  gf_msg (this->name, GF_LOG_INFO, 0,                          GD_MSG_DICT_GET_FAILED, "Global dict not present.");                  ret = 0; @@ -3069,7 +3069,8 @@ glusterd_clearlocks_get_local_client_ports (glusterd_volinfo_t *volinfo,                                    brickinfo->path);                  port = pmap_registry_search (THIS, brickname, -                                             GF_PMAP_PORT_BRICKSERVER); +                                             GF_PMAP_PORT_BRICKSERVER, +                                             _gf_false);                  if (!port) {                          ret = -1;                          gf_msg_debug (THIS->name, 0, "Couldn't get port " diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c index 7da0de20291..9f877b6d620 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c +++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c @@ -3145,6 +3145,13 @@ struct volopt_map_entry glusterd_volopt_map[] = {            .flags       = OPT_FLAG_CLIENT_OPT,            .op_version  = GD_OP_VERSION_3_9_1,          }, + +        /* Brick multiplexing options */ +        { .key         = GLUSTERD_BRICK_MULTIPLEX_KEY, +          .voltype     = "mgmt/glusterd", +          .value       = "off", +          .op_version  = GD_OP_VERSION_3_10_0 +        },          { .key         = NULL          }  }; diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h index d00e4e20811..f3c7e1d6891 100644 --- a/xlators/mgmt/glusterd/src/glusterd.h +++ b/xlators/mgmt/glusterd/src/glusterd.h @@ -54,6 +54,7 @@                                          "S32gluster_enable_shared_storage.sh"  #define GLUSTER_SHARED_STORAGE          "gluster_shared_storage"  #define GLUSTERD_SHARED_STORAGE_KEY     "cluster.enable-shared-storage" +#define GLUSTERD_BRICK_MULTIPLEX_KEY    "cluster.brick-multiplex"  #define GANESHA_HA_CONF  CONFDIR "/ganesha-ha.conf"  #define GANESHA_EXPORT_DIRECTORY        CONFDIR"/exports" @@ -77,7 +78,6 @@                              "for more details."  #define OPERRSTR_COMMIT_FAIL "Commit failed on %s. Please check the log file "\                               "for more details." -  struct glusterd_volinfo_;  typedef struct glusterd_volinfo_ glusterd_volinfo_t; @@ -215,7 +215,6 @@ struct glusterd_brickinfo {          int                port;          int                rdma_port;          char              *logfile; -        gf_boolean_t       signed_in;          gf_store_handle_t *shandle;          gf_brick_status_t  status;          struct rpc_clnt   *rpc; @@ -232,6 +231,7 @@ struct glusterd_brickinfo {           */          uint16_t           group;          uuid_t             jbr_uuid; +        gf_boolean_t       started_here;  };  typedef struct glusterd_brickinfo glusterd_brickinfo_t; @@ -1048,7 +1048,8 @@ glusterd_brick_rpc_notify (struct rpc_clnt *rpc, void *mydata,  int  glusterd_rpc_create (struct rpc_clnt **rpc, dict_t *options, -                     rpc_clnt_notify_t notify_fn, void *notify_data); +                     rpc_clnt_notify_t notify_fn, void *notify_data, +                     gf_boolean_t force);  /* handler functions */ @@ -1064,8 +1065,7 @@ int glusterd_handle_defrag_start (glusterd_volinfo_t *volinfo, char *op_errstr,                                    size_t len, int cmd, defrag_cbk_fn_t cbk,                                    glusterd_op_t op);  int -glusterd_rebalance_rpc_create (glusterd_volinfo_t *volinfo, -                               gf_boolean_t reconnect); +glusterd_rebalance_rpc_create (glusterd_volinfo_t *volinfo);  int glusterd_rebalance_defrag_init (glusterd_volinfo_t *volinfo,                                      defrag_cbk_fn_t cbk);  | 
