diff options
| author | Susant Palai <spalai@redhat.com> | 2018-01-18 13:06:12 +0530 | 
|---|---|---|
| committer | Raghavendra G <rgowdapp@redhat.com> | 2018-02-02 15:24:38 +0000 | 
| commit | 545a7ce6762a1b3a7b989b43a9d18b5b1b299df0 (patch) | |
| tree | 0f2c3015697553914cb520dbda107f3843521f53 /xlators/cluster/dht/src | |
| parent | d9f773ba719397c12860f494a8cd38109e4b2fe3 (diff) | |
cluster/dht: avoid overwriting client writes during migration
For more details on this issue see
https://github.com/gluster/glusterfs/issues/308
Solution:
This is a restrictive solution where a file will not be migrated
if a client writes to it during the migration. This does not
check if the writes from the rebalance and the client actually
do overlap.
If dht_writev_cbk finds that the file is being migrated (PHASE1)
it will set an xattr on the destination file indicating the file
was updated by a non-rebalance client.
Rebalance checks if any other client has written to the dst file
and aborts the file migration if it finds the xattr.
updates gluster/glusterfs#308
Change-Id: I73aec28bc9dbb8da57c7425ec88c6b6af0fbc9dd
Signed-off-by: Susant Palai <spalai@redhat.com>
Signed-off-by: Raghavendra G <rgowdapp@redhat.com>
Signed-off-by: N Balachandran <nbalacha@redhat.com>
Diffstat (limited to 'xlators/cluster/dht/src')
| -rw-r--r-- | xlators/cluster/dht/src/dht-common.h | 2 | ||||
| -rw-r--r-- | xlators/cluster/dht/src/dht-inode-write.c | 28 | ||||
| -rw-r--r-- | xlators/cluster/dht/src/dht-rebalance.c | 103 | ||||
| -rw-r--r-- | xlators/cluster/dht/src/dht-shared.c | 17 | 
4 files changed, 138 insertions, 12 deletions
diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h index 91ba3418643..9671bbe1cbe 100644 --- a/xlators/cluster/dht/src/dht-common.h +++ b/xlators/cluster/dht/src/dht-common.h @@ -695,6 +695,8 @@ struct dht_conf {          synclock_t      link_lock;          gf_boolean_t    use_fallocate; + +        gf_boolean_t    force_migration;  };  typedef struct dht_conf dht_conf_t; diff --git a/xlators/cluster/dht/src/dht-inode-write.c b/xlators/cluster/dht/src/dht-inode-write.c index 7c596b1c099..226ee95e8b3 100644 --- a/xlators/cluster/dht/src/dht-inode-write.c +++ b/xlators/cluster/dht/src/dht-inode-write.c @@ -95,6 +95,33 @@ dht_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,          /* Check if the rebalance phase1 is true */          if (IS_DHT_MIGRATION_PHASE1 (postbuf)) { +                if (!dht_is_tier_xlator (this)) { +                        if (!local->xattr_req) { +                                local->xattr_req = dict_new (); +                                if (!local->xattr_req) { +                                        gf_msg (this->name, GF_LOG_ERROR, +                                                DHT_MSG_NO_MEMORY, +                                                ENOMEM, "insufficient memory"); +                                        local->op_errno = ENOMEM; +                                        local->op_ret = -1; +                                        goto out; +                                } +                        } + +                        ret = dict_set_uint32 (local->xattr_req, +                                               GF_PROTECT_FROM_EXTERNAL_WRITES, +                                               1); +                        if (ret) { +                                gf_msg (this->name, GF_LOG_ERROR, +                                        DHT_MSG_DICT_SET_FAILED, 0, +                                        "Failed to set key %s in dictionary", +                                        GF_PROTECT_FROM_EXTERNAL_WRITES); +                                local->op_errno = ENOMEM; +                                local->op_ret = -1; +                                goto out; +                        } +                } +                  dht_iatt_merge (this, &local->stbuf, postbuf, NULL);                  dht_iatt_merge (this, &local->prebuf, prebuf, NULL); @@ -146,7 +173,6 @@ dht_writev2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)                  return 0;          } -          if (subvol == NULL)                  goto out; diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c index 70d5a5f316f..d9c3149049c 100644 --- a/xlators/cluster/dht/src/dht-rebalance.c +++ b/xlators/cluster/dht/src/dht-rebalance.c @@ -26,7 +26,8 @@  #define MAX_REBAL_TYPE_SIZE               16  #define FILE_CNT_INTERVAL                600 /* 10 mins */  #define ESTIMATE_START_INTERVAL          600 /* 10 mins */ - +#define HARDLINK_MIG_INPROGRESS          -2 +#define SKIP_MIGRATION_FD_POSITIVE       -3  #ifndef MAX  #define MAX(a, b) (((a) > (b))?(a):(b))  #endif @@ -680,6 +681,7 @@ __dht_rebalance_create_dst_file (xlator_t *this, xlator_t *to, xlator_t *from,          struct iatt  check_stbuf= {0,};          dht_conf_t  *conf = NULL;          dict_t      *dict = NULL; +        dict_t      *xdata = NULL;          conf = this->private; @@ -725,7 +727,31 @@ __dht_rebalance_create_dst_file (xlator_t *this, xlator_t *to, xlator_t *from,                  goto out;          } -        ret = syncop_lookup (to, loc, &new_stbuf, NULL, NULL, NULL); +        if (!!dht_is_tier_xlator (this)) { +                xdata = dict_new (); +                if (!xdata) { +                        *fop_errno = ENOMEM; +                        ret = -1; +                        gf_msg (this->name, GF_LOG_ERROR, ENOMEM, +                                DHT_MSG_MIGRATE_FILE_FAILED, +                                "%s: dict_new failed)", +                                loc->path); +                        goto out; +                } + +                ret = dict_set_int32 (xdata, GF_CLEAN_WRITE_PROTECTION, 1); +                if (ret) { +                        *fop_errno = ENOMEM; +                        ret = -1; +                        gf_msg (this->name, GF_LOG_ERROR, 0, +                                DHT_MSG_DICT_SET_FAILED, +                                "%s: failed to set dictionary value: key = %s ", +                                loc->path, GF_CLEAN_WRITE_PROTECTION); +                        goto out; +                } +        } + +        ret = syncop_lookup (to, loc, &new_stbuf, NULL, xdata, NULL);          if (!ret) {                  /* File exits in the destination, check if gfid matches */                  if (gf_uuid_compare (stbuf->ia_gfid, new_stbuf.ia_gfid) != 0) { @@ -875,6 +901,10 @@ out:          if (dict)                  dict_unref (dict); +        if (xdata) +                dict_unref (dict); + +          return ret;  } @@ -1090,9 +1120,9 @@ out:  }  static int -__dht_rebalance_migrate_data (gf_defrag_info_t *defrag, xlator_t *from, -                              xlator_t *to, fd_t *src, fd_t *dst, -                              uint64_t ia_size, int hole_exists, +__dht_rebalance_migrate_data (xlator_t *this, gf_defrag_info_t *defrag, +                              xlator_t *from, xlator_t *to, fd_t *src, +                              fd_t *dst, uint64_t ia_size, int hole_exists,                                int *fop_errno)  {          int            ret    = 0; @@ -1102,7 +1132,10 @@ __dht_rebalance_migrate_data (gf_defrag_info_t *defrag, xlator_t *from,          struct iobref *iobref = NULL;          uint64_t       total  = 0;          size_t         read_size = 0; +        dict_t        *xdata = NULL; +        dht_conf_t    *conf  = NULL; +        conf = this->private;          /* if file size is '0', no need to enter this loop */          while (total < ia_size) {                  read_size = (((ia_size - total) > DHT_REBALANCE_BLKSIZE) ? @@ -1121,8 +1154,42 @@ __dht_rebalance_migrate_data (gf_defrag_info_t *defrag, xlator_t *from,                                                      ret, offset, iobref,                                                      fop_errno);                  } else { +                        if (!conf->force_migration && +                            !dht_is_tier_xlator (this)) { +                                xdata = dict_new (); +                                if (!xdata) { +                                        gf_msg ("dht", GF_LOG_ERROR, 0, +                                                DHT_MSG_MIGRATE_FILE_FAILED, +                                                "insufficient memory"); +                                        ret = -1; +                                        *fop_errno = ENOMEM; +                                        break; +                                } + +                                /* Fail this write and abort rebalance if we +                                 * detect a write from client since migration of +                                 * this file started. This is done to avoid +                                 * potential data corruption due to out of order +                                 * writes from rebalance and client to the same +                                 * region (as compared between src and dst +                                 * files). See +                                 * https://github.com/gluster/glusterfs/issues/308 +                                 * for more details. +                                 */ +                                ret = dict_set_int32 (xdata, +                                                      GF_AVOID_OVERWRITE, 1); +                                if (ret) { +                                        gf_msg ("dht", GF_LOG_ERROR, 0, +                                                ENOMEM, "failed to set dict"); +                                        ret = -1; +                                        *fop_errno = ENOMEM; +                                        break; +                                } + +                        } +                          ret = syncop_writev (to, dst, vector, count, -                                             offset, iobref, 0, NULL, NULL); +                                             offset, iobref, 0, xdata, NULL);                          if (ret < 0) {                                  *fop_errno = -ret;                          } @@ -1158,6 +1225,10 @@ __dht_rebalance_migrate_data (gf_defrag_info_t *defrag, xlator_t *from,          else                  ret = -1; +        if (xdata) { +                dict_unref (xdata); +        } +          return ret;  } @@ -1575,7 +1646,6 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,                  goto out;          } -          /* Do not migrate file in case lock migration is not enabled on the           * volume*/          if (!conf->lock_migration_enabled) { @@ -1642,7 +1712,7 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,          ret = __is_file_migratable (this, loc, &stbuf, xattr_rsp, flag, defrag, conf,                                      fop_errno);          if (ret) { -                if (ret == -2) +                if (ret == HARDLINK_MIG_INPROGRESS)                          ret = 0;                  goto out;          } @@ -1785,7 +1855,7 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,          ret = __check_file_has_hardlink (this, loc, &stbuf, xattr_rsp,                                           flag, defrag, conf, fop_errno);          if (ret) { -                if (ret == -2) +                if (ret == HARDLINK_MIG_INPROGRESS)                          ret = 0;                  goto out;          } @@ -1794,8 +1864,8 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,                  file_has_holes = 1; -        ret = __dht_rebalance_migrate_data (defrag, from, to, src_fd, dst_fd, -                                            stbuf.ia_size, +        ret = __dht_rebalance_migrate_data (this, defrag, from, to, +                                            src_fd, dst_fd, stbuf.ia_size,                                              file_has_holes, fop_errno);          if (ret) {                  gf_msg (this->name, GF_LOG_ERROR, 0, @@ -2280,6 +2350,17 @@ out:                  }          } +        if (!dht_is_tier_xlator (this)) { +                lk_ret = syncop_removexattr (to, loc, +                                             GF_PROTECT_FROM_EXTERNAL_WRITES, +                                             NULL, NULL); +                if (lk_ret) { +                        gf_msg (this->name, GF_LOG_WARNING, -lk_ret, 0, +                                "%s: removexattr failed key %s", loc->path, +                                GF_CLEAN_WRITE_PROTECTION); +                } +        } +          if (dict)                  dict_unref (dict); diff --git a/xlators/cluster/dht/src/dht-shared.c b/xlators/cluster/dht/src/dht-shared.c index 8c011f72530..51c9d9cb3cf 100644 --- a/xlators/cluster/dht/src/dht-shared.c +++ b/xlators/cluster/dht/src/dht-shared.c @@ -524,6 +524,10 @@ dht_reconfigure (xlator_t *this, dict_t *options)          GF_OPTION_RECONF ("lock-migration", conf->lock_migration_enabled,                            options, bool, out); +        GF_OPTION_RECONF ("force-migration", conf->force_migration, +                          options, bool, out); + +          if (conf->defrag) {                  if (dict_get_str (options, "rebal-throttle", &temp_str) == 0) {                          ret = dht_configure_throttle (this, conf, temp_str); @@ -810,6 +814,10 @@ dht_init (xlator_t *this)          GF_OPTION_INIT ("lock-migration", conf->lock_migration_enabled,                           bool, err); +        GF_OPTION_INIT ("force-migration", conf->force_migration, +                        bool, err); + +          if (defrag) {                defrag->lock_migration_enabled = conf->lock_migration_enabled; @@ -1203,5 +1211,14 @@ struct volume_options options[] = {            .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC          }, +        { .key =  {"force-migration"}, +          .type = GF_OPTION_TYPE_BOOL, +          .default_value = "off", +          .description = "If disabled, rebalance will not migrate files that " +                         "are being written to by an application", +          .op_version  = {GD_OP_VERSION_4_0_0}, +          .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC +        }, +          { .key  = {NULL} },  };  | 
