diff options
| author | Venky Shankar <vshankar@redhat.com> | 2015-03-31 15:48:18 +0530 | 
|---|---|---|
| committer | Vijay Bellur <vbellur@redhat.com> | 2015-04-08 07:01:32 +0000 | 
| commit | f0cd1d73c63001740cd7691a77df7631c9b8e8dc (patch) | |
| tree | 336a8ee694dc71a2fde6927d8a40682bef6368ef /xlators/features/bit-rot | |
| parent | 7fb55dbdabf73f9169b0f3021a42fa120d64b373 (diff) | |
bitrot/scrub: Scrubber fixes
This patch fixes a handful of problem with scrubber which
are detailed below.
Scrubber used to skip objects for verification due to missing
fd iterface to fetch versioning extended attributes. Similar
to the inode interface, an fd based interface in POSIX is now
introduced.
Moreover, this patch also fixes potential false reporting by
scrubber due to:
  An object gets dirtied and signed when scrubber is busy
  calculatingobject checksum. This is fixed by caching the
  signed version when an object is first inspected for
  stalenes, i.e., during pre-compute stage. This version is
  used to verify checksum in the post-compute stage when the
  signatures are compared for possible corruption.
  Side effect of _not_ sending signature length during signing
  resulted in "truncated" signature to be set for an object.
  Now, at the time of signing, the signature length is sent
  and is used in place of invoking strlen() to get signature
  length (which could have possible 00s). The signature length
  itself is not persisted in the signature xattr, but is
  calculated on-the-fly by substracting the xattr length by
  the "structure" header size.
Some of the log entries are made more meaningful (as and aid
for debugging).
Change-Id: I938bee5aea6688d5d99eb2640053613af86d6269
BUG: 1207624
Signed-off-by: Venky Shankar <vshankar@redhat.com>
Reviewed-on: http://review.gluster.org/10118
Reviewed-by: Raghavendra Bhat <raghavendra@redhat.com>
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Vijay Bellur <vbellur@redhat.com>
Diffstat (limited to 'xlators/features/bit-rot')
| -rw-r--r-- | xlators/features/bit-rot/src/bitd/bit-rot-scrub.c | 247 | ||||
| -rw-r--r-- | xlators/features/bit-rot/src/bitd/bit-rot.c | 13 | ||||
| -rw-r--r-- | xlators/features/bit-rot/src/stub/bit-rot-common.h | 6 | ||||
| -rw-r--r-- | xlators/features/bit-rot/src/stub/bit-rot-stub.c | 27 | ||||
| -rw-r--r-- | xlators/features/bit-rot/src/stub/bit-rot-stub.h | 1 | 
5 files changed, 212 insertions, 82 deletions
| diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c b/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c index 1712aa1529b..d2b179027a2 100644 --- a/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c +++ b/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c @@ -24,23 +24,26 @@  #include "bit-rot-scrub.h"  #include <pthread.h> +/** + * fetch signature extended attribute from an object's fd. + * NOTE: On success @xattr is not unref'd as @sign points + * to the dictionary value. + */  static inline int32_t -bitd_fetch_signature (xlator_t *this, -                      br_child_t *child, fd_t *fd, br_isignature_out_t *sign) +bitd_fetch_signature (xlator_t *this, br_child_t *child, +                      fd_t *fd, dict_t **xattr, br_isignature_out_t **sign)  { -        int32_t ret = -1; -        dict_t *xattr = NULL; -        br_isignature_out_t *sigptr = NULL; +       int32_t ret = -1; -        ret = syncop_fgetxattr (child->xl, fd, &xattr, +        ret = syncop_fgetxattr (child->xl, fd, xattr,                                 GLUSTERFS_GET_OBJECT_SIGNATURE, NULL);          if (ret < 0) { -                br_log_object (this, "getxattr", fd->inode->gfid, -ret); +                br_log_object (this, "fgetxattr", fd->inode->gfid, -ret);                  goto out;          } -        ret = dict_get_ptr (xattr, GLUSTERFS_GET_OBJECT_SIGNATURE, -                            (void **)&sigptr); +        ret = dict_get_ptr +                (*xattr, GLUSTERFS_GET_OBJECT_SIGNATURE, (void **) sign);          if (ret) {                  gf_log (this->name, GF_LOG_ERROR,                          "failed to extract signature info [GFID: %s]", @@ -48,67 +51,141 @@ bitd_fetch_signature (xlator_t *this,                  goto unref_dict;          } -        ret = 0; -        (void) memcpy (sign, sigptr, sizeof (br_isignature_out_t)); +        return 0;   unref_dict: -        dict_unref (xattr); +        dict_unref (*xattr);   out: -        return ret; +        return -1;  } -static inline int32_t +/** + * POST COMPUTE CHECK + * + * Checks to be performed before verifying calculated signature + * Object is skipped if: + *  - has stale signature + *  - mismatches versions caches in pre-compute check + */ + +int32_t  bitd_scrub_post_compute_check (xlator_t *this,                                 br_child_t *child, -                               br_isignature_out_t *sign, fd_t *fd) +                               fd_t *fd, unsigned long version, +                               br_isignature_out_t **signature)  { -        int32_t ret = 0; +        int32_t              ret     = 0; +        size_t               signlen = 0; +        dict_t              *xattr   = NULL; +        br_isignature_out_t *signptr = NULL; -        ret = bitd_fetch_signature (this, child, fd, sign); -        if (ret) +        ret = bitd_fetch_signature (this, child, fd, &xattr, &signptr); +        if (ret < 0)                  goto out; -        if (sign->stale) + +        /** +         * Either the object got dirtied during the time the signature was +         * calculated OR the version we saved during pre-compute check does +         * not match now, implying that the object got dirtied and signed in +         * between scrubs pre & post compute checks (checksum window). +         * +         * The log entry looks pretty ugly, but helps in debugging.. +         */ +        if (signptr->stale || (signptr->version != version)) { +                gf_log (this->name, GF_LOG_DEBUG, +                        "<STAGE: POST> Object [GFID: %s] either has a stale " +                        "signature OR underwent signing during checksumming " +                        "{Stale: %d | Version: %lu,%lu}", +                        uuid_utoa (fd->inode->gfid), (signptr->stale) ? 1 : 0, +                        version, signptr->version);                  ret = -1; +                goto unref_dict; +        } + +        signlen = signptr->signaturelen; +        *signature = GF_CALLOC (1, sizeof (br_isignature_out_t) + signlen, +                                gf_common_mt_char); +        (void) memcpy (*signature, signptr, +                       sizeof (br_isignature_out_t) + signlen); + + unref_dict: +        dict_unref (xattr);   out:          return ret;  }  static inline int32_t -bitd_scrub_pre_compute_check (xlator_t *this, br_child_t *child, fd_t *fd) +bitd_signature_staleness (xlator_t *this, +                          br_child_t *child, fd_t *fd, +                          int *stale, unsigned long *version)  {          int32_t ret = -1; -        br_isignature_out_t sign = {0,}; +        dict_t *xattr = NULL; +        br_isignature_out_t *signptr = NULL; -        /* if the object is already marked bad, don't bother checking */ -        if (bitd_is_bad_file (this, child, NULL, fd)) +        ret = bitd_fetch_signature (this, child, fd, &xattr, &signptr); +        if (ret < 0)                  goto out; -        /* else, check for signature staleness */ -        ret = bitd_fetch_signature (this, child, fd, &sign); -        if (ret) -                goto out; -        if (sign.stale) { -                ret = -1; +        /** +         * save verison for validation in post compute stage +         * c.f. bitd_scrub_post_compute_check() +         */ +        *stale = signptr->stale ? 1 : 0; +        *version = signptr->version; + +        dict_unref (xattr); + + out: +        return ret; +} + +/** + * PRE COMPUTE CHECK + * + * Checks to be performed before initiating object signature calculation. + * An object is skipped if: + *  - it's already marked corrupted + *  - has stale signature + */ +int32_t +bitd_scrub_pre_compute_check (xlator_t *this, br_child_t *child, +                              fd_t *fd, unsigned long *version) +{ +        int     stale = 0; +        int32_t ret   = -1; + +        if (bitd_is_bad_file (this, child, NULL, fd)) { +                gf_log (this->name, GF_LOG_WARNING, +                        "Object [GFID: %s] is marked corrupted, skipping..", +                        uuid_utoa (fd->inode->gfid));                  goto out;          } -        ret = 0; +        ret = bitd_signature_staleness (this, child, fd, &stale, version); +        if (!ret && stale) { +                gf_log (this->name, GF_LOG_DEBUG, +                        "<STAGE: PRE> Object [GFID: %s] has stale signature", +                        uuid_utoa (fd->inode->gfid)); +                ret = -1; +        }   out:          return ret;  } -static inline int +/* static inline int */ +int  bitd_compare_ckum (xlator_t *this,                     br_isignature_out_t *sign,                     unsigned char *md, inode_t *linked_inode,                     gf_dirent_t *entry, fd_t *fd, br_child_t *child)  {          int   ret = -1; -        dict_t xattr = {0,}; +        dict_t *xattr = NULL;          GF_VALIDATE_OR_GOTO ("bit-rot", this, out);          GF_VALIDATE_OR_GOTO (this->name, sign, out); @@ -118,52 +195,70 @@ bitd_compare_ckum (xlator_t *this,          GF_VALIDATE_OR_GOTO (this->name, md, out);          GF_VALIDATE_OR_GOTO (this->name, entry, out); -        if (strncmp (sign->signature, (char *)md, strlen (sign->signature))) { -                gf_log (this->name, GF_LOG_WARNING, "checksums does not match " -                        "for the entry %s (gfid: %s)", entry->d_name, -                        uuid_utoa (linked_inode->gfid)); -                ret = dict_set_int32 (&xattr, "trusted.glusterfs.bad-file", -                                      _gf_true); -                if (ret) { -                        gf_log (this->name, GF_LOG_ERROR, "dict-set for " -                                "bad-file (entry: %s, gfid: %s) failed", -                                entry->d_name, uuid_utoa (linked_inode->gfid)); -                        goto out; -                } - -                ret = syncop_fsetxattr (child->xl, fd, &xattr, 0); -                if (ret) { -                        gf_log (this->name, GF_LOG_ERROR, "setxattr to mark " -                                "the file %s (gfid: %s) as bad failed", -                                entry->d_name, uuid_utoa (linked_inode->gfid)); -                        goto out; -                } +        if (strncmp +            (sign->signature, (char *) md, strlen (sign->signature)) == 0) { +                gf_log (this->name, GF_LOG_DEBUG, +                        "Entry %s [GFID: %s] matches calculated checksum", +                        entry->d_name, uuid_utoa (linked_inode->gfid)); +                return 0;          } -out: +        gf_log (this->name, GF_LOG_WARNING, +                "Object checksumsum mismatch: %s [GFID: %s]", +                entry->d_name, uuid_utoa (linked_inode->gfid)); + +        /* Perform bad-file marking */ +        xattr = dict_new (); +        if (!xattr) { +                ret = -1; +                goto out; +        } + +        ret = dict_set_int32 (xattr, "trusted.glusterfs.bad-file", _gf_true); +        if (ret) { +                gf_log (this->name, GF_LOG_ERROR, +                        "Error setting bad-file marker for %s [GFID: %s]", +                        entry->d_name, uuid_utoa (linked_inode->gfid)); +                goto dictfree; +        } + +        gf_log (this->name, GF_LOG_INFO, "Marking %s [GFID: %s] as corrupted..", +                entry->d_name, uuid_utoa (linked_inode->gfid)); +        ret = syncop_fsetxattr (child->xl, fd, xattr, 0); +        if (ret) +                gf_log (this->name, GF_LOG_ERROR, +                        "Error marking object %s [GFID: %s] as corrupted", +                        entry->d_name, uuid_utoa (linked_inode->gfid)); + + dictfree: +        dict_unref (xattr); + out:          return ret;  }  /** - * This is the scrubber. As of now there's a mix of fd and inode - * operations. Better to move them to fd based to be clean and - * avoid code cluttering. + * "The Scrubber" + * + * Perform signature validation for a given object with the assumption + * that the signature is SHA256 (because signer as of now _always_ + * signs with SHA256).   */  int  bitd_start_scrub (xlator_t *subvol,                    gf_dirent_t *entry, loc_t *parent, void *data)  { -        int32_t              ret          = -1; -        fd_t                *fd           = NULL; -        loc_t                loc          = {0, }; -        struct iatt          iatt         = {0, }; -        struct iatt          parent_buf   = {0, }; -        pid_t                pid          = 0; -        br_child_t          *child        = NULL; -        xlator_t            *this         = NULL; -        unsigned char       *md           = NULL; -        inode_t             *linked_inode = NULL; -        br_isignature_out_t  sign         = {0,}; +        int32_t              ret           = -1; +        fd_t                *fd            = NULL; +        loc_t                loc           = {0, }; +        struct iatt          iatt          = {0, }; +        struct iatt          parent_buf    = {0, }; +        pid_t                pid           = 0; +        br_child_t          *child         = NULL; +        xlator_t            *this          = NULL; +        unsigned char       *md            = NULL; +        inode_t             *linked_inode  = NULL; +        br_isignature_out_t *sign          = NULL; +        unsigned long        signedversion = 0;          GF_VALIDATE_OR_GOTO ("bit-rot", subvol, out);          GF_VALIDATE_OR_GOTO ("bit-rot", data, out); @@ -189,6 +284,9 @@ bitd_start_scrub (xlator_t *subvol,          if (linked_inode)                  inode_lookup (linked_inode); +        gf_log (this->name, GF_LOG_DEBUG, "Scrubbing object %s [GFID: %s]", +                entry->d_name, uuid_utoa (linked_inode->gfid)); +          if (iatt.ia_type != IA_IFREG) {                  gf_log (this->name, GF_LOG_DEBUG, "%s is not a regular "                          "file", entry->d_name); @@ -221,7 +319,7 @@ bitd_start_scrub (xlator_t *subvol,           *  - presence of bad object           *  - signature staleness           */ -        ret = bitd_scrub_pre_compute_check (this, child, fd); +        ret = bitd_scrub_pre_compute_check (this, child, fd, &signedversion);          if (ret)                  goto unrefd; /* skip this object */ @@ -243,13 +341,16 @@ bitd_start_scrub (xlator_t *subvol,           * perform post compute checks as an object's signature may have           * become stale while scrubber calculated checksum.           */ -        ret = bitd_scrub_post_compute_check (this, child, &sign, fd); +        ret = bitd_scrub_post_compute_check (this, child, +                                             fd, signedversion, &sign);          if (ret)                  goto free_md; -        ret = bitd_compare_ckum (this, &sign, md, +        ret = bitd_compare_ckum (this, sign, md,                                   linked_inode, entry, fd, child); +        GF_FREE (sign); /* alloced on post-compute */ +          /** fd_unref() takes care of closing fd.. like syncop_close() */   free_md: @@ -263,8 +364,8 @@ bitd_start_scrub (xlator_t *subvol,          return ret;  } -#define BR_SCRUB_THROTTLE_COUNT 10 -#define BR_SCRUB_THROTTLE_ZZZ   100 +#define BR_SCRUB_THROTTLE_COUNT 30 +#define BR_SCRUB_THROTTLE_ZZZ   60  void *  br_scrubber (void *arg)  { @@ -284,7 +385,7 @@ br_scrubber (void *arg)                              GF_CLIENT_PID_SCRUB, child, bitd_start_scrub,                              BR_SCRUB_THROTTLE_COUNT, BR_SCRUB_THROTTLE_ZZZ); -                sleep (BR_SCRUB_THROTTLE_ZZZ * BR_SCRUB_THROTTLE_COUNT); +                sleep (BR_SCRUB_THROTTLE_ZZZ);          }          return NULL; diff --git a/xlators/features/bit-rot/src/bitd/bit-rot.c b/xlators/features/bit-rot/src/bitd/bit-rot.c index d808c0dc69c..6a4de70bfbb 100644 --- a/xlators/features/bit-rot/src/bitd/bit-rot.c +++ b/xlators/features/bit-rot/src/bitd/bit-rot.c @@ -139,8 +139,14 @@ br_prepare_signature (const unsigned char *sign,          if (!signature)                  return NULL; +        /* object version */          signature->signedversion = object->signedversion; + +        /* signature length & type */ +        signature->signaturelen = hashlen;          signature->signaturetype = hashtype; + +        /* signature itself */          memcpy (signature->signature, (char *)sign, hashlen);          signature->signature[hashlen+1] = '\0'; @@ -167,7 +173,7 @@ bitd_is_bad_file (xlator_t *this, br_child_t *child, loc_t *loc, fd_t *fd)                                         "trusted.glusterfs.bad-file", NULL);          if (!ret) { -                gf_log (this->name, GF_LOG_ERROR, "[GFID: %s] is marked " +                gf_log (this->name, GF_LOG_DEBUG, "[GFID: %s] is marked "                          "corrupted", uuid_utoa (inode->gfid));                  bad_file = _gf_true;          } @@ -918,8 +924,11 @@ bitd_oneshot_crawl (xlator_t *subvol,           * if there are any fds present for that inode) and handle properly.           */ -        if (bitd_is_bad_file (this, child, &loc, NULL)) +        if (bitd_is_bad_file (this, child, &loc, NULL)) { +                gf_log (this->name, GF_LOG_WARNING, +                        "Entry [%s] is marked corrupted.. skipping.", loc.path);                  goto unref_inode; +        }          ret = syncop_getxattr (child->xl, &loc, &xattr,                                 GLUSTERFS_GET_OBJECT_SIGNATURE, NULL); diff --git a/xlators/features/bit-rot/src/stub/bit-rot-common.h b/xlators/features/bit-rot/src/stub/bit-rot-common.h index fdf23142768..699323170d3 100644 --- a/xlators/features/bit-rot/src/stub/bit-rot-common.h +++ b/xlators/features/bit-rot/src/stub/bit-rot-common.h @@ -76,6 +76,7 @@ typedef struct br_isignature_in {          unsigned long signedversion;     /* version against which the                                              object was signed         */ +        size_t signaturelen;             /* signature length          */          char signature[0];               /* object signature          */  } br_isignature_t; @@ -86,11 +87,14 @@ typedef struct br_isignature_in {  typedef struct br_isignature_out {          char stale;                      /* stale signature?          */ +        unsigned long version;           /* current signed version    */ +          uint32_t time[2];                /* time when the object                                              got dirtied               */          int8_t signaturetype;            /* hash type                 */ -        char signature[0];               /* signature (hash)          */ +        size_t signaturelen;             /* signature length          */ +        char   signature[0];             /* signature (hash)          */  } br_isignature_out_t;  typedef struct br_stub_init { diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub.c b/xlators/features/bit-rot/src/stub/bit-rot-stub.c index f8c8f59c1f8..0db500659b5 100644 --- a/xlators/features/bit-rot/src/stub/bit-rot-stub.c +++ b/xlators/features/bit-rot/src/stub/bit-rot-stub.c @@ -566,7 +566,7 @@ br_stub_prepare_signature (xlator_t *this, dict_t *dict,          if (!br_is_signature_type_valid (sign->signaturetype))                  goto error_return; -        signaturelen = strlen (sign->signature); +        signaturelen = sign->signaturelen;          ret = br_stub_alloc_versions (NULL, &sbuf, signaturelen);          if (ret)                  goto error_return; @@ -657,8 +657,8 @@ br_stub_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,                        int op_ret, int op_errno, dict_t *xattr, dict_t *xdata)  {          int32_t              ret          = 0; -        ssize_t              totallen     = 0; -        ssize_t              signaturelen = 0; +        size_t               totallen     = 0; +        size_t               signaturelen = 0;          br_version_t        *obuf         = NULL;          br_signature_t      *sbuf         = NULL;          br_isignature_out_t *sign         = NULL; @@ -681,8 +681,21 @@ br_stub_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,              || (status == BR_VXATTR_STATUS_UNSIGNED))                  goto delkeys; -        signaturelen = strlen (sbuf->signature); -        totallen = signaturelen + sizeof (br_isignature_out_t); +        /** +         * okay.. we have enough information to satisfy the request, +         * namely: version and signing extended attribute. what's +         * pending is the signature length -- that's figured out +         * indirectly via the size of the _whole_ xattr and the +         * on-disk signing xattr header size. +         */ +        op_errno = EINVAL; +        ret = dict_get_uint32 (xattr, BITROT_SIGNING_XATTR_SIZE_KEY, +                               (uint32_t *)&signaturelen); +        if (ret) +                goto delkeys; + +        signaturelen -= sizeof (br_signature_t); +        totallen = sizeof (br_isignature_out_t) + signaturelen;          op_errno = ENOMEM;          sign = GF_CALLOC (1, totallen, gf_br_stub_mt_signature_t); @@ -692,10 +705,12 @@ br_stub_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,          sign->time[0] = obuf->timebuf[0];          sign->time[1] = obuf->timebuf[1]; -        /* Object's dirty state */ +        /* Object's dirty state & current signed version */ +        sign->version = sbuf->signedversion;          sign->stale = (obuf->ongoingversion != sbuf->signedversion) ? 1 : 0;          /* Object's signature */ +        sign->signaturelen  = signaturelen;          sign->signaturetype = sbuf->signaturetype;          (void) memcpy (sign->signature, sbuf->signature, signaturelen); diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub.h b/xlators/features/bit-rot/src/stub/bit-rot-stub.h index d565112b1ad..5db0e321c20 100644 --- a/xlators/features/bit-rot/src/stub/bit-rot-stub.h +++ b/xlators/features/bit-rot/src/stub/bit-rot-stub.h @@ -263,6 +263,7 @@ br_stub_remove_vxattrs (dict_t *xattr)          if (xattr) {                  dict_del (xattr, BITROT_CURRENT_VERSION_KEY);                  dict_del (xattr, BITROT_SIGNING_VERSION_KEY); +                dict_del (xattr, BITROT_SIGNING_XATTR_SIZE_KEY);          }  } | 
