From 60b6e5d2c3442ea0f7f85374d6613cd0dd76604c Mon Sep 17 00:00:00 2001 From: Raghavendra Bhat Date: Wed, 27 May 2015 17:00:36 +0530 Subject: features/bit-rot: check for both inmemory and ondisk staleness * Let bit-rot stub check both on disk ongoing version, signed version xattrs and the in memory flags in the inode and then decide whether the inode is stale or not. This information is used by one shot crawler in BitD to decide whether to trigger the sign for the object or skip it. NOTE: The above check should be done only for BitD. For scrubber its still the old way of comparing on disk ongoing version with signed version. * BitD's one shot crawler should not sign zero byte objects if they do not contain signature. (Means the object was just created and nothing was written to it). Change-Id: I6941aefc2981bf79a6aeb476e660f79908e165a8 BUG: 1224611 Signed-off-by: Raghavendra Bhat Reviewed-on: http://review.gluster.org/10947 Reviewed-by: Venky Shankar Tested-by: Venky Shankar Tested-by: Gluster Build System --- xlators/features/bit-rot/src/bitd/bit-rot.c | 14 +-- xlators/features/bit-rot/src/stub/bit-rot-stub.c | 143 +++++++++++++++++++++-- 2 files changed, 138 insertions(+), 19 deletions(-) (limited to 'xlators/features/bit-rot') diff --git a/xlators/features/bit-rot/src/bitd/bit-rot.c b/xlators/features/bit-rot/src/bitd/bit-rot.c index 651c42fcb82..03fd1c2f1ab 100644 --- a/xlators/features/bit-rot/src/bitd/bit-rot.c +++ b/xlators/features/bit-rot/src/bitd/bit-rot.c @@ -853,7 +853,6 @@ br_check_object_need_sign (xlator_t *this, dict_t *xattr, br_child_t *child) { int32_t ret = -1; gf_boolean_t need_sign = _gf_false; - struct timeval tv = {0,}; br_isignature_out_t *sign = NULL; GF_VALIDATE_OR_GOTO ("bit-rot", this, out); @@ -868,11 +867,8 @@ br_check_object_need_sign (xlator_t *this, dict_t *xattr, br_child_t *child) goto out; } - tv.tv_sec = ntohl (sign->time[0]); - tv.tv_usec = ntohl (sign->time[1]); - /* Object has been opened and hence dirty. Do not sign it */ - if (sign->stale && !br_time_equal (child, &tv)) + if (sign->stale) need_sign = _gf_true; out: @@ -1002,7 +998,11 @@ bitd_oneshot_crawl (xlator_t *subvol, op_errno = -ret; br_log_object (this, "getxattr", linked_inode->gfid, op_errno); - if (op_errno == ENODATA) + /** + * No need to sign the zero byte objects as the signing + * happens upon first modification of the object. + */ + if (op_errno == ENODATA && (iatt.ia_size != 0)) need_signing = _gf_true; if (op_errno == EINVAL) gf_log (this->name, GF_LOG_WARNING, "Partial version " @@ -1231,7 +1231,7 @@ br_brick_connect (xlator_t *this, br_child_t *child) memcpy (child->brick_path, stub->export, strlen (stub->export) + 1); child->tv.tv_sec = ntohl (stub->timebuf[0]); - child->tv.tv_usec = ntohl (stub->timebuf[0]); + child->tv.tv_usec = ntohl (stub->timebuf[1]); if (priv->iamscrubber) ret = br_enact_scrubber (this, child); diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub.c b/xlators/features/bit-rot/src/stub/bit-rot-stub.c index d48a3f751f3..2f2e16df226 100644 --- a/xlators/features/bit-rot/src/stub/bit-rot-stub.c +++ b/xlators/features/bit-rot/src/stub/bit-rot-stub.c @@ -944,6 +944,79 @@ br_stub_listxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, return 0; } +/** + * ONE SHOT CRAWLER from BitD signs the objects that it encounters while + * crawling, if the object is identified as stale by the stub. Stub follows + * the below logic to mark an object as stale or not. + * If the ongoing version and the signed_version match, then the object is not + * stale. Just return. Otherwise if they does not match, then it means one + * of the below things. + * 1) If the inode does not need write back of the version and the sign state is + * is NORMAL, then some active i/o is going on the object. So skip it. + * A notification will be sent to trigger the sign once the release is + * received on the object. + * 2) If inode does not need writeback of the version and the sign state is + * either reopen wait or quick sign, then it means: + * A) BitD restarted and it is not sure whether the object it encountered + * while crawling is in its timer wheel or not. Since there is no way to + * scan the timer wheel as of now, ONE SHOT CRAWLER just goes ahead and + * signs the object. Since the inode does not need writeback, version will + * not be incremented and directly the object will be signed. + * 3) If the inode needs writeback, then it means the inode was forgotten after + * the versioning and it has to be signed now. + * + * This is the algorithm followed: + * if (ongoing_version == signed_version); then + * object_is_not_stale; + * return; + * else; then + * if (!inode_needs_writeback && inode_sign_state != NORMAL); then + * object_is_stale; + * if (inode_needs_writeback); then + * object_is_stale; + * + * For SCRUBBER, no need to check for the sign state and inode writeback. + * If the ondisk ongoingversion and the ondisk signed version does not match, + * then treat the object as stale. + */ +char +br_stub_is_object_stale (xlator_t *this, call_frame_t *frame, inode_t *inode, + br_version_t *obuf, br_signature_t *sbuf) +{ + uint64_t ctx_addr = 0; + br_stub_inode_ctx_t *ctx = NULL; + int32_t ret = -1; + char stale = 0; + + if (obuf->ongoingversion == sbuf->signedversion) + goto out; + + if (frame->root->pid == GF_CLIENT_PID_SCRUB) { + stale = 1; + goto out; + } + + ret = br_stub_get_inode_ctx (this, inode, &ctx_addr); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "failed to get the inode " + "context for %s", uuid_utoa (inode->gfid)); + goto out; + } + + ctx = (br_stub_inode_ctx_t *)(long)ctx_addr; + + LOCK (&inode->lock); + { + if ((!__br_stub_is_inode_dirty (ctx) && + ctx->info_sign != BR_SIGN_NORMAL) || + __br_stub_is_inode_dirty (ctx)) + stale = 1; + } + UNLOCK (&inode->lock); + +out: + return stale; +} int br_stub_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, @@ -956,12 +1029,18 @@ br_stub_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, br_signature_t *sbuf = NULL; br_isignature_out_t *sign = NULL; br_vxattr_status_t status; + br_stub_local_t *local = NULL; + inode_t *inode = NULL; if (op_ret < 0) goto unwind; if (cookie != (void *) BR_STUB_REQUEST_COOKIE) goto unwind; + local = frame->local; + frame->local = NULL; + inode = local->u.context.inode; + op_ret = -1; status = br_version_xattr_state (xattr, &obuf, &sbuf); @@ -1000,7 +1079,7 @@ br_stub_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, /* Object's dirty state & current signed version */ sign->version = sbuf->signedversion; - sign->stale = (obuf->ongoingversion != sbuf->signedversion) ? 1 : 0; + sign->stale = br_stub_is_object_stale (this, frame, inode, obuf, sbuf); /* Object's signature */ sign->signaturelen = signaturelen; @@ -1020,6 +1099,10 @@ br_stub_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, unwind: STACK_UNWIND (frame, op_ret, op_errno, xattr, xdata); + if (local) { + br_stub_cleanup_local (local); + br_stub_dealloc_local (local); + } return 0; } @@ -1065,9 +1148,16 @@ int br_stub_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, const char *name, dict_t *xdata) { - void *cookie = NULL; - uuid_t rootgfid = {0, }; - fop_getxattr_cbk_t cbk = br_stub_getxattr_cbk; + void *cookie = NULL; + uuid_t rootgfid = {0, }; + fop_getxattr_cbk_t cbk = br_stub_getxattr_cbk; + int32_t op_ret = -1; + int32_t op_errno = EINVAL; + br_stub_local_t *local = NULL; + + GF_VALIDATE_OR_GOTO ("bit-rot-stub", this, unwind); + GF_VALIDATE_OR_GOTO (this->name, loc, unwind); + GF_VALIDATE_OR_GOTO (this->name, loc->inode, unwind); rootgfid[15] = 1; @@ -1076,10 +1166,8 @@ br_stub_getxattr (call_frame_t *frame, xlator_t *this, goto wind; } - if (br_stub_is_internal_xattr (name)) { - STACK_UNWIND (frame, -1, EINVAL, NULL, NULL); - return 0; - } + if (br_stub_is_internal_xattr (name)) + goto unwind; /** * this special extended attribute is allowed only on root @@ -1099,6 +1187,18 @@ br_stub_getxattr (call_frame_t *frame, xlator_t *this, if (name && (strncmp (name, GLUSTERFS_GET_OBJECT_SIGNATURE, strlen (GLUSTERFS_GET_OBJECT_SIGNATURE)) == 0)) { cookie = (void *) BR_STUB_REQUEST_COOKIE; + + local = br_stub_alloc_local (this); + if (!local) { + op_ret = -1; + op_errno = ENOMEM; + goto unwind; + } + + br_stub_fill_local (local, NULL, NULL, loc->inode, + loc->inode->gfid, + BR_STUB_NO_VERSIONING, 0); + frame->local = local; } wind: @@ -1106,6 +1206,9 @@ br_stub_getxattr (call_frame_t *frame, xlator_t *this, (frame, cbk, cookie, FIRST_CHILD (this), FIRST_CHILD (this)->fops->getxattr, loc, name, xdata); return 0; +unwind: + STACK_UNWIND (frame, op_ret, op_errno, NULL, NULL); + return 0; } int @@ -1115,6 +1218,9 @@ br_stub_fgetxattr (call_frame_t *frame, xlator_t *this, void *cookie = NULL; uuid_t rootgfid = {0, }; fop_fgetxattr_cbk_t cbk = br_stub_getxattr_cbk; + int32_t op_ret = -1; + int32_t op_errno = EINVAL; + br_stub_local_t *local = NULL; rootgfid[15] = 1; @@ -1123,10 +1229,8 @@ br_stub_fgetxattr (call_frame_t *frame, xlator_t *this, goto wind; } - if (br_stub_is_internal_xattr (name)) { - STACK_UNWIND (frame, -1, EINVAL, NULL, NULL); - return 0; - } + if (br_stub_is_internal_xattr (name)) + goto unwind; /** * this special extended attribute is allowed only on root @@ -1145,6 +1249,18 @@ br_stub_fgetxattr (call_frame_t *frame, xlator_t *this, if (name && (strncmp (name, GLUSTERFS_GET_OBJECT_SIGNATURE, strlen (GLUSTERFS_GET_OBJECT_SIGNATURE)) == 0)) { cookie = (void *) BR_STUB_REQUEST_COOKIE; + + local = br_stub_alloc_local (this); + if (!local) { + op_ret = -1; + op_errno = ENOMEM; + goto unwind; + } + + br_stub_fill_local (local, NULL, fd, fd->inode, + fd->inode->gfid, + BR_STUB_NO_VERSIONING, 0); + frame->local = local; } wind: @@ -1152,6 +1268,9 @@ br_stub_fgetxattr (call_frame_t *frame, xlator_t *this, (frame, cbk, cookie, FIRST_CHILD (this), FIRST_CHILD (this)->fops->fgetxattr, fd, name, xdata); return 0; +unwind: + STACK_UNWIND (frame, op_ret, op_errno, NULL, NULL); + return 0; } /** -- cgit