From 097795df7ebab48515d75e95a2c29e5ab42ac5ee Mon Sep 17 00:00:00 2001 From: Joseph Fernandes Date: Wed, 1 Apr 2015 02:56:23 +0530 Subject: ctr : Fix for heating of files during promotion/demotion This fix will solve the heating of the files during the promotion or demotion. Promotion: ~~~~~~~~~ When a file gets promoted it get the current time stamp during creation only, but following writes or reads during the migration wont heat the file. Demotion: ~~~~~~~~ When a file gets demoted it get the wind/unwind time stamp is set to zero. The following writes or reads during the migration wont heat the file. What is remaining ? ~~~~~~~~~~~~~~~~~ Bug 1209129 ( https://bugzilla.redhat.com/show_bug.cgi?id=1209129 ) Inspite of this fix there is still a issue remaining, i.e the heat of the file is not keep intact during a internal rebalance activity i.e a rebalance within a tier. Change-Id: I01e82dc226355599732d40e699062cee7960b0a5 BUG: 1207867 Signed-off-by: Joseph Fernandes Signed-off-by: Dan Lambright Signed-off-by: Joseph Fernandes Reviewed-on: http://review.gluster.org/10080 Reviewed-by: Shyamsundar Ranganathan Tested-by: Gluster Build System --- .../changetimerecorder/src/changetimerecorder.c | 34 ++++++++-- .../features/changetimerecorder/src/ctr-helper.c | 61 ++++++++++++----- .../features/changetimerecorder/src/ctr-helper.h | 76 +++++++++++++++------- 3 files changed, 127 insertions(+), 44 deletions(-) (limited to 'xlators/features/changetimerecorder') diff --git a/xlators/features/changetimerecorder/src/changetimerecorder.c b/xlators/features/changetimerecorder/src/changetimerecorder.c index 6be20819107..80deefbd4b2 100644 --- a/xlators/features/changetimerecorder/src/changetimerecorder.c +++ b/xlators/features/changetimerecorder/src/changetimerecorder.c @@ -50,6 +50,7 @@ ctr_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx; CTR_IS_DISABLED_THEN_GOTO(this, out); + CTR_IF_INTERNAL_FOP_THEN_GOTO (frame, xdata, out); /*Fill ctr inode context*/ FILL_CTR_INODE_CONTEXT(_inode_cx, fd->inode->ia_type, @@ -109,6 +110,7 @@ ctr_setattr (call_frame_t *frame, gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx; CTR_IS_DISABLED_THEN_GOTO(this, out); + CTR_IF_INTERNAL_FOP_THEN_GOTO (frame, xdata, out); /*Fill ctr inode context*/ FILL_CTR_INODE_CONTEXT(_inode_cx, loc->inode->ia_type, @@ -163,6 +165,7 @@ ctr_fremovexattr (call_frame_t *frame, xlator_t *this, fd_t *fd, gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx; CTR_IS_DISABLED_THEN_GOTO(this, out); + CTR_IF_INTERNAL_FOP_THEN_GOTO (frame, xdata, out); /*Fill ctr inode context*/ @@ -193,6 +196,7 @@ ctr_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int ret = -1; CTR_IS_DISABLED_THEN_GOTO(this, out); + CTR_IF_INTERNAL_FOP_THEN_GOTO (frame, xdata, out); ret = ctr_insert_unwind(frame, this, @@ -217,6 +221,7 @@ ctr_removexattr (call_frame_t *frame, xlator_t *this, loc_t *loc, gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx; CTR_IS_DISABLED_THEN_GOTO(this, out); + CTR_IF_INTERNAL_FOP_THEN_GOTO (frame, xdata, out); /*Fill ctr inode context*/ @@ -274,6 +279,7 @@ ctr_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx; CTR_IS_DISABLED_THEN_GOTO(this, out); + CTR_IF_INTERNAL_FOP_THEN_GOTO (frame, xdata, out); /*Fill ctr inode context*/ FILL_CTR_INODE_CONTEXT(_inode_cx, loc->inode->ia_type, @@ -327,7 +333,7 @@ ctr_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx; CTR_IS_DISABLED_THEN_GOTO(this, out); - + CTR_IF_INTERNAL_FOP_THEN_GOTO (frame, xdata, out); /*Fill ctr inode context*/ FILL_CTR_INODE_CONTEXT(_inode_cx, fd->inode->ia_type, @@ -389,6 +395,7 @@ ctr_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc, gf_ctr_link_context_t *_olink_cx = &old_link_cx; CTR_IS_DISABLED_THEN_GOTO(this, out); + CTR_IF_INTERNAL_FOP_THEN_GOTO (frame, xdata, out); /*Fill old link context*/ FILL_CTR_LINK_CX(_olink_cx, oldloc->pargfid, oldloc->name, @@ -488,6 +495,8 @@ ctr_unlink (call_frame_t *frame, xlator_t *this, gf_ctr_link_context_t ctr_link_cx; gf_ctr_link_context_t *_link_cx = &ctr_link_cx; + GF_ASSERT (frame); + CTR_IS_DISABLED_THEN_GOTO(this, out); /*Fill link context*/ @@ -498,6 +507,9 @@ ctr_unlink (call_frame_t *frame, xlator_t *this, loc->inode->gfid, _link_cx, NULL, GFDB_FOP_DENTRY_WRITE, GFDB_FOP_WDEL); + /*Internal FOP*/ + _inode_cx->is_internal_fop = CTR_IS_INTERNAL_FOP(frame, xdata); + /*record into the database*/ ret = ctr_insert_wind(frame, this, _inode_cx); if (ret) { @@ -576,6 +588,7 @@ ctr_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx; CTR_IS_DISABLED_THEN_GOTO(this, out); + CTR_IF_INTERNAL_FOP_THEN_GOTO (frame, xdata, out); /*Fill ctr inode context*/ FILL_CTR_INODE_CONTEXT(_inode_cx, fd->inode->ia_type, @@ -629,6 +642,7 @@ ctr_setxattr (call_frame_t *frame, xlator_t *this, gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx; CTR_IS_DISABLED_THEN_GOTO(this, out); + CTR_IF_INTERNAL_FOP_THEN_GOTO (frame, xdata, out); /*Fill ctr inode context*/ FILL_CTR_INODE_CONTEXT(_inode_cx, loc->inode->ia_type, @@ -694,8 +708,6 @@ ctr_mknod (call_frame_t *frame, xlator_t *this, GF_ASSERT(frame); GF_ASSERT(frame->root); - CTR_IF_REBALANCE_FOP_THEN_GOTO (frame, out); - CTR_IF_INTERNAL_FOP_THEN_GOTO (frame, xdata, out); /*get gfid from xdata dict*/ ret = dict_get_ptr (xdata, "gfid-req", &uuid_req); @@ -714,6 +726,9 @@ ctr_mknod (call_frame_t *frame, xlator_t *this, *ptr_gfid, _link_cx, NULL, GFDB_FOP_CREATE_WRITE, GFDB_FOP_WIND); + /*Internal FOP*/ + _inode_cx->is_internal_fop = CTR_IS_INTERNAL_FOP(frame, xdata); + /*record into the database*/ ret = ctr_insert_wind(frame, this, _inode_cx); if (ret) { @@ -771,6 +786,9 @@ ctr_create (call_frame_t *frame, xlator_t *this, CTR_IS_DISABLED_THEN_GOTO(this, out); + GF_ASSERT(frame); + GF_ASSERT(frame->root); + /*Get GFID from Xdata dict*/ ret = dict_get_ptr (xdata, "gfid-req", &uuid_req); if (ret) { @@ -788,6 +806,9 @@ ctr_create (call_frame_t *frame, xlator_t *this, *ptr_gfid, _link_cx, NULL, GFDB_FOP_CREATE_WRITE, GFDB_FOP_WIND); + /*Internal FOP*/ + _inode_cx->is_internal_fop = CTR_IS_INTERNAL_FOP(frame, xdata); + /*record into the database*/ ret = ctr_insert_wind(frame, this, &ctr_inode_cx); if (ret) { @@ -838,7 +859,8 @@ ctr_link (call_frame_t *frame, xlator_t *this, CTR_IS_DISABLED_THEN_GOTO(this, out); - CTR_IF_REBALANCE_FOP_THEN_GOTO (frame, out); + GF_ASSERT(frame); + GF_ASSERT(frame->root); /*fill ctr link context*/ FILL_CTR_LINK_CX(_link_cx, newloc->pargfid, newloc->name, @@ -849,6 +871,9 @@ ctr_link (call_frame_t *frame, xlator_t *this, oldloc->inode->gfid, _link_cx, NULL, GFDB_FOP_DENTRY_WRITE, GFDB_FOP_WIND); + /*Internal FOP*/ + _inode_cx->is_internal_fop = CTR_IS_INTERNAL_FOP(frame, xdata); + /*record into the database*/ ret = ctr_insert_wind(frame, this, _inode_cx); if (ret) { @@ -896,6 +921,7 @@ ctr_readv (call_frame_t *frame, xlator_t *this, gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx; CTR_IS_DISABLED_THEN_GOTO(this, out); + CTR_IF_INTERNAL_FOP_THEN_GOTO (frame, xdata, out); /*Fill ctr inode context*/ FILL_CTR_INODE_CONTEXT(_inode_cx, fd->inode->ia_type, diff --git a/xlators/features/changetimerecorder/src/ctr-helper.c b/xlators/features/changetimerecorder/src/ctr-helper.c index 7341c11d540..da5a3deed96 100644 --- a/xlators/features/changetimerecorder/src/ctr-helper.c +++ b/xlators/features/changetimerecorder/src/ctr-helper.c @@ -18,18 +18,24 @@ * ******************************************************************************/ int -fill_db_record_for_unwind(gf_ctr_local_t *ctr_local, +fill_db_record_for_unwind(xlator_t *this, + gf_ctr_local_t *ctr_local, gfdb_fop_type_t fop_type, gfdb_fop_path_t fop_path) { int ret = -1; gfdb_time_t *ctr_uwtime = NULL; + gf_ctr_private_t *_priv = NULL; + + GF_ASSERT (this); + _priv = this->private; + GF_ASSERT (_priv); GF_ASSERT(ctr_local); /*If not unwind path error*/ if (!isunwindpath(fop_path)) { - gf_log (GFDB_DATA_STORE, GF_LOG_ERROR, "Wrong fop_path." + gf_log (this->name, GF_LOG_ERROR, "Wrong fop_path." "Should be unwind"); goto out; } @@ -38,15 +44,22 @@ fill_db_record_for_unwind(gf_ctr_local_t *ctr_local, CTR_DB_REC(ctr_local).gfdb_fop_path = fop_path; CTR_DB_REC(ctr_local).gfdb_fop_type = fop_type; - /*Time is not recorded for internal fops*/ - if (!ctr_local->is_internal_fop) { - ret = gettimeofday (ctr_uwtime, NULL); - if (ret == -1) { - gf_log (GFDB_DATA_STORE, GF_LOG_ERROR, + ret = gettimeofday (ctr_uwtime, NULL); + if (ret == -1) { + gf_log (this->name, GF_LOG_ERROR, "Error filling unwind time record %s", strerror(errno)); goto out; } + + /* Special case i.e if its a tier rebalance + * + cold tier brick + * + its a create/mknod FOP + * we record unwind time as zero */ + if (ctr_local->client_pid == GF_CLIENT_PID_TIER_DEFRAG + && (!_priv->ctr_hot_brick) + && isdentrycreatefop(fop_type)) { + memset(ctr_uwtime, 0, sizeof(*ctr_uwtime)); } ret = 0; out: @@ -60,18 +73,23 @@ out: * ******************************************************************************/ int -fill_db_record_for_wind(gf_ctr_local_t *ctr_local, +fill_db_record_for_wind (xlator_t *this, + gf_ctr_local_t *ctr_local, gf_ctr_inode_context_t *ctr_inode_cx) { int ret = -1; - gfdb_time_t *ctr_wtime = NULL; + gfdb_time_t *ctr_wtime = NULL; + gf_ctr_private_t *_priv = NULL; - GF_ASSERT(ctr_local); - IS_CTR_INODE_CX_SANE(ctr_inode_cx); + GF_ASSERT (this); + _priv = this->private; + GF_ASSERT (_priv); + GF_ASSERT (ctr_local); + IS_CTR_INODE_CX_SANE (ctr_inode_cx); /*if not wind path error!*/ if (!iswindpath(ctr_inode_cx->fop_path)) { - gf_log (GFDB_DATA_STORE, GF_LOG_ERROR, + gf_log (this->name, GF_LOG_ERROR, "Wrong fop_path. Should be wind"); goto out; } @@ -80,15 +98,22 @@ fill_db_record_for_wind(gf_ctr_local_t *ctr_local, CTR_DB_REC(ctr_local).gfdb_fop_path = ctr_inode_cx->fop_path; CTR_DB_REC(ctr_local).gfdb_fop_type = ctr_inode_cx->fop_type; - /*Time is not recorded for internal fops*/ - if (!ctr_local->is_internal_fop) { - ret = gettimeofday (ctr_wtime, NULL); - if (ret) { - gf_log (GFDB_DATA_STORE, GF_LOG_ERROR, + ret = gettimeofday (ctr_wtime, NULL); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Error filling wind time record %s", strerror(errno)); goto out; - } + } + + /* Special case i.e if its a tier rebalance + * + cold tier brick + * + its a create/mknod FOP + * we record wind time as zero */ + if (ctr_local->client_pid == GF_CLIENT_PID_TIER_DEFRAG + && (!_priv->ctr_hot_brick) + && isdentrycreatefop(ctr_inode_cx->fop_type)) { + memset(ctr_wtime, 0, sizeof(*ctr_wtime)); } /*Copy gfid into db record*/ diff --git a/xlators/features/changetimerecorder/src/ctr-helper.h b/xlators/features/changetimerecorder/src/ctr-helper.h index c63b9c122d2..6f4ad5cbe7a 100644 --- a/xlators/features/changetimerecorder/src/ctr-helper.h +++ b/xlators/features/changetimerecorder/src/ctr-helper.h @@ -62,17 +62,12 @@ typedef struct gf_ctr_private { * is_internal_fop in gf_ctr_local will tell us if this is a internal fop and * take special/no action. We dont record change/acces times or increement heat * counter for internal fops from rebalancer. - * NOTE: This piece is broken with the addition of frequency counters. - * Any rebalancer or tiering will cause the files to get the files heated. - * We would require seperate identifiers for tiering FOPS. - * The QE have noted this issue and will raise a bug as this patch gets merged. - * We will fix this as a bug fix. - * * */ typedef struct gf_ctr_local { gfdb_db_record_t gfdb_db_record; ia_type_t ia_inode_type; gf_boolean_t is_internal_fop; + gf_client_pid_t client_pid; } gf_ctr_local_t; /* * Easy access of gfdb_db_record of ctr_local @@ -163,6 +158,7 @@ typedef struct gf_ctr_inode_context { gf_ctr_link_context_t *old_link_cx; gfdb_fop_type_t fop_type; gfdb_fop_path_t fop_path; + gf_boolean_t is_internal_fop; } gf_ctr_inode_context_t; @@ -249,6 +245,18 @@ do {\ #define REBALANCE_FOP(frame)\ (frame->root->pid == GF_CLIENT_PID_DEFRAG) +/* + * If its a tiering rebalancer fop + * */ +#define TIER_REBALANCE_FOP(frame)\ + (frame->root->pid == GF_CLIENT_PID_TIER_DEFRAG) + +/* + * If its a AFR SELF HEAL + * */ + #define AFR_SELF_HEAL_FOP(frame)\ + (frame->root->pid == GF_CLIENT_PID_AFR_SELF_HEALD) + /* * if a rebalancer fop goto * */ @@ -262,18 +270,22 @@ do {\ * Internal fop * * */ -#define CTR_IS_INTERNAL_FOP(frame, priv)\ - (REBALANCE_FOP(frame) && (!priv->ctr_hot_brick)) +#define CTR_IS_INTERNAL_FOP(frame, dict)\ + (AFR_SELF_HEAL_FOP (frame) \ + || REBALANCE_FOP (frame) \ + || TIER_REBALANCE_FOP (frame) \ + || (dict && \ + dict_get (dict, GLUSTERFS_INTERNAL_FOP_KEY))) /** * ignore internal fops for all clients except AFR self-heal daemon */ #define CTR_IF_INTERNAL_FOP_THEN_GOTO(frame, dict, label)\ do {\ - if ((frame->root->pid != GF_CLIENT_PID_AFR_SELF_HEALD) \ - && dict \ - && dict_get (dict, GLUSTERFS_INTERNAL_FOP_KEY)) \ - goto label; \ + GF_ASSERT(frame);\ + GF_ASSERT(frame->root);\ + if (CTR_IS_INTERNAL_FOP(frame, dict)) \ + goto label; \ } while (0) @@ -290,14 +302,15 @@ do {\ goto label;\ } while (0) - int -fill_db_record_for_unwind(gf_ctr_local_t *ctr_local, +fill_db_record_for_unwind (xlator_t *this, + gf_ctr_local_t *ctr_local, gfdb_fop_type_t fop_type, gfdb_fop_path_t fop_path); int -fill_db_record_for_wind(gf_ctr_local_t *ctr_local, +fill_db_record_for_wind (xlator_t *this, + gf_ctr_local_t *ctr_local, gf_ctr_inode_context_t *ctr_inode_cx); /******************************************************************************* @@ -317,6 +330,7 @@ ctr_insert_wind (call_frame_t *frame, gf_ctr_local_t *ctr_local = NULL; GF_ASSERT(frame); + GF_ASSERT(frame->root); GF_ASSERT(this); IS_CTR_INODE_CX_SANE(ctr_inode_cx); @@ -335,16 +349,32 @@ ctr_insert_wind (call_frame_t *frame, goto out; }; ctr_local = frame->local; + ctr_local->client_pid = frame->root->pid; + ctr_local->is_internal_fop = ctr_inode_cx->is_internal_fop; - /*Broken please refer gf_ctr_local_t documentation*/ - ctr_local->is_internal_fop = CTR_IS_INTERNAL_FOP(frame, _priv); - - /*Broken please refer gf_ctr_local_t documentation*/ + /* Decide whether to record counters or not */ CTR_DB_REC(ctr_local).do_record_counters = - _priv->ctr_record_counter; + _priv->ctr_record_counter && + !(ctr_local->is_internal_fop); + + /* Decide whether to record times or not + * For non internal FOPS record times as usual*/ + if (!ctr_local->is_internal_fop) { + CTR_DB_REC(ctr_local).do_record_times = + (_priv->ctr_record_wind + || _priv->ctr_record_unwind); + } + /* when its a internal FOPS*/ + else { + /* Record times only for create + * i.e when the inode is created */ + CTR_DB_REC(ctr_local).do_record_times = + (isdentrycreatefop(ctr_inode_cx->fop_type)) ? + _gf_true : _gf_false; + } /*Fill the db record for insertion*/ - ret = fill_db_record_for_wind (ctr_local, ctr_inode_cx); + ret = fill_db_record_for_wind (this, ctr_local, ctr_inode_cx); if (ret) { gf_log (this->name, GF_LOG_ERROR, "WIND: Error filling ctr local"); @@ -364,6 +394,7 @@ out: if (ret) { free_ctr_local (ctr_local); + frame->local = NULL; } return ret; @@ -406,7 +437,8 @@ ctr_insert_unwind (call_frame_t *frame, CTR_DB_REC(ctr_local).do_record_uwind_time = _priv->ctr_record_unwind; - ret = fill_db_record_for_unwind(ctr_local, fop_type, fop_path); + ret = fill_db_record_for_unwind(this, ctr_local, fop_type, + fop_path); if (ret == -1) { gf_log(this->name, GF_LOG_ERROR, "UNWIND: Error" "filling ctr local"); -- cgit