From 96af474045c9ba5ab74ca76daa823d91a0a0c610 Mon Sep 17 00:00:00 2001 From: Joseph Fernandes Date: Thu, 27 Aug 2015 17:23:07 +0530 Subject: tier/ctr: Solving DB Lock issue due to write contention from db connections Problem: The DB on the brick is been accessed by CTR, for write and tier migrator, for read and write. The write from tier migrator is reseting the heat counters after a cycle. Since we are using sqlite, two connections trying to write would cause a db lock contention. As a result CTR used to fail to update the db. Solution: Using the same db connection of CTR for reseting the heat counters. 1) Introducted a new IPC FOP for CTR 2) After the query do a ipc syncop to the underlying client xlator associated to the brick. 3) CTR in brick will catch the IPC FOP and cleat the heat counters. Change-Id: I53306bfc08dcdba479deb4ccc154896521336150 BUG: 1260730 Signed-off-by: Joseph Fernandes Reviewed-on: http://review.gluster.org/12031 Tested-by: NetBSD Build System Tested-by: Gluster Build System Reviewed-by: Dan Lambright Tested-by: Dan Lambright --- xlators/cluster/dht/src/dht-mem-types.h | 1 + xlators/cluster/dht/src/tier.c | 139 ++++++++++++++------- xlators/cluster/dht/src/tier.h | 9 +- .../changetimerecorder/src/changetimerecorder.c | 34 ++++- .../features/changetimerecorder/src/ctr-helper.h | 1 + 5 files changed, 139 insertions(+), 45 deletions(-) (limited to 'xlators') diff --git a/xlators/cluster/dht/src/dht-mem-types.h b/xlators/cluster/dht/src/dht-mem-types.h index 85e5baed62c..e3a38ed7e03 100644 --- a/xlators/cluster/dht/src/dht-mem-types.h +++ b/xlators/cluster/dht/src/dht-mem-types.h @@ -34,6 +34,7 @@ enum gf_dht_mem_types_ { gf_dht_mt_container_t, gf_dht_mt_octx_t, gf_dht_mt_miginfo_t, + gf_tier_mt_bricklist_t, gf_dht_mt_end }; #endif diff --git a/xlators/cluster/dht/src/tier.c b/xlators/cluster/dht/src/tier.c index 7563ec7f20b..ff31c17c7e2 100644 --- a/xlators/cluster/dht/src/tier.c +++ b/xlators/cluster/dht/src/tier.c @@ -27,7 +27,7 @@ static void *libhandle; static gfdb_methods_t gfdb_methods; #define DB_QUERY_RECORD_SIZE 4096 - +#define PROMOTION_CYCLE_CNT 4 static int @@ -436,8 +436,7 @@ out: * It picks up each bricks db and queries for eligible files for migration. * The list of eligible files are populated in appropriate query files*/ static int -tier_process_brick_cbk (dict_t *brick_dict, char *key, data_t *value, - void *args) { +tier_process_brick_cbk (brick_list_t *local_brick, void *args) { int ret = -1; char *db_path = NULL; query_cbk_args_t *query_cbk_args = NULL; @@ -456,8 +455,12 @@ tier_process_brick_cbk (dict_t *brick_dict, char *key, data_t *value, GF_VALIDATE_OR_GOTO (this->name, gfdb_brick_dict_info->_query_cbk_args, out); - GF_VALIDATE_OR_GOTO (this->name, value, out); - db_path = data_to_str(value); + GF_VALIDATE_OR_GOTO (this->name, local_brick, out); + + GF_VALIDATE_OR_GOTO (this->name, local_brick->xlator, out); + + GF_VALIDATE_OR_GOTO (this->name, local_brick->brick_db_path, out); + db_path = local_brick->brick_db_path; /*Preparing DB parameters before init_db i.e getting db connection*/ params_dict = dict_new (); @@ -467,7 +470,7 @@ tier_process_brick_cbk (dict_t *brick_dict, char *key, data_t *value, "DB Params cannot initialized!"); goto out; } - SET_DB_PARAM_TO_DICT(this->name, params_dict, gfdb_methods.dbpath, + SET_DB_PARAM_TO_DICT(this->name, params_dict, GFDB_SQL_PARAM_DBPATH, db_path, ret, out); /*Get the db connection*/ @@ -508,7 +511,7 @@ tier_process_brick_cbk (dict_t *brick_dict, char *key, data_t *value, write_freq_threshold, query_cbk_args->defrag-> read_freq_threshold, - _gf_true); + _gf_false); } } else { if (query_cbk_args->defrag->write_freq_threshold == 0 && @@ -527,7 +530,7 @@ tier_process_brick_cbk (dict_t *brick_dict, char *key, data_t *value, query_cbk_args->defrag-> write_freq_threshold, query_cbk_args->defrag->read_freq_threshold, - _gf_true); + _gf_false); } } if (ret) { @@ -535,7 +538,17 @@ tier_process_brick_cbk (dict_t *brick_dict, char *key, data_t *value, DHT_MSG_LOG_TIER_ERROR, "FATAL: query from db failed"); goto out; - } + } + + /*Clear the heat on the DB entries*/ + ret = syncop_ipc (local_brick->xlator, GF_IPC_TARGET_CTR, NULL, NULL); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, 0, + DHT_MSG_LOG_TIER_ERROR, "Failed clearing the heat " + "on db %s", local_brick->brick_db_path); + goto out; + } + ret = 0; out: if (query_cbk_args && query_cbk_args->queryFILE) { @@ -555,6 +568,7 @@ tier_build_migration_qfile (demotion_args_t *args, _gfdb_brick_dict_info_t gfdb_brick_dict_info; gfdb_time_t time_in_past; int ret = -1; + brick_list_t *local_brick = NULL; /* * The first time this function is called, query file will @@ -585,14 +599,18 @@ tier_build_migration_qfile (demotion_args_t *args, gfdb_brick_dict_info.time_stamp = &time_in_past; gfdb_brick_dict_info._gfdb_promote = is_promotion; gfdb_brick_dict_info._query_cbk_args = query_cbk_args; - ret = dict_foreach (args->brick_list, tier_process_brick_cbk, - &gfdb_brick_dict_info); - if (ret) { - gf_msg (args->this->name, GF_LOG_ERROR, 0, - DHT_MSG_BRICK_QUERY_FAILED, - "Brick query failed\n"); - goto out; + + list_for_each_entry (local_brick, args->brick_list, list) { + ret = tier_process_brick_cbk (local_brick, + &gfdb_brick_dict_info); + if (ret) { + gf_msg (args->this->name, GF_LOG_ERROR, 0, + DHT_MSG_BRICK_QUERY_FAILED, + "Brick query failed\n"); + goto out; + } } + ret = 0; out: return ret; } @@ -697,19 +715,19 @@ out: } static int -tier_get_bricklist (xlator_t *xl, dict_t *bricklist) +tier_get_bricklist (xlator_t *xl, struct list_head *local_bricklist_head) { xlator_list_t *child = NULL; char *rv = NULL; char *rh = NULL; char localhost[256] = {0}; - char *db_path = NULL; char *brickname = NULL; char db_name[PATH_MAX] = ""; int ret = 0; + brick_list_t *local_brick = NULL; GF_VALIDATE_OR_GOTO ("tier", xl, out); - GF_VALIDATE_OR_GOTO ("tier", bricklist, out); + GF_VALIDATE_OR_GOTO ("tier", local_bricklist_head, out); gethostname (localhost, sizeof (localhost)); @@ -724,27 +742,38 @@ tier_get_bricklist (xlator_t *xl, dict_t *bricklist) if (gf_is_local_addr (rh)) { + local_brick = GF_CALLOC (1, sizeof(brick_list_t), + gf_tier_mt_bricklist_t); + if (!local_brick) { + goto out; + } + ret = dict_get_str(xl->options, "remote-subvolume", &rv); if (ret < 0) goto out; + brickname = strrchr(rv, '/') + 1; snprintf(db_name, sizeof(db_name), "%s.db", brickname); - db_path = GF_CALLOC (PATH_MAX, 1, gf_common_mt_char); - if (!db_path) { + + local_brick->brick_db_path = + GF_CALLOC (PATH_MAX, 1, gf_common_mt_char); + if (!local_brick->brick_db_path) { gf_msg ("tier", GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_STATUS, "Faile. to allocate memory for bricklist"); goto out; } - sprintf(db_path, "%s/%s/%s", rv, + sprintf(local_brick->brick_db_path, "%s/%s/%s", rv, GF_HIDDEN_PATH, db_name); - if (dict_add_dynstr_with_alloc(bricklist, "brick", - db_path)) - goto out; + + local_brick->xlator = xl; + + list_add_tail (&(local_brick->list), + local_bricklist_head); ret = 0; goto out; @@ -752,19 +781,48 @@ tier_get_bricklist (xlator_t *xl, dict_t *bricklist) } for (child = xl->children; child; child = child->next) { - ret = tier_get_bricklist(child->xlator, bricklist); + ret = tier_get_bricklist(child->xlator, local_bricklist_head); + if (ret) { + goto out; + } } + + ret = 0; out: - GF_FREE (db_path); + + if (ret) { + if (local_brick) { + GF_FREE (local_brick->brick_db_path); + } + GF_FREE (local_brick); + } return ret; } +void +clear_bricklist (struct list_head *brick_list) +{ + brick_list_t *local_brick = NULL; + brick_list_t *temp = NULL; + + if (list_empty(brick_list)) { + return; + } + + list_for_each_entry_safe (local_brick, temp, brick_list, list) { + list_del (&local_brick->list); + GF_FREE (local_brick->brick_db_path); + GF_FREE (local_brick); + } +} + + int tier_start (xlator_t *this, gf_defrag_info_t *defrag) { - dict_t *bricklist_cold = NULL; - dict_t *bricklist_hot = NULL; + struct list_head bricklist_hot = { 0 }; + struct list_head bricklist_cold = { 0 }; dht_conf_t *conf = NULL; gfdb_time_t current_time; int freq_promote = 0; @@ -783,16 +841,11 @@ tier_start (xlator_t *this, gf_defrag_info_t *defrag) conf = this->private; - bricklist_cold = dict_new(); - if (!bricklist_cold) - return -1; - - bricklist_hot = dict_new(); - if (!bricklist_hot) - return -1; + INIT_LIST_HEAD ((&bricklist_hot)); + INIT_LIST_HEAD ((&bricklist_cold)); - tier_get_bricklist (conf->subvolumes[0], bricklist_cold); - tier_get_bricklist (conf->subvolumes[1], bricklist_hot); + tier_get_bricklist (conf->subvolumes[0], &bricklist_cold); + tier_get_bricklist (conf->subvolumes[1], &bricklist_hot); gf_msg (this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS, "Begin run tier promote %d" @@ -873,7 +926,7 @@ tier_start (xlator_t *this, gf_defrag_info_t *defrag) if (is_demotion_triggered) { demotion_args.this = this; - demotion_args.brick_list = bricklist_hot; + demotion_args.brick_list = &bricklist_hot; demotion_args.defrag = defrag; demotion_args.freq_time = freq_demote; ret_demotion = pthread_create (&demote_thread, @@ -889,9 +942,9 @@ tier_start (xlator_t *this, gf_defrag_info_t *defrag) if (is_promotion_triggered) { promotion_args.this = this; - promotion_args.brick_list = bricklist_cold; + promotion_args.brick_list = &bricklist_cold; promotion_args.defrag = defrag; - promotion_args.freq_time = freq_promote; + promotion_args.freq_time = freq_promote * PROMOTION_CYCLE_CNT; ret_promotion = pthread_create (&promote_thread, NULL, &tier_promote, &promotion_args); @@ -940,8 +993,8 @@ tier_start (xlator_t *this, gf_defrag_info_t *defrag) ret = 0; out: - dict_unref(bricklist_cold); - dict_unref(bricklist_hot); + clear_bricklist (&bricklist_cold); + clear_bricklist (&bricklist_hot); return ret; } diff --git a/xlators/cluster/dht/src/tier.h b/xlators/cluster/dht/src/tier.h index 88fb7844dad..76ff27d5932 100644 --- a/xlators/cluster/dht/src/tier.h +++ b/xlators/cluster/dht/src/tier.h @@ -31,6 +31,7 @@ #define TIMER_SECS 3600 #include "gfdb_data_store.h" +#include "gfdb_sqlite3.h" #include #include @@ -56,10 +57,16 @@ typedef struct _gfdb_brick_dict_info { query_cbk_args_t *_query_cbk_args; } _gfdb_brick_dict_info_t; +typedef struct brick_list { + xlator_t *xlator; + char *brick_db_path; + struct list_head list; +} brick_list_t; + typedef struct _dm_thread_args { xlator_t *this; gf_defrag_info_t *defrag; - dict_t *brick_list; + struct list_head *brick_list; int freq_time; int return_value; } promotion_args_t, demotion_args_t; diff --git a/xlators/features/changetimerecorder/src/changetimerecorder.c b/xlators/features/changetimerecorder/src/changetimerecorder.c index 5df9cb9097b..609be07fa7e 100644 --- a/xlators/features/changetimerecorder/src/changetimerecorder.c +++ b/xlators/features/changetimerecorder/src/changetimerecorder.c @@ -1379,6 +1379,36 @@ out: fd, size, off, flags, xdata); return 0; } + +/*******************************ctr_ipc****************************************/ + +/* IPC Call from tier migrator to clear the heat on the DB */ +int32_t +ctr_ipc (call_frame_t *frame, xlator_t *this, int32_t op, dict_t *xdata) +{ + int ret = -1; + gf_ctr_private_t *_priv = NULL; + + GF_ASSERT(this); + _priv = this->private; + GF_ASSERT (_priv); + GF_ASSERT(_priv->_db_conn); + + if (op != GF_IPC_TARGET_CTR) + goto wind; + + ret = clear_files_heat (_priv->_db_conn); + + STACK_UNWIND_STRICT (ipc, frame, ret, 0, NULL); + return 0; + + wind: + STACK_WIND (frame, default_ipc_cbk, FIRST_CHILD (this), + FIRST_CHILD (this)->fops->ipc, op, xdata); + return 0; +} + + /******************************************************************************/ int @@ -1596,7 +1626,9 @@ struct xlator_fops fops = { .writev = ctr_writev, .setattr = ctr_setattr, /*read fops*/ - .readv = ctr_readv + .readv = ctr_readv, + /* IPC call*/ + .ipc = ctr_ipc }; struct xlator_cbks cbks = { diff --git a/xlators/features/changetimerecorder/src/ctr-helper.h b/xlators/features/changetimerecorder/src/ctr-helper.h index 0c4f3e2cd8c..cff79756603 100644 --- a/xlators/features/changetimerecorder/src/ctr-helper.h +++ b/xlators/features/changetimerecorder/src/ctr-helper.h @@ -17,6 +17,7 @@ #include "iatt.h" #include "glusterfs.h" #include "xlator.h" +#include "defaults.h" #include "logging.h" #include "common-utils.h" #include -- cgit