diff options
| author | Harshavardhana <fharshav@redhat.com> | 2011-11-15 13:44:43 -0800 | 
|---|---|---|
| committer | Vijay Bellur <vijay@gluster.com> | 2011-11-23 04:16:16 -0800 | 
| commit | 896fc241850aaa021f6f8958da4e37e37679c0cd (patch) | |
| tree | 6bce29400dfcfce50f53abbcb72d56df796d3f57 /xlators/cluster/dht/src/dht-diskusage.c | |
| parent | af7d85074fc05afdee3ff48f62b0ec5c057a3e6b (diff) | |
cluster/distribute: Add support for 'min-free-inodes' on each distribute subvolume.
This change is required as increasingly large number of small files
would cause inodes to run out before they run out on available disk space.
It is highly necessary to support algorithmic checking of inodes too
just as we do for disk space.
Change-Id: I9b87405328d443825e239ee80ab664aceb50ee68
BUG: 3799
Signed-off-by: Harshavardhana <fharshav@redhat.com>
Reviewed-on: http://review.gluster.com/730
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Jeff Darcy <jdarcy@redhat.com>
Reviewed-by: Amar Tumballi <amar@gluster.com>
Diffstat (limited to 'xlators/cluster/dht/src/dht-diskusage.c')
| -rw-r--r-- | xlators/cluster/dht/src/dht-diskusage.c | 432 | 
1 files changed, 237 insertions, 195 deletions
diff --git a/xlators/cluster/dht/src/dht-diskusage.c b/xlators/cluster/dht/src/dht-diskusage.c index 0b8c116ca40..5453e3b107b 100644 --- a/xlators/cluster/dht/src/dht-diskusage.c +++ b/xlators/cluster/dht/src/dht-diskusage.c @@ -35,227 +35,269 @@  int  dht_du_info_cbk (call_frame_t *frame, void *cookie, xlator_t *this, -                 int op_ret, int op_errno, struct statvfs *statvfs) +		 int op_ret, int op_errno, struct statvfs *statvfs)  { -        dht_conf_t    *conf         = NULL; -        call_frame_t  *prev          = NULL; -        int            this_call_cnt = 0; -        int            i = 0; -        double         percent = 0; -        uint64_t       bytes = 0; - -        conf = this->private; -        prev = cookie; - -        if (op_ret == -1) { -                gf_log (this->name, GF_LOG_WARNING, -                        "failed to get disk info from %s", prev->this->name); -                goto out; -        } - -        if (statvfs && statvfs->f_blocks) { -                percent = (statvfs->f_bavail * 100) / statvfs->f_blocks; -                bytes = (statvfs->f_bavail * statvfs->f_frsize); -        } - -        LOCK (&conf->subvolume_lock); -        { -                for (i = 0; i < conf->subvolume_cnt; i++) -                        if (prev->this == conf->subvolumes[i]) { -                                conf->du_stats[i].avail_percent = percent; -                                conf->du_stats[i].avail_space   = bytes; -                                gf_log (this->name, GF_LOG_TRACE, -                                        "on subvolume '%s': avail_percent is: " -                                        "%.2f and avail_space is: %"PRIu64"", -                                        prev->this->name, -                                        conf->du_stats[i].avail_percent, -                                        conf->du_stats[i].avail_space); -                        } -        } -        UNLOCK (&conf->subvolume_lock); +	dht_conf_t    *conf         = NULL; +	call_frame_t  *prev          = NULL; +	int            this_call_cnt = 0; +	int            i = 0; +	double         percent = 0; +	double         percent_inodes = 0; +	uint64_t       bytes = 0; + +	conf = this->private; +	prev = cookie; + +	if (op_ret == -1) { +		gf_log (this->name, GF_LOG_WARNING, +			"failed to get disk info from %s", prev->this->name); +		goto out; +	} + +	if (statvfs && statvfs->f_blocks) { +		percent = (statvfs->f_bavail * 100) / statvfs->f_blocks; +		bytes = (statvfs->f_bavail * statvfs->f_frsize); +	} + +	if (statvfs && statvfs->f_files) { +		percent_inodes = (statvfs->f_ffree * 100) / statvfs->f_files; +	} else { +		/* set percent inodes to 100 for dynamically allocated inode filesystems +		   this logic holds good so that, distribute has nothing to worry about +		   total inodes rather let the 'create()' to be scheduled on the hashed +		   subvol regardless of the total inodes. since we have no awareness on +		   loosing inodes this logic fits well +		*/ +		percent_inodes = 100; +	} + +	LOCK (&conf->subvolume_lock); +	{ +		for (i = 0; i < conf->subvolume_cnt; i++) +			if (prev->this == conf->subvolumes[i]) { +				conf->du_stats[i].avail_percent = percent; +				conf->du_stats[i].avail_space   = bytes; +				conf->du_stats[i].avail_inodes  = percent_inodes; +				gf_log (this->name, GF_LOG_DEBUG, +					"on subvolume '%s': avail_percent is: " +					"%.2f and avail_space is: %"PRIu64" " +					"and avail_inodes is: %.2f", +					prev->this->name, +					conf->du_stats[i].avail_percent, +					conf->du_stats[i].avail_space, +					conf->du_stats[i].avail_inodes); +			} +	} +	UNLOCK (&conf->subvolume_lock);  out: -        this_call_cnt = dht_frame_return (frame); -        if (is_last_call (this_call_cnt)) -                DHT_STACK_DESTROY (frame); +	this_call_cnt = dht_frame_return (frame); +	if (is_last_call (this_call_cnt)) +		DHT_STACK_DESTROY (frame); -        return 0; +	return 0;  }  int  dht_get_du_info_for_subvol (xlator_t *this, int subvol_idx)  { -        dht_conf_t    *conf         = NULL; -        call_frame_t  *statfs_frame = NULL; -        dht_local_t   *statfs_local = NULL; -        call_pool_t   *pool         = NULL; - -        conf = this->private; -        pool = this->ctx->pool; - -        statfs_frame = create_frame (this, pool); -        if (!statfs_frame) { -                goto err; -        } - -        /* local->fop value is not used in this case */ -        statfs_local = dht_local_init (statfs_frame, NULL, NULL, -                                       GF_FOP_MAXVALUE); -        if (!statfs_local) { -                goto err; -        } - -        loc_t tmp_loc = { .inode = NULL, -                          .path = "/", -        }; - -        statfs_local->call_cnt = 1; -        STACK_WIND (statfs_frame, dht_du_info_cbk, -                    conf->subvolumes[subvol_idx], -                    conf->subvolumes[subvol_idx]->fops->statfs, -                    &tmp_loc); - -        return 0; +	dht_conf_t    *conf         = NULL; +	call_frame_t  *statfs_frame = NULL; +	dht_local_t   *statfs_local = NULL; +	call_pool_t   *pool         = NULL; + +	conf = this->private; +	pool = this->ctx->pool; + +	statfs_frame = create_frame (this, pool); +	if (!statfs_frame) { +		goto err; +	} + +	/* local->fop value is not used in this case */ +	statfs_local = dht_local_init (statfs_frame, NULL, NULL, +				       GF_FOP_MAXVALUE); +	if (!statfs_local) { +		goto err; +	} + +	loc_t tmp_loc = { .inode = NULL, +			  .path = "/", +	}; + +	statfs_local->call_cnt = 1; +	STACK_WIND (statfs_frame, dht_du_info_cbk, +		    conf->subvolumes[subvol_idx], +		    conf->subvolumes[subvol_idx]->fops->statfs, +		    &tmp_loc); + +	return 0;  err: -        if (statfs_frame) -                DHT_STACK_DESTROY (statfs_frame); +	if (statfs_frame) +		DHT_STACK_DESTROY (statfs_frame); -        return -1; +	return -1;  }  int  dht_get_du_info (call_frame_t *frame, xlator_t *this, loc_t *loc)  { -        int            i = 0; -        dht_conf_t    *conf         = NULL; -        call_frame_t  *statfs_frame = NULL; -        dht_local_t   *statfs_local = NULL; -        struct timeval tv = {0,}; - -        conf  = this->private; - -        gettimeofday (&tv, NULL); -        if (tv.tv_sec > (conf->refresh_interval -                         + conf->last_stat_fetch.tv_sec)) { - -                statfs_frame = copy_frame (frame); -                if (!statfs_frame) { -                        goto err; -                } - -                /* In this case, 'local->fop' is not used */ -                statfs_local = dht_local_init (statfs_frame, loc, NULL, -                                               GF_FOP_MAXVALUE); -                if (!statfs_local) { -                        goto err; -                } - -                loc_t tmp_loc = { .inode = NULL, -                                  .path = "/", -                }; - -                statfs_local->call_cnt = conf->subvolume_cnt; -                for (i = 0; i < conf->subvolume_cnt; i++) { -                        STACK_WIND (statfs_frame, dht_du_info_cbk, -                                    conf->subvolumes[i], -                                    conf->subvolumes[i]->fops->statfs, -                                    &tmp_loc); -                } - -                conf->last_stat_fetch.tv_sec = tv.tv_sec; -        } -        return 0; +	int            i = 0; +	dht_conf_t    *conf         = NULL; +	call_frame_t  *statfs_frame = NULL; +	dht_local_t   *statfs_local = NULL; +	struct timeval tv = {0,}; + +	conf  = this->private; + +	gettimeofday (&tv, NULL); +	if (tv.tv_sec > (conf->refresh_interval +			 + conf->last_stat_fetch.tv_sec)) { + +		statfs_frame = copy_frame (frame); +		if (!statfs_frame) { +			goto err; +		} + +		/* In this case, 'local->fop' is not used */ +		statfs_local = dht_local_init (statfs_frame, loc, NULL, +					       GF_FOP_MAXVALUE); +		if (!statfs_local) { +			goto err; +		} + +		loc_t tmp_loc = { .inode = NULL, +				  .path = "/", +		}; + +		statfs_local->call_cnt = conf->subvolume_cnt; +		for (i = 0; i < conf->subvolume_cnt; i++) { +			STACK_WIND (statfs_frame, dht_du_info_cbk, +				    conf->subvolumes[i], +				    conf->subvolumes[i]->fops->statfs, +				    &tmp_loc); +		} + +		conf->last_stat_fetch.tv_sec = tv.tv_sec; +	} +	return 0;  err: -        if (statfs_frame) -                DHT_STACK_DESTROY (statfs_frame); +	if (statfs_frame) +		DHT_STACK_DESTROY (statfs_frame); -        return -1; +	return -1;  } -int +gf_boolean_t  dht_is_subvol_filled (xlator_t *this, xlator_t *subvol)  { -        int         i = 0; -        int         subvol_filled = 0; -        dht_conf_t *conf = NULL; - -        conf = this->private; - -        /* Check for values above specified percent or free disk */ -        LOCK (&conf->subvolume_lock); -        { -                for (i = 0; i < conf->subvolume_cnt; i++) { -                        if (subvol == conf->subvolumes[i]) { -                                if (conf->disk_unit == 'p') { -                                        if (conf->du_stats[i].avail_percent < -                                            conf->min_free_disk) { -                                                subvol_filled = 1; -                                                break; -                                        } -                                } else { -                                        if (conf->du_stats[i].avail_space < -                                            conf->min_free_disk) { -                                                subvol_filled = 1; -                                                break; -                                        } -                                } -                        } -                } -        } -        UNLOCK (&conf->subvolume_lock); - -        if (subvol_filled && conf->subvolume_status[i]) { -                if (!(conf->du_stats[i].log++ % (GF_UNIVERSAL_ANSWER * 10))) { -                        gf_log (this->name, GF_LOG_WARNING, -                                "disk space on subvolume '%s' is getting " -                                "full (%.2f %%), consider adding more nodes", -                                subvol->name, -                                (100 - conf->du_stats[i].avail_percent)); -                } -        } - -        return subvol_filled; +	int         i = 0; +	dht_conf_t *conf = NULL; +	gf_boolean_t subvol_filled_inodes = _gf_false; +	gf_boolean_t subvol_filled_space = _gf_false; +	gf_boolean_t is_subvol_filled = _gf_false; + +	conf = this->private; + +	/* Check for values above specified percent or free disk */ +	LOCK (&conf->subvolume_lock); +	{ +		for (i = 0; i < conf->subvolume_cnt; i++) { +			if (subvol == conf->subvolumes[i]) { +				if (conf->disk_unit == 'p') { +					if (conf->du_stats[i].avail_percent < +					    conf->min_free_disk) { +						subvol_filled_space = _gf_true; +						break; +					} + +				} else { +					if (conf->du_stats[i].avail_space < +					    conf->min_free_disk) { +						subvol_filled_space = _gf_true; +						break; +					} +				} +				if (conf->du_stats[i].avail_inodes < +				    conf->min_free_inodes) { +					subvol_filled_inodes = _gf_true; +					break; +				} +			} +		} +	} +	UNLOCK (&conf->subvolume_lock); + +	if (subvol_filled_space && conf->subvolume_status[i]) { +		if (!(conf->du_stats[i].log++ % (GF_UNIVERSAL_ANSWER * 10))) { +			gf_log (this->name, GF_LOG_WARNING, +				"disk space on subvolume '%s' is getting " +				"full (%.2f %%), consider adding more nodes", +				subvol->name, +				(100 - conf->du_stats[i].avail_percent)); +		} +	} + +	if (subvol_filled_inodes && conf->subvolume_status[i]) { +		if (!(conf->du_stats[i].log++ % (GF_UNIVERSAL_ANSWER * 10))) { +			gf_log (this->name, GF_LOG_CRITICAL, +				"inodes on subvolume '%s' are at " +				"(%.2f %%), consider adding more nodes", +				subvol->name, +				(100 - conf->du_stats[i].avail_inodes)); +		} +	} + +	is_subvol_filled = (subvol_filled_space || subvol_filled_inodes); + +	return is_subvol_filled;  }  xlator_t *  dht_free_disk_available_subvol (xlator_t *this, xlator_t *subvol)  { -        int         i = 0; -        double      max= 0; -        xlator_t   *avail_subvol = NULL; -        dht_conf_t *conf = NULL; - -        conf = this->private; - -        LOCK (&conf->subvolume_lock); -        { -                for (i = 0; i < conf->subvolume_cnt; i++) { -                        if (conf->disk_unit == 'p') { -                                if (conf->du_stats[i].avail_percent > max) { -                                        max = conf->du_stats[i].avail_percent; -                                        avail_subvol = conf->subvolumes[i]; -                                } -                        } else { -                                if (conf->du_stats[i].avail_space > max) { -                                        max = conf->du_stats[i].avail_space; -                                        avail_subvol = conf->subvolumes[i]; -                                } -                        } -                } -        } -        UNLOCK (&conf->subvolume_lock); - -        if (!avail_subvol) { -                gf_log (this->name, GF_LOG_DEBUG, -                        "no subvolume has enough free space to create"); -        } - -        if (max < conf->min_free_disk) -                avail_subvol = subvol; - -        if (!avail_subvol) -                avail_subvol = subvol; - -        return avail_subvol; +	int         i = 0; +	double      max = 0; +	double      max_inodes = 0; +	xlator_t   *avail_subvol = NULL; +	dht_conf_t *conf = NULL; + +	conf = this->private; + +	LOCK (&conf->subvolume_lock); +	{ +		for (i = 0; i < conf->subvolume_cnt; i++) { +			if (conf->disk_unit == 'p') { +				if ((conf->du_stats[i].avail_percent > max) +				    && (conf->du_stats[i].avail_inodes > max_inodes)) { +					max = conf->du_stats[i].avail_percent; +					max_inodes = conf->du_stats[i].avail_inodes; +					avail_subvol = conf->subvolumes[i]; +				} +			} else { +				if ((conf->du_stats[i].avail_space > max) +				    && (conf->du_stats[i].avail_inodes > max_inodes)) { +					max = conf->du_stats[i].avail_space; +					max_inodes = conf->du_stats[i].avail_inodes; +					avail_subvol = conf->subvolumes[i]; +				} + +			} +		} +	} +	UNLOCK (&conf->subvolume_lock); + +	if (!avail_subvol) { +		gf_log (this->name, GF_LOG_DEBUG, +			"no subvolume has enough free space and inodes to create"); +	} + +	if ((max < conf->min_free_disk) && (max_inodes < conf->min_free_inodes)) +		avail_subvol = subvol; + +	if (!avail_subvol) +		avail_subvol = subvol; + +	return avail_subvol;  }  | 
