diff options
| -rw-r--r-- | extras/stripe-merge.c | 496 | ||||
| -rw-r--r-- | xlators/cluster/stripe/src/stripe-helpers.c | 92 | ||||
| -rw-r--r-- | xlators/cluster/stripe/src/stripe.c | 339 | ||||
| -rw-r--r-- | xlators/cluster/stripe/src/stripe.h | 54 | ||||
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-volgen.c | 1 | ||||
| -rw-r--r-- | xlators/storage/posix/src/posix.c | 2 | 
6 files changed, 882 insertions, 102 deletions
| diff --git a/extras/stripe-merge.c b/extras/stripe-merge.c index 3f8e4b1244d..32768badd36 100644 --- a/extras/stripe-merge.c +++ b/extras/stripe-merge.c @@ -1,48 +1,498 @@ +/* +  Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> +  This file is part of GlusterFS. + +  This file is licensed to you under your choice of the GNU Lesser +  General Public License, version 3 or any later version (LGPLv3 or +  later), or the GNU General Public License, version 2 (GPLv2), in all +  cases as published by the Free Software Foundation. +*/ + +/* + * stripe-merge.c + * + * This program recovers an original file based on the striped files stored on + * the individual bricks of a striped volume. The file format and stripe + * geometry is validated through the extended attributes stored in the file. + * + * TODO: Support optional xattr recovery (i.e., user xattrs). Perhaps provide a + * 	 command-line flag to toggle this behavior. + */ +  #include <stdio.h> -#include <unistd.h> -#include <fcntl.h>  #include <sys/types.h>  #include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <stdlib.h> +#include <stdint.h> +#include <errno.h> +#include <string.h> +#include <attr/xattr.h> +#include <fnmatch.h> -int -main (int argc, char *argv[]) +#define ATTRNAME_STRIPE_INDEX "trusted.*.stripe-index" +#define ATTRNAME_STRIPE_COUNT "trusted.*.stripe-count" +#define ATTRNAME_STRIPE_SIZE "trusted.*.stripe-size" +#define ATTRNAME_STRIPE_COALESCE "trusted.*.stripe-coalesce" + +#define INVALID_FD -1 +#define INVALID_MODE UINT32_MAX + +struct file_stripe_info { +	int stripe_count; +	int stripe_size; +	int coalesce; +	mode_t mode; +	int fd[0]; +}; + +static int close_files(struct file_stripe_info *); + +static struct +file_stripe_info *alloc_file_stripe_info(int count)  { -	int fds[argc-1]; -	char buf[argc-1][4096];  	int i; -	int max_ret, ret; +	struct file_stripe_info *finfo; -	if (argc < 2) { -		printf ("Usage: %s file1 file2 ... >file\n", argv[0]); -		return 1; +	finfo = calloc(1, sizeof(struct file_stripe_info) + +		(sizeof(int) * count)); +	if (!finfo) +		return NULL; + +	for (i = 0; i < count; i++) +		finfo->fd[i] = INVALID_FD; + +	finfo->mode = INVALID_MODE; +	finfo->coalesce = INVALID_FD; + +	return finfo; +} + +/* + * Search for an attribute matching the provided pattern. Return a count for + * the total number of matching entries (including 0). Allocate a buffer for + * the first matching entry found. + */ +static int +get_stripe_attr_name(const char *path, const char *pattern, char **attrname) +{ +	char attrbuf[4096]; +	char *ptr, *match = NULL; +	int len, r, match_count = 0; + +	if (!path || !pattern || !attrname) +		return -1; + +	len = listxattr(path, attrbuf, sizeof(attrbuf)); +	if (len < 0) +		return len; + +	ptr = attrbuf; +	while (ptr) { +		r = fnmatch(pattern, ptr, 0); +		if (!r) { +			if (!match) +				match = ptr; +			match_count++; +		} else if (r != FNM_NOMATCH) { +			return -1; +		} + +		len -= strlen(ptr) + 1; +		if (len > 0) +			ptr += strlen(ptr) + 1; +		else +			ptr = NULL;  	} -	for (i=0; i<argc-1; i++) { -		fds[i] = open (argv[i+1], O_RDONLY); -		if (fds[i] == -1) { -			perror (argv[i+1]); -			return 1; +	if (match) +		*attrname = strdup(match); + +	return match_count; +} + +/* + * Get the integer representation of a named attribute. + */ +static int +get_stripe_attr_val(const char *path, const char *attr, int *val) +{ +	char attrbuf[4096]; +	int len; + +	if (!path || !attr || !val) +		return -1; + +	len = getxattr(path, attr, attrbuf, sizeof(attrbuf)); +	if (len < 0) +		return len; + +	*val = atoi(attrbuf); + +	return 0; +} + +/* + * Get an attribute name/value (assumed to be an integer) pair based on a + * specified search pattern. A buffer is allocated for the exact attr name + * returned. Optionally, skip the pattern search if a buffer is provided + * (which should contain an attribute name). + * + * Returns the attribute count or -1 on error. The value parameter is set only + * when a single attribute is found. + */ +static int +get_attr(const char *path, const char *pattern, char **buf, int *val) +{ +	int count = 1; + +	if (!buf) +		return -1; + +	if (!*buf) { +		count = get_stripe_attr_name(path, pattern, buf); +		if (count > 1) { +			/* pattern isn't good enough */ +			fprintf(stderr, "ERROR: duplicate attributes found " +				"matching pattern: %s\n", pattern); +			free(*buf); +			*buf = NULL; +			return count; +		} else if (count < 1) { +			return count;  		}  	} -	max_ret = 0; +	if (get_stripe_attr_val(path, *buf, val) < 0) +		return -1; + +	return count; +} + +/* + * validate_and_open_files() + * + * Open the provided source files and validate the extended attributes. Verify + * that the geometric attributes are consistent across all of the files and + * print a warning if any files are missing. We proceed without error in the + * latter case to support partial recovery. + */ +static struct +file_stripe_info *validate_and_open_files(char *paths[], int count) +{ +	int i, val, tmp; +	struct stat sbuf; +	char *stripe_count_attr = NULL; +	char *stripe_size_attr = NULL; +	char *stripe_index_attr = NULL; +	char *stripe_coalesce_attr = NULL; +	struct file_stripe_info *finfo = NULL; + +	for (i = 0; i < count; i++) { +		if (!paths[i]) +			goto err; + +		/* +		 * Check the stripe count first so we can allocate the info +		 * struct with the appropriate number of fds. +		 */ +		if (get_attr(paths[i], ATTRNAME_STRIPE_COUNT, +				&stripe_count_attr, &val) != 1) { +			fprintf(stderr, "ERROR: %s: attribute: '%s'\n", +				paths[i], ATTRNAME_STRIPE_COUNT); +			goto err; +		} +		if (!finfo) { +			finfo = alloc_file_stripe_info(val); +			if (!finfo) +				goto err; + +			if (val != count) +				fprintf(stderr, "WARNING: %s: stripe-count " +					"(%d) != file count (%d). Result may " +					"be incomplete.\n", paths[i], val, +					count); + +			finfo->stripe_count = val; +		} else if (val != finfo->stripe_count) { +			fprintf(stderr, "ERROR %s: invalid stripe count: %d " +				"(expected %d)\n", paths[i], val, +				finfo->stripe_count); +			goto err; +		} + +		/* +		 * Get and validate the chunk size. +		 */ +		if (get_attr(paths[i], ATTRNAME_STRIPE_SIZE, &stripe_size_attr, +				&val) != 1) { +			fprintf(stderr, "ERROR: %s: attribute: '%s'\n", +				paths[i], ATTRNAME_STRIPE_SIZE); +			goto err; +		} + +		if (!finfo->stripe_size) { +			finfo->stripe_size = val; +		} else if (val != finfo->stripe_size) { +			fprintf(stderr, "ERROR: %s: invalid stripe size: %d " +				"(expected %d)\n", paths[i], val, +				finfo->stripe_size); +			goto err; +		} + +		/* +		 * stripe-coalesce is a backward compatible attribute. If the +		 * attribute does not exist, assume a value of zero for the +		 * traditional stripe format. +		 */ +		tmp = get_attr(paths[i], ATTRNAME_STRIPE_COALESCE, +				&stripe_coalesce_attr, &val); +		if (!tmp) { +			val = 0; +		} else if (tmp != 1) { +			fprintf(stderr, "ERROR: %s: attribute: '%s'\n", +				paths[i], ATTRNAME_STRIPE_COALESCE); +			goto err; +		} + +		if (finfo->coalesce == INVALID_FD) { +			finfo->coalesce = val; +		} else if (val != finfo->coalesce) { +			fprintf(stderr, "ERROR: %s: invalid coalesce flag\n", +				paths[i]); +			goto err; +		} + +		/* +		 * Get/validate the stripe index and open the file in the +		 * appropriate fd slot. +		 */ +		if (get_attr(paths[i], ATTRNAME_STRIPE_INDEX, +				&stripe_index_attr, &val) != 1) { +			fprintf(stderr, "ERROR: %s: attribute: '%s'\n", +				paths[i], ATTRNAME_STRIPE_INDEX); +			goto err; +		} +		if (finfo->fd[val] != INVALID_FD) { +			fprintf(stderr, "ERROR: %s: duplicate stripe index: " +				"%d\n", paths[i], val); +			goto err; +		} + +		finfo->fd[val] = open(paths[i], O_RDONLY); +		if (finfo->fd[val] < 0) +			goto err; + +		/* +		 * Get the creation mode for the file. +		 */ +		if (fstat(finfo->fd[val], &sbuf) < 0) +			goto err; +		if (finfo->mode == INVALID_MODE) { +			finfo->mode = sbuf.st_mode; +		} else if (sbuf.st_mode != finfo->mode) { +			fprintf(stderr, "ERROR: %s: invalid mode\n", paths[i]); +			goto err; +		} +	} + +	free(stripe_count_attr); +	free(stripe_size_attr); +	free(stripe_index_attr); +	free(stripe_coalesce_attr); + +	return finfo; +err: + +	if (stripe_count_attr) +		free(stripe_count_attr); +	if (stripe_size_attr) +		free(stripe_size_attr); +	if (stripe_index_attr) +		free(stripe_index_attr); +	if (stripe_coalesce_attr) +		free(stripe_coalesce_attr); + +	if (finfo) { +		close_files(finfo); +		free(finfo); +	} + +	return NULL; +} + +static int +close_files(struct file_stripe_info *finfo) +{ +	int i, ret; + +	if (!finfo) +		return -1; + +	for (i = 0; i < finfo->stripe_count; i++) { +		if (finfo->fd[i] == INVALID_FD) +			continue; + +		ret = close(finfo->fd[i]); +		if (ret < 0) +			return ret; +	} + +	return ret; +} + +/* + * Generate the original file using files striped in the coalesced format. + * Data in the striped files is stored at a coalesced offset based on the + * stripe number. + * + * Walk through the finfo fds (which are already ordered) and and iteratively + * copy stripe_size bytes from the source files to the target file. If a source + * file is missing, seek past the associated stripe_size bytes in the target + * file. + */ +static int +generate_file_coalesce(int target, struct file_stripe_info *finfo) +{ +	char *buf; +	int ret = 0; +	int r, w, i; + +	buf = malloc(finfo->stripe_size); +	if (!buf) +		return -1; + +	i = 0; +	while (1) { +		if (finfo->fd[i] == INVALID_FD) { +			if (lseek(target, finfo->stripe_size, SEEK_CUR) < 0) +				break; + +			i = (i + 1) % finfo->stripe_count; +			continue; +		} + +		r = read(finfo->fd[i], buf, finfo->stripe_size); +		if (r < 0) { +			ret = r; +			break; +		} +		if (!r) +			break; + +		w = write(target, buf, r); +		if (w < 0) { +			ret = w; +			break; +		} + +		i = (i + 1) % finfo->stripe_count; +	} + +	free(buf); +	return ret; +} + +/* + * Generate the original file using files striped with the traditional stripe + * format. Data in the striped files is stored at the equivalent offset from + * the source file. + */ +static int +generate_file_traditional(int target, struct file_stripe_info *finfo) +{ +	int i, j, max_ret, ret; +	char buf[finfo->stripe_count][4096]; +  	do {  		char newbuf[4096] = {0, }; -		int j;  		max_ret = 0; -		for (i=0; i<argc-1; i++) { -			memset (buf[i], 0, 4096); -			ret = read (fds[i], buf[i], 4096);  +		for (i = 0; i < finfo->stripe_count; i++) { +			memset(buf[i], 0, 4096); +			ret = read(finfo->fd[i], buf[i], 4096);  			if (ret > max_ret)  				max_ret = ret;  		} -		for (i=0; i<max_ret;i++) -			for (j=0; j<argc-1; j++) +		for (i = 0; i < max_ret; i++) +			for (j = 0; j < finfo->stripe_count; j++)  				newbuf[i] |= buf[j][i]; -		write (1, newbuf, max_ret); +			write(target, newbuf, max_ret);  	} while (max_ret);  	return 0;  } +static int +generate_file(int target, struct file_stripe_info *finfo) +{ +	if (finfo->coalesce) +		return generate_file_coalesce(target, finfo); + +	return generate_file_traditional(target, finfo); +} + +static void +usage(char *name) +{ +	fprintf(stderr, "Usage: %s [-o <outputfile>] <inputfile1> " +		"<inputfile2> ...\n", name); +} + +int +main(int argc, char *argv[]) +{ +	int file_count, opt; +	char *opath = NULL; +	int targetfd; +	struct file_stripe_info *finfo; + +	while ((opt = getopt(argc, argv, "o:")) != -1) { +		switch (opt) { +		case 'o': +			opath = optarg; +			break; +		default: +			usage(argv[0]); +			return -1; +		} +	} + +	file_count = argc - optind; + +	if (!opath || !file_count) { +		usage(argv[0]); +		return -1; +	} + +	finfo = validate_and_open_files(&argv[optind], file_count); +	if (!finfo) +		goto err; + +	targetfd = open(opath, O_RDWR|O_CREAT, finfo->mode); +	if (targetfd < 0) +		goto err; + +	if (generate_file(targetfd, finfo) < 0) +		goto err; + +	if (fsync(targetfd) < 0) +		fprintf(stderr, "ERROR: %s\n", strerror(errno)); +	if (close(targetfd) < 0) +		fprintf(stderr, "ERROR: %s\n", strerror(errno)); + +	close_files(finfo); +	free(finfo); + +	return 0; + +err: +	if (finfo) { +		close_files(finfo); +		free(finfo); +	} + +	return -1; +} + diff --git a/xlators/cluster/stripe/src/stripe-helpers.c b/xlators/cluster/stripe/src/stripe-helpers.c index 1821832c20e..336da793e55 100644 --- a/xlators/cluster/stripe/src/stripe-helpers.c +++ b/xlators/cluster/stripe/src/stripe-helpers.c @@ -236,8 +236,6 @@ out:          return block_size;  } - -  int32_t  stripe_ctx_handle (xlator_t *this, call_frame_t *prev, stripe_local_t *local,                     dict_t *dict) @@ -246,7 +244,6 @@ stripe_ctx_handle (xlator_t *this, call_frame_t *prev, stripe_local_t *local,          data_t         *data            = NULL;          int32_t         index           = 0;          stripe_private_t *priv          = NULL; -        int32_t         ret             = -1;          priv = this->private; @@ -343,14 +340,31 @@ stripe_ctx_handle (xlator_t *this, call_frame_t *prev, stripe_local_t *local,                  if (!local->fctx->xl_array[index])                          local->fctx->xl_array[index] = prev->this;          } -        ret = 0; + +	sprintf(key, "trusted.%s.stripe-coalesce", this->name); +	data = dict_get(dict, key); +	if (!data) { +		/* +		 * The file was probably created prior to coalesce support. +		 * Assume non-coalesce mode for this file to maintain backwards +		 * compatibility. +		 */ +		gf_log(this->name, GF_LOG_DEBUG, "missing stripe-coalesce " +			"attr, assume non-coalesce mode"); +		local->fctx->stripe_coalesce = 0; +	} else { +		local->fctx->stripe_coalesce = data_to_int32(data); +	} + +  out: -        return ret; +        return 0;  }  int32_t  stripe_xattr_request_build (xlator_t *this, dict_t *dict, uint64_t stripe_size, -                            uint32_t stripe_count, uint32_t stripe_index) +                            uint32_t stripe_count, uint32_t stripe_index, +			    uint32_t stripe_coalesce)  {          char            key[256]       = {0,};          int32_t         ret             = -1; @@ -378,6 +392,14 @@ stripe_xattr_request_build (xlator_t *this, dict_t *dict, uint64_t stripe_size,                          "failed to set %s in xattr_req dict", key);                  goto out;          } + +	sprintf(key, "trusted.%s.stripe-coalesce", this->name); +	ret = dict_set_int32(dict, key, stripe_coalesce); +	if (ret) { +		gf_log(this->name, GF_LOG_WARNING, +			"failed to set %s in xattr_req_dict", key); +		goto out; +	}  out:          return ret;  } @@ -508,3 +530,61 @@ stripe_iatt_merge (struct iatt *from, struct iatt *to)                  to->ia_atime = from->ia_atime;          return 0;  } + +off_t +coalesced_offset(off_t offset, uint64_t stripe_size, int stripe_count) +{ +	size_t line_size = 0; +	uint64_t stripe_num = 0; +	off_t coalesced_offset = 0; + +	line_size = stripe_size * stripe_count; +	stripe_num = offset / line_size; + +	coalesced_offset = (stripe_num * stripe_size) + +		(offset % stripe_size); + +	return coalesced_offset; +} + +off_t +uncoalesced_size(off_t size, uint64_t stripe_size, int stripe_count, +		int stripe_index) +{ +        uint64_t nr_full_stripe_chunks = 0, mod = 0; + +        if (!size) +                return size; + +	/* +	 * Estimate the number of fully written stripes from the +	 * local file size. Each stripe_size chunk corresponds to +	 * a stripe. +	 */ +        nr_full_stripe_chunks = (size / stripe_size) * stripe_count; +        mod = size % stripe_size; + +        if (!mod) { +                /* +		 * There is no remainder, thus we could have overestimated +		 * the size of the file in terms of chunks. Trim the number +		 * of chunks by the following stripe members and leave it +		 * up to those nodes to respond with a larger size (if +		 * necessary). +		 */ +                nr_full_stripe_chunks -= stripe_count - +                        (stripe_index + 1); +                size = nr_full_stripe_chunks * stripe_size; +        } else { +		/* +		 * There is a remainder and thus we own the last chunk of the +		 * file. Add the preceding stripe members of the final stripe +		 * along with the remainder to calculate the exact size. +		 */ +                nr_full_stripe_chunks += stripe_index; +                size = nr_full_stripe_chunks * stripe_size + mod; +        } + +        return size; +} + diff --git a/xlators/cluster/stripe/src/stripe.c b/xlators/cluster/stripe/src/stripe.c index a98e14e9508..efee9444e9e 100644 --- a/xlators/cluster/stripe/src/stripe.c +++ b/xlators/cluster/stripe/src/stripe.c @@ -32,7 +32,6 @@  struct volume_options options[]; -  int32_t  stripe_sh_chown_cbk (call_frame_t *frame, void *cookie, xlator_t *this,                       int32_t op_ret, int32_t op_errno, @@ -237,6 +236,8 @@ stripe_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,                          local->stbuf_blocks      += buf->ia_blocks;                          local->postparent_blocks += postparent->ia_blocks; +			correct_file_size(buf, local->fctx, prev); +                          if (local->stbuf_size < buf->ia_size)                                  local->stbuf_size = buf->ia_size;                          if (local->postparent_size < postparent->ia_size) @@ -326,9 +327,19 @@ stripe_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc,          /* get stripe-size xattr on lookup. This would be required for           * open/read/write/pathinfo calls. Hence we send down the request           * even when type == IA_INVAL */ + +	/* +	 * We aren't guaranteed to have xdata here. We need the format info for +	 * the file, so allocate xdata if necessary. +	 */ +	if (!xdata) +		xdata = dict_new(); +	else +		xdata = dict_ref(xdata); +          if (xdata && (IA_ISREG (loc->inode->ia_type) ||              (loc->inode->ia_type == IA_INVAL))) { -                ret = stripe_xattr_request_build (this, xdata, 8, 4, 4); +                ret = stripe_xattr_request_build (this, xdata, 8, 4, 4, 0);                  if (ret)                          gf_log (this->name , GF_LOG_ERROR, "Failed to build"                                  " xattr request for %s", loc->path); @@ -344,6 +355,8 @@ stripe_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc,                  trav = trav->next;          } +	dict_unref(xdata); +          return 0;  err:          STRIPE_STACK_UNWIND (lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL); @@ -388,6 +401,9 @@ stripe_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,                          }                          local->stbuf_blocks += buf->ia_blocks; + +			correct_file_size(buf, local->fctx, prev); +                          if (local->stbuf_size < buf->ia_size)                                  local->stbuf_size = buf->ia_size;                  } @@ -416,6 +432,7 @@ stripe_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)          xlator_list_t    *trav = NULL;          stripe_local_t   *local = NULL;          stripe_private_t *priv = NULL; +	stripe_fd_ctx_t  *fctx = NULL;          int32_t           op_errno = EINVAL;          VALIDATE_OR_GOTO (frame, err); @@ -442,6 +459,13 @@ stripe_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)          frame->local = local;          local->call_count = priv->child_count; +	if (IA_ISREG(loc->inode->ia_type)) { +		inode_ctx_get(loc->inode, this, (uint64_t *) &fctx); +		if (!fctx) +			goto err; +		local->fctx = fctx; +	} +          while (trav) {                  STACK_WIND (frame, stripe_stat_cbk, trav->xlator,                              trav->xlator->fops->stat, loc, NULL); @@ -583,6 +607,9 @@ stripe_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,                          local->prebuf_blocks  += prebuf->ia_blocks;                          local->postbuf_blocks += postbuf->ia_blocks; +			correct_file_size(prebuf, local->fctx, prev); +			correct_file_size(postbuf, local->fctx, prev); +                          if (local->prebuf_size < prebuf->ia_size)                                  local->prebuf_size = prebuf->ia_size; @@ -614,10 +641,12 @@ out:  int32_t  stripe_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, dict_t *xdata)  { -        xlator_list_t    *trav = NULL;          stripe_local_t   *local = NULL;          stripe_private_t *priv = NULL; +	stripe_fd_ctx_t  *fctx = NULL;          int32_t           op_errno = EINVAL; +	int		  i, eof_idx; +	off_t		  dest_offset, tmp_offset;          VALIDATE_OR_GOTO (frame, err);          VALIDATE_OR_GOTO (this, err); @@ -626,7 +655,6 @@ stripe_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,          VALIDATE_OR_GOTO (loc->inode, err);          priv = this->private; -        trav = this->children;          if (priv->first_child_down) {                  op_errno = ENOTCONN; @@ -643,11 +671,51 @@ stripe_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,          frame->local = local;          local->call_count = priv->child_count; -        while (trav) { -                STACK_WIND (frame, stripe_truncate_cbk, trav->xlator, -                            trav->xlator->fops->truncate, loc, offset, NULL); -                trav = trav->next; -        } +	inode_ctx_get(loc->inode, this, (uint64_t *) &fctx); +	if (!fctx) { +		gf_log(this->name, GF_LOG_ERROR, "no stripe context"); +		op_errno = EINVAL; +		goto err; +	} + +	local->fctx = fctx; +	eof_idx = (offset / fctx->stripe_size) % fctx->stripe_count; + +	for (i = 0; i < fctx->stripe_count; i++) { +		if (!fctx->xl_array[i]) { +			gf_log(this->name, GF_LOG_ERROR, +				"no xlator at index %d", i); +			op_errno = EINVAL; +			goto err; +		} + +		if (fctx->stripe_coalesce) { +			/* +	 		 * The node that owns EOF is truncated to the exact +	 		 * coalesced offset. Nodes prior to this index should +	 		 * be rounded up to the size of the complete stripe, +	 		 * while nodes after this index should be rounded down +			 * to the size of the previous stripe. +			 */ +			if (i < eof_idx) +				tmp_offset = roof(offset, fctx->stripe_size * +						fctx->stripe_count); +			else if (i > eof_idx) +				tmp_offset = floor(offset, fctx->stripe_size * +						fctx->stripe_count); +			else +				tmp_offset = offset; + +			dest_offset = coalesced_offset(tmp_offset, +					fctx->stripe_size, fctx->stripe_count); +		} else { +			dest_offset = offset; +		} + +		STACK_WIND(frame, stripe_truncate_cbk, fctx->xl_array[i], +			fctx->xl_array[i]->fops->truncate, loc, dest_offset, +			NULL); +	}          return 0;  err: @@ -698,6 +766,9 @@ stripe_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,                          local->prebuf_blocks  += preop->ia_blocks;                          local->postbuf_blocks += postop->ia_blocks; +			correct_file_size(preop, local->fctx, prev); +			correct_file_size(postop, local->fctx, prev); +                          if (local->prebuf_size < preop->ia_size)                                  local->prebuf_size = preop->ia_size;                          if (local->postbuf_size < postop->ia_size) @@ -733,6 +804,7 @@ stripe_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,          xlator_list_t    *trav = NULL;          stripe_local_t   *local = NULL;          stripe_private_t *priv = NULL; +	stripe_fd_ctx_t	 *fctx = NULL;          int32_t           op_errno = EINVAL;          VALIDATE_OR_GOTO (frame, err); @@ -766,6 +838,13 @@ stripe_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,                  return 0;          } +	if (IA_ISREG(loc->inode->ia_type)) { +		inode_ctx_get(loc->inode, this, (uint64_t *) &fctx); +		if (!fctx) +			goto err; +		local->fctx = fctx; +	} +          local->call_count = priv->child_count;          while (trav) {                  STACK_WIND (frame, stripe_setattr_cbk, @@ -862,6 +941,8 @@ stripe_stack_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,                          local->pre_buf.ia_blocks    += prenewparent->ia_blocks;                          local->post_buf.ia_blocks   += postnewparent->ia_blocks; +			correct_file_size(buf, local->fctx, prev); +                          if (local->stbuf.ia_size < buf->ia_size)                                  local->stbuf.ia_size =  buf->ia_size; @@ -947,6 +1028,7 @@ stripe_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc,          stripe_private_t *priv = NULL;          stripe_local_t   *local = NULL;          xlator_list_t    *trav = NULL; +	stripe_fd_ctx_t	 *fctx = NULL;          int32_t           op_errno = EINVAL;          VALIDATE_OR_GOTO (frame, err); @@ -977,6 +1059,11 @@ stripe_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc,          local->call_count = priv->child_count; +	inode_ctx_get(oldloc->inode, this, (uint64_t *) &fctx); +	if (!fctx) +		goto err; +	local->fctx = fctx; +          frame->local = local;          STACK_WIND (frame, stripe_first_rename_cbk, trav->xlator, @@ -1367,7 +1454,6 @@ stripe_mknod_ifreg_cbk (call_frame_t *frame, void *cookie, xlator_t *this,          stripe_private_t *priv = NULL;          call_frame_t     *prev = NULL;          xlator_list_t    *trav = NULL; -        stripe_fd_ctx_t  *fctx = NULL;          if (!this || !frame || !frame->local || !cookie) {                  gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref"); @@ -1399,10 +1485,16 @@ stripe_mknod_ifreg_cbk (call_frame_t *frame, void *cookie, xlator_t *this,                          if (uuid_is_null (local->ia_gfid))                                  uuid_copy (local->ia_gfid, buf->ia_gfid); +			if (stripe_ctx_handle(this, prev, local, xdata)) +				gf_log(this->name, GF_LOG_ERROR, +					"Error getting fctx info from dict"); +                          local->stbuf_blocks += buf->ia_blocks;                          local->preparent_blocks  += preparent->ia_blocks;                          local->postparent_blocks += postparent->ia_blocks; +			correct_file_size(buf, local->fctx, prev); +                          if (local->stbuf_size < buf->ia_size)                                  local->stbuf_size = buf->ia_size;                          if (local->preparent_size < preparent->ia_size) @@ -1441,23 +1533,10 @@ stripe_mknod_ifreg_cbk (call_frame_t *frame, void *cookie, xlator_t *this,                          local->postparent.ia_size   = local->postparent_size;                          local->stbuf.ia_size        = local->stbuf_size;                          local->stbuf.ia_blocks      = local->stbuf_blocks; -                        fctx = GF_CALLOC (1, sizeof (stripe_fd_ctx_t), -                                          gf_stripe_mt_stripe_fd_ctx_t); -                        if (!fctx) { -                                local->op_ret = -1; -                                local->op_errno = ENOMEM; -                                goto unwind; -                        } - -                        fctx->stripe_size  = local->stripe_size; -                        fctx->stripe_count = priv->child_count; -                        fctx->static_array = 1; -                        fctx->xl_array = priv->xl_array;                          inode_ctx_put (local->inode, this, -                                       (uint64_t)(long)fctx); +                                       (uint64_t)(long) local->fctx);                  } -unwind:                  STRIPE_STACK_UNWIND (mknod, frame, local->op_ret, local->op_errno,                                       local->inode, &local->stbuf,                                       &local->preparent, &local->postparent, NULL); @@ -1531,7 +1610,8 @@ stripe_mknod_first_ifreg_cbk (call_frame_t *frame, void *cookie, xlator_t *this,                          ret = stripe_xattr_request_build (this, dict,                                                            local->stripe_size, -                                                          priv->child_count, i); +                                                          priv->child_count, i, +							  priv->coalesce);                          if (ret)                                  gf_log (this->name, GF_LOG_ERROR,                                          "Failed to build xattr request"); @@ -1579,9 +1659,6 @@ stripe_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,          stripe_local_t   *local          = NULL;          int32_t           op_errno       = EINVAL;          int32_t           i              = 0; -        char              size_key[256]  = {0,}; -        char              index_key[256] = {0,}; -        char              count_key[256] = {0,};          dict_t           *dict           = NULL;          int               ret            = 0;          int               need_unref     = 0; @@ -1631,15 +1708,6 @@ stripe_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,                     be looked up */                  local->call_count = priv->child_count; -                /* Send a setxattr request to nodes where the -                   files are created */ -                sprintf (size_key, -                         "trusted.%s.stripe-size", this->name); -                sprintf (count_key, -                         "trusted.%s.stripe-count", this->name); -                sprintf (index_key, -                         "trusted.%s.stripe-index", this->name); -                  if (priv->xattr_supported) {                          dict = dict_new ();                          if (!dict) { @@ -1653,7 +1721,7 @@ stripe_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,                          ret = stripe_xattr_request_build (this, dict,                                                            local->stripe_size,                                                            priv->child_count, -                                                          i); +                                                          i, priv->coalesce);                          if (ret)                                  gf_log (this->name, GF_LOG_ERROR,                                          "failed to build xattr request"); @@ -1867,6 +1935,7 @@ stripe_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,          int32_t         callcnt = 0;          stripe_local_t  *local   = NULL;          call_frame_t    *prev = NULL; +	stripe_fd_ctx_t *fctx = NULL;          if (!this || !frame || !frame->local || !cookie) {                  gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref"); @@ -1880,6 +1949,14 @@ stripe_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,          {                  callcnt = --local->call_count; +		inode_ctx_get(inode, this, (uint64_t *) &fctx); +		if (!fctx) { +			gf_log(this->name, GF_LOG_ERROR, "failed to get stripe " +				"context"); +			op_ret = -1; +			op_errno = EINVAL; +		} +                  if (op_ret == -1) {                          gf_log (this->name, GF_LOG_DEBUG,                                  "%s returned error %s", @@ -1903,6 +1980,8 @@ stripe_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,                          local->preparent_blocks  += preparent->ia_blocks;                          local->postparent_blocks += postparent->ia_blocks; +			correct_file_size(buf, fctx, prev); +                          if (local->stbuf_size < buf->ia_size)                                  local->stbuf_size = buf->ia_size;                          if (local->preparent_size < preparent->ia_size) @@ -2023,7 +2102,6 @@ stripe_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,          int32_t           callcnt = 0;          stripe_local_t   *local = NULL;          stripe_private_t *priv = NULL; -        stripe_fd_ctx_t  *fctx = NULL;          call_frame_t     *prev = NULL;          xlator_list_t    *trav = NULL; @@ -2049,12 +2127,21 @@ stripe_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,                  }                  if (op_ret >= 0) { +			if (IA_ISREG(buf->ia_type)) { +				if (stripe_ctx_handle(this, prev, local, xdata)) +					gf_log(this->name, GF_LOG_ERROR, +						"Error getting fctx info from " +						"dict"); +			} +                          local->op_ret = op_ret;                          local->stbuf_blocks += buf->ia_blocks;                          local->preparent_blocks  += preparent->ia_blocks;                          local->postparent_blocks += postparent->ia_blocks; +			correct_file_size(buf, local->fctx, prev); +                          if (local->stbuf_size < buf->ia_size)                                  local->stbuf_size = buf->ia_size;                          if (local->preparent_size < preparent->ia_size) @@ -2092,23 +2179,13 @@ stripe_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,                          local->stbuf.ia_size        = local->stbuf_size;                          local->stbuf.ia_blocks      = local->stbuf_blocks; -                        fctx = GF_CALLOC (1, sizeof (stripe_fd_ctx_t), -                                          gf_stripe_mt_stripe_fd_ctx_t); -                        if (!fctx) { -                                local->op_ret = -1; -                                local->op_errno = ENOMEM; -                                goto unwind; -                        } - -                        fctx->stripe_size  = local->stripe_size; -                        fctx->stripe_count = priv->child_count; -                        fctx->static_array = 1; -                        fctx->xl_array = priv->xl_array; -                        inode_ctx_put (local->inode, this, -                                    (uint64_t)(long)fctx); +			stripe_copy_xl_array(local->fctx->xl_array, +				             priv->xl_array, +					     local->fctx->stripe_count); +			inode_ctx_put(local->inode, this, +					(uint64_t) local->fctx);                  } -        unwind:                  /* Create itself has failed.. so return                     without setxattring */                  STRIPE_STACK_UNWIND (create, frame, local->op_ret, @@ -2214,14 +2291,14 @@ stripe_first_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,                          ret = stripe_xattr_request_build (this, dict,                                                            local->stripe_size,                                                            priv->child_count, -                                                          i); +                                                          i, priv->coalesce);                          if (ret)                                  gf_log (this->name, GF_LOG_ERROR,                                          "failed to build xattr request");                  } else {                          dict = local->xattr;                  } -  +                  STACK_WIND (frame, stripe_create_cbk, trav->xlator,                              trav->xlator->fops->create, &local->loc,                              local->flags, local->mode, local->umask, local->fd, @@ -2310,7 +2387,7 @@ stripe_create (call_frame_t *frame, xlator_t *this, loc_t *loc,                  ret = stripe_xattr_request_build (this, dict,                                                    local->stripe_size,                                                    priv->child_count, -                                                  i); +                                                  i, priv->coalesce);                  if (ret)                          gf_log (this->name, GF_LOG_ERROR,                                  "failed to build xattr request"); @@ -2743,6 +2820,9 @@ stripe_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,                          local->prebuf_blocks  += prebuf->ia_blocks;                          local->postbuf_blocks += postbuf->ia_blocks; +			correct_file_size(prebuf, local->fctx, prev); +			correct_file_size(postbuf, local->fctx, prev); +                          if (local->prebuf_size < prebuf->ia_size)                                  local->prebuf_size = prebuf->ia_size; @@ -2777,6 +2857,7 @@ stripe_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, dict          stripe_local_t   *local = NULL;          stripe_private_t *priv = NULL;          xlator_list_t    *trav = NULL; +	stripe_fd_ctx_t  *fctx = NULL;          int32_t           op_errno = 1;          VALIDATE_OR_GOTO (frame, err); @@ -2793,6 +2874,14 @@ stripe_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, dict                  op_errno = ENOMEM;                  goto err;          } + +	inode_ctx_get(fd->inode, this, (uint64_t *) &fctx); +	if (!fctx) { +		op_errno = EINVAL; +		goto err; +	} +	local->fctx = fctx; +          local->op_ret = -1;          frame->local = local;          local->call_count = priv->child_count; @@ -2846,6 +2935,9 @@ stripe_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,                                  local->stbuf = *buf;                          local->stbuf_blocks += buf->ia_blocks; + +			correct_file_size(buf, local->fctx, prev); +                          if (local->stbuf_size < buf->ia_size)                                  local->stbuf_size = buf->ia_size;                  } @@ -2877,6 +2969,7 @@ stripe_fstat (call_frame_t *frame,          stripe_local_t   *local = NULL;          stripe_private_t *priv = NULL;          xlator_list_t    *trav = NULL; +	stripe_fd_ctx_t  *fctx = NULL;          int32_t           op_errno = 1;          VALIDATE_OR_GOTO (frame, err); @@ -2897,6 +2990,13 @@ stripe_fstat (call_frame_t *frame,          frame->local = local;          local->call_count = priv->child_count; +	if (IA_ISREG(fd->inode->ia_type)) { +		inode_ctx_get(fd->inode, this, (uint64_t *) &fctx); +		if (!fctx) +			goto err; +		local->fctx = fctx; +	} +          while (trav) {                  STACK_WIND (frame, stripe_fstat_cbk, trav->xlator,                              trav->xlator->fops->fstat, fd, NULL); @@ -2915,8 +3015,10 @@ stripe_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, d  {          stripe_local_t   *local = NULL;          stripe_private_t *priv = NULL; -        xlator_list_t    *trav = NULL; -        int32_t           op_errno = 1; +	stripe_fd_ctx_t  *fctx = NULL; +	int		  i, eof_idx; +	off_t		  dest_offset, tmp_offset; +        int32_t		  op_errno = 1;          VALIDATE_OR_GOTO (frame, err);          VALIDATE_OR_GOTO (this, err); @@ -2924,7 +3026,6 @@ stripe_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, d          VALIDATE_OR_GOTO (fd->inode, err);          priv = this->private; -        trav = this->children;          /* Initialization */          local = mem_get0 (this->local_pool); @@ -2936,11 +3037,49 @@ stripe_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, d          frame->local = local;          local->call_count = priv->child_count; -        while (trav) { -                STACK_WIND (frame, stripe_truncate_cbk, trav->xlator, -                            trav->xlator->fops->ftruncate, fd, offset, NULL); -                trav = trav->next; -        } +	inode_ctx_get(fd->inode, this, (uint64_t *) &fctx); +	if (!fctx) { +		gf_log(this->name, GF_LOG_ERROR, "no stripe context"); +		op_errno = EINVAL; +		goto err; +	} +	if (!fctx->stripe_count) { +		gf_log(this->name, GF_LOG_ERROR, "no stripe count"); +		op_errno = EINVAL; +		goto err; +	} + +	local->fctx = fctx; +	eof_idx = (offset / fctx->stripe_size) % fctx->stripe_count; + +	for (i = 0; i < fctx->stripe_count; i++) { +		if (!fctx->xl_array[i]) { +			gf_log(this->name, GF_LOG_ERROR, "no xlator at index " +				"%d", i); +			op_errno = EINVAL; +			goto err; +		} + +		if (fctx->stripe_coalesce) { +			if (i < eof_idx) +				tmp_offset = roof(offset, fctx->stripe_size * +						fctx->stripe_count); +			else if (i > eof_idx) +				tmp_offset = floor(offset, fctx->stripe_size * +						fctx->stripe_count); +			else +				tmp_offset = offset; + +			dest_offset = coalesced_offset(tmp_offset, +				fctx->stripe_size, fctx->stripe_count); +		} else { +			dest_offset = offset; +		} + +		STACK_WIND(frame, stripe_truncate_cbk, fctx->xl_array[i], +			fctx->xl_array[i]->fops->ftruncate, fd, dest_offset, +			NULL); +	}          return 0;  err: @@ -3045,6 +3184,7 @@ stripe_readv_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,          struct iatt     tmp_stbuf = {0,};          struct iobref  *tmp_iobref = NULL;          struct iobuf   *iobuf = NULL; +	call_frame_t   *prev = NULL;          if (!this || !frame || !frame->local) {                  gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref"); @@ -3052,13 +3192,16 @@ stripe_readv_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,          }          local = frame->local; +	prev = cookie;          LOCK (&frame->lock);          {                  callcnt = --local->call_count; -                if (op_ret != -1) +                if (op_ret != -1) { +			correct_file_size(buf, local->fctx, prev);                          if (local->stbuf_size < buf->ia_size)                                  local->stbuf_size = buf->ia_size; +		}          }          UNLOCK (&frame->lock); @@ -3150,6 +3293,7 @@ stripe_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,          struct iatt    *tmp_stbuf_p = NULL; //need it for a warning          struct iobref  *tmp_iobref = NULL;          stripe_fd_ctx_t  *fctx = NULL; +	call_frame_t	*prev = NULL;          if (!this || !frame || !frame->local || !cookie) {                  gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref"); @@ -3158,6 +3302,7 @@ stripe_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,          local  = frame->local;          index  = local->node_index; +	prev = cookie;          mframe = local->orig_frame;          if (!mframe)                  goto out; @@ -3177,6 +3322,9 @@ stripe_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,                          mlocal->replies[index].stbuf  = *stbuf;                          mlocal->replies[index].count  = count;                          mlocal->replies[index].vector = iov_dup (vector, count); + +			correct_file_size(stbuf, fctx, prev); +                          if (local->stbuf_size < stbuf->ia_size)                                  local->stbuf_size = stbuf->ia_size;                          local->stbuf_blocks += stbuf->ia_blocks; @@ -3289,6 +3437,7 @@ stripe_readv (call_frame_t *frame, xlator_t *this, fd_t *fd,          uint64_t          stripe_size = 0;          off_t             rounded_start = 0;          off_t             frame_offset = offset; +	off_t		  dest_offset = 0;          stripe_local_t   *local = NULL;          call_frame_t     *rframe = NULL;          stripe_local_t   *rlocal = NULL; @@ -3361,9 +3510,16 @@ stripe_readv (call_frame_t *frame, xlator_t *this, fd_t *fd,                  rlocal->readv_size = frame_size;                  rframe->local = rlocal;                  idx = (index % fctx->stripe_count); + +		if (fctx->stripe_coalesce) +			dest_offset = coalesced_offset(frame_offset, +				stripe_size, fctx->stripe_count); +		else +			dest_offset = frame_offset; +                  STACK_WIND (rframe, stripe_readv_cbk, fctx->xl_array[idx],                              fctx->xl_array[idx]->fops->readv, -                            fd, frame_size, frame_offset, flags, xdata); +                            fd, frame_size, dest_offset, flags, xdata);                  frame_offset += frame_size;          } @@ -3410,11 +3566,27 @@ stripe_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,                          local->op_ret += op_ret;                          local->post_buf = *postbuf;                          local->pre_buf = *prebuf; + +			local->prebuf_blocks  += prebuf->ia_blocks; +			local->postbuf_blocks += postbuf->ia_blocks; + +			correct_file_size(prebuf, local->fctx, prev); +			correct_file_size(postbuf, local->fctx, prev); + +			if (local->prebuf_size < prebuf->ia_size) +				local->prebuf_size = prebuf->ia_size; +			if (local->postbuf_size < postbuf->ia_size) +				local->postbuf_size = postbuf->ia_size;                  }          }          UNLOCK (&frame->lock);          if ((callcnt == local->wind_count) && local->unwind) { +		local->pre_buf.ia_size = local->prebuf_size; +		local->pre_buf.ia_blocks = local->prebuf_blocks; +		local->post_buf.ia_size = local->postbuf_size; +		local->post_buf.ia_blocks = local->postbuf_blocks; +                  STRIPE_STACK_UNWIND (writev, frame, local->op_ret,                                       local->op_errno, &local->pre_buf,                                       &local->post_buf, NULL); @@ -3440,6 +3612,7 @@ stripe_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,          off_t             fill_size = 0;          uint64_t          stripe_size = 0;          uint64_t          tmp_fctx = 0; +	off_t		  dest_offset = 0;          VALIDATE_OR_GOTO (frame, err);          VALIDATE_OR_GOTO (this, err); @@ -3469,6 +3642,7 @@ stripe_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,          }          frame->local = local;          local->stripe_size = stripe_size; +	local->fctx = fctx;          if (!stripe_size) {                  gf_log (this->name, GF_LOG_DEBUG, @@ -3505,9 +3679,15 @@ stripe_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,                  if (remaining_size == 0)                          local->unwind = 1; +		if (fctx->stripe_coalesce) +			dest_offset = coalesced_offset(offset + offset_offset, +				local->stripe_size, fctx->stripe_count); +		else +			dest_offset = offset + offset_offset; +                  STACK_WIND (frame, stripe_writev_cbk, fctx->xl_array[idx],                              fctx->xl_array[idx]->fops->writev, fd, tmp_vec, -                            tmp_count, offset + offset_offset, flags, iobref, +                            tmp_count, dest_offset, flags, iobref,                              xdata);                  GF_FREE (tmp_vec); @@ -3859,10 +4039,15 @@ stripe_readdirp_lookup_cbk (call_frame_t *frame, void *cookie,                          local->op_ret = op_ret;                          goto unlock;                  } + +		if (stripe_ctx_handle(this, prev, local, xattr)) +			gf_log(this->name, GF_LOG_ERROR, +				"Error getting fctx info from dict."); + +		correct_file_size(stbuf, local->fctx, prev); +                  stripe_iatt_merge (stbuf, &entry->d_stat);                  local->stbuf_blocks += stbuf->ia_blocks; - -                stripe_ctx_handle (this, prev, local, xattr);          }  unlock:          UNLOCK(&frame->lock); @@ -3957,7 +4142,7 @@ unlock:          xattrs = dict_new ();          if (xattrs) -                (void) stripe_xattr_request_build (this, xattrs, 0, 0, 0); +                (void) stripe_xattr_request_build (this, xattrs, 0, 0, 0, 0);          count = op_ret;          list_for_each_entry_safe (local_entry, tmp_entry,                                    (&local->entries.list), list) { @@ -4165,6 +4350,9 @@ reconfigure (xlator_t *this, dict_t *options)                                  goto unlock;                          }                  } + +		GF_OPTION_RECONF("coalesce", priv->coalesce, options, bool, +				unlock);          }   unlock:          UNLOCK (&priv->lock); @@ -4285,6 +4473,8 @@ init (xlator_t *this)          /* notify related */          priv->nodes_down = priv->child_count; +	GF_OPTION_INIT("coalesce", priv->coalesce, bool, out); +          this->local_pool = mem_pool_new (stripe_local_t, 128);          if (!this->local_pool) {                  ret = -1; @@ -4768,5 +4958,12 @@ struct volume_options options[] = {            .type = GF_OPTION_TYPE_BOOL,            .default_value = "true"          }, +	{ .key = {"coalesce"}, +	  .type = GF_OPTION_TYPE_BOOL, +	  .default_value = "false", +	  .description = "Enable coalesce mode to flatten striped files as " +			 "stored on the server (i.e., eliminate holes caused " +			 "by the traditional format)." +	},          { .key  = {NULL} },  }; diff --git a/xlators/cluster/stripe/src/stripe.h b/xlators/cluster/stripe/src/stripe.h index cb05eb56fc0..1b9e660c126 100644 --- a/xlators/cluster/stripe/src/stripe.h +++ b/xlators/cluster/stripe/src/stripe.h @@ -101,6 +101,7 @@ struct stripe_private {          int8_t                  child_count;          int8_t                 *state; /* Current state of child node */          gf_boolean_t            xattr_supported;  /* default yes */ +	gf_boolean_t		coalesce;          char                    vol_uuid[UUID_SIZE + 1];  }; @@ -119,6 +120,7 @@ struct readv_replies {  typedef struct _stripe_fd_ctx {          off_t      stripe_size;          int        stripe_count; +	int	   stripe_coalesce;          int        static_array;          xlator_t **xl_array;  } stripe_fd_ctx_t; @@ -214,13 +216,41 @@ struct stripe_local {  typedef struct stripe_local   stripe_local_t;  typedef struct stripe_private stripe_private_t; +/* + * Determine the stripe index of a particular frame based on the translator. + */ +static inline int32_t stripe_get_frame_index(stripe_fd_ctx_t *fctx, +					     call_frame_t *prev) +{ +	int32_t i, idx = -1; + +	for (i = 0; i < fctx->stripe_count; i++) { +		if (fctx->xl_array[i] == prev->this) { +			idx = i; +			break; +		} +	} + +	return idx; +} + +static inline void stripe_copy_xl_array(xlator_t **dst, xlator_t **src, +					int count) +{ +	int i; + +	for (i = 0; i < count; i++) +		dst[i] = src[i]; +} +  void stripe_local_wipe (stripe_local_t *local);  int32_t stripe_ctx_handle (xlator_t *this, call_frame_t *prev,                             stripe_local_t *local, dict_t *dict);  void stripe_aggregate_xattr (dict_t *dst, dict_t *src);  int32_t stripe_xattr_request_build (xlator_t *this, dict_t *dict,                                      uint64_t stripe_size, uint32_t stripe_count, -                                    uint32_t stripe_index); +                                    uint32_t stripe_index, +				    uint32_t stripe_coalesce);  int32_t stripe_get_matching_bs (const char *path, stripe_private_t *priv);  int set_stripe_block_size (xlator_t *this, stripe_private_t *priv, char *data);  int32_t stripe_iatt_merge (struct iatt *from, struct iatt *to); @@ -229,5 +259,27 @@ int32_t stripe_fill_pathinfo_xattr (xlator_t *this, stripe_local_t *local,  int32_t stripe_free_xattr_str (stripe_local_t *local);  int32_t stripe_xattr_aggregate (char *buffer, stripe_local_t *local,                                  int32_t *total); +off_t coalesced_offset(off_t offset, uint64_t stripe_size, int stripe_count); +off_t uncoalesced_size(off_t size, uint64_t stripe_size, int stripe_count, +			int stripe_index); + +/* + * Adjust the size attribute for files if coalesce is enabled. + */ +static inline void correct_file_size(struct iatt *buf, stripe_fd_ctx_t *fctx, +	call_frame_t *prev) +{ +	int index; + +	if (!IA_ISREG(buf->ia_type)) +		return; + +	if (!fctx || !fctx->stripe_coalesce) +		return; + +	index = stripe_get_frame_index(fctx, prev); +	buf->ia_size = uncoalesced_size(buf->ia_size, fctx->stripe_size, +		fctx->stripe_count, index); +}  #endif /* _STRIPE_H_ */ diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c index 2a97e40522a..a01d2a773eb 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volgen.c +++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c @@ -141,6 +141,7 @@ static struct volopt_map_entry glusterd_volopt_map[] = {          {"cluster.choose-local",                 "cluster/replicate",  NULL, NULL, DOC, 0},          {"cluster.stripe-block-size",            "cluster/stripe",     "block-size", NULL, DOC, 0}, +	{"cluster.stripe-coalesce",		 "cluster/stripe",     "coalesce", NULL, DOC, 0},          {VKEY_DIAG_LAT_MEASUREMENT,              "debug/io-stats",     "latency-measurement", "off", NO_DOC, 0},          {"diagnostics.dump-fd-stats",            "debug/io-stats",     NULL, NULL, NO_DOC, 0}, diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c index e18654cc728..46a83ea2f85 100644 --- a/xlators/storage/posix/src/posix.c +++ b/xlators/storage/posix/src/posix.c @@ -1810,7 +1810,7 @@ out:          STACK_UNWIND_STRICT (create, frame, op_ret, op_errno,                               fd, (loc)?loc->inode:NULL, &stbuf, &preparent, -                             &postparent, NULL); +                             &postparent, xdata);          return 0;  } | 
