From 868a0d4c1a2fa0493695abb3e29b6832516ea8de Mon Sep 17 00:00:00 2001 From: Amar Tumballi Date: Wed, 24 Mar 2010 12:52:22 +0000 Subject: stripe: proper fix for reading 'holes' Signed-off-by: Amar Tumballi Signed-off-by: Anand V. Avati BUG: 713 (fsx tool segfaults in readahead and iocache) URL: http://bugs.gluster.com/cgi-bin/bugzilla3/show_bug.cgi?id=713 --- xlators/cluster/stripe/src/stripe.c | 301 ++++++++++++++++++++---------------- xlators/cluster/stripe/src/stripe.h | 2 +- 2 files changed, 172 insertions(+), 131 deletions(-) (limited to 'xlators/cluster') diff --git a/xlators/cluster/stripe/src/stripe.c b/xlators/cluster/stripe/src/stripe.c index 97182f9b8e1..009b279b154 100644 --- a/xlators/cluster/stripe/src/stripe.c +++ b/xlators/cluster/stripe/src/stripe.c @@ -2758,21 +2758,93 @@ stripe_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags) } -/** - * stripe_single_readv_cbk - This function is used as return fn, when the - * file name doesn't match the pattern specified for striping. - */ int32_t -stripe_single_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, - struct iovec *vector, int32_t count, - struct stat *stbuf, struct iobref *iobref) +stripe_readv_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct stat *buf) { - STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno, vector, - count, stbuf, iobref); + int32_t i = 0; + int32_t callcnt = 0; + int32_t count = 0; + stripe_local_t *local = NULL; + struct iovec *vec = NULL; + struct stat tmp_stbuf = {0,}; + struct iobref *tmp_iobref = NULL; + struct iobuf *iobuf = NULL; + + local = frame->local; + + LOCK (&frame->lock); + { + callcnt = --local->call_count; + if (op_ret != -1) + if (local->stbuf_size < buf->st_size) + local->stbuf_size = buf->st_size; + } + UNLOCK (&frame->lock); + + if (!callcnt) { + op_ret = 0; + + /* Keep extra space for filling in '\0's */ + vec = CALLOC ((local->count * 2), sizeof (struct iovec)); + if (!vec) { + op_ret = -1; + goto done; + } + + for (i = 0; i < local->wind_count; i++) { + memcpy ((vec + count), local->replies[i].vector, + (local->replies[i].count * sizeof (struct iovec))); + count += local->replies[i].count; + op_ret += local->replies[i].op_ret; + + if ((local->replies[i].op_ret < + local->replies[i].requested_size) && + (local->stbuf_size > (local->offset + op_ret))) { + /* Fill in 0s here */ + vec[count].iov_len = + (local->replies[i].requested_size - + local->replies[i].op_ret); + iobuf = iobuf_get (this->ctx->iobuf_pool); + if (!iobuf) { + gf_log (this->name, GF_LOG_ERROR, + "Out of memory."); + op_ret = -1; + op_errno = ENOMEM; + goto done; + } + memset (iobuf->ptr, 0, vec[count].iov_len); + iobref_add (local->iobref, iobuf); + + vec[count].iov_base = iobuf->ptr; + + op_ret += vec[count].iov_len; + count++; + } + FREE (local->replies[i].vector); + } + + /* FIXME: notice that st_ino, and st_dev (gen) will be + * different than what inode will have. Make sure this doesn't + * cause any bugs at higher levels */ + memcpy (&tmp_stbuf, &local->replies[0].stbuf, + sizeof (struct stat)); + tmp_stbuf.st_size = local->stbuf_size; + done: + /* */ + FREE (local->replies); + tmp_iobref = local->iobref; + fd_unref (local->fd); + STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno, vec, + count, &tmp_stbuf, tmp_iobref); + + iobref_unref (tmp_iobref); + if (vec) + FREE (vec); + } + return 0; } - /** * stripe_readv_cbk - get all the striped reads, and order it properly, send it * to above layer after putting it in a single vector. @@ -2784,152 +2856,113 @@ stripe_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this, { int32_t index = 0; int32_t callcnt = 0; - call_frame_t *main_frame = NULL; - stripe_local_t *main_local = NULL; - stripe_local_t *local = frame->local; + int32_t final_count = 0; + int32_t need_to_check_proper_size = 0; + call_frame_t *mframe = NULL; + stripe_local_t *mlocal = NULL; + stripe_local_t *local = NULL; + struct iovec *final_vec = NULL; + struct stat tmp_stbuf = {0,}; + struct iobref *tmp_iobref = NULL; + stripe_fd_ctx_t *fctx = NULL; - index = local->node_index; - main_frame = local->orig_frame; - main_local = main_frame->local; + local = frame->local; + index = local->node_index; + mframe = local->orig_frame; + mlocal = mframe->local; + fctx = mlocal->fctx; - LOCK (&main_frame->lock); + LOCK (&mframe->lock); { - main_local->replies[index].op_ret = op_ret; - main_local->replies[index].op_errno = op_errno; + mlocal->replies[index].op_ret = op_ret; + mlocal->replies[index].op_errno = op_errno; + mlocal->replies[index].requested_size = local->readv_size; if (op_ret >= 0) { - main_local->replies[index].stbuf = *stbuf; - if ((main_local->wind_count >= 2) && - (index < (main_local->wind_count -1)) && - (op_ret < local->readv_size)) { - /* refer to bug :p #536 on bugs.gluster.com */ - main_local->readv_pendingsize = - (local->readv_size - op_ret); - } else { - main_local->replies[index].count = count; - main_local->replies[index].vector = - iov_dup (vector, count); - } + mlocal->replies[index].stbuf = *stbuf; + mlocal->replies[index].count = count; + mlocal->replies[index].vector = iov_dup (vector, count); - if (!main_local->iobref) - main_local->iobref = iobref_new (); - iobref_merge (main_local->iobref, iobref); + if (!mlocal->iobref) + mlocal->iobref = iobref_new (); + iobref_merge (mlocal->iobref, iobref); } - callcnt = ++main_local->call_count; + callcnt = ++mlocal->call_count; } - UNLOCK(&main_frame->lock); - - if (callcnt == main_local->wind_count) { - int32_t final_count = 0; - struct iovec *final_vec = NULL; - struct stat tmp_stbuf = {0,}; - struct iobref *iobref = NULL; + UNLOCK(&mframe->lock); + if (callcnt == mlocal->wind_count) { op_ret = 0; - /* FIXME: notice that st_ino, and st_dev (gen) will be - * different than what inode will have. Make sure this doesn't - * cause any bugs at higher levels */ - memcpy (&tmp_stbuf, &main_local->replies[0].stbuf, - sizeof (struct stat)); - for (index=0; index < main_local->wind_count; index++) { + + for (index=0; index < mlocal->wind_count; index++) { /* check whether each stripe returned * 'expected' number of bytes */ - if (main_local->replies[index].op_ret == -1) { + if (mlocal->replies[index].op_ret == -1) { op_ret = -1; - op_errno = main_local->replies[index].op_errno; + op_errno = mlocal->replies[index].op_errno; break; } - /* ANSWER-ME: Do we need to send anything more in stbuf? - */ - if (tmp_stbuf.st_size < - main_local->replies[index].stbuf.st_size) { - tmp_stbuf.st_size = - main_local->replies[index].stbuf.st_size; + /* TODO: handle the 'holes' within the read range + properly */ + if (mlocal->replies[index].op_ret < + mlocal->replies[index].requested_size) { + need_to_check_proper_size = 1; } - /* TODO: Should I handle a case where there is a hole - * in a read request which spans across two nodes? - * for now, Solving bug(536) without addressing that - * case */ - if ((index < (main_local->wind_count - 1)) && - (main_local->replies[index+1].op_ret > 0) && - (main_local->readv_pendingsize)) { - /* Fill in zeroes */ - struct iovec *tmp_vec = NULL; - struct iobuf *iobuf = NULL; - int tmp_count = 0; - - tmp_count = main_local->replies[index].count; - tmp_vec = CALLOC (1, ((tmp_count + 1) * - sizeof (struct iovec))); - memcpy (tmp_vec, - main_local->replies[index].vector, - sizeof (struct iovec) * tmp_count); - - iobuf = iobuf_get (this->ctx->iobuf_pool); - if (!iobuf) { - gf_log (this->name, GF_LOG_ERROR, - "Out of memory."); - op_ret = -1; - op_errno = ENOMEM; - goto done; - } - memset (iobuf->ptr, 0, - main_local->readv_pendingsize); - iobref_add (main_local->iobref, iobuf); - - tmp_vec[tmp_count].iov_base = iobuf->ptr; - tmp_vec[tmp_count].iov_len = - main_local->readv_pendingsize; - - main_local->replies[index].count++; - main_local->replies[index].op_ret += - main_local->readv_pendingsize; - - FREE (main_local->replies[index].vector); - main_local->replies[index].vector = tmp_vec; - } - op_ret += main_local->replies[index].op_ret; - final_count += main_local->replies[index].count; + op_ret += mlocal->replies[index].op_ret; + mlocal->count += mlocal->replies[index].count; } - if (op_ret != -1) { - final_vec = CALLOC (final_count, - sizeof (struct iovec)); - if (!final_vec) { - op_ret = -1; - final_count = 0; - goto done; - } + if (op_ret == -1) + goto done; + if (need_to_check_proper_size) + goto check_size; - final_count = 0; + final_vec = CALLOC (mlocal->count, sizeof (struct iovec)); - for (index=0; - index < main_local->wind_count; index++) { - memcpy (final_vec + final_count, - main_local->replies[index].vector, - (main_local->replies[index].count * - sizeof (struct iovec))); - final_count += - main_local->replies[index].count; + if (!final_vec) { + op_ret = -1; + goto done; + } - free (main_local->replies[index].vector); - } - } else { - final_vec = NULL; - final_count = 0; + for (index = 0; index < mlocal->wind_count; index++) { + memcpy ((final_vec + final_count), + mlocal->replies[index].vector, + (mlocal->replies[index].count * + sizeof (struct iovec))); + final_count += mlocal->replies[index].count; + FREE (mlocal->replies[index].vector); } + /* FIXME: notice that st_ino, and st_dev (gen) will be + * different than what inode will have. Make sure this doesn't + * cause any bugs at higher levels */ + memcpy (&tmp_stbuf, &mlocal->replies[0].stbuf, + sizeof (struct stat)); + done: /* */ - FREE (main_local->replies); - iobref = main_local->iobref; - STACK_UNWIND_STRICT (readv, main_frame, op_ret, op_errno, - final_vec, final_count, &tmp_stbuf, iobref); + FREE (mlocal->replies); + tmp_iobref = mlocal->iobref; + fd_unref (mlocal->fd); + STACK_UNWIND_STRICT (readv, mframe, op_ret, op_errno, final_vec, + final_count, &tmp_stbuf, tmp_iobref); - iobref_unref (iobref); + iobref_unref (tmp_iobref); if (final_vec) FREE (final_vec); - } + goto out; + + check_size: + mlocal->call_count = fctx->stripe_count; + + for (index = 0; index < fctx->stripe_count; index++) { + STACK_WIND (mframe, stripe_readv_fstat_cbk, + (fctx->xl_array[index]), + (fctx->xl_array[index])->fops->fstat, + mlocal->fd); + } + } +out: STACK_DESTROY (frame->root); return 0; } @@ -2984,7 +3017,6 @@ stripe_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, op_errno = ENOMEM; goto err; } - local->wind_count = num_stripe; frame->local = local; /* This is where all the vectors should be copied. */ @@ -2995,6 +3027,12 @@ stripe_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, } off_index = (offset / stripe_size) % fctx->stripe_count; + local->wind_count = num_stripe; + local->readv_size = size; + local->offset = offset; + local->fd = fd_ref (fd); + local->fctx = fctx; + local->entry_count = off_index % fctx->stripe_count; for (index = off_index; index < (num_stripe + off_index); index++) { rframe = copy_frame (frame); @@ -3021,6 +3059,9 @@ stripe_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, return 0; err: + if (local->fd) + fd_unref (local->fd); + STACK_UNWIND_STRICT (readv, frame, -1, op_errno, NULL, 0, NULL, NULL); return 0; } diff --git a/xlators/cluster/stripe/src/stripe.h b/xlators/cluster/stripe/src/stripe.h index 614fb7a08ee..8a92e43da9f 100644 --- a/xlators/cluster/stripe/src/stripe.h +++ b/xlators/cluster/stripe/src/stripe.h @@ -69,6 +69,7 @@ struct readv_replies { int32_t count; //count of vector int32_t op_ret; //op_ret of readv int32_t op_errno; + int32_t requested_size; struct stat stbuf; /* 'stbuf' is also a part of reply */ }; @@ -120,7 +121,6 @@ struct stripe_local { int8_t unwind; size_t readv_size; - int32_t readv_pendingsize; int32_t entry_count; int32_t node_index; int32_t call_count; -- cgit