From 65cc8cb531456de934e8ba3175430e8fcb304ca5 Mon Sep 17 00:00:00 2001 From: Pranith Kumar K Date: Sun, 28 Oct 2012 07:31:20 +0530 Subject: storage/posix: Make rchecksum O_DIRECT friendly Problem: When posix-aio is enabled to perform aio fd is set with O_DIRECT whenever possible in read, writev fops. Rchecksum does not take this into account. If either offset/size/memory-buf passed to pread in rchecksum fop is not aligned, pread fails with EINVAL. Fix: Before doing pread necessary O_DIRECT manipulation is done when aio is enabled. Memory buffer passed to pread is now page-aligned. Test: 1) Create replica volume with aio enabled. 2) dd if=/dev/urandom of=a bs=1M count=1 3) kill one of the bricks in the replica pair 4) dd if=/dev/urandom of=a bs=1M count=1 5) bring back the brick. Self-heal succeeds after the change. The test above checks both rchecksum, writev fops that were changed in this patch. Change-Id: I186099a2854d4864c5b48086ab7bc5f1a7b27313 BUG: 866459 Signed-off-by: Pranith Kumar K Reviewed-on: http://review.gluster.org/4134 Tested-by: Gluster Build System Reviewed-by: Vijay Bellur --- xlators/storage/posix/src/posix-aio.c | 10 +++++ xlators/storage/posix/src/posix.c | 71 +++++++++++++++++++++++------------ xlators/storage/posix/src/posix.h | 4 ++ 3 files changed, 62 insertions(+), 23 deletions(-) (limited to 'xlators/storage/posix') diff --git a/xlators/storage/posix/src/posix-aio.c b/xlators/storage/posix/src/posix-aio.c index 0a776b9bcf0..f807618ce1c 100644 --- a/xlators/storage/posix/src/posix-aio.c +++ b/xlators/storage/posix/src/posix-aio.c @@ -560,4 +560,14 @@ posix_aio_off (xlator_t *this) return 0; } +void +__posix_fd_set_odirect (fd_t *fd, struct posix_fd *pfd, int opflags, + off_t offset, size_t size) +{ + xlator_t *this = THIS; + gf_log (this->name, GF_LOG_INFO, + "Linux AIO not availble at build-time." + " Continuing with synchronous IO"); + return; +} #endif diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c index 86c35f44f8b..812bfda7dfb 100644 --- a/xlators/storage/posix/src/posix.c +++ b/xlators/storage/posix/src/posix.c @@ -52,6 +52,7 @@ #include "posix-aio.h" extern char *marker_xattrs[]; +#define ALIGN_SIZE 4096 #undef HAVE_SET_FSID #ifdef HAVE_SET_FSID @@ -2013,6 +2014,21 @@ err: return op_ret; } +char* +_page_aligned_alloc (size_t size, char **aligned_buf) +{ + char *alloc_buf = NULL; + char *buf = NULL; + + alloc_buf = GF_CALLOC (1, (size + ALIGN_SIZE), gf_posix_mt_char); + if (!alloc_buf) + goto out; + /* page aligned buffer */ + buf = GF_ALIGN_BUF (alloc_buf, ALIGN_SIZE); + *aligned_buf = buf; +out: + return alloc_buf; +} int32_t __posix_writev (int fd, struct iovec *vector, int count, off_t startoff, @@ -2020,7 +2036,6 @@ __posix_writev (int fd, struct iovec *vector, int count, off_t startoff, { int32_t op_ret = 0; int idx = 0; - int align = 4096; int max_buf_size = 0; int retval = 0; char *buf = NULL; @@ -2036,7 +2051,7 @@ __posix_writev (int fd, struct iovec *vector, int count, off_t startoff, max_buf_size = vector[idx].iov_len; } - alloc_buf = GF_MALLOC (1 * (max_buf_size + align), gf_posix_mt_char); + alloc_buf = _page_aligned_alloc (max_buf_size, &buf); if (!alloc_buf) { op_ret = -errno; goto err; @@ -2044,9 +2059,6 @@ __posix_writev (int fd, struct iovec *vector, int count, off_t startoff, internal_off = startoff; for (idx = 0; idx < count; idx++) { - /* page aligned buffer */ - buf = GF_ALIGN_BUF (alloc_buf, align); - memcpy (buf, vector[idx].iov_base, vector[idx].iov_len); /* not sure whether writev works on O_DIRECT'd fd */ @@ -3875,23 +3887,26 @@ int32_t posix_rchecksum (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, int32_t len, dict_t *xdata) { - char *buf = NULL; - int _fd = -1; - struct posix_fd *pfd = NULL; - int op_ret = -1; - int op_errno = 0; - int ret = 0; - int32_t weak_checksum = 0; - unsigned char strong_checksum[MD5_DIGEST_LENGTH]; + char *alloc_buf = NULL; + char *buf = NULL; + int _fd = -1; + struct posix_fd *pfd = NULL; + int op_ret = -1; + int op_errno = 0; + int ret = 0; + int32_t weak_checksum = 0; + unsigned char strong_checksum[MD5_DIGEST_LENGTH] = {0}; + struct posix_private *priv = NULL; VALIDATE_OR_GOTO (frame, out); VALIDATE_OR_GOTO (this, out); VALIDATE_OR_GOTO (fd, out); + priv = this->private; memset (strong_checksum, 0, MD5_DIGEST_LENGTH); - buf = GF_CALLOC (1, len, gf_posix_mt_char); - if (!buf) { + alloc_buf = _page_aligned_alloc (len, &buf); + if (!alloc_buf) { op_errno = ENOMEM; goto out; } @@ -3906,15 +3921,25 @@ posix_rchecksum (call_frame_t *frame, xlator_t *this, _fd = pfd->fd; - ret = pread (_fd, buf, len, offset); - if (ret < 0) { - gf_log (this->name, GF_LOG_WARNING, - "pread of %d bytes returned %d (%s)", - len, ret, strerror (errno)); + LOCK (&fd->lock); + { + if (priv->aio_capable && priv->aio_init_done) + __posix_fd_set_odirect (fd, pfd, 0, offset, len); + + ret = pread (_fd, buf, len, offset); + if (ret < 0) { + gf_log (this->name, GF_LOG_WARNING, + "pread of %d bytes returned %d (%s)", + len, ret, strerror (errno)); + + op_errno = errno; + } - op_errno = errno; - goto out; } + UNLOCK (&fd->lock); + + if (ret < 0) + goto out; weak_checksum = gf_rsync_weak_checksum ((unsigned char *) buf, (size_t) len); gf_rsync_strong_checksum ((unsigned char *) buf, (size_t) len, (unsigned char *) strong_checksum); @@ -3924,7 +3949,7 @@ out: STACK_UNWIND_STRICT (rchecksum, frame, op_ret, op_errno, weak_checksum, strong_checksum, NULL); - GF_FREE (buf); + GF_FREE (alloc_buf); return 0; } diff --git a/xlators/storage/posix/src/posix.h b/xlators/storage/posix/src/posix.h index 6f96e49d18f..45ee3596330 100644 --- a/xlators/storage/posix/src/posix.h +++ b/xlators/storage/posix/src/posix.h @@ -159,4 +159,8 @@ int posix_fd_ctx_get (fd_t *fd, xlator_t *this, struct posix_fd **pfd); void posix_fill_ino_from_gfid (xlator_t *this, struct iatt *buf); gf_boolean_t posix_special_xattr (char **pattern, char *key); + +void +__posix_fd_set_odirect (fd_t *fd, struct posix_fd *pfd, int opflags, + off_t offset, size_t size); #endif /* _POSIX_H */ -- cgit