diff options
| author | Jeff Darcy <jdarcy@redhat.com> | 2012-01-19 17:49:42 -0500 | 
|---|---|---|
| committer | Anand Avati <avati@gluster.com> | 2012-01-30 05:09:35 -0800 | 
| commit | c3aa99d907591f72b6302287b9b8899514fb52f1 (patch) | |
| tree | 40dceb4deb7e1987caf4004eb76ead91ef4e6fd6 | |
| parent | 20d74c540879d3994d56b9baf7044c79ae5df5e3 (diff) | |
Fix race between read-ahead and write.
Change-Id: I0ed1aca585733302b5e3840f392849e12f0b0f0d
BUG: 783313
Signed-off-by: Jeff Darcy <jdarcy@redhat.com>
Reviewed-on: http://review.gluster.com/2666
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Anand Avati <avati@gluster.com>
| -rw-r--r-- | xlators/performance/read-ahead/src/page.c | 31 | ||||
| -rw-r--r-- | xlators/performance/read-ahead/src/read-ahead.c | 60 | ||||
| -rw-r--r-- | xlators/performance/read-ahead/src/read-ahead.h | 3 | 
3 files changed, 68 insertions, 26 deletions
diff --git a/xlators/performance/read-ahead/src/page.c b/xlators/performance/read-ahead/src/page.c index 9778ef54258..0c9a61853c8 100644 --- a/xlators/performance/read-ahead/src/page.c +++ b/xlators/performance/read-ahead/src/page.c @@ -175,14 +175,8 @@ ra_fault_cbk (call_frame_t *frame, void *cookie, xlator_t *this,                  if (op_ret >= 0)                          file->stbuf = *stbuf; -                if (op_ret < 0) { -                        page = ra_page_get (file, pending_offset); -                        if (page) -                                waitq = ra_page_error (page, op_ret, op_errno); -                        goto unlock; -                } -                  page = ra_page_get (file, pending_offset); +                  if (!page) {                          gf_log (this->name, GF_LOG_TRACE,                                  "wasted copy: %"PRId64"[+%"PRId64"] file=%p", @@ -190,6 +184,29 @@ ra_fault_cbk (call_frame_t *frame, void *cookie, xlator_t *this,                          goto unlock;                  } +                /* +                 * "Dirty" means that the request was a pure read-ahead; it's +                 * set for requests we issue ourselves, and cleared when user +                 * requests are issued or put on the waitq.  "Poisoned" means +                 * that we got a write while a read was still in flight, and we +                 * couldn't stop it so we marked it instead.  If it's both +                 * dirty and poisoned by the time we get here, we cancel its +                 * effect so that a subsequent user read doesn't get data that +                 * we know is stale (because we made it stale ourselves).  We +                 * can't use ESTALE because that has special significance. +                 * ECANCELED has no such special meaning, and is close to what +                 * we're trying to indicate. +                 */ +                if (page->dirty && page->poisoned) { +                        op_ret = -1; +                        op_errno = ECANCELED; +                } + +                if (op_ret < 0) { +                        waitq = ra_page_error (page, op_ret, op_errno); +                        goto unlock; +                } +                  if (page->vector) {                          iobref_unref (page->iobref);                          GF_FREE (page->vector); diff --git a/xlators/performance/read-ahead/src/read-ahead.c b/xlators/performance/read-ahead/src/read-ahead.c index 37f34f2eb91..928d8b7bdbc 100644 --- a/xlators/performance/read-ahead/src/read-ahead.c +++ b/xlators/performance/read-ahead/src/read-ahead.c @@ -231,7 +231,8 @@ ra_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,  */  static void -flush_region (call_frame_t *frame, ra_file_t *file, off_t offset, off_t size) +flush_region (call_frame_t *frame, ra_file_t *file, off_t offset, off_t size, +              int for_write)  {          ra_page_t *trav = NULL;          ra_page_t *next = NULL; @@ -243,8 +244,13 @@ flush_region (call_frame_t *frame, ra_file_t *file, off_t offset, off_t size)                         && trav->offset < (offset + size)) {                          next = trav->next; -                        if (trav->offset >= offset && !trav->waitq) { -                                ra_page_purge (trav); +                        if (trav->offset >= offset) { +                                if (!trav->waitq) { +                                        ra_page_purge (trav); +                                } +                                else if (for_write) { +                                        trav->poisoned = 1; +                                }                          }                          trav = next;                  } @@ -392,15 +398,15 @@ dispatch_requests (call_frame_t *frame, ra_file_t *file)                          trav = ra_page_get (file, trav_offset);                          if (!trav) {                                  trav = ra_page_create (file, trav_offset); +                                if (!trav) { +                                        local->op_ret = -1; +                                        local->op_errno = ENOMEM; +                                        goto unlock; +                                }                                  fault = 1;                                  need_atime_update = 0;                          } - -                        if (!trav) { -                                local->op_ret = -1; -                                local->op_errno = ENOMEM; -                                goto unlock; -                        } +                        trav->dirty = 0;                          if (trav->ready) {                                  gf_log (frame->this->name, GF_LOG_TRACE, @@ -513,7 +519,7 @@ ra_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,          }          if (!expected_offset) { -                flush_region (frame, file, 0, file->pages.prev->offset + 1); +                flush_region (frame, file, 0, file->pages.prev->offset + 1, 0);          }          local = (void *) GF_CALLOC (1, sizeof (*local), gf_ra_mt_ra_local_t); @@ -536,7 +542,7 @@ ra_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,          dispatch_requests (frame, file); -        flush_region (frame, file, 0, floor (offset, file->page_size)); +        flush_region (frame, file, 0, floor (offset, file->page_size), 0);          read_ahead (frame, file); @@ -596,7 +602,7 @@ ra_flush (call_frame_t *frame, xlator_t *this, fd_t *fd)          file = (ra_file_t *)(long)tmp_file;          if (file) { -                flush_region (frame, file, 0, file->pages.prev->offset+1); +                flush_region (frame, file, 0, file->pages.prev->offset+1, 0);          }          STACK_WIND (frame, ra_flush_cbk, FIRST_CHILD (this), @@ -624,7 +630,7 @@ ra_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync)          file = (ra_file_t *)(long)tmp_file;          if (file) { -                flush_region (frame, file, 0, file->pages.prev->offset+1); +                flush_region (frame, file, 0, file->pages.prev->offset+1, 0);          }          STACK_WIND (frame, ra_fsync_cbk, FIRST_CHILD (this), @@ -649,7 +655,7 @@ ra_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,          file = frame->local;          if (file) { -                flush_region (frame, file, 0, file->pages.prev->offset+1); +                flush_region (frame, file, 0, file->pages.prev->offset+1, 1);          }          frame->local = NULL; @@ -673,7 +679,7 @@ ra_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector,          fd_ctx_get (fd, this, &tmp_file);          file = (ra_file_t *)(long)tmp_file;          if (file) { -                flush_region (frame, file, 0, file->pages.prev->offset+1); +                flush_region (frame, file, 0, file->pages.prev->offset+1, 1);                  frame->local = file;                  /* reset the read-ahead counters too */                  file->expected = file->page_count = 0; @@ -739,8 +745,16 @@ ra_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset)                          if (!file)                                  continue; +                        /* +                         * Truncation invalidates reads just like writing does. +                         * TBD: this seems to flush more than it should.  The +                         * only time we should flush at all is when we're +                         * shortening (not lengthening) the file, and then only +                         * from new EOF to old EOF.  The same problem exists in +                         * ra_ftruncate. +                         */                          flush_region (frame, file, 0, -                                      file->pages.prev->offset + 1); +                                      file->pages.prev->offset + 1, 1);                  }          }          UNLOCK (&inode->lock); @@ -775,6 +789,8 @@ ra_page_dump (struct ra_page *page)          gf_proc_dump_write ("dirty", "%s", page->dirty ? "yes" : "no"); +        gf_proc_dump_write ("poisoned", "%s", page->poisoned ? "yes" : "no"); +          gf_proc_dump_write ("ready", "%s", page->ready ? "yes" : "no");          for (trav = page->waitq; trav; trav = trav->next) { @@ -870,7 +886,7 @@ ra_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd)                          if (!file)                                  continue;                          flush_region (frame, file, 0, -                                      file->pages.prev->offset + 1); +                                      file->pages.prev->offset + 1, 0);                  }          }          UNLOCK (&inode->lock); @@ -907,8 +923,16 @@ ra_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset)                          file = (ra_file_t *)(long)tmp_file;                          if (!file)                                  continue; +                        /* +                         * Truncation invalidates reads just like writing does. +                         * TBD: this seems to flush more than it should.  The +                         * only time we should flush at all is when we're +                         * shortening (not lengthening) the file, and then only +                         * from new EOF to old EOF.  The same problem exists in +                         * ra_truncate. +                         */                          flush_region (frame, file, 0, -                                      file->pages.prev->offset + 1); +                                      file->pages.prev->offset + 1, 1);                  }          }          UNLOCK (&inode->lock); diff --git a/xlators/performance/read-ahead/src/read-ahead.h b/xlators/performance/read-ahead/src/read-ahead.h index d0bbcde810f..f5e73cb6697 100644 --- a/xlators/performance/read-ahead/src/read-ahead.h +++ b/xlators/performance/read-ahead/src/read-ahead.h @@ -76,7 +76,8 @@ struct ra_page {          struct ra_page   *next;          struct ra_page   *prev;          struct ra_file   *file; -        char              dirty; +        char              dirty;    /* Internal request, not from user. */ +        char              poisoned; /* Pending read invalidated by write. */          char              ready;          struct iovec     *vector;          int32_t           count;  | 
