summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKrutika Dhananjay <kdhananj@redhat.com>2017-01-17 16:40:04 +0530
committerRaghavendra G <rgowdapp@redhat.com>2018-08-18 07:28:53 +0000
commitc9bde3021202f1d5c5a2d19ac05a510fc1f788ac (patch)
treeeba54e5f791b33d079afae9655d5438d007c3091
parent045d70a5450daa85aa5564b6e9f93065c342ab12 (diff)
performance/readdir-ahead: keep stats of cached dentries in sync with modifications
PROBLEM: Stats of dentries that are readdirp'd ahead can become stale due to fops like writes, truncate etc that modify the file pointed by dentries. When a readdir is finally wound at offset corresponding to these entries, the iatts that are returned to the application come from readdir-ahead's cache, which are stale by now. This problem gets further aggravated when caching translators/modules cache and continue to serve this stale information. FIX: * Store the iatt in context of the inode pointed by dentry. * Whenever the inode pointed by dentry undergoes modification, in cbk of modification fop, update the iatt stored in inode-ctx to reflect the modification. * When serving a readdirp response from application, update iatts of dentries with the iatts stored in the context of inodes pointed by these dentries. * Some fops don't have valid iatts in their responses. For eg., write response whose data is still cached in write-behind will have zeroed out stat. In this case keep only ia_type and ia_gfid and reset rest of the iatt members to zero. - fuse-bridge in this case just sends "entry" information back to kernel and attr is not sent. - gfapi sets entry->inode to NULL and zeroes out the entire stat * There is one tiny race between the entry creation and a readdirp on its parent dir, which could cause the inode-ctx setting and inode ctx reading to happen on two different inode objects. To prevent this, when entry->inode doesn't eqaul to linked_inode, - fuse-bridge is made to send only "entry" information without attributes - gfapi sets entry->inode to NULL and zeroes out the entire stat. Change-Id: Ia27ff49a61922e88c73a1547ad8aacc9968a69df BUG: 1390050 Updates: bz#1390050 Signed-off-by: Krutika Dhananjay <kdhananj@redhat.com> Signed-off-by: Raghavendra G <rgowdapp@redhat.com>
-rw-r--r--libglusterfs/src/inode.h1
-rw-r--r--tests/basic/afr/split-brain-healing.t1
-rw-r--r--tests/bugs/readdir-ahead/bug-1390050.c70
-rw-r--r--tests/bugs/readdir-ahead/bug-1390050.t29
-rw-r--r--xlators/cluster/dht/src/dht-common.c4
-rw-r--r--xlators/mount/fuse/src/fuse-bridge.c17
-rw-r--r--xlators/performance/readdir-ahead/src/readdir-ahead-mem-types.h1
-rw-r--r--xlators/performance/readdir-ahead/src/readdir-ahead.c573
-rw-r--r--xlators/performance/readdir-ahead/src/readdir-ahead.h52
9 files changed, 721 insertions, 27 deletions
diff --git a/libglusterfs/src/inode.h b/libglusterfs/src/inode.h
index ffee2a5516a..96f67c05629 100644
--- a/libglusterfs/src/inode.h
+++ b/libglusterfs/src/inode.h
@@ -260,6 +260,7 @@ inode_ctx_put(inode_t *inode, xlator_t *this, uint64_t v)
#define inode_ctx_get(i,x,v) inode_ctx_get0(i,x,v)
#define inode_ctx_del(i,x,v) inode_ctx_del2(i,x,v,0)
+#define inode_ctx_del1(i, x, v) inode_ctx_del2(i, x, 0, v)
gf_boolean_t
__is_root_gfid (uuid_t gfid);
diff --git a/tests/basic/afr/split-brain-healing.t b/tests/basic/afr/split-brain-healing.t
index 773a8b33b12..c80f900b909 100644
--- a/tests/basic/afr/split-brain-healing.t
+++ b/tests/basic/afr/split-brain-healing.t
@@ -76,7 +76,6 @@ do
done
BIGGER_FILE_SIZE=$(stat -c %s file1)
-
TEST $CLI volume start $V0 force
EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 3
diff --git a/tests/bugs/readdir-ahead/bug-1390050.c b/tests/bugs/readdir-ahead/bug-1390050.c
new file mode 100644
index 00000000000..5593a1d4c0c
--- /dev/null
+++ b/tests/bugs/readdir-ahead/bug-1390050.c
@@ -0,0 +1,70 @@
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <dirent.h>
+#include <string.h>
+#include <errno.h>
+
+int
+main (int argc, char *argv[])
+{
+ const char *glfs_dir = NULL, *filepath = NULL;
+ DIR *dirfd = NULL;
+ int filefd = 0, ret = 0;
+ struct stat stbuf = {0, };
+ size_t size_before_write = 0;
+
+ glfs_dir = argv[1];
+ filepath = argv[2];
+ dirfd = opendir (glfs_dir);
+ if (dirfd == NULL) {
+ fprintf (stderr, "opening directory failed (%s)\n",
+ strerror (errno));
+ goto err;
+ }
+
+ filefd = open (filepath, O_RDWR);
+ if (filefd < 0) {
+ fprintf (stderr, "open failed on path %s (%s)\n", filepath,
+ strerror (errno));
+ goto err;
+ }
+
+ ret = stat (filepath, &stbuf);
+ if (ret < 0) {
+ fprintf (stderr, "stat failed on path %s (%s)\n", filepath,
+ strerror (errno));
+ goto err;
+ }
+
+ size_before_write = stbuf.st_size;
+
+ ret = write (filefd, "testdata", strlen ("testdata123") + 1);
+ if (ret <= 0) {
+ fprintf (stderr, "write failed (%s)\n", strerror (errno));
+ goto err;
+ }
+
+ while (readdir (dirfd)) {
+ /* do nothing */
+ }
+
+ ret = stat (filepath, &stbuf);
+ if (ret < 0) {
+ fprintf (stderr, "stat failed on path %s (%s)\n",
+ strerror (errno));
+ goto err;
+ }
+
+ if (stbuf.st_size == size_before_write) {
+ fprintf (stderr, "file size (%lu) has not changed even after "
+ "its written to\n", stbuf.st_size);
+ goto err;
+ }
+
+ return 0;
+err:
+ return -1;
+}
diff --git a/tests/bugs/readdir-ahead/bug-1390050.t b/tests/bugs/readdir-ahead/bug-1390050.t
new file mode 100644
index 00000000000..ab1d7d4ead9
--- /dev/null
+++ b/tests/bugs/readdir-ahead/bug-1390050.t
@@ -0,0 +1,29 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../fileio.rc
+
+cleanup;
+
+TEST glusterd
+
+TEST $CLI volume create $V0 $H0:$B{0..1}/$V0
+TEST $CLI volume set $V0 readdir-ahead on
+
+DIRECTORY="$M0/subdir1/subdir2"
+
+#Make sure md-cache has large timeout to hold stat from readdirp_cbk in its cache
+TEST $CLI volume set $V0 performance.md-cache-timeout 600
+TEST $CLI volume start $V0
+TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $M0
+rm -rf $M0/*
+TEST mkdir -p $DIRECTORY
+rm -rf $DIRECTORY/*
+TEST touch $DIRECTORY/file{0..10}
+rdd_tester=$(dirname $0)/rdd-tester
+TEST build_tester $(dirname $0)/bug-1390050.c -o $rdd_tester
+TEST $rdd_tester $DIRECTORY $DIRECTORY/file4
+rm -f $rdd_tester
+cleanup;
+
diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
index afec1889071..055c92fcbfc 100644
--- a/xlators/cluster/dht/src/dht-common.c
+++ b/xlators/cluster/dht/src/dht-common.c
@@ -6336,9 +6336,9 @@ dht_file_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
out:
if (local->fop == GF_FOP_REMOVEXATTR) {
- DHT_STACK_UNWIND (removexattr, frame, op_ret, op_errno, NULL);
+ DHT_STACK_UNWIND (removexattr, frame, op_ret, op_errno, xdata);
} else {
- DHT_STACK_UNWIND (fremovexattr, frame, op_ret, op_errno, NULL);
+ DHT_STACK_UNWIND (fremovexattr, frame, op_ret, op_errno, xdata);
}
return 0;
diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c
index 20f015431a0..ee50329f03b 100644
--- a/xlators/mount/fuse/src/fuse-bridge.c
+++ b/xlators/mount/fuse/src/fuse-bridge.c
@@ -3056,6 +3056,10 @@ fuse_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (!linked_inode)
goto next_entry;
+ if (entry->inode != linked_inode) {
+ memset (&entry->d_stat, 0, sizeof (entry->d_stat));
+ }
+
feo->nodeid = inode_to_fuse_nodeid (linked_inode);
if (!((strcmp (entry->d_name, ".") == 0) ||
@@ -3069,10 +3073,15 @@ fuse_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
calc_timeout_sec (priv->entry_timeout);
feo->entry_valid_nsec =
calc_timeout_nsec (priv->entry_timeout);
- feo->attr_valid =
- calc_timeout_sec (priv->attribute_timeout);
- feo->attr_valid_nsec =
- calc_timeout_nsec (priv->attribute_timeout);
+
+ if (entry->d_stat.ia_ctime) {
+ feo->attr_valid =
+ calc_timeout_sec (priv->attribute_timeout);
+ feo->attr_valid_nsec =
+ calc_timeout_nsec (priv->attribute_timeout);
+ } else {
+ feo->attr_valid = feo->attr_valid_nsec = 0;
+ }
next_entry:
if (size == max_size)
diff --git a/xlators/performance/readdir-ahead/src/readdir-ahead-mem-types.h b/xlators/performance/readdir-ahead/src/readdir-ahead-mem-types.h
index 39e2c536975..ae533840c7e 100644
--- a/xlators/performance/readdir-ahead/src/readdir-ahead-mem-types.h
+++ b/xlators/performance/readdir-ahead/src/readdir-ahead-mem-types.h
@@ -18,6 +18,7 @@ enum gf_rda_mem_types_ {
gf_rda_mt_rda_local = gf_common_mt_end + 1,
gf_rda_mt_rda_fd_ctx,
gf_rda_mt_rda_priv,
+ gf_rda_mt_inode_ctx_t,
gf_rda_mt_end
};
diff --git a/xlators/performance/readdir-ahead/src/readdir-ahead.c b/xlators/performance/readdir-ahead/src/readdir-ahead.c
index cb2e73f9d4d..72ab64c5974 100644
--- a/xlators/performance/readdir-ahead/src/readdir-ahead.c
+++ b/xlators/performance/readdir-ahead/src/readdir-ahead.c
@@ -33,6 +33,17 @@
#include "readdir-ahead-messages.h"
static int rda_fill_fd(call_frame_t *, xlator_t *, fd_t *);
+static void
+rda_local_wipe (struct rda_local *local)
+{
+ if (local->fd)
+ fd_unref (local->fd);
+ if (local->xattrs)
+ dict_unref (local->xattrs);
+ if (local->inode)
+ inode_unref (local->inode);
+}
+
/*
* Get (or create) the fd context for storing prepopulated directory
* entries.
@@ -70,6 +81,102 @@ out:
return ctx;
}
+static rda_inode_ctx_t *
+__rda_inode_ctx_get (inode_t *inode, xlator_t *this)
+{
+ int ret = -1;
+ uint64_t ctx_uint = 0;
+ rda_inode_ctx_t *ctx_p = NULL;
+
+ ret = __inode_ctx_get1 (inode, this, &ctx_uint);
+ if (ret == 0)
+ return (rda_inode_ctx_t *)ctx_uint;
+
+ ctx_p = GF_CALLOC (1, sizeof (*ctx_p), gf_rda_mt_inode_ctx_t);
+ if (!ctx_p)
+ return NULL;
+
+ GF_ATOMIC_INIT (ctx_p->generation, 0);
+
+ ret = __inode_ctx_set1 (inode, this, (uint64_t *)&ctx_p);
+ if (ret < 0) {
+ GF_FREE (ctx_p);
+ return NULL;
+ }
+
+ return ctx_p;
+}
+
+static int
+__rda_inode_ctx_update_iatts (inode_t *inode, xlator_t *this,
+ struct iatt *stbuf_in, struct iatt *stbuf_out,
+ uint64_t generation)
+{
+ rda_inode_ctx_t *ctx_p = NULL;
+ struct iatt tmp_stat = {0, };
+
+ ctx_p = __rda_inode_ctx_get (inode, this);
+ if (!ctx_p)
+ return -1;
+
+ if ((!stbuf_in) || (stbuf_in->ia_ctime == 0)) {
+ /* A fop modified a file but valid stbuf is not provided.
+ * Can't update iatt to reflect results of fop and hence
+ * invalidate the iatt stored in dentry.
+ *
+ * An example of this case can be response of write request
+ * that is cached in write-behind.
+ */
+ tmp_stat = ctx_p->statbuf;
+ memset (&ctx_p->statbuf, 0,
+ sizeof (ctx_p->statbuf));
+ gf_uuid_copy (ctx_p->statbuf.ia_gfid,
+ tmp_stat.ia_gfid);
+ ctx_p->statbuf.ia_type = tmp_stat.ia_type;
+ GF_ATOMIC_INC (ctx_p->generation);
+ } else {
+ if (ctx_p->statbuf.ia_ctime) {
+ if (stbuf_in->ia_ctime < ctx_p->statbuf.ia_ctime) {
+ goto out;
+ }
+
+ if ((stbuf_in->ia_ctime == ctx_p->statbuf.ia_ctime) &&
+ (stbuf_in->ia_ctime_nsec
+ < ctx_p->statbuf.ia_ctime_nsec)) {
+ goto out;
+ }
+ } else {
+ if (generation != GF_ATOMIC_GET (ctx_p->generation))
+ goto out;
+ }
+
+ ctx_p->statbuf = *stbuf_in;
+ }
+
+out:
+ if (stbuf_out)
+ *stbuf_out = ctx_p->statbuf;
+
+ return 0;
+}
+
+static int
+rda_inode_ctx_update_iatts (inode_t *inode, xlator_t *this,
+ struct iatt *stbuf_in, struct iatt *stbuf_out,
+ uint64_t generation)
+{
+ int ret = -1;
+
+ LOCK(&inode->lock);
+ {
+ ret = __rda_inode_ctx_update_iatts (inode, this, stbuf_in,
+ stbuf_out, generation);
+ }
+ UNLOCK(&inode->lock);
+
+ return ret;
+}
+
/*
* Reset the tracking state of the context.
*/
@@ -112,6 +219,27 @@ rda_can_serve_readdirp(struct rda_fd_ctx *ctx, size_t request_size)
return _gf_false;
}
+void
+rda_inode_ctx_get_iatt (inode_t *inode, xlator_t *this, struct iatt *attr)
+{
+ rda_inode_ctx_t *ctx_p = NULL;
+
+ if (!inode || !this || !attr)
+ goto out;
+
+ LOCK (&inode->lock);
+ {
+ ctx_p = __rda_inode_ctx_get (inode, this);
+ if (ctx_p) {
+ *attr = ctx_p->statbuf;
+ }
+ }
+ UNLOCK (&inode->lock);
+
+out:
+ return;
+}
+
/*
* Serve a request from the fd dentry list based on the size of the request
* buffer. ctx must be locked.
@@ -124,6 +252,7 @@ __rda_fill_readdirp (xlator_t *this, gf_dirent_t *entries, size_t request_size,
size_t dirent_size, size = 0;
int32_t count = 0;
struct rda_priv *priv = NULL;
+ struct iatt tmp_stat = {0,};
priv = this->private;
@@ -132,6 +261,13 @@ __rda_fill_readdirp (xlator_t *this, gf_dirent_t *entries, size_t request_size,
if (size + dirent_size > request_size)
break;
+ memset (&tmp_stat, 0, sizeof (tmp_stat));
+
+ if (dirent->inode) {
+ rda_inode_ctx_get_iatt (dirent->inode, this, &tmp_stat);
+ dirent->d_stat = tmp_stat;
+ }
+
size += dirent_size;
list_del_init(&dirent->list);
ctx->cur_size -= dirent_size;
@@ -319,6 +455,17 @@ rda_fill_fd_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
list_del_init(&dirent->list);
/* must preserve entry order */
list_add_tail(&dirent->list, &ctx->entries.list);
+ if (dirent->inode) {
+ /* If ctxp->stat is invalidated, don't update it
+ * with dirent->d_stat as we don't have
+ * generation number of the inode when readdirp
+ * request was initiated. So, we pass 0 for
+ * generation number
+ */
+ rda_inode_ctx_update_iatts (dirent->inode, this,
+ &dirent->d_stat,
+ &dirent->d_stat, 0);
+ }
dirent_size = gf_dirent_size (dirent->d_name);
@@ -379,6 +526,7 @@ out:
ctx->xattrs = NULL;
}
+ rda_local_wipe (ctx->fill_frame->local);
STACK_DESTROY(ctx->fill_frame->root);
ctx->fill_frame = NULL;
}
@@ -444,7 +592,7 @@ rda_fill_fd(call_frame_t *frame, xlator_t *this, fd_t *fd)
}
local->ctx = ctx;
- local->fd = fd;
+ local->fd = fd_ref (fd);
nframe->local = local;
ctx->fill_frame = nframe;
@@ -469,8 +617,10 @@ rda_fill_fd(call_frame_t *frame, xlator_t *this, fd_t *fd)
return 0;
err:
- if (nframe)
+ if (nframe) {
+ rda_local_wipe (nframe->local);
FRAME_DESTROY(nframe);
+ }
return -1;
}
@@ -479,24 +629,10 @@ static int32_t
rda_opendir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
{
- struct rda_local *local = frame->local;
-
if (!op_ret)
rda_fill_fd(frame, this, fd);
- frame->local = NULL;
-
- STACK_UNWIND_STRICT(opendir, frame, op_ret, op_errno, fd, xdata);
-
- if (local && local->xattrs) {
- /* unref for dict_new() done in rda_opendir */
- dict_unref (local->xattrs);
- local->xattrs = NULL;
- }
-
- if (local)
- mem_put (local);
-
+ RDA_STACK_UNWIND(opendir, frame, op_ret, op_errno, fd, xdata);
return 0;
}
@@ -540,6 +676,374 @@ unwind:
}
static int32_t
+rda_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ struct rda_local *local = NULL;
+ struct iatt postbuf_out = {0,};
+
+ if (op_ret < 0)
+ goto unwind;
+
+ local = frame->local;
+ rda_inode_ctx_update_iatts (local->inode, this, postbuf, &postbuf_out,
+ local->generation);
+
+ if (postbuf_out.ia_ctime == 0)
+ memset (&postbuf_out, 0, sizeof (postbuf_out));
+unwind:
+ RDA_STACK_UNWIND (writev, frame, op_ret, op_errno, prebuf,
+ &postbuf_out, xdata);
+ return 0;
+}
+
+static int32_t
+rda_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iovec *vector, int32_t count, off_t off, uint32_t flags,
+ struct iobref *iobref, dict_t *xdata)
+{
+ RDA_COMMON_MODIFICATION_FOP (writev, frame, this, fd->inode, xdata, fd,
+ vector, count, off, flags, iobref);
+ return 0;
+}
+
+static int32_t
+rda_fallocate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ struct rda_local *local = NULL;
+ struct iatt postbuf_out = {0,};
+
+ if (op_ret < 0)
+ goto unwind;
+
+ local = frame->local;
+ rda_inode_ctx_update_iatts (local->inode, this, postbuf, &postbuf_out,
+ local->generation);
+
+ if (postbuf_out.ia_ctime == 0)
+ memset (&postbuf_out, 0, sizeof (postbuf_out));
+
+unwind:
+ RDA_STACK_UNWIND (fallocate, frame, op_ret, op_errno, prebuf,
+ &postbuf_out, xdata);
+ return 0;
+}
+
+static int32_t
+rda_fallocate (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t keep_size,
+ off_t offset, size_t len, dict_t *xdata)
+{
+ RDA_COMMON_MODIFICATION_FOP (fallocate, frame, this, fd->inode, xdata,
+ fd, keep_size, offset, len);
+ return 0;
+}
+
+static int32_t
+rda_zerofill_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ struct rda_local *local = NULL;
+ struct iatt postbuf_out = {0,};
+
+ if (op_ret < 0)
+ goto unwind;
+
+ local = frame->local;
+ rda_inode_ctx_update_iatts (local->inode, this, postbuf, &postbuf_out,
+ local->generation);
+
+ if (postbuf_out.ia_ctime == 0)
+ memset (&postbuf_out, 0, sizeof (postbuf_out));
+
+unwind:
+ RDA_STACK_UNWIND (zerofill, frame, op_ret, op_errno, prebuf,
+ &postbuf_out, xdata);
+ return 0;
+}
+
+static int32_t
+rda_zerofill (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ off_t len, dict_t *xdata)
+{
+ RDA_COMMON_MODIFICATION_FOP (zerofill, frame, this, fd->inode, xdata,
+ fd, offset, len);
+ return 0;
+}
+
+static int32_t
+rda_discard_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ struct rda_local *local = NULL;
+ struct iatt postbuf_out = {0,};
+
+ if (op_ret < 0)
+ goto unwind;
+
+ local = frame->local;
+ rda_inode_ctx_update_iatts (local->inode, this, postbuf, &postbuf_out,
+ local->generation);
+
+ if (postbuf_out.ia_ctime == 0)
+ memset (&postbuf_out, 0, sizeof (postbuf_out));
+unwind:
+ RDA_STACK_UNWIND (discard, frame, op_ret, op_errno, prebuf,
+ &postbuf_out, xdata);
+ return 0;
+}
+
+static int32_t
+rda_discard (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
+{
+ RDA_COMMON_MODIFICATION_FOP (discard, frame, this, fd->inode, xdata,
+ fd, offset, len);
+ return 0;
+}
+
+static int32_t
+rda_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ struct rda_local *local = NULL;
+ struct iatt postbuf_out = {0,};
+
+ if (op_ret < 0)
+ goto unwind;
+
+ local = frame->local;
+ rda_inode_ctx_update_iatts (local->inode, this, postbuf, &postbuf_out,
+ local->generation);
+
+ if (postbuf_out.ia_ctime == 0)
+ memset (&postbuf_out, 0, sizeof (postbuf_out));
+
+unwind:
+ RDA_STACK_UNWIND (ftruncate, frame, op_ret, op_errno, prebuf,
+ &postbuf_out, xdata);
+ return 0;
+}
+
+static int32_t
+rda_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ dict_t *xdata)
+{
+ RDA_COMMON_MODIFICATION_FOP (ftruncate, frame, this, fd->inode, xdata,
+ fd, offset);
+ return 0;
+}
+
+static int32_t
+rda_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ struct rda_local *local = NULL;
+ struct iatt postbuf_out = {0,};
+
+ if (op_ret < 0)
+ goto unwind;
+
+ local = frame->local;
+ rda_inode_ctx_update_iatts (local->inode, this, postbuf, &postbuf_out,
+ local->generation);
+ if (postbuf_out.ia_ctime == 0)
+ memset (&postbuf_out, 0, sizeof (postbuf_out));
+
+unwind:
+ RDA_STACK_UNWIND (ftruncate, frame, op_ret, op_errno, prebuf,
+ &postbuf_out, xdata);
+ return 0;
+}
+
+static int32_t
+rda_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
+ dict_t *xdata)
+{
+ RDA_COMMON_MODIFICATION_FOP (truncate, frame, this, loc->inode, xdata,
+ loc, offset);
+ return 0;
+}
+
+static int32_t
+rda_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ struct rda_local *local = NULL;
+
+ if (op_ret < 0)
+ goto unwind;
+
+ local = frame->local;
+
+ rda_inode_ctx_update_iatts (local->inode, this, NULL, NULL,
+ local->generation);
+unwind:
+ RDA_STACK_UNWIND (setxattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+static int32_t
+rda_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
+ int32_t flags, dict_t *xdata)
+{
+ RDA_COMMON_MODIFICATION_FOP (setxattr, frame, this, loc->inode,
+ xdata, loc, dict, flags);
+ return 0;
+}
+
+static int32_t
+rda_fsetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ struct rda_local *local = NULL;
+
+ if (op_ret < 0)
+ goto unwind;
+
+ local = frame->local;
+
+ rda_inode_ctx_update_iatts (local->inode, this, NULL, NULL,
+ local->generation);
+unwind:
+ RDA_STACK_UNWIND (fsetxattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+static int32_t
+rda_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
+ int32_t flags, dict_t *xdata)
+{
+ RDA_COMMON_MODIFICATION_FOP (fsetxattr, frame, this, fd->inode,
+ xdata, fd, dict, flags);
+ return 0;
+}
+
+static int32_t
+rda_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata)
+{
+ struct rda_local *local = NULL;
+ struct iatt postbuf_out = {0,};
+
+ if (op_ret < 0)
+ goto unwind;
+
+ local = frame->local;
+ rda_inode_ctx_update_iatts (local->inode, this, statpost, &postbuf_out,
+ local->generation);
+ if (postbuf_out.ia_ctime == 0)
+ memset (&postbuf_out, 0, sizeof (postbuf_out));
+
+unwind:
+ RDA_STACK_UNWIND (setattr, frame, op_ret, op_errno, statpre,
+ &postbuf_out, xdata);
+ return 0;
+}
+
+static int32_t
+rda_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
+{
+ RDA_COMMON_MODIFICATION_FOP (setattr, frame, this, loc->inode, xdata,
+ loc, stbuf, valid);
+ return 0;
+}
+
+static int32_t
+rda_fsetattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata)
+{
+ struct rda_local *local = NULL;
+ struct iatt postbuf_out = {0,};
+
+ if (op_ret < 0)
+ goto unwind;
+
+ local = frame->local;
+ rda_inode_ctx_update_iatts (local->inode, this, statpost, &postbuf_out,
+ local->generation);
+ if (postbuf_out.ia_ctime == 0)
+ memset (&postbuf_out, 0, sizeof (postbuf_out));
+
+unwind:
+ RDA_STACK_UNWIND (fsetattr, frame, op_ret, op_errno, statpre,
+ &postbuf_out, xdata);
+ return 0;
+}
+
+static int32_t
+rda_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *stbuf,
+ int32_t valid, dict_t *xdata)
+{
+ RDA_COMMON_MODIFICATION_FOP (fsetattr, frame, this, fd->inode, xdata,
+ fd, stbuf, valid);
+ return 0;
+}
+
+static int32_t
+rda_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ struct rda_local *local = NULL;
+
+ if (op_ret < 0)
+ goto unwind;
+
+ local = frame->local;
+
+ rda_inode_ctx_update_iatts (local->inode, this, NULL, NULL,
+ local->generation);
+unwind:
+ RDA_STACK_UNWIND (removexattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+static int32_t
+rda_removexattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
+{
+ RDA_COMMON_MODIFICATION_FOP (removexattr, frame, this, loc->inode,
+ xdata, loc, name);
+ return 0;
+}
+
+static int32_t
+rda_fremovexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ struct rda_local *local = NULL;
+
+ if (op_ret < 0)
+ goto unwind;
+
+ local = frame->local;
+
+ rda_inode_ctx_update_iatts (local->inode, this, NULL, NULL,
+ local->generation);
+unwind:
+ RDA_STACK_UNWIND (fremovexattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+static int32_t
+rda_fremovexattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata)
+{
+ RDA_COMMON_MODIFICATION_FOP (fremovexattr, frame, this, fd->inode,
+ xdata, fd, name);
+ return 0;
+}
+
+static int32_t
rda_releasedir(xlator_t *this, fd_t *fd)
{
uint64_t val;
@@ -566,6 +1070,23 @@ rda_releasedir(xlator_t *this, fd_t *fd)
return 0;
}
+static int
+rda_forget (xlator_t *this, inode_t *inode)
+{
+ uint64_t ctx_uint = 0;
+ rda_inode_ctx_t *ctx = NULL;
+
+ inode_ctx_del1 (inode, this, &ctx_uint);
+ if (!ctx_uint)
+ return 0;
+
+ ctx = (rda_inode_ctx_t *)ctx_uint;
+
+ GF_FREE (ctx);
+
+ return 0;
+}
+
int32_t
mem_acct_init(xlator_t *this)
{
@@ -677,10 +1198,28 @@ out:
struct xlator_fops fops = {
.opendir = rda_opendir,
.readdirp = rda_readdirp,
+ /* inode write */
+ /* TODO: invalidate a dentry's stats if its pointing to a directory
+ * when entry operations happen in that directory
+ */
+ .writev = rda_writev,
+ .truncate = rda_truncate,
+ .ftruncate = rda_ftruncate,
+ .fallocate = rda_fallocate,
+ .discard = rda_discard,
+ .zerofill = rda_zerofill,
+ /* metadata write */
+ .setxattr = rda_setxattr,
+ .fsetxattr = rda_fsetxattr,
+ .setattr = rda_setattr,
+ .fsetattr = rda_fsetattr,
+ .removexattr = rda_removexattr,
+ .fremovexattr = rda_fremovexattr,
};
struct xlator_cbks cbks = {
.releasedir = rda_releasedir,
+ .forget = rda_forget,
};
struct volume_options options[] = {
diff --git a/xlators/performance/readdir-ahead/src/readdir-ahead.h b/xlators/performance/readdir-ahead/src/readdir-ahead.h
index 3c06cc0f107..7dbed4c60ed 100644
--- a/xlators/performance/readdir-ahead/src/readdir-ahead.h
+++ b/xlators/performance/readdir-ahead/src/readdir-ahead.h
@@ -19,6 +19,44 @@
#define RDA_FD_BYPASS (1 << 4)
#define RDA_FD_PLUGGED (1 << 5)
+
+#define RDA_COMMON_MODIFICATION_FOP(name, frame, this, __inode, __xdata, args ...)\
+ do { \
+ struct rda_local *__local = NULL; \
+ rda_inode_ctx_t *ctx_p = NULL; \
+ \
+ __local = mem_get0 (this->local_pool); \
+ __local->inode = inode_ref (__inode); \
+ LOCK (&__inode->lock); \
+ { \
+ ctx_p = __rda_inode_ctx_get (__inode, this); \
+ } \
+ UNLOCK (&__inode->lock); \
+ __local->generation = GF_ATOMIC_GET (ctx_p->generation); \
+ \
+ frame->local = __local; \
+ if (__xdata) \
+ __local->xattrs = dict_ref (__xdata); \
+ \
+ STACK_WIND (frame, rda_##name##_cbk, FIRST_CHILD(this), \
+ FIRST_CHILD(this)->fops->name, args, __xdata); \
+ } while (0)
+
+
+#define RDA_STACK_UNWIND(fop, frame, params ...) do { \
+ struct rda_local *__local = NULL; \
+ if (frame) { \
+ __local = frame->local; \
+ frame->local = NULL; \
+ } \
+ STACK_UNWIND_STRICT (fop, frame, params); \
+ if (__local) { \
+ rda_local_wipe (__local); \
+ mem_put (__local); \
+ } \
+} while (0)
+
+
struct rda_fd_ctx {
off_t cur_offset; /* current head of the ctx */
size_t cur_size; /* current size of the preload */
@@ -34,9 +72,12 @@ struct rda_fd_ctx {
struct rda_local {
struct rda_fd_ctx *ctx;
- fd_t *fd;
- off_t offset;
- dict_t *xattrs; /* xattrs to be sent in readdirp() */
+ fd_t *fd;
+ dict_t *xattrs; /* md-cache keys to be sent in readdirp() */
+ inode_t *inode;
+ off_t offset;
+ uint64_t generation;
+ int32_t skip_dir;
};
struct rda_priv {
@@ -48,4 +89,9 @@ struct rda_priv {
gf_boolean_t parallel_readdir;
};
+typedef struct rda_inode_ctx {
+ struct iatt statbuf;
+ gf_atomic_t generation;
+} rda_inode_ctx_t;
+
#endif /* __READDIR_AHEAD_H */