summaryrefslogtreecommitdiffstats
path: root/tests/basic/gfapi/upcall-cache-invalidate.c
blob: cc2e6a0fe33c9e7894ce259b91947e80aa36a31b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
#include <fcntl.h>
#include <unistd.h>
#include <time.h>
#include <limits.h>
#include <alloca.h>
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <glusterfs/api/glfs.h>
#include <glusterfs/api/glfs-handles.h>
int gfapi = 1;

#define LOG_ERR(func, ret) do { \
        if (ret != 0) {            \
                fprintf (stderr, "%s : returned error %d (%s)\n", \
                         func, ret, strerror (errno)); \
                goto out; \
        } else { \
                fprintf (stderr, "%s : returned %d\n", func, ret); \
        } \
        } while (0)

int
main (int argc, char *argv[])
{
        glfs_t    *fs = NULL;
        glfs_t    *fs2 = NULL;
        glfs_t    *fs_tmp = NULL;
        glfs_t    *fs_tmp2 = NULL;
        int        ret = 0, i;
        glfs_fd_t *fd = NULL;
        glfs_fd_t *fd2 = NULL;
        glfs_fd_t *fd_tmp = NULL;
        glfs_fd_t *fd_tmp2 = NULL;
        char       readbuf[32];
        char      *filename = "file_tmp";
        char      *writebuf = NULL;
        char      *vol_id  = NULL;
        unsigned int       cnt = 1;
        struct    callback_arg cbk;
        char      *logfile = NULL;
        char      *volname = NULL;
        struct callback_inode_arg *in_arg = NULL;

        cbk.reason = 0;

        if (argc != 3) {
                fprintf (stderr, "Invalid argument\n");
                exit(1);
        }

        volname = argv[1];
        logfile = argv[2];

        fs = glfs_new (volname);
        if (!fs) {
                fprintf (stderr, "glfs_new: returned NULL\n");
                return -1;
        }

        ret = glfs_set_volfile_server (fs, "tcp", "localhost", 24007);
        LOG_ERR("glfs_set_volfile_server", ret);

        ret = glfs_set_logging (fs, logfile, 7);
        LOG_ERR("glfs_set_logging", ret);

        ret = glfs_init (fs);
        LOG_ERR("glfs_init", ret);

        fs2 = glfs_new (volname);
        if (!fs2) {
                fprintf (stderr, "glfs_new fs2: returned NULL\n");
                return 1;
        }

        ret = glfs_set_volfile_server (fs2, "tcp", "localhost", 24007);
        LOG_ERR("glfs_set_volfile_server-fs2", ret);

        ret = glfs_set_logging (fs2, logfile, 7);
        LOG_ERR("glfs_set_logging-fs2", ret);

        ret = glfs_init (fs2);
        LOG_ERR("glfs_init-fs2", ret);

        fd = glfs_creat(fs, filename, O_RDWR|O_SYNC, 0644);
        if (fd <= 0) {
                ret = -1;
                LOG_ERR ("glfs_creat", ret);
        }
        fprintf (stderr, "glfs-create fd - %d\n", fd);

        fd2 = glfs_open(fs2, filename, O_SYNC|O_RDWR|O_CREAT);
        if (fd2 <= 0) {
                ret = -1;
                LOG_ERR ("glfs_open-fs2", ret);
        }
        fprintf (stderr, "glfs-open fd2 - %d\n", fd2);

        do {
                if (cnt%2) {
                        fd_tmp = fd;
                        fs_tmp = fs;
                        fd_tmp2 = fd2;
                        fs_tmp2 = fs2;
                } else {
                        fd_tmp = fd2;
                        fs_tmp = fs2;
                        fd_tmp2 = fd;
                        fs_tmp2 = fs;
                }

                /* WRITE on fd_tmp */
                writebuf = malloc(10);
                if (writebuf) {
                        memcpy (writebuf, "abcd", 4);
                        ret = glfs_write (fd_tmp, writebuf, 4, 0);
                        if (ret <= 0)   {
                                ret = -1;
                                LOG_ERR ("glfs_write", ret);
                        } else {
                                fprintf (stderr,
                                         "glfs_write suceeded\n");
                        }
                        free(writebuf);
                } else {
                        fprintf (stderr,
                                 "Could not allocate writebuf\n");
                        return -1;
                }

                /* READ on fd_tmp2 */
                ret = glfs_lseek (fd_tmp2, 0, SEEK_SET);
                LOG_ERR ("glfs_lseek", ret);

                ret = glfs_pread (fd_tmp2, readbuf, 4, 0, 0);

                if (ret <= 0) {
                        ret = -1;
                        LOG_ERR ("glfs_pread", ret);
                } else {
                        fprintf (stderr, "glfs_read: %s\n", readbuf);
                }

                /* Open() fops seem to be not performed on server side until
                 * there are I/Os on that fd
                 */
                if (cnt > 2) {
                        ret = glfs_h_poll_upcall(fs_tmp, &cbk);
                        LOG_ERR ("glfs_h_poll_upcall", ret);
                        /* Expect 'GFAPI_INODE_INVALIDATE' upcall event. */
                        if (cbk.reason == GFAPI_INODE_INVALIDATE) {
                                in_arg = cbk.event_arg;
                                fprintf (stderr, " upcall event type - %d,"
                                         " object(%p), flags(%d), "
                                         " expire_time_attr(%d)\n" ,
                                         cbk.reason, in_arg->object,
                                         in_arg->flags,
                                         in_arg->expire_time_attr);
                                ret = glfs_h_close (in_arg->object);
                                LOG_ERR ("glfs_h_close", ret);
                                free (in_arg);
                        } else {
                                fprintf (stderr,
                                         "Dint receive upcall notify event");
                                ret = -1;
                                goto err;
                        }
                }

                sleep(5);
        } while (++cnt < 5);

err:
        glfs_close(fd);
        LOG_ERR ("glfs_close", ret);

        glfs_close(fd2);
        LOG_ERR ("glfs_close-fd2", ret);

out:
        if (fs) {
                ret = glfs_fini(fs);
                fprintf (stderr, "glfs_fini(fs) returned %d \n", ret);
        }

        if (fs2) {
                ret = glfs_fini(fs2);
                fprintf (stderr, "glfs_fini(fs2) returned %d \n", ret);
        }

        if (ret)
                exit(1);
        exit(0);
}


v>
/**
- * incremented once per open()/open().../close()/close() sequence
+ * incremented once per write()/write().../close()/close() sequence
*/
- startversion++;
ret = brstub_validate_version (bpath, startversion);
if (ret < 0)
goto err;
@@ -106,11 +135,11 @@ brstub_new_object_validate (char *filp, char *brick)
printf ("Validating initial version..\n");
ret = brstub_validate_version (bpath, 1);
- if (ret < 0)
+ if (ret == 0)
goto err;
printf ("Validating version on modifications..\n");
- ret = brstub_open_validation (filp, bpath, 1);
+ ret = brstub_write_validation (filp, bpath, 1);
if (ret < 0)
goto err;
diff --git a/tests/bitrot/br-stub.t b/tests/bitrot/br-stub.t
index 11d02418785..bab4c7cdbd1 100644
--- a/tests/bitrot/br-stub.t
+++ b/tests/bitrot/br-stub.t
@@ -2,6 +2,7 @@
. $(dirname $0)/../include.rc
. $(dirname $0)/../volume.rc
+. $(dirname $0)/../nfs.rc
STUB_SOURCE=$(dirname $0)/br-stub.c
STUB_EXEC=$(dirname $0)/br-stub
@@ -17,9 +18,9 @@ EXPECT "$V0" volinfo_field $V0 'Volume Name';
EXPECT 'Created' volinfo_field $V0 'Status';
EXPECT '2' brick_count $V0
-## Turn off open-behind (stub does not work with anonfd yet..)
-TEST $CLI volume set $V0 performance.open-behind off
-EXPECT 'off' volinfo_field $V0 'performance.open-behind'
+## Turn off write-behind (write-behind clubs writes together)
+TEST $CLI volume set $V0 performance.write-behind off
+#EXPECT 'off' volinfo_field $V0 'performance.open-behind'
## Start the volume
TEST $CLI volume start $V0;
@@ -27,6 +28,7 @@ EXPECT 'Started' volinfo_field $V0 'Status';
## Mount the volume
TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0;
+TEST mount_nfs $H0:/$V0 $N0 nolock;
## Build stub C source
build_tester $STUB_SOURCE -o $STUB_EXEC -I$(dirname $0)/../../xlators/features/bit-rot/src/stub
@@ -34,11 +36,20 @@ TEST [ -e $STUB_EXEC ]
## create & check version
fname="$M0/filezero"
-touch $fname
+touch $fname;
backpath=$(get_backend_paths $fname)
+
+TEST $STUB_EXEC $fname $(dirname $backpath)
+
+rm -f $fname;
+
+## test nfs
+fname="$N0/filezero"
+touch $fname; # backpath remains same..
+
TEST $STUB_EXEC $fname $(dirname $backpath)
-## cleanups..
+##cleanups..
rm -f $STUB_EXEC
cleanup;
diff --git a/xlators/features/bit-rot/src/bitd/bit-rot.c b/xlators/features/bit-rot/src/bitd/bit-rot.c
index 0bb2f2ab8e6..b9adbd6647c 100644
--- a/xlators/features/bit-rot/src/bitd/bit-rot.c
+++ b/xlators/features/bit-rot/src/bitd/bit-rot.c
@@ -171,11 +171,11 @@ bitd_is_bad_file (xlator_t *this, br_child_t *child, loc_t *loc, fd_t *fd)
if (fd)
ret = syncop_fgetxattr (child->xl, fd, &xattr,
- BITROT_OBJECT_BAD_KEY, NULL,
+ "trusted.glusterfs.bad-file", NULL,
NULL);
else if (loc)
ret = syncop_getxattr (child->xl, loc, &xattr,
- BITROT_OBJECT_BAD_KEY, NULL,
+ "trusted.glusterfs.bad-file", NULL,
NULL);
if (!ret) {
@@ -484,6 +484,98 @@ br_log_object_path (xlator_t *this, char *op,
op, path, strerror (op_errno));
}
+static void
+br_send_dummy_write (xlator_t *this, fd_t *fd, br_child_t *child,
+ dict_t *xdata)
+{
+ struct iovec iov = {0, };
+ struct iobref *iobref = NULL;
+ struct iobuf *iobuf = NULL;
+ char *msg = NULL;
+ size_t size = 0;
+ int32_t ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("bit-rot", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, fd, out);
+ GF_VALIDATE_OR_GOTO (this->name, child, out);
+
+ msg = gf_strdup ("GLUSTERFS");
+ if (!msg)
+ goto out;
+
+ size = strlen (msg);
+
+ iov.iov_base = msg;
+ iov.iov_len = size;
+
+ iobref = iobref_new ();
+ if (!iobref)
+ goto free_msg;
+
+ iobuf = iobuf_get2 (this->ctx->iobuf_pool, size);
+ if (!iobuf)
+ goto free_iobref;
+
+ iobref_add (iobref, iobuf);
+
+ iov_unload (iobuf_ptr (iobuf), &iov, 1); /* FIXME!!! */
+
+ iov.iov_base = iobuf_ptr (iobuf);
+ iov.iov_len = size;
+
+ ret = syncop_writev (child->xl, fd, &iov, 1, 0, iobref, 0, xdata, NULL);
+ if (ret <= 0) {
+ ret = -1;
+ gf_log (this->name, GF_LOG_ERROR,
+ "dummy write failed (%s)", strerror (errno));
+ goto free_iobuf;
+ }
+
+ /* iobref_unbref() takes care of iobuf unref */
+ ret = 0;
+
+ free_iobuf:
+ iobuf_unref (iobuf);
+ free_iobref:
+ iobref_unref (iobref);
+ free_msg:
+ GF_FREE (msg);
+ out:
+ return;
+}
+
+static void
+br_object_handle_reopen (xlator_t *this,
+ br_object_t *object, inode_t *linked_inode)
+{
+ int32_t ret = -1;
+ dict_t *dict = NULL;
+ loc_t loc = {0, };
+
+ /**
+ * Here dict is purposefully not checked for NULL, because at any cost
+ * sending a re-open should not be missed. This re-open is an indication
+ * for the stub to properly mark inode's status.
+ */
+ dict = dict_new ();
+ if (dict) {
+ /* TODO: Make it a #define */
+ ret = dict_set_int32 (dict, "br-fd-reopen", 1);
+ if (ret)
+ gf_log (this->name, GF_LOG_WARNING,
+ "Object reopen would trigger versioning.");
+ }
+
+ loc.inode = inode_ref (linked_inode);
+ gf_uuid_copy (loc.gfid, linked_inode->gfid);
+
+ br_trigger_sign (this, object->child, linked_inode, &loc, dict);
+
+ if (dict)
+ dict_unref (dict);
+ loc_wipe (&loc);
+}
+
/**
* Sign a given object. This routine runs full throttle. There needs to be
* some form of priority scheduling and/or read burstness to avoid starving
@@ -497,6 +589,7 @@ static inline int32_t br_sign_object (br_object_t *object)
fd_t *fd = NULL;
struct iatt iatt = {0, };
pid_t pid = GF_CLIENT_PID_BITD;
+ br_sign_state_t sign_info = BR_SIGN_NORMAL;
GF_VALIDATE_OR_GOTO ("bit-rot", object, out);
@@ -515,6 +608,20 @@ static inline int32_t br_sign_object (br_object_t *object)
goto out;
}
+ /* sanity check */
+ sign_info = ntohl (object->sign_info);
+ GF_ASSERT (sign_info != BR_SIGN_NORMAL);
+
+ /**
+ * For fd's that have notified for reopening, we send an explicit
+ * open() followed by a dummy write() call. This triggers the
+ * actual signing of the object.
+ */
+ if (sign_info == BR_SIGN_REOPEN_WAIT) {
+ br_object_handle_reopen (this, object, linked_inode);
+ goto unref_inode;
+ }
+
ret = br_object_open (this, object, linked_inode, &fd);
if (!fd) {
br_log_object (this, "open", object->gfid, -ret);
@@ -648,6 +755,7 @@ br_initialize_object (xlator_t *this, br_child_t *child, changelog_event_t *ev)
/* NOTE: it's BE, but no worry */
object->signedversion = ev->u.releasebr.version;
+ object->sign_info = ev->u.releasebr.sign_info;
out:
return object;
@@ -693,7 +801,6 @@ br_brick_callback (void *xl, char *brick,
xlator_t *this = NULL;
br_object_t *object = NULL;
br_child_t *child = NULL;
- int32_t flags = 0;
struct gf_tw_timer_list *timer = NULL;
this = xl;
@@ -710,14 +817,6 @@ br_brick_callback (void *xl, char *brick,
gf_log (this->name, GF_LOG_DEBUG,
"RELEASE EVENT [GFID %s]", uuid_utoa (gfid));
- flags = (int32_t)ntohl (ev->u.releasebr.flags);
- if (flags == O_RDONLY) {
- gf_log (this->name, GF_LOG_DEBUG,
- "Read only fd [GFID: %s], ignoring signing..",
- uuid_utoa (gfid));
- goto out;
- }
-
child = br_get_child_from_brick_path (this, brick);
if (!child) {
gf_log (this->name, GF_LOG_ERROR, "failed to get the subvolume "
@@ -804,12 +903,15 @@ out:
return need_sign;
}
-static inline void
+void
br_trigger_sign (xlator_t *this, br_child_t *child, inode_t *linked_inode,
- loc_t *loc)
+ loc_t *loc, dict_t *xdata)
{
fd_t *fd = NULL;
int32_t ret = -1;
+ pid_t pid = GF_CLIENT_PID_BITD;
+
+ syncopctx_setfspid (&pid);
fd = fd_create (linked_inode, 0);
if (!fd) {
@@ -828,8 +930,10 @@ br_trigger_sign (xlator_t *this, br_child_t *child, inode_t *linked_inode,
fd_bind (fd);
}
- if (fd)
+ if (fd) {
+ br_send_dummy_write (this, fd, child, xdata);
syncop_close (fd);
+ }
out:
return;
@@ -972,7 +1076,7 @@ bitd_oneshot_crawl (xlator_t *subvol,
gf_log (this->name, GF_LOG_INFO,
"Triggering signing for %s [GFID: %s | Brick: %s]",
loc.path, uuid_utoa (linked_inode->gfid), child->brick_path);
- br_trigger_sign (this, child, linked_inode, &loc);
+ br_trigger_sign (this, child, linked_inode, &loc, NULL);
ret = 0;
@@ -1600,7 +1704,9 @@ struct xlator_cbks cbks;
struct volume_options options[] = {
{ .key = {"expiry-time"},
.type = GF_OPTION_TYPE_INT,
- .default_value = "120",
+ /* Let the default timer be half the value of the wait time for
+ * sining (which is 120 as of now) */
+ .default_value = "60",
.description = "default time duration for which an object waits "
"before it is signed",
},
diff --git a/xlators/features/bit-rot/src/bitd/bit-rot.h b/xlators/features/bit-rot/src/bitd/bit-rot.h
index d2cf3b07b4d..1705f715f0c 100644
--- a/xlators/features/bit-rot/src/bitd/bit-rot.h
+++ b/xlators/features/bit-rot/src/bitd/bit-rot.h
@@ -157,6 +157,8 @@ struct br_object {
be signed */
br_child_t *child; /* object's subvolume */
+ int sign_info;
+
struct list_head list; /* hook to add to the queue once the
object is expired from timer wheel */
void *data;
@@ -180,4 +182,8 @@ br_prepare_loc (xlator_t *, br_child_t *, loc_t *, gf_dirent_t *, loc_t *);
gf_boolean_t
bitd_is_bad_file (xlator_t *, br_child_t *, loc_t *, fd_t *);
+void
+br_trigger_sign (xlator_t *this, br_child_t *child, inode_t *linked_inode,
+ loc_t *loc, dict_t *xdata);
+
#endif /* __BIT_ROT_H__ */
diff --git a/xlators/features/bit-rot/src/stub/bit-rot-common.h b/xlators/features/bit-rot/src/stub/bit-rot-common.h
index 699323170d3..7fd584e5970 100644
--- a/xlators/features/bit-rot/src/stub/bit-rot-common.h
+++ b/xlators/features/bit-rot/src/stub/bit-rot-common.h
@@ -33,6 +33,13 @@ typedef enum br_vxattr_state {
BR_VXATTR_STATUS_INVALID = 3,
} br_vxattr_status_t;
+typedef enum br_sign_state {
+ BR_SIGN_INVALID = -1,
+ BR_SIGN_NORMAL = 0,
+ BR_SIGN_REOPEN_WAIT = 1,
+ BR_SIGN_QUICK = 2,
+} br_sign_state_t;
+
static inline br_vxattr_status_t
br_version_xattr_state (dict_t *xattr,
br_version_t **obuf, br_signature_t **sbuf)
diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub-mem-types.h b/xlators/features/bit-rot/src/stub/bit-rot-stub-mem-types.h
index 46271407219..9f6da89032f 100644
--- a/xlators/features/bit-rot/src/stub/bit-rot-stub-mem-types.h
+++ b/xlators/features/bit-rot/src/stub/bit-rot-stub-mem-types.h
@@ -28,7 +28,8 @@ enum br_mem_types {
gf_br_mt_br_tbf_opspec_t,
gf_br_mt_br_scrubber_t,
gf_br_mt_br_fsscan_entry_t,
- gf_br_stub_mt_end
+ gf_br_stub_mt_br_stub_fd_t,
+ gf_br_stub_mt_end,
};
#endif
diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub.c b/xlators/features/bit-rot/src/stub/bit-rot-stub.c
index f9c3886948a..93db072f671 100644
--- a/xlators/features/bit-rot/src/stub/bit-rot-stub.c
+++ b/xlators/features/bit-rot/src/stub/bit-rot-stub.c
@@ -198,14 +198,15 @@ br_stub_init_inode_versions (xlator_t *this, fd_t *fd, inode_t *inode,
if (!ctx)
goto error_return;
+ INIT_LIST_HEAD (&ctx->fd_list);
(markdirty) ? __br_stub_mark_inode_dirty (ctx)
: __br_stub_mark_inode_synced (ctx);
__br_stub_set_ongoing_version (ctx, version);
- __br_stub_reset_release_counters (ctx);
if (fd) {
- br_stub_require_release_call (this, fd);
- __br_stub_track_openfd (fd, ctx);
+ ret = br_stub_add_fd_to_inode (this, fd, ctx);
+ if (ret)
+ goto free_ctx;
}
ret = br_stub_set_inode_ctx (this, inode, ctx);
if (ret)
@@ -238,7 +239,6 @@ br_stub_mod_inode_versions (xlator_t *this,
__br_stub_mark_inode_synced (ctx);
}
- __br_stub_track_openfd (fd, ctx);
ret = 0;
}
unblock:
@@ -250,19 +250,16 @@ br_stub_mod_inode_versions (xlator_t *this,
static inline void
br_stub_fill_local (br_stub_local_t *local,
call_stub_t *stub, fd_t *fd, inode_t *inode, uuid_t gfid,
- int versioningtype, unsigned long memversion, int dirty)
+ int versioningtype, unsigned long memversion)
{
local->fopstub = stub;
local->versioningtype = versioningtype;
local->u.context.version = memversion;
- if (fd)
+ if (fd && !local->u.context.fd)
local->u.context.fd = fd_ref (fd);
if (inode)
local->u.context.inode = inode_ref (inode);
gf_uuid_copy (local->u.context.gfid, gfid);
-
- /* mark inode dirty/fresh according to durability */
- local->u.context.markdirty = (dirty) ? _gf_true : _gf_false;
}
static inline void
@@ -279,57 +276,13 @@ br_stub_cleanup_local (br_stub_local_t *local)
inode_unref (local->u.context.inode);
local->u.context.inode = NULL;
}
- local->u.context.markdirty = _gf_true;
memset (local->u.context.gfid, '\0', sizeof (uuid_t));
}
/**
- * callback for inode/fd full versioning
+ * callback for inode/fd versioning
*/
int
-br_stub_inode_fullversioning_cbk (call_frame_t *frame,
- void *cookie, xlator_t *this,
- int op_ret, int op_errno, dict_t *xdata)
-{
- fd_t *fd = NULL;
- inode_t *inode = NULL;
- unsigned long version = 0;
- gf_boolean_t dirty = _gf_true;
- br_stub_local_t *local = NULL;
-
- local = (br_stub_local_t *)frame->local;
-
- /* be graceful to EEXIST */
- if ((op_ret < 0) && (op_errno == EEXIST)) {
- op_ret = 0;
- goto done;