From d405498e377754e5465c5c8e849b9df4deb65c49 Mon Sep 17 00:00:00 2001 From: Xavi Hernandez Date: Tue, 12 May 2020 23:54:54 +0200 Subject: open-behind: rewrite of internal logic There was a critical flaw in the previous implementation of open-behind. When an open is done in the background, it's necessary to take a reference on the fd_t object because once we "fake" the open answer, the fd could be destroyed. However as long as there's a reference, the release function won't be called. So, if the application closes the file descriptor without having actually opened it, there will always remain at least 1 reference, causing a leak. To avoid this problem, the previous implementation didn't take a reference on the fd_t, so there were races where the fd could be destroyed while it was still in use. To fix this, I've implemented a new xlator cbk that gets called from fuse when the application closes a file descriptor. The whole logic of handling background opens have been simplified and it's more efficient now. Only if the fop needs to be delayed until an open completes, a stub is created. Otherwise no memory allocations are needed. Correctly handling the close request while the open is still pending has added a bit of complexity, but overall normal operation is simpler. Change-Id: I6376a5491368e0e1c283cc452849032636261592 Fixes: #1225 Signed-off-by: Xavi Hernandez --- libglusterfs/src/fd.c | 26 + libglusterfs/src/glusterfs/fd.h | 3 + libglusterfs/src/glusterfs/xlator.h | 4 + libglusterfs/src/libglusterfs.sym | 1 + tests/basic/open-behind/open-behind.t | 183 +++ tests/basic/open-behind/tester-fd.c | 99 ++ tests/basic/open-behind/tester.c | 444 +++++++ tests/basic/open-behind/tester.h | 145 +++ tests/bugs/glusterfs/bug-873962-spb.t | 1 + xlators/mount/fuse/src/fuse-bridge.c | 2 + .../open-behind/src/open-behind-messages.h | 6 +- xlators/performance/open-behind/src/open-behind.c | 1302 ++++++++------------ 12 files changed, 1393 insertions(+), 823 deletions(-) create mode 100644 tests/basic/open-behind/open-behind.t create mode 100644 tests/basic/open-behind/tester-fd.c create mode 100644 tests/basic/open-behind/tester.c create mode 100644 tests/basic/open-behind/tester.h diff --git a/libglusterfs/src/fd.c b/libglusterfs/src/fd.c index e0767a7e61f..62606e91164 100644 --- a/libglusterfs/src/fd.c +++ b/libglusterfs/src/fd.c @@ -501,6 +501,32 @@ out: return; } +void +fd_close(fd_t *fd) +{ + xlator_t *xl, *old_THIS; + + old_THIS = THIS; + + for (xl = fd->inode->table->xl->graph->first; xl != NULL; xl = xl->next) { + if (!xl->call_cleanup) { + THIS = xl; + + if (IA_ISDIR(fd->inode->ia_type)) { + if (xl->cbks->fdclosedir != NULL) { + xl->cbks->fdclosedir(xl, fd); + } + } else { + if (xl->cbks->fdclose != NULL) { + xl->cbks->fdclose(xl, fd); + } + } + } + } + + THIS = old_THIS; +} + void fd_unref(fd_t *fd) { diff --git a/libglusterfs/src/glusterfs/fd.h b/libglusterfs/src/glusterfs/fd.h index 28906d34e4d..3ffaaa60504 100644 --- a/libglusterfs/src/glusterfs/fd.h +++ b/libglusterfs/src/glusterfs/fd.h @@ -106,6 +106,9 @@ fd_ref(fd_t *fd); void fd_unref(fd_t *fd); +void +fd_close(fd_t *fd); + fd_t * fd_create(struct _inode *inode, pid_t pid); diff --git a/libglusterfs/src/glusterfs/xlator.h b/libglusterfs/src/glusterfs/xlator.h index c94bd75cf1c..23004ab2245 100644 --- a/libglusterfs/src/glusterfs/xlator.h +++ b/libglusterfs/src/glusterfs/xlator.h @@ -702,6 +702,8 @@ typedef size_t (*cbk_inodectx_size_t)(xlator_t *this, inode_t *inode); typedef size_t (*cbk_fdctx_size_t)(xlator_t *this, fd_t *fd); +typedef void (*cbk_fdclose_t)(xlator_t *this, fd_t *fd); + struct xlator_cbks { cbk_forget_t forget; cbk_release_t release; @@ -712,6 +714,8 @@ struct xlator_cbks { cbk_ictxmerge_t ictxmerge; cbk_inodectx_size_t ictxsize; cbk_fdctx_size_t fdctxsize; + cbk_fdclose_t fdclose; + cbk_fdclose_t fdclosedir; }; typedef int32_t (*dumpop_priv_t)(xlator_t *this); diff --git a/libglusterfs/src/libglusterfs.sym b/libglusterfs/src/libglusterfs.sym index f5e273e4ca0..c80b892df25 100644 --- a/libglusterfs/src/libglusterfs.sym +++ b/libglusterfs/src/libglusterfs.sym @@ -451,6 +451,7 @@ gf_event_unregister_close fd_anonymous fd_anonymous_with_flags fd_bind +fd_close fd_create fd_create_uint64 __fd_ctx_del diff --git a/tests/basic/open-behind/open-behind.t b/tests/basic/open-behind/open-behind.t new file mode 100644 index 00000000000..5e865d602e2 --- /dev/null +++ b/tests/basic/open-behind/open-behind.t @@ -0,0 +1,183 @@ +#!/bin/bash + +WD="$(dirname "${0}")" + +. ${WD}/../../include.rc +. ${WD}/../../volume.rc + +function assign() { + local _assign_var="${1}" + local _assign_value="${2}" + + printf -v "${_assign_var}" "%s" "${_assign_value}" +} + +function pipe_create() { + local _pipe_create_var="${1}" + local _pipe_create_name + local _pipe_create_fd + + _pipe_create_name="$(mktemp -u)" + mkfifo "${_pipe_create_name}" + exec {_pipe_create_fd}<>"${_pipe_create_name}" + rm "${_pipe_create_name}" + + assign "${_pipe_create_var}" "${_pipe_create_fd}" +} + +function pipe_close() { + local _pipe_close_fd="${!1}" + + exec {_pipe_close_fd}>&- +} + +function tester_start() { + declare -ag tester + local tester_in + local tester_out + + pipe_create tester_in + pipe_create tester_out + + ${WD}/tester <&${tester_in} >&${tester_out} & + + tester=("$!" "${tester_in}" "${tester_out}") +} + +function tester_send() { + declare -ag tester + local tester_res + local tester_extra + + echo "${*}" >&${tester[1]} + + read -t 3 -u ${tester[2]} tester_res tester_extra + echo "${tester_res} ${tester_extra}" + if [[ "${tester_res}" == "OK" ]]; then + return 0 + fi + + return 1 +} + +function tester_stop() { + declare -ag tester + local tester_res + + tester_send "quit" + + tester_res=0 + if ! wait ${tester[0]}; then + tester_res=$? + fi + + unset tester + + return ${tester_res} +} + +function count_open() { + local file="$(realpath "${B0}/${V0}/${1}")" + local count="0" + local inode + local ref + + inode="$(stat -c %i "${file}")" + + for fd in /proc/${BRICK_PID}/fd/*; do + ref="$(readlink "${fd}")" + if [[ "${ref}" == "${B0}/${V0}/"* ]]; then + if [[ "$(stat -c %i "${ref}")" == "${inode}" ]]; then + count="$((${count} + 1))" + fi + fi + done + + echo "${count}" +} + +cleanup + +TEST build_tester ${WD}/tester.c ${WD}/tester-fd.c + +TEST glusterd +TEST pidof glusterd +TEST ${CLI} volume create ${V0} ${H0}:${B0}/${V0} +TEST ${CLI} volume set ${V0} flush-behind off +TEST ${CLI} volume set ${V0} write-behind off +TEST ${CLI} volume set ${V0} quick-read off +TEST ${CLI} volume set ${V0} stat-prefetch on +TEST ${CLI} volume set ${V0} io-cache off +TEST ${CLI} volume set ${V0} open-behind on +TEST ${CLI} volume set ${V0} lazy-open off +TEST ${CLI} volume set ${V0} read-after-open off +TEST ${CLI} volume start ${V0} + +TEST ${GFS} --volfile-id=/${V0} --volfile-server=${H0} ${M0}; + +BRICK_PID="$(get_brick_pid ${V0} ${H0} ${B0}/${V0})" + +TEST touch "${M0}/test" + +EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 +TEST ${GFS} --volfile-id=/${V0} --volfile-server=${H0} ${M0}; + +TEST tester_start + +TEST tester_send fd open 0 "${M0}/test" +EXPECT_WITHIN 5 "1" count_open "/test" +TEST tester_send fd close 0 +EXPECT_WITHIN 5 "0" count_open "/test" + +EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 +TEST ${CLI} volume set ${V0} lazy-open on +TEST ${GFS} --volfile-id=/${V0} --volfile-server=${H0} ${M0}; + +TEST tester_send fd open 0 "${M0}/test" +sleep 2 +EXPECT "0" count_open "/test" +TEST tester_send fd write 0 "test" +EXPECT "1" count_open "/test" +TEST tester_send fd close 0 +EXPECT_WITHIN 5 "0" count_open "/test" + +EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 +TEST ${GFS} --volfile-id=/${V0} --volfile-server=${H0} ${M0}; + +TEST tester_send fd open 0 "${M0}/test" +EXPECT "0" count_open "/test" +EXPECT "test" tester_send fd read 0 64 +# Even though read-after-open is disabled, use-anonymous-fd is also disabled, +# so reads need to open the file first. +EXPECT "1" count_open "/test" +TEST tester_send fd close 0 +EXPECT "0" count_open "/test" + +EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 +TEST ${GFS} --volfile-id=/${V0} --volfile-server=${H0} ${M0}; + +TEST tester_send fd open 0 "${M0}/test" +EXPECT "0" count_open "/test" +TEST tester_send fd open 1 "${M0}/test" +EXPECT "2" count_open "/test" +TEST tester_send fd close 0 +EXPECT_WITHIN 5 "1" count_open "/test" +TEST tester_send fd close 1 +EXPECT_WITHIN 5 "0" count_open "/test" + +EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 +TEST ${CLI} volume set ${V0} read-after-open on +TEST ${GFS} --volfile-id=/${V0} --volfile-server=${H0} ${M0}; + +TEST tester_send fd open 0 "${M0}/test" +EXPECT "0" count_open "/test" +EXPECT "test" tester_send fd read 0 64 +EXPECT "1" count_open "/test" +TEST tester_send fd close 0 +EXPECT_WITHIN 5 "0" count_open "/test" + +EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 + +TEST tester_stop + +cleanup diff --git a/tests/basic/open-behind/tester-fd.c b/tests/basic/open-behind/tester-fd.c new file mode 100644 index 00000000000..00f02bc5b0a --- /dev/null +++ b/tests/basic/open-behind/tester-fd.c @@ -0,0 +1,99 @@ +/* + Copyright (c) 2020 Red Hat, Inc. + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#include "tester.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +static int32_t +fd_open(context_t *ctx, command_t *cmd) +{ + obj_t *obj; + int32_t fd; + + obj = cmd->args[0].obj.ref; + + fd = open(cmd->args[1].str.data, O_RDWR); + if (fd < 0) { + return error(errno, "open() failed"); + } + + obj->type = OBJ_TYPE_FD; + obj->fd = fd; + + out_ok("%d", fd); + + return 0; +} + +static int32_t +fd_close(context_t *ctx, command_t *cmd) +{ + obj_t *obj; + + obj = cmd->args[0].obj.ref; + obj->type = OBJ_TYPE_NONE; + + if (close(obj->fd) != 0) { + return error(errno, "close() failed"); + } + + out_ok(); + + return 0; +} + +static int32_t +fd_write(context_t *ctx, command_t *cmd) +{ + ssize_t len, ret; + + len = strlen(cmd->args[1].str.data); + ret = write(cmd->args[0].obj.ref->fd, cmd->args[1].str.data, len); + if (ret < 0) { + return error(errno, "write() failed"); + } + + out_ok("%zd", ret); + + return 0; +} + +static int32_t +fd_read(context_t *ctx, command_t *cmd) +{ + char data[cmd->args[1].num.value + 1]; + ssize_t ret; + + ret = read(cmd->args[0].obj.ref->fd, data, cmd->args[1].num.value); + if (ret < 0) { + return error(errno, "read() failed"); + } + + data[ret] = 0; + + out_ok("%zd %s", ret, data); + + return 0; +} + +command_t fd_commands[] = { + {"open", fd_open, CMD_ARGS(ARG_VAL(OBJ_TYPE_NONE), ARG_STR(1024))}, + {"close", fd_close, CMD_ARGS(ARG_VAL(OBJ_TYPE_FD))}, + {"write", fd_write, CMD_ARGS(ARG_VAL(OBJ_TYPE_FD), ARG_STR(1024))}, + {"read", fd_read, CMD_ARGS(ARG_VAL(OBJ_TYPE_FD), ARG_NUM(0, 1024))}, + CMD_END}; diff --git a/tests/basic/open-behind/tester.c b/tests/basic/open-behind/tester.c new file mode 100644 index 00000000000..b2da71c8385 --- /dev/null +++ b/tests/basic/open-behind/tester.c @@ -0,0 +1,444 @@ +/* + Copyright (c) 2020 Red Hat, Inc. + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#include "tester.h" + +#include +#include +#include +#include +#include + +static void * +mem_alloc(size_t size) +{ + void *ptr; + + ptr = malloc(size); + if (ptr == NULL) { + error(ENOMEM, "Failed to allocate memory (%zu bytes)", size); + } + + return ptr; +} + +static void +mem_free(void *ptr) +{ + free(ptr); +} + +static bool +buffer_create(context_t *ctx, size_t size) +{ + ctx->buffer.base = mem_alloc(size); + if (ctx->buffer.base == NULL) { + return false; + } + + ctx->buffer.size = size; + ctx->buffer.len = 0; + ctx->buffer.pos = 0; + + return true; +} + +static void +buffer_destroy(context_t *ctx) +{ + mem_free(ctx->buffer.base); + ctx->buffer.size = 0; + ctx->buffer.len = 0; +} + +static int32_t +buffer_get(context_t *ctx) +{ + ssize_t len; + + if (ctx->buffer.pos >= ctx->buffer.len) { + len = read(0, ctx->buffer.base, ctx->buffer.size); + if (len < 0) { + return error(errno, "read() failed"); + } + if (len == 0) { + return 0; + } + + ctx->buffer.len = len; + ctx->buffer.pos = 0; + } + + return ctx->buffer.base[ctx->buffer.pos++]; +} + +static int32_t +str_skip_spaces(context_t *ctx, int32_t current) +{ + while ((current > 0) && (current != '\n') && isspace(current)) { + current = buffer_get(ctx); + } + + return current; +} + +static int32_t +str_token(context_t *ctx, char *buffer, uint32_t size, int32_t current) +{ + uint32_t len; + + current = str_skip_spaces(ctx, current); + + len = 0; + while ((size > 0) && (current > 0) && (current != '\n') && + !isspace(current)) { + len++; + *buffer++ = current; + size--; + current = buffer_get(ctx); + } + + if (len == 0) { + return error(ENODATA, "Expecting a token"); + } + + if (size == 0) { + return error(ENOBUFS, "Token too long"); + } + + *buffer = 0; + + return current; +} + +static int32_t +str_number(context_t *ctx, uint64_t min, uint64_t max, uint64_t *value, + int32_t current) +{ + char text[32], *ptr; + uint64_t num; + + current = str_token(ctx, text, sizeof(text), current); + if (current > 0) { + num = strtoul(text, &ptr, 0); + if ((*ptr != 0) || (num < min) || (num > max)) { + return error(ERANGE, "Invalid number"); + } + *value = num; + } + + return current; +} + +static int32_t +str_eol(context_t *ctx, int32_t current) +{ + current = str_skip_spaces(ctx, current); + if (current != '\n') { + return error(EINVAL, "Expecting end of command"); + } + + return current; +} + +static void +str_skip(context_t *ctx, int32_t current) +{ + while ((current > 0) && (current != '\n')) { + current = buffer_get(ctx); + } +} + +static int32_t +cmd_parse_obj(context_t *ctx, arg_t *arg, int32_t current) +{ + obj_t *obj; + uint64_t id; + + current = str_number(ctx, 0, ctx->obj_count, &id, current); + if (current <= 0) { + return current; + } + + obj = &ctx->objs[id]; + if (obj->type != arg->obj.type) { + if (obj->type != OBJ_TYPE_NONE) { + return error(EBUSY, "Object is in use"); + } + return error(ENOENT, "Object is not defined"); + } + + arg->obj.ref = obj; + + return current; +} + +static int32_t +cmd_parse_num(context_t *ctx, arg_t *arg, int32_t current) +{ + return str_number(ctx, arg->num.min, arg->num.max, &arg->num.value, + current); +} + +static int32_t +cmd_parse_str(context_t *ctx, arg_t *arg, int32_t current) +{ + return str_token(ctx, arg->str.data, arg->str.size, current); +} + +static int32_t +cmd_parse_args(context_t *ctx, command_t *cmd, int32_t current) +{ + arg_t *arg; + + for (arg = cmd->args; arg->type != ARG_TYPE_NONE; arg++) { + switch (arg->type) { + case ARG_TYPE_OBJ: + current = cmd_parse_obj(ctx, arg, current); + break; + case ARG_TYPE_NUM: + current = cmd_parse_num(ctx, arg, current); + break; + case ARG_TYPE_STR: + current = cmd_parse_str(ctx, arg, current); + break; + default: + return error(EINVAL, "Unknown argument type"); + } + } + + if (current < 0) { + return current; + } + + current = str_eol(ctx, current); + if (current <= 0) { + return error(EINVAL, "Syntax error"); + } + + return cmd->handler(ctx, cmd); +} + +static int32_t +cmd_parse(context_t *ctx, command_t *cmds) +{ + char text[32]; + command_t *cmd; + int32_t current; + + cmd = cmds; + do { + current = str_token(ctx, text, sizeof(text), buffer_get(ctx)); + if (current <= 0) { + return current; + } + + while (cmd->name != NULL) { + if (strcmp(cmd->name, text) == 0) { + if (cmd->handler != NULL) { + return cmd_parse_args(ctx, cmd, current); + } + cmd = cmd->cmds; + break; + } + cmd++; + } + } while (cmd->name != NULL); + + str_skip(ctx, current); + + return error(ENOTSUP, "Unknown command"); +} + +static void +cmd_fini(context_t *ctx, command_t *cmds) +{ + command_t *cmd; + arg_t *arg; + + for (cmd = cmds; cmd->name != NULL; cmd++) { + if (cmd->handler == NULL) { + cmd_fini(ctx, cmd->cmds); + } else { + for (arg = cmd->args; arg->type != ARG_TYPE_NONE; arg++) { + switch (arg->type) { + case ARG_TYPE_STR: + mem_free(arg->str.data); + arg->str.data = NULL; + break; + default: + break; + } + } + } + } +} + +static bool +cmd_init(context_t *ctx, command_t *cmds) +{ + command_t *cmd; + arg_t *arg; + + for (cmd = cmds; cmd->name != NULL; cmd++) { + if (cmd->handler == NULL) { + if (!cmd_init(ctx, cmd->cmds)) { + return false; + } + } else { + for (arg = cmd->args; arg->type != ARG_TYPE_NONE; arg++) { + switch (arg->type) { + case ARG_TYPE_STR: + arg->str.data = mem_alloc(arg->str.size); + if (arg->str.data == NULL) { + return false; + } + break; + default: + break; + } + } + } + } + + return true; +} + +static bool +objs_create(context_t *ctx, uint32_t count) +{ + uint32_t i; + + ctx->objs = mem_alloc(sizeof(obj_t) * count); + if (ctx->objs == NULL) { + return false; + } + ctx->obj_count = count; + + for (i = 0; i < count; i++) { + ctx->objs[i].type = OBJ_TYPE_NONE; + } + + return true; +} + +static int32_t +objs_destroy(context_t *ctx) +{ + uint32_t i; + int32_t err; + + err = 0; + for (i = 0; i < ctx->obj_count; i++) { + if (ctx->objs[i].type != OBJ_TYPE_NONE) { + err = error(ENOTEMPTY, "Objects not destroyed"); + break; + } + } + + mem_free(ctx->objs); + ctx->objs = NULL; + ctx->obj_count = 0; + + return err; +} + +static context_t * +init(size_t size, uint32_t objs, command_t *cmds) +{ + context_t *ctx; + + ctx = mem_alloc(sizeof(context_t)); + if (ctx == NULL) { + goto failed; + } + + if (!buffer_create(ctx, size)) { + goto failed_ctx; + } + + if (!objs_create(ctx, objs)) { + goto failed_buffer; + } + + if (!cmd_init(ctx, cmds)) { + goto failed_objs; + } + + ctx->active = true; + + return ctx; + +failed_objs: + cmd_fini(ctx, cmds); + objs_destroy(ctx); +failed_buffer: + buffer_destroy(ctx); +failed_ctx: + mem_free(ctx); +failed: + return NULL; +} + +static int32_t +fini(context_t *ctx, command_t *cmds) +{ + int32_t ret; + + cmd_fini(ctx, cmds); + buffer_destroy(ctx); + + ret = objs_destroy(ctx); + + ctx->active = false; + + return ret; +} + +static int32_t +exec_quit(context_t *ctx, command_t *cmd) +{ + ctx->active = false; + + return 0; +} + +static command_t commands[] = {{"fd", NULL, CMD_SUB(fd_commands)}, + {"quit", exec_quit, CMD_ARGS()}, + CMD_END}; + +int32_t +main(int32_t argc, char *argv[]) +{ + context_t *ctx; + int32_t res; + + ctx = init(1024, 16, commands); + if (ctx == NULL) { + return 1; + } + + do { + res = cmd_parse(ctx, commands); + if (res < 0) { + out_err(-res); + } + } while (ctx->active); + + res = fini(ctx, commands); + if (res >= 0) { + out_ok(); + return 0; + } + + out_err(-res); + + return 1; +} diff --git a/tests/basic/open-behind/tester.h b/tests/basic/open-behind/tester.h new file mode 100644 index 00000000000..64e940c78fc --- /dev/null +++ b/tests/basic/open-behind/tester.h @@ -0,0 +1,145 @@ +/* + Copyright (c) 2020 Red Hat, Inc. + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef __TESTER_H__ +#define __TESTER_H__ + +#include +#include +#include + +enum _obj_type; +typedef enum _obj_type obj_type_t; + +enum _arg_type; +typedef enum _arg_type arg_type_t; + +struct _buffer; +typedef struct _buffer buffer_t; + +struct _obj; +typedef struct _obj obj_t; + +struct _context; +typedef struct _context context_t; + +struct _arg; +typedef struct _arg arg_t; + +struct _command; +typedef struct _command command_t; + +enum _obj_type { OBJ_TYPE_NONE, OBJ_TYPE_FD }; + +enum _arg_type { ARG_TYPE_NONE, ARG_TYPE_OBJ, ARG_TYPE_NUM, ARG_TYPE_STR }; + +struct _buffer { + char *base; + uint32_t size; + uint32_t len; + uint32_t pos; +}; + +struct _obj { + obj_type_t type; + union { + int32_t fd; + }; +}; + +struct _context { + obj_t *objs; + buffer_t buffer; + uint32_t obj_count; + bool active; +}; + +struct _arg { + arg_type_t type; + union { + struct { + obj_type_t type; + obj_t *ref; + } obj; + struct { + uint64_t value; + uint64_t min; + uint64_t max; + } num; + struct { + uint32_t size; + char *data; + } str; + }; +}; + +struct _command { + const char *name; + int32_t (*handler)(context_t *ctx, command_t *cmd); + union { + arg_t *args; + command_t *cmds; + }; +}; + +#define msg(_stream, _fmt, _args...) \ + do { \ + fprintf(_stream, _fmt "\n", ##_args); \ + fflush(_stream); \ + } while (0) + +#define msg_out(_fmt, _args...) msg(stdout, _fmt, ##_args) +#define msg_err(_err, _fmt, _args...) \ + ({ \ + int32_t __msg_err = (_err); \ + msg(stderr, "[%4u:%-15s] " _fmt, __LINE__, __FUNCTION__, __msg_err, \ + ##_args); \ + -__msg_err; \ + }) + +#define error(_err, _fmt, _args...) msg_err(_err, "E(%4d) " _fmt, ##_args) +#define warn(_err, _fmt, _args...) msg_err(_err, "W(%4d) " _fmt, ##_args) +#define info(_err, _fmt, _args...) msg_err(_err, "I(%4d) " _fmt, ##_args) + +#define out_ok(_args...) msg_out("OK " _args) +#define out_err(_err) msg_out("ERR %d", _err) + +#define ARG_END \ + { \ + ARG_TYPE_NONE \ + } + +#define CMD_ARGS1(_x, _args...) \ + .args = (arg_t[]) { _args } +#define CMD_ARGS(_args...) CMD_ARGS1(, ##_args, ARG_END) + +#define CMD_SUB(_cmds) .cmds = _cmds + +#define CMD_END \ + { \ + NULL, NULL, CMD_SUB(NULL) \ + } + +#define ARG_VAL(_type) \ + { \ + ARG_TYPE_OBJ, .obj = {.type = _type } \ + } +#define ARG_NUM(_min, _max) \ + { \ + ARG_TYPE_NUM, .num = {.min = _min, .max = _max } \ + } +#define ARG_STR(_size) \ + { \ + ARG_TYPE_STR, .str = {.size = _size } \ + } + +extern command_t fd_commands[]; + +#endif /* __TESTER_H__ */ \ No newline at end of file diff --git a/tests/bugs/glusterfs/bug-873962-spb.t b/tests/bugs/glusterfs/bug-873962-spb.t index db84a223089..db71cc0f6fe 100644 --- a/tests/bugs/glusterfs/bug-873962-spb.t +++ b/tests/bugs/glusterfs/bug-873962-spb.t @@ -14,6 +14,7 @@ TEST $CLI volume set $V0 performance.io-cache off TEST $CLI volume set $V0 performance.write-behind off TEST $CLI volume set $V0 performance.stat-prefetch off TEST $CLI volume set $V0 performance.read-ahead off +TEST $CLI volume set $V0 performance.open-behind off TEST $CLI volume set $V0 cluster.background-self-heal-count 0 TEST $CLI volume start $V0 TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id=$V0 $M0 --direct-io-mode=enable diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c index 108c44e4a32..8fac0745e21 100644 --- a/xlators/mount/fuse/src/fuse-bridge.c +++ b/xlators/mount/fuse/src/fuse-bridge.c @@ -3410,6 +3410,8 @@ fuse_release(xlator_t *this, fuse_in_header_t *finh, void *msg, gf_log("glusterfs-fuse", GF_LOG_TRACE, "finh->unique: %" PRIu64 ": RELEASE %p", finh->unique, state->fd); + fd_close(state->fd); + fuse_fd_ctx_destroy(this, state->fd); fd_unref(fd); diff --git a/xlators/performance/open-behind/src/open-behind-messages.h b/xlators/performance/open-behind/src/open-behind-messages.h index f25082433f8..0e789177684 100644 --- a/xlators/performance/open-behind/src/open-behind-messages.h +++ b/xlators/performance/open-behind/src/open-behind-messages.h @@ -23,6 +23,10 @@ */ GLFS_MSGID(OPEN_BEHIND, OPEN_BEHIND_MSG_XLATOR_CHILD_MISCONFIGURED, - OPEN_BEHIND_MSG_VOL_MISCONFIGURED, OPEN_BEHIND_MSG_NO_MEMORY); + OPEN_BEHIND_MSG_VOL_MISCONFIGURED, OPEN_BEHIND_MSG_NO_MEMORY, + OPEN_BEHIND_MSG_FAILED, OPEN_BEHIND_MSG_BAD_STATE); + +#define OPEN_BEHIND_MSG_FAILED_STR "Failed to submit fop" +#define OPEN_BEHIND_MSG_BAD_STATE_STR "Unexpected state" #endif /* _OPEN_BEHIND_MESSAGES_H_ */ diff --git a/xlators/performance/open-behind/src/open-behind.c b/xlators/performance/open-behind/src/open-behind.c index cbe89ec82e8..e43fe73bcca 100644 --- a/xlators/performance/open-behind/src/open-behind.c +++ b/xlators/performance/open-behind/src/open-behind.c @@ -16,6 +16,18 @@ #include "open-behind-messages.h" #include +/* Note: The initial design of open-behind was made to cover the simple case + * of open, read, close for small files. This pattern combined with + * quick-read can do the whole operation without a single request to the + * bricks (except the initial lookup). + * + * The way to do this has been improved, but the logic remains the same. + * Basically, this means that any operation sent to the fd or the inode + * that it's not a read, causes the open request to be sent to the + * bricks, and all future operations will be executed synchronously, + * including opens (it's reset once all fd's are closed). + */ + typedef struct ob_conf { gf_boolean_t use_anonymous_fd; /* use anonymous FDs wherever safe e.g - fstat() readv() @@ -32,1096 +44,754 @@ typedef struct ob_conf { */ } ob_conf_t; -typedef struct ob_inode { - inode_t *inode; - struct list_head resume_fops; - struct list_head ob_fds; - int count; - int op_ret; - int op_errno; - gf_boolean_t open_in_progress; - int unlinked; -} ob_inode_t; +/* A negative state represents an errno value negated. In this case the + * current operation cannot be processed. */ +typedef enum _ob_state { + /* There are no opens on the inode or the first open is already + * completed. The current operation can be sent directly. */ + OB_STATE_READY = 0, -typedef struct ob_fd { - call_frame_t *open_frame; - loc_t loc; - dict_t *xdata; - int flags; - int op_errno; - ob_inode_t *ob_inode; - fd_t *fd; - gf_boolean_t opened; - gf_boolean_t ob_inode_fops_waiting; - struct list_head list; - struct list_head ob_fds_on_inode; -} ob_fd_t; + /* There's an open pending and it has been triggered. The current + * operation should be "stubbified" and processed with + * ob_stub_dispatch(). */ + OB_STATE_OPEN_TRIGGERED, -ob_inode_t * -ob_inode_alloc(inode_t *inode) -{ - ob_inode_t *ob_inode = NULL; + /* There's an open pending but it has not been triggered. The current + * operation can be processed directly but using an anonymous fd. */ + OB_STATE_OPEN_PENDING, - ob_inode = GF_CALLOC(1, sizeof(*ob_inode), gf_ob_mt_inode_t); - if (ob_inode == NULL) - goto out; + /* The current operation is the first open on the inode. */ + OB_STATE_FIRST_OPEN +} ob_state_t; - ob_inode->inode = inode; - INIT_LIST_HEAD(&ob_inode->resume_fops); - INIT_LIST_HEAD(&ob_inode->ob_fds); -out: - return ob_inode; -} - -void -ob_inode_free(ob_inode_t *ob_inode) -{ - if (ob_inode == NULL) - goto out; +typedef struct ob_inode { + /* List of stubs pending on the first open. Once the first open is + * complete, all these stubs will be resubmitted, and dependencies + * will be checked again. */ + struct list_head resume_fops; - list_del_init(&ob_inode->resume_fops); - list_del_init(&ob_inode->ob_fds); + /* The inode this object references. */ + inode_t *inode; - GF_FREE(ob_inode); -out: - return; -} + /* The fd from the first open sent to this inode. It will be set + * from the moment the open is processed until the open if fully + * executed or closed before actually opened. It's NULL in all + * other cases. */ + fd_t *first_fd; + + /* The stub from the first open operation. When open fop starts + * being processed, it's assigned the OB_OPEN_PREPARING value + * until the actual stub is created. This is necessary to avoid + * creating the stub inside a locked region. Once the stub is + * successfully created, it's assigned here. This value is set + * to NULL once the stub is resumed. */ + call_stub_t *first_open; + + /* The total number of currently open fd's on this inode. */ + int32_t open_count; + + /* This flag is set as soon as we know that the open will be + * sent to the bricks, even before the stub is ready. */ + bool triggered; +} ob_inode_t; -ob_inode_t * -ob_inode_get(xlator_t *this, inode_t *inode) +/* Dummy pointer used temporarily while the actual open stub is being created */ +#define OB_OPEN_PREPARING ((call_stub_t *)-1) + +#define OB_POST_COMMON(_fop, _xl, _frame, _fd, _args...) \ + case OB_STATE_FIRST_OPEN: \ + gf_smsg((_xl)->name, GF_LOG_ERROR, EINVAL, OPEN_BEHIND_MSG_BAD_STATE, \ + "fop=%s", #_fop, "state=%d", __ob_state, NULL); \ + default_##_fop##_failure_cbk(_frame, EINVAL); \ + break; \ + case OB_STATE_READY: \ + default_##_fop(_frame, _xl, ##_args); \ + break; \ + case OB_STATE_OPEN_TRIGGERED: { \ + call_stub_t *__ob_stub = fop_##_fop##_stub(_frame, ob_##_fop, \ + ##_args); \ + if (__ob_stub != NULL) { \ + ob_stub_dispatch(_xl, __ob_inode, _fd, __ob_stub); \ + break; \ + } \ + __ob_state = -ENOMEM; \ + } \ + default: \ + gf_smsg((_xl)->name, GF_LOG_ERROR, -__ob_state, \ + OPEN_BEHIND_MSG_FAILED, "fop=%s", #_fop, NULL); \ + default_##_fop##_failure_cbk(_frame, -__ob_state) + +#define OB_POST_FD(_fop, _xl, _frame, _fd, _trigger, _args...) \ + do { \ + ob_inode_t *__ob_inode; \ + fd_t *__first_fd; \ + ob_state_t __ob_state = ob_open_and_resume_fd( \ + _xl, _fd, 0, true, _trigger, &__ob_inode, &__first_fd); \ + switch (__ob_state) { \ + case OB_STATE_OPEN_PENDING: \ + if (!(_trigger)) { \ + fd_t *__ob_fd = fd_anonymous_with_flags((_fd)->inode, \ + (_fd)->flags); \ + if (__ob_fd != NULL) { \ + default_##_fop(_frame, _xl, ##_args); \ + fd_unref(__ob_fd); \ + break; \ + } \ + __ob_state = -ENOMEM; \ + } \ + OB_POST_COMMON(_fop, _xl, _frame, __first_fd, ##_args); \ + } \ + } while (0) + +#define OB_POST_FLUSH(_xl, _frame, _fd, _args...) \ + do { \ + ob_inode_t *__ob_inode; \ + fd_t *__first_fd; \ + ob_state_t __ob_state = ob_open_and_resume_fd( \ + _xl, _fd, 0, true, false, &__ob_inode, &__first_fd); \ + switch (__ob_state) { \ + case OB_STATE_OPEN_PENDING: \ + default_flush_cbk(_frame, NULL, _xl, 0, 0, NULL); \ + break; \ + OB_POST_COMMON(flush, _xl, _frame, __first_fd, ##_args); \ + } \ + } while (0) + +#define OB_POST_INODE(_fop, _xl, _frame, _inode, _trigger, _args...) \ + do { \ + ob_inode_t *__ob_inode; \ + fd_t *__first_fd; \ + ob_state_t __ob_state = ob_open_and_resume_inode( \ + _xl, _inode, NULL, 0, true, _trigger, &__ob_inode, &__first_fd); \ + switch (__ob_state) { \ + case OB_STATE_OPEN_PENDING: \ + OB_POST_COMMON(_fop, _xl, _frame, __first_fd, ##_args); \ + } \ + } while (0) + +static ob_inode_t * +ob_inode_get_locked(xlator_t *this, inode_t *inode) { ob_inode_t *ob_inode = NULL; uint64_t value = 0; - int ret = 0; - if (!inode) - goto out; + if ((__inode_ctx_get(inode, this, &value) == 0) && (value != 0)) { + return (ob_inode_t *)(uintptr_t)value; + } - LOCK(&inode->lock); - { - __inode_ctx_get(inode, this, &value); - if (value == 0) { - ob_inode = ob_inode_alloc(inode); - if (ob_inode == NULL) - goto unlock; - - value = (uint64_t)(uintptr_t)ob_inode; - ret = __inode_ctx_set(inode, this, &value); - if (ret < 0) { - ob_inode_free(ob_inode); - ob_inode = NULL; - } - } else { - ob_inode = (ob_inode_t *)(uintptr_t)value; + ob_inode = GF_CALLOC(1, sizeof(*ob_inode), gf_ob_mt_inode_t); + if (ob_inode != NULL) { + ob_inode->inode = inode; + INIT_LIST_HEAD(&ob_inode->resume_fops); + + value = (uint64_t)(uintptr_t)ob_inode; + if (__inode_ctx_set(inode, this, &value) < 0) { + GF_FREE(ob_inode); + ob_inode = NULL; } } -unlock: - UNLOCK(&inode->lock); -out: return ob_inode; } -ob_fd_t * -__ob_fd_ctx_get(xlator_t *this, fd_t *fd) +static ob_state_t +ob_open_and_resume_inode(xlator_t *xl, inode_t *inode, fd_t *fd, + int32_t open_count, bool synchronous, bool trigger, + ob_inode_t **pob_inode, fd_t **pfd) { - uint64_t value = 0; - int ret = -1; - ob_fd_t *ob_fd = NULL; + ob_conf_t *conf; + ob_inode_t *ob_inode; + call_stub_t *open_stub; - ret = __fd_ctx_get(fd, this, &value); - if (ret) - return NULL; + if (inode == NULL) { + return OB_STATE_READY; + } - ob_fd = (void *)((long)value); + conf = xl->private; - return ob_fd; -} + *pfd = NULL; -ob_fd_t * -ob_fd_ctx_get(xlator_t *this, fd_t *fd) -{ - ob_fd_t *ob_fd = NULL; - - LOCK(&fd->lock); + LOCK(&inode->lock); { - ob_fd = __ob_fd_ctx_get(this, fd); - } - UNLOCK(&fd->lock); - - return ob_fd; -} + ob_inode = ob_inode_get_locked(xl, inode); + if (ob_inode == NULL) { + UNLOCK(&inode->lock); -int -__ob_fd_ctx_set(xlator_t *this, fd_t *fd, ob_fd_t *ob_fd) -{ - uint64_t value = 0; - int ret = -1; + return -ENOMEM; + } + *pob_inode = ob_inode; + + ob_inode->open_count += open_count; + + /* If first_fd is not NULL, it means that there's a previous open not + * yet completed. */ + if (ob_inode->first_fd != NULL) { + *pfd = ob_inode->first_fd; + /* If the current request doesn't trigger the open and it hasn't + * been triggered yet, we can continue without issuing the open + * only if the current request belongs to the same fd as the + * first one. */ + if (!trigger && !ob_inode->triggered && + (ob_inode->first_fd == fd)) { + UNLOCK(&inode->lock); + + return OB_STATE_OPEN_PENDING; + } - value = (long)((void *)ob_fd); + /* We need to issue the open. It could have already been triggered + * before. In this case open_stub will be NULL. Or the initial open + * may not be completely ready yet. In this case open_stub will be + * OB_OPEN_PREPARING. */ + open_stub = ob_inode->first_open; + ob_inode->first_open = NULL; + ob_inode->triggered = true; - ret = __fd_ctx_set(fd, this, value); + UNLOCK(&inode->lock); - return ret; -} + if ((open_stub != NULL) && (open_stub != OB_OPEN_PREPARING)) { + call_resume(open_stub); + } -int -ob_fd_ctx_set(xlator_t *this, fd_t *fd, ob_fd_t *ob_fd) -{ - int ret = -1; + return OB_STATE_OPEN_TRIGGERED; + } - LOCK(&fd->lock); - { - ret = __ob_fd_ctx_set(this, fd, ob_fd); - } - UNLOCK(&fd->lock); + /* There's no pending open. Only opens can be non synchronous, so all + * regular fops will be processed directly. For non synchronous opens, + * we'll still process them normally (i.e. synchornous) if there are + * more file descriptors open. */ + if (synchronous || (ob_inode->open_count > open_count)) { + UNLOCK(&inode->lock); - return ret; -} + return OB_STATE_READY; + } -ob_fd_t * -ob_fd_new(void) -{ - ob_fd_t *ob_fd = NULL; + *pfd = fd; - ob_fd = GF_CALLOC(1, sizeof(*ob_fd), gf_ob_mt_fd_t); + /* This is the first open. We keep a reference on the fd and set + * first_open stub to OB_OPEN_PREPARING until the actual stub can + * be assigned (we don't create the stub here to avoid doing memory + * allocations inside the mutex). */ + ob_inode->first_fd = __fd_ref(fd); + ob_inode->first_open = OB_OPEN_PREPARING; - INIT_LIST_HEAD(&ob_fd->list); - INIT_LIST_HEAD(&ob_fd->ob_fds_on_inode); + /* If lazy_open is not set, we'll need to immediately send the open, + * so we set triggered right now. */ + ob_inode->triggered = !conf->lazy_open; + } + UNLOCK(&inode->lock); - return ob_fd; + return OB_STATE_FIRST_OPEN; } -void -ob_fd_free(ob_fd_t *ob_fd) +static ob_state_t +ob_open_and_resume_fd(xlator_t *xl, fd_t *fd, int32_t open_count, + bool synchronous, bool trigger, ob_inode_t **pob_inode, + fd_t **pfd) { - LOCK(&ob_fd->fd->inode->lock); - { - list_del_init(&ob_fd->ob_fds_on_inode); - } - UNLOCK(&ob_fd->fd->inode->lock); - - loc_wipe(&ob_fd->loc); - - if (ob_fd->xdata) - dict_unref(ob_fd->xdata); + uint64_t err; - if (ob_fd->open_frame) { - /* If we sill have a frame it means that background open has never - * been triggered. We need to release the pending reference. */ - fd_unref(ob_fd->fd); - - STACK_DESTROY(ob_fd->open_frame->root); + if ((fd_ctx_get(fd, xl, &err) == 0) && (err != 0)) { + return (ob_state_t)-err; } - GF_FREE(ob_fd); + return ob_open_and_resume_inode(xl, fd->inode, fd, open_count, synchronous, + trigger, pob_inode, pfd); } -int -ob_wake_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, - int op_errno, fd_t *fd_ret, dict_t *xdata) +static ob_state_t +ob_open_behind(xlator_t *xl, fd_t *fd, int32_t flags, ob_inode_t **pob_inode, + fd_t **pfd) { - fd_t *fd = NULL; - int count = 0; - int ob_inode_op_ret = 0; - int ob_inode_op_errno = 0; - ob_fd_t *ob_fd = NULL; - call_stub_t *stub = NULL, *tmp = NULL; - ob_inode_t *ob_inode = NULL; - gf_boolean_t ob_inode_fops_waiting = _gf_false; - struct list_head fops_waiting_on_fd, fops_waiting_on_inode; + bool synchronous; - fd = frame->local; - frame->local = NULL; - - INIT_LIST_HEAD(&fops_waiting_on_fd); - INIT_LIST_HEAD(&fops_waiting_on_inode); + /* TODO: If O_CREAT, O_APPEND, O_WRONLY or O_DIRECT are specified, shouldn't + * we also execute this open synchronously ? */ + synchronous = (flags & O_TRUNC) != 0; - ob_inode = ob_inode_get(this, fd->inode); + return ob_open_and_resume_fd(xl, fd, 1, synchronous, true, pob_inode, pfd); +} - LOCK(&fd->lock); +static int32_t +ob_stub_dispatch(xlator_t *xl, ob_inode_t *ob_inode, fd_t *fd, + call_stub_t *stub) +{ + LOCK(&ob_inode->inode->lock); { - ob_fd = __ob_fd_ctx_get(this, fd); - ob_fd->opened = _gf_true; - - ob_inode_fops_waiting = ob_fd->ob_inode_fops_waiting; - - list_splice_init(&ob_fd->list, &fops_waiting_on_fd); - - if (op_ret < 0) { - /* mark fd BAD for ever */ - ob_fd->op_errno = op_errno; - ob_fd = NULL; /*shouldn't be freed*/ - } else { - __fd_ctx_del(fd, this, NULL); - } - } - UNLOCK(&fd->lock); - - if (ob_inode_fops_waiting) { - LOCK(&fd->inode->lock); - { - count = --ob_inode->count; - if (op_ret < 0) { - /* TODO: when to reset the error? */ - ob_inode->op_ret = -1; - ob_inode->op_errno = op_errno; - } - - if (count == 0) { - ob_inode->open_in_progress = _gf_false; - ob_inode_op_ret = ob_inode->op_ret; - ob_inode_op_errno = ob_inode->op_errno; - list_splice_init(&ob_inode->resume_fops, - &fops_waiting_on_inode); - } + /* We only queue a stub if the open has not been completed or + * cancelled. */ + if (ob_inode->first_fd == fd) { + list_add_tail(&stub->list, &ob_inode->resume_fops); + stub = NULL; } - UNLOCK(&fd->inode->lock); - } - - if (ob_fd) - ob_fd_free(ob_fd); - - list_for_each_entry_safe(stub, tmp, &fops_waiting_on_fd, list) - { - list_del_init(&stub->list); - - if (op_ret < 0) - call_unwind_error(stub, -1, op_errno); - else - call_resume(stub); } + UNLOCK(&ob_inode->inode->lock); - list_for_each_entry_safe(stub, tmp, &fops_waiting_on_inode, list) - { - list_del_init(&stub->list); - - if (ob_inode_op_ret < 0) - call_unwind_error(stub, -1, ob_inode_op_errno); - else - call_resume(stub); + if (stub != NULL) { + call_resume(stub); } - /* The background open is completed. We can release the 'fd' reference. */ - fd_unref(fd); - - STACK_DESTROY(frame->root); - return 0; } -int -ob_fd_wake(xlator_t *this, fd_t *fd, ob_fd_t *ob_fd) +static int32_t +ob_open_dispatch(xlator_t *xl, ob_inode_t *ob_inode, fd_t *fd, + call_stub_t *stub) { - call_frame_t *frame = NULL; - - if (ob_fd == NULL) { - LOCK(&fd->lock); - { - ob_fd = __ob_fd_ctx_get(this, fd); - if (!ob_fd) - goto unlock; + bool closed; - frame = ob_fd->open_frame; - ob_fd->open_frame = NULL; - } - unlock: - UNLOCK(&fd->lock); - } else { - LOCK(&fd->lock); - { - frame = ob_fd->open_frame; - ob_fd->open_frame = NULL; + LOCK(&ob_inode->inode->lock); + { + closed = ob_inode->first_fd != fd; + if (!closed) { + if (ob_inode->triggered) { + ob_inode->first_open = NULL; + } else { + ob_inode->first_open = stub; + stub = NULL; + } } - UNLOCK(&fd->lock); } + UNLOCK(&ob_inode->inode->lock); - if (frame) { - /* We don't need to take a reference here. We already have a reference - * while the open is pending. */ - frame->local = fd; - - STACK_WIND(frame, ob_wake_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->open, &ob_fd->loc, ob_fd->flags, fd, - ob_fd->xdata); + if (stub != NULL) { + if (closed) { + call_stub_destroy(stub); + fd_unref(fd); + } else { + call_resume(stub); + } } return 0; } -void -ob_inode_wake(xlator_t *this, struct list_head *ob_fds) +static void +ob_resume_pending(struct list_head *list) { - ob_fd_t *ob_fd = NULL, *tmp = NULL; + call_stub_t *stub; - if (!list_empty(ob_fds)) { - list_for_each_entry_safe(ob_fd, tmp, ob_fds, ob_fds_on_inode) - { - ob_fd_wake(this, ob_fd->fd, ob_fd); - ob_fd_free(ob_fd); - } - } -} + while (!list_empty(list)) { + stub = list_first_entry(list, call_stub_t, list); + list_del_init(&stub->list); -/* called holding inode->lock and fd->lock */ -void -ob_fd_copy(ob_fd_t *src, ob_fd_t *dst) -{ - if (!src || !dst) - goto out; - - dst->fd = src->fd; - dst->loc.inode = inode_ref(src->loc.inode); - gf_uuid_copy(dst->loc.gfid, src->loc.gfid); - dst->flags = src->flags; - dst->xdata = dict_ref(src->xdata); - dst->ob_inode = src->ob_inode; -out: - return; + call_resume(stub); + } } -int -open_all_pending_fds_and_resume(xlator_t *this, inode_t *inode, - call_stub_t *stub) +static void +ob_open_completed(xlator_t *xl, ob_inode_t *ob_inode, fd_t *fd, int32_t op_ret, + int32_t op_errno) { - ob_inode_t *ob_inode = NULL; - ob_fd_t *ob_fd = NULL, *tmp = NULL; - gf_boolean_t was_open_in_progress = _gf_false; - gf_boolean_t wait_for_open = _gf_false; - struct list_head ob_fds; + struct list_head list; - ob_inode = ob_inode_get(this, inode); - if (ob_inode == NULL) - goto out; + INIT_LIST_HEAD(&list); - INIT_LIST_HEAD(&ob_fds); + if (op_ret < 0) { + fd_ctx_set(fd, xl, op_errno <= 0 ? EIO : op_errno); + } - LOCK(&inode->lock); + LOCK(&ob_inode->inode->lock); { - was_open_in_progress = ob_inode->open_in_progress; - ob_inode->unlinked = 1; - - if (was_open_in_progress) { - list_add_tail(&stub->list, &ob_inode->resume_fops); - goto inode_unlock; - } - - list_for_each_entry(ob_fd, &ob_inode->ob_fds, ob_fds_on_inode) - { - LOCK(&ob_fd->fd->lock); - { - if (ob_fd->opened) - goto fd_unlock; - - ob_inode->count++; - ob_fd->ob_inode_fops_waiting = _gf_true; - - if (ob_fd->open_frame == NULL) { - /* open in progress no need of wake */ - } else { - tmp = ob_fd_new(); - tmp->open_frame = ob_fd->open_frame; - ob_fd->open_frame = NULL; - - ob_fd_copy(ob_fd, tmp); - list_add_tail(&tmp->ob_fds_on_inode, &ob_fds); - } - } - fd_unlock: - UNLOCK(&ob_fd->fd->lock); - } - - if (ob_inode->count) { - wait_for_open = ob_inode->open_in_progress = _gf_true; - list_add_tail(&stub->list, &ob_inode->resume_fops); + /* Only update the fields if the file has not been closed before + * getting here. */ + if (ob_inode->first_fd == fd) { + list_splice_init(&ob_inode->resume_fops, &list); + ob_inode->first_fd = NULL; + ob_inode->first_open = NULL; + ob_inode->triggered = false; } } -inode_unlock: - UNLOCK(&inode->lock); + UNLOCK(&ob_inode->inode->lock); -out: - if (!was_open_in_progress) { - if (!wait_for_open) { - call_resume(stub); - } else { - ob_inode_wake(this, &ob_fds); - } - } + ob_resume_pending(&list); - return 0; + fd_unref(fd); } -int -open_and_resume(xlator_t *this, fd_t *fd, call_stub_t *stub) +static int32_t +ob_open_cbk(call_frame_t *frame, void *cookie, xlator_t *xl, int32_t op_ret, + int32_t op_errno, fd_t *fd, dict_t *xdata) { - ob_fd_t *ob_fd = NULL; - int op_errno = 0; - - if (!fd) - goto nofd; - - LOCK(&fd->lock); - { - ob_fd = __ob_fd_ctx_get(this, fd); - if (!ob_fd) - goto unlock; + ob_inode_t *ob_inode; - if (ob_fd->op_errno) { - op_errno = ob_fd->op_errno; - goto unlock; - } + ob_inode = frame->local; + frame->local = NULL; - list_add_tail(&stub->list, &ob_fd->list); - } -unlock: - UNLOCK(&fd->lock); + ob_open_completed(xl, ob_inode, cookie, op_ret, op_errno); -nofd: - if (op_errno) - call_unwind_error(stub, -1, op_errno); - else if (ob_fd) - ob_fd_wake(this, fd, NULL); - else - call_resume(stub); + STACK_DESTROY(frame->root); return 0; } -int -ob_open_behind(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, +static int32_t +ob_open_resume(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, fd_t *fd, dict_t *xdata) { - ob_fd_t *ob_fd = NULL; - int ret = -1; - ob_conf_t *conf = NULL; - ob_inode_t *ob_inode = NULL; - gf_boolean_t open_in_progress = _gf_false; - int unlinked = 0; - - conf = this->private; - - if (flags & O_TRUNC) { - STACK_WIND(frame, default_open_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata); - return 0; - } - - ob_inode = ob_inode_get(this, fd->inode); - - ob_fd = ob_fd_new(); - if (!ob_fd) - goto enomem; - - ob_fd->ob_inode = ob_inode; - - ob_fd->fd = fd; - - ob_fd->open_frame = copy_frame(frame); - if (!ob_fd->open_frame) - goto enomem; - ret = loc_copy(&ob_fd->loc, loc); - if (ret) - goto enomem; - - ob_fd->flags = flags; - if (xdata) - ob_fd->xdata = dict_ref(xdata); - - LOCK(&fd->inode->lock); - { - open_in_progress = ob_inode->open_in_progress; - unlinked = ob_inode->unlinked; - if (!open_in_progress && !unlinked) { - ret = ob_fd_ctx_set(this, fd, ob_fd); - if (ret) { - UNLOCK(&fd->inode->lock); - goto enomem; - } - - list_add(&ob_fd->ob_fds_on_inode, &ob_inode->ob_fds); - } - } - UNLOCK(&fd->inode->lock); - - /* We take a reference while the background open is pending or being - * processed. If we finally wind the request in the foreground, then - * ob_fd_free() will take care of this additional reference. */ - fd_ref(fd); - - if (!open_in_progress && !unlinked) { - STACK_UNWIND_STRICT(open, frame, 0, 0, fd, xdata); - - if (!conf->lazy_open) - ob_fd_wake(this, fd, NULL); - } else { - ob_fd_free(ob_fd); - STACK_WIND(frame, default_open_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata); - } + STACK_WIND_COOKIE(frame, ob_open_cbk, fd, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata); return 0; -enomem: - if (ob_fd) { - if (ob_fd->open_frame) - STACK_DESTROY(ob_fd->open_frame->root); - - loc_wipe(&ob_fd->loc); - if (ob_fd->xdata) - dict_unref(ob_fd->xdata); - - GF_FREE(ob_fd); - } - - return -1; } -int +static int32_t ob_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, fd_t *fd, dict_t *xdata) { - fd_t *old_fd = NULL; - int ret = -1; - int op_errno = ENOMEM; - call_stub_t *stub = NULL; - - old_fd = fd_lookup(fd->inode, 0); - if (old_fd) { - /* open-behind only when this is the first FD */ - stub = fop_open_stub(frame, default_open_resume, loc, flags, fd, xdata); - if (!stub) { - fd_unref(old_fd); - goto err; - } - - open_and_resume(this, old_fd, stub); + ob_inode_t *ob_inode; + call_frame_t *open_frame; + call_stub_t *stub; + fd_t *first_fd; + ob_state_t state; + + state = ob_open_behind(this, fd, flags, &ob_inode, &first_fd); + if (state == OB_STATE_READY) { + /* There's no pending open, but there are other file descriptors opened + * or the current flags require a synchronous open. */ + return default_open(frame, this, loc, flags, fd, xdata); + } - fd_unref(old_fd); + if (state == OB_STATE_OPEN_TRIGGERED) { + /* The first open is in progress (either because it was already issued + * or because this request triggered it). We try to create a new stub + * to retry the operation once the initial open completes. */ + stub = fop_open_stub(frame, ob_open, loc, flags, fd, xdata); + if (stub != NULL) { + return ob_stub_dispatch(this, ob_inode, first_fd, stub); + } - return 0; + state = -ENOMEM; } - ret = ob_open_behind(frame, this, loc, flags, fd, xdata); - if (ret) { - goto err; - } + if (state == OB_STATE_FIRST_OPEN) { + /* We try to create a stub for the new open. A new frame needs to be + * used because the current one may be destroyed soon after sending + * the open's reply. */ + open_frame = copy_frame(frame); + if (open_frame != NULL) { + stub = fop_open_stub(open_frame, ob_open_resume, loc, flags, fd, + xdata); + if (stub != NULL) { + open_frame->local = ob_inode; - return 0; -err: - gf_msg(this->name, GF_LOG_ERROR, op_errno, OPEN_BEHIND_MSG_NO_MEMORY, "%s", - loc->path); + /* TODO: Previous version passed xdata back to the caller, but + * probably this doesn't make sense since it won't contain + * any requested data. I think it would be better to pass + * NULL for xdata. */ + default_open_cbk(frame, NULL, this, 0, 0, fd, xdata); - STACK_UNWIND_STRICT(open, frame, -1, op_errno, 0, 0); + return ob_open_dispatch(this, ob_inode, first_fd, stub); + } - return 0; -} + STACK_DESTROY(open_frame->root); + } -fd_t * -ob_get_wind_fd(xlator_t *this, fd_t *fd, uint32_t *flag) -{ - fd_t *wind_fd = NULL; - ob_fd_t *ob_fd = NULL; - ob_conf_t *conf = NULL; + /* In case of error, simulate a regular completion but with an error + * code. */ + ob_open_completed(this, ob_inode, first_fd, -1, ENOMEM); - conf = this->private; + state = -ENOMEM; + } - ob_fd = ob_fd_ctx_get(this, fd); + /* In case of failure we need to decrement the number of open files because + * ob_fdclose() won't be called. */ - if (ob_fd && ob_fd->open_frame && conf->use_anonymous_fd) { - wind_fd = fd_anonymous(fd->inode); - if ((ob_fd->flags & O_DIRECT) && (flag)) - *flag = *flag | O_DIRECT; - } else { - wind_fd = fd_ref(fd); + LOCK(&fd->inode->lock); + { + ob_inode->open_count--; } + UNLOCK(&fd->inode->lock); - return wind_fd; + gf_smsg(this->name, GF_LOG_ERROR, -state, OPEN_BEHIND_MSG_FAILED, "fop=%s", + "open", "path=%s", loc->path, NULL); + + return default_open_failure_cbk(frame, -state); } -int +static int32_t ob_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, off_t offset, uint32_t flags, dict_t *xdata) { - call_stub_t *stub = NULL; - fd_t *wind_fd = NULL; - ob_conf_t *conf = NULL; + ob_conf_t *conf = this->private; + bool trigger = conf->read_after_open || !conf->use_anonymous_fd; - conf = this->private; - - if (!conf->read_after_open) - wind_fd = ob_get_wind_fd(this, fd, &flags); - else - wind_fd = fd_ref(fd); - - stub = fop_readv_stub(frame, default_readv_resume, wind_fd, size, offset, - flags, xdata); - fd_unref(wind_fd); - - if (!stub) - goto err; - - open_and_resume(this, wind_fd, stub); - - return 0; -err: - STACK_UNWIND_STRICT(readv, frame, -1, ENOMEM, 0, 0, 0, 0, 0); + OB_POST_FD(readv, this, frame, fd, trigger, fd, size, offset, flags, xdata); return 0; } -int +static int32_t ob_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *iov, int count, off_t offset, uint32_t flags, struct iobref *iobref, dict_t *xdata) { - call_stub_t *stub = NULL; - - stub = fop_writev_stub(frame, default_writev_resume, fd, iov, count, offset, - flags, iobref, xdata); - if (!stub) - goto err; - - open_and_resume(this, fd, stub); - - return 0; -err: - STACK_UNWIND_STRICT(writev, frame, -1, ENOMEM, 0, 0, 0); + OB_POST_FD(writev, this, frame, fd, true, fd, iov, count, offset, flags, + iobref, xdata); return 0; } -int +static int32_t ob_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) { - call_stub_t *stub = NULL; - fd_t *wind_fd = NULL; - - wind_fd = ob_get_wind_fd(this, fd, NULL); - - stub = fop_fstat_stub(frame, default_fstat_resume, wind_fd, xdata); + ob_conf_t *conf = this->private; + bool trigger = !conf->use_anonymous_fd; - fd_unref(wind_fd); - - if (!stub) - goto err; - - open_and_resume(this, wind_fd, stub); - - return 0; -err: - STACK_UNWIND_STRICT(fstat, frame, -1, ENOMEM, 0, 0); + OB_POST_FD(fstat, this, frame, fd, trigger, fd, xdata); return 0; } -int +static int32_t ob_seek(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, gf_seek_what_t what, dict_t *xdata) { - call_stub_t *stub = NULL; - fd_t *wind_fd = NULL; - - wind_fd = ob_get_wind_fd(this, fd, NULL); + ob_conf_t *conf = this->private; + bool trigger = !conf->use_anonymous_fd; - stub = fop_seek_stub(frame, default_seek_resume, wind_fd, offset, what, - xdata); - - fd_unref(wind_fd); - - if (!stub) - goto err; - - open_and_resume(this, wind_fd, stub); - - return 0; -err: - STACK_UNWIND_STRICT(fstat, frame, -1, ENOMEM, 0, 0); + OB_POST_FD(seek, this, frame, fd, trigger, fd, offset, what, xdata); return 0; } -int +static int32_t ob_flush(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) { - call_stub_t *stub = NULL; - ob_fd_t *ob_fd = NULL; - gf_boolean_t unwind = _gf_false; - - LOCK(&fd->lock); - { - ob_fd = __ob_fd_ctx_get(this, fd); - if (ob_fd && ob_fd->open_frame) - /* if open() was never wound to backend, - no need to wind flush() either. - */ - unwind = _gf_true; - } - UNLOCK(&fd->lock); - - if (unwind) - goto unwind; - - stub = fop_flush_stub(frame, default_flush_resume, fd, xdata); - if (!stub) - goto err; - - open_and_resume(this, fd, stub); - - return 0; -err: - STACK_UNWIND_STRICT(flush, frame, -1, ENOMEM, 0); - - return 0; - -unwind: - STACK_UNWIND_STRICT(flush, frame, 0, 0, 0); + OB_POST_FLUSH(this, frame, fd, fd, xdata); return 0; } -int +static int32_t ob_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int flag, dict_t *xdata) { - call_stub_t *stub = NULL; - - stub = fop_fsync_stub(frame, default_fsync_resume, fd, flag, xdata); - if (!stub) - goto err; - - open_and_resume(this, fd, stub); - - return 0; -err: - STACK_UNWIND_STRICT(fsync, frame, -1, ENOMEM, 0, 0, 0); + OB_POST_FD(fsync, this, frame, fd, true, fd, flag, xdata); return 0; } -int +static int32_t ob_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int cmd, struct gf_flock *flock, dict_t *xdata) { - call_stub_t *stub = NULL; - - stub = fop_lk_stub(frame, default_lk_resume, fd, cmd, flock, xdata); - if (!stub) - goto err; - - open_and_resume(this, fd, stub); - - return 0; -err: - STACK_UNWIND_STRICT(lk, frame, -1, ENOMEM, 0, 0); + OB_POST_FD(lk, this, frame, fd, true, fd, cmd, flock, xdata); return 0; } -int +static int32_t ob_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, dict_t *xdata) { - call_stub_t *stub = NULL; - - stub = fop_ftruncate_stub(frame, default_ftruncate_resume, fd, offset, - xdata); - if (!stub) - goto err; - - open_and_resume(this, fd, stub); - - return 0; -err: - STACK_UNWIND_STRICT(ftruncate, frame, -1, ENOMEM, 0, 0, 0); + OB_POST_FD(ftruncate, this, frame, fd, true, fd, offset, xdata); return 0; } -int +static int32_t ob_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xattr, int flags, dict_t *xdata) { - call_stub_t *stub = NULL; - - stub = fop_fsetxattr_stub(frame, default_fsetxattr_resume, fd, xattr, flags, - xdata); - if (!stub) - goto err; - - open_and_resume(this, fd, stub); - - return 0; -err: - STACK_UNWIND_STRICT(fsetxattr, frame, -1, ENOMEM, 0); + OB_POST_FD(fsetxattr, this, frame, fd, true, fd, xattr, flags, xdata); return 0; } -int +static int32_t ob_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name, dict_t *xdata) { - call_stub_t *stub = NULL; - - stub = fop_fgetxattr_stub(frame, default_fgetxattr_resume, fd, name, xdata); - if (!stub) - goto err; - - open_and_resume(this, fd, stub); - - return 0; -err: - STACK_UNWIND_STRICT(fgetxattr, frame, -1, ENOMEM, 0, 0); + OB_POST_FD(fgetxattr, this, frame, fd, true, fd, name, xdata); return 0; } -int +static int32_t ob_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name, dict_t *xdata) { - call_stub_t *stub = NULL; - - stub = fop_fremovexattr_stub(frame, default_fremovexattr_resume, fd, name, - xdata); - if (!stub) - goto err; - - open_and_resume(this, fd, stub); - - return 0; -err: - STACK_UNWIND_STRICT(fremovexattr, frame, -1, ENOMEM, 0); + OB_POST_FD(fremovexattr, this, frame, fd, true, fd, name, xdata); return 0; } -int +static int32_t ob_finodelk(call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd, int cmd, struct gf_flock *flock, dict_t *xdata) { - call_stub_t *stub = fop_finodelk_stub(frame, default_finodelk_resume, - volume, fd, cmd, flock, xdata); - if (stub) - open_and_resume(this, fd, stub); - else - STACK_UNWIND_STRICT(finodelk, frame, -1, ENOMEM, 0); + OB_POST_FD(finodelk, this, frame, fd, true, volume, fd, cmd, flock, xdata); return 0; } -int +static int32_t ob_fentrylk(call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd, const char *basename, entrylk_cmd cmd, entrylk_type type, dict_t *xdata) { - call_stub_t *stub = fop_fentrylk_stub( - frame, default_fentrylk_resume, volume, fd, basename, cmd, type, xdata); - if (stub) - open_and_resume(this, fd, stub); - else - STACK_UNWIND_STRICT(fentrylk, frame, -1, ENOMEM, 0); + OB_POST_FD(fentrylk, this, frame, fd, true, volume, fd, basename, cmd, type, + xdata); return 0; } -int +static int32_t ob_fxattrop(call_frame_t *frame, xlator_t *this, fd_t *fd, gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata) { - call_stub_t *stub = fop_fxattrop_stub(frame, default_fxattrop_resume, fd, - optype, xattr, xdata); - if (stub) - open_and_resume(this, fd, stub); - else - STACK_UNWIND_STRICT(fxattrop, frame, -1, ENOMEM, 0, 0); + OB_POST_FD(fxattrop, this, frame, fd, true, fd, optype, xattr, xdata); return 0; } -int +static int32_t ob_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *iatt, int valid, dict_t *xdata) { - call_stub_t *stub = NULL; - - stub = fop_fsetattr_stub(frame, default_fsetattr_resume, fd, iatt, valid, - xdata); - if (!stub) - goto err; - - open_and_resume(this, fd, stub); - - return 0; -err: - STACK_UNWIND_STRICT(fsetattr, frame, -1, ENOMEM, 0, 0, 0); + OB_POST_FD(fsetattr, this, frame, fd, true, fd, iatt, valid, xdata); return 0; } -int +static int32_t ob_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode, off_t offset, size_t len, dict_t *xdata) { - call_stub_t *stub; - - stub = fop_fallocate_stub(frame, default_fallocate_resume, fd, mode, offset, - len, xdata); - if (!stub) - goto err; - - open_and_resume(this, fd, stub); + OB_POST_FD(fallocate, this, frame, fd, true, fd, mode, offset, len, xdata); - return 0; -err: - STACK_UNWIND_STRICT(fallocate, frame, -1, ENOMEM, NULL, NULL, NULL); return 0; } -int +static int32_t ob_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, size_t len, dict_t *xdata) { - call_stub_t *stub; - - stub = fop_discard_stub(frame, default_discard_resume, fd, offset, len, - xdata); - if (!stub) - goto err; - - open_and_resume(this, fd, stub); + OB_POST_FD(discard, this, frame, fd, true, fd, offset, len, xdata); - return 0; -err: - STACK_UNWIND_STRICT(discard, frame, -1, ENOMEM, NULL, NULL, NULL); return 0; } -int +static int32_t ob_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, off_t len, dict_t *xdata) { - call_stub_t *stub; - - stub = fop_zerofill_stub(frame, default_zerofill_resume, fd, offset, len, - xdata); - if (!stub) - goto err; + OB_POST_FD(zerofill, this, frame, fd, true, fd, offset, len, xdata); - open_and_resume(this, fd, stub); - - return 0; -err: - STACK_UNWIND_STRICT(zerofill, frame, -1, ENOMEM, NULL, NULL, NULL); return 0; } -int +static int32_t ob_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflags, dict_t *xdata) { - call_stub_t *stub = NULL; - - stub = fop_unlink_stub(frame, default_unlink_resume, loc, xflags, xdata); - if (!stub) - goto err; - - open_all_pending_fds_and_resume(this, loc->inode, stub); - - return 0; -err: - STACK_UNWIND_STRICT(unlink, frame, -1, ENOMEM, 0, 0, 0); + OB_POST_INODE(unlink, this, frame, loc->inode, true, loc, xflags, xdata); return 0; } -int +static int32_t ob_rename(call_frame_t *frame, xlator_t *this, loc_t *src, loc_t *dst, dict_t *xdata) { - call_stub_t *stub = NULL; - - stub = fop_rename_stub(frame, default_rename_resume, src, dst, xdata); - if (!stub) - goto err; - - open_all_pending_fds_and_resume(this, dst->inode, stub); - - return 0; -err: - STACK_UNWIND_STRICT(rename, frame, -1, ENOMEM, 0, 0, 0, 0, 0, 0); + OB_POST_INODE(rename, this, frame, dst->inode, true, src, dst, xdata); return 0; } -int32_t +static int32_t ob_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *stbuf, int32_t valid, dict_t *xdata) { - call_stub_t *stub = NULL; - - stub = fop_setattr_stub(frame, default_setattr_resume, loc, stbuf, valid, - xdata); - if (!stub) - goto err; + OB_POST_INODE(setattr, this, frame, loc->inode, true, loc, stbuf, valid, + xdata); - open_all_pending_fds_and_resume(this, loc->inode, stub); - - return 0; -err: - STACK_UNWIND_STRICT(setattr, frame, -1, ENOMEM, NULL, NULL, NULL); return 0; } -int32_t +static int32_t ob_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, int32_t flags, dict_t *xdata) { - call_stub_t *stub = NULL; - gf_boolean_t access_xattr = _gf_false; - if (dict_get(dict, POSIX_ACL_DEFAULT_XATTR) || dict_get(dict, POSIX_ACL_ACCESS_XATTR) || - dict_get(dict, GF_SELINUX_XATTR_KEY)) - access_xattr = _gf_true; - - if (!access_xattr) + dict_get(dict, GF_SELINUX_XATTR_KEY)) { return default_setxattr(frame, this, loc, dict, flags, xdata); + } - stub = fop_setxattr_stub(frame, default_setxattr_resume, loc, dict, flags, - xdata); - if (!stub) - goto err; - - open_all_pending_fds_and_resume(this, loc->inode, stub); + OB_POST_INODE(setxattr, this, frame, loc->inode, true, loc, dict, flags, + xdata); - return 0; -err: - STACK_UNWIND_STRICT(setxattr, frame, -1, ENOMEM, NULL); return 0; } -int -ob_release(xlator_t *this, fd_t *fd) +static void +ob_fdclose(xlator_t *this, fd_t *fd) { - ob_fd_t *ob_fd = NULL; + struct list_head list; + ob_inode_t *ob_inode; + call_stub_t *stub; + + INIT_LIST_HEAD(&list); + stub = NULL; - ob_fd = ob_fd_ctx_get(this, fd); + LOCK(&fd->inode->lock); + { + ob_inode = ob_inode_get_locked(this, fd->inode); + if (ob_inode != NULL) { + ob_inode->open_count--; + + /* If this fd is the same as ob_inode->first_fd, it means that + * the initial open has not fully completed. We'll try to cancel + * it. */ + if (ob_inode->first_fd == fd) { + if (ob_inode->first_open == OB_OPEN_PREPARING) { + /* In this case ob_open_dispatch() has not been called yet. + * We clear first_fd and first_open to allow that function + * to know that the open is not really needed. This also + * allows other requests to work as expected if they + * arrive before the dispatch function is called. If there + * are pending fops, we can directly process them here. + * (note that there shouldn't be any fd related fops, but + * if there are, it's fine if they fail). */ + ob_inode->first_fd = NULL; + ob_inode->first_open = NULL; + ob_inode->triggered = false; + list_splice_init(&ob_inode->resume_fops, &list); + } else if (!ob_inode->triggered) { + /* If the open has already been dispatched, we can only + * cancel it if it has not been triggered. Otherwise we + * simply wait until it completes. While it's not triggered, + * first_open must be a valid stub and there can't be any + * pending fops. */ + GF_ASSERT((ob_inode->first_open != NULL) && + list_empty(&ob_inode->resume_fops)); + + ob_inode->first_fd = NULL; + stub = ob_inode->first_open; + ob_inode->first_open = NULL; + } + } + } + } + UNLOCK(&fd->inode->lock); - ob_fd_free(ob_fd); + if (stub != NULL) { + call_stub_destroy(stub); + fd_unref(fd); + } - return 0; + ob_resume_pending(&list); } int ob_forget(xlator_t *this, inode_t *inode) { - ob_inode_t *ob_inode = NULL; + ob_inode_t *ob_inode; uint64_t value = 0; - inode_ctx_del(inode, this, &value); - - if (value) { + if ((inode_ctx_del(inode, this, &value) == 0) && (value != 0)) { ob_inode = (ob_inode_t *)(uintptr_t)value; - ob_inode_free(ob_inode); + GF_FREE(ob_inode); } return 0; @@ -1153,20 +823,18 @@ ob_priv_dump(xlator_t *this) int ob_fdctx_dump(xlator_t *this, fd_t *fd) { - ob_fd_t *ob_fd = NULL; char key_prefix[GF_DUMP_MAX_BUF_LEN] = { 0, }; - int ret = 0; + uint64_t value = 0; + int ret = 0, error = 0; ret = TRY_LOCK(&fd->lock); if (ret) return 0; - ob_fd = __ob_fd_ctx_get(this, fd); - if (!ob_fd) { - UNLOCK(&fd->lock); - return 0; + if ((__fd_ctx_get(fd, this, &value) == 0) && (value != 0)) { + error = (int32_t)value; } gf_proc_dump_build_key(key_prefix, "xlator.performance.open-behind", @@ -1175,17 +843,7 @@ ob_fdctx_dump(xlator_t *this, fd_t *fd) gf_proc_dump_write("fd", "%p", fd); - gf_proc_dump_write("open_frame", "%p", ob_fd->open_frame); - - if (ob_fd->open_frame) - gf_proc_dump_write("open_frame.root.unique", "%" PRIu64, - ob_fd->open_frame->root->unique); - - gf_proc_dump_write("loc.path", "%s", ob_fd->loc.path); - - gf_proc_dump_write("loc.ino", "%s", uuid_utoa(ob_fd->loc.gfid)); - - gf_proc_dump_write("flags", "%d", ob_fd->flags); + gf_proc_dump_write("error", "%d", error); UNLOCK(&fd->lock); @@ -1307,7 +965,7 @@ struct xlator_fops fops = { }; struct xlator_cbks cbks = { - .release = ob_release, + .fdclose = ob_fdclose, .forget = ob_forget, }; -- cgit