summaryrefslogtreecommitdiffstats
path: root/xlators/mount/fuse
diff options
context:
space:
mode:
authorAnand Avati <avati@gluster.com>2012-01-13 13:27:15 +0530
committerAnand Avati <avati@gluster.com>2012-01-20 05:03:42 -0800
commit7e1f8e3bac201f88e2d9ef62fc69a044716dfced (patch)
tree77540dbf1def2c864f8ae55f2293dba4a1d47488 /xlators/mount/fuse
parent33c568ce1a28c1739f095611b40b7acf40e4e6df (diff)
core: GFID filehandle based backend and anonymous FDs
1. What -------- This change introduces an infrastructure change in the filesystem which lets filesystem operation address objects (inodes) just by its GFID. Thus far GFID has been a unique identifier of a user-visible inode. But in terms of addressability the only mechanism thus far has been the backend filesystem path, which could be derived from the GFID only if it was cached in the inode table along with the entire set of dentry ancestry leading up to the root. This change essentially decouples addressability from the namespace. It is no more necessary to be aware of the parent directory to address a file or directory. 2. Why ------- The biggest use case for such a feature is NFS for generating persistent filehandles. So far the technique for generating filehandles in NFS has been to encode path components so that the appropriate inode_t can be repopulated into the inode table by means of a recursive lookup of each component top-down. Another use case is the ability to perform more intelligent self-healing and rebalancing of inodes with hardlinks and also to detect renames. A derived feature from GFID filehandles is anonymous FDs. An anonymous FD is an internal USABLE "fd_t" which does not map to a user opened file descriptor or to an internal ->open()'d fd. The ability to address a file by the GFID eliminates the need to have a persistent ->open()'d fd for the purpose of avoiding the namespace. This improves NFS read/write performance significantly eliminating open/close calls and also fixes some of today's limitations (like keeping an FD open longer than necessary resulting in disk space leakage) 3. How ------- At each storage/posix translator level, every file is hardlinked inside a hidden .glusterfs directory (under the top level export) with the name as the ascii-encoded standard UUID format string. For reasons of performance and scalability there is a two-tier classification of those hardlinks under directories with the initial parts of the UUID string as the directory names. For directories (which cannot be hardlinked), the approach is to use a symlink which dereferences the parent GFID path along with basename of the directory. The parent GFID dereference will in turn be a dereference of the grandparent with the parent's basename, and so on recursively up to the root export. 4. Development --------------- 4a. To leverage the ability to address an inode by its GFID, the technique is to perform a "nameless lookup". This means, to populate a loc_t structure as: loc_t { pargfid: NULL parent: NULL name: NULL path: NULL gfid: GFID to be looked up [out parameter] inode: inode_new () result [in parameter] } and performing such lookup will return in its callback an inode_t populated with the right contexts and a struct iatt which can be used to perform an inode_link () on the inode (without a parent and basename). The inode will now be hashed and linked in the inode table and findable via inode_find(). A fundamental change moving forward is that the primary fields in a loc_t structure are now going to be (pargfid, name) and (gfid) depending on the kind of FOP. So far path had been the primary field for operations. The remaining fields only serve as hints/helpers. 4b. If read/write is to be performed on an inode_t, the approach so far has been to: fd_create(), STACK_WIND(open, fd), fd_bind (in callback) and then perform STACK_WIND(read, fd) etc. With anonymous fds now you can do fd_anonymous (inode), STACK_WIND (read, fd). This results in great boost in performance in the inbuilt NFS server. 5. Misc ------- The inode_ctx_put[2] has been renamed to inode_ctx_set[2] to be consistent with the rest of the codebase. Change-Id: Ie4629edf6bd32a595f4d7f01e90c0a01f16fb12f BUG: 781318 Reviewed-on: http://review.gluster.com/669 Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Anand Avati <avati@gluster.com>
Diffstat (limited to 'xlators/mount/fuse')
-rw-r--r--xlators/mount/fuse/src/fuse-bridge.c1
-rw-r--r--xlators/mount/fuse/src/fuse-bridge.h7
-rw-r--r--xlators/mount/fuse/src/fuse-helpers.c15
-rw-r--r--xlators/mount/fuse/src/fuse-resolve.c400
4 files changed, 127 insertions, 296 deletions
diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c
index 8c1cd8f7568..b8f53a1bc3c 100644
--- a/xlators/mount/fuse/src/fuse-bridge.c
+++ b/xlators/mount/fuse/src/fuse-bridge.c
@@ -3378,6 +3378,7 @@ fuse_first_lookup (xlator_t *this)
loc.path = "/";
loc.name = "";
loc.inode = fuse_ino_to_inode (1, this);
+ uuid_copy (loc.gfid, loc.inode->gfid);
loc.parent = NULL;
dict = dict_new ();
diff --git a/xlators/mount/fuse/src/fuse-bridge.h b/xlators/mount/fuse/src/fuse-bridge.h
index 39b54f6fe32..ae764a7bccc 100644
--- a/xlators/mount/fuse/src/fuse-bridge.h
+++ b/xlators/mount/fuse/src/fuse-bridge.h
@@ -148,7 +148,9 @@ typedef struct fuse_private fuse_private_t;
state->finh->unique, \
state->finh->opcode); \
free_fuse_state (state); \
- return; \
+ /* ideally, need to 'return', but let the */ \
+ /* calling function take care of it */ \
+ break; \
} \
\
frame->root->state = state; \
@@ -165,6 +167,7 @@ typedef struct fuse_private fuse_private_t;
} else { \
STACK_WIND (frame, ret, xl, xl->fops->fop, args); \
} \
+ \
} while (0)
@@ -242,7 +245,7 @@ typedef struct {
char *resolved;
int op_ret;
int op_errno;
- loc_t deep_loc;
+ loc_t resolve_loc;
struct fuse_resolve_comp *components;
int comp_count;
} fuse_resolve_t;
diff --git a/xlators/mount/fuse/src/fuse-helpers.c b/xlators/mount/fuse/src/fuse-helpers.c
index 941907cea8b..9bf85f979c3 100644
--- a/xlators/mount/fuse/src/fuse-helpers.c
+++ b/xlators/mount/fuse/src/fuse-helpers.c
@@ -68,7 +68,7 @@ fuse_resolve_wipe (fuse_resolve_t *resolve)
if (resolve->resolved)
GF_FREE ((void *)resolve->resolved);
- loc_wipe (&resolve->deep_loc);
+ loc_wipe (&resolve->resolve_loc);
comp = resolve->components;
@@ -321,6 +321,8 @@ fuse_loc_fill (loc_t *loc, fuse_state_t *state, ino_t ino,
if (!parent) {
parent = fuse_ino_to_inode (par, state->this);
loc->parent = parent;
+ if (parent)
+ uuid_copy (loc->pargfid, parent->gfid);
}
inode = loc->inode;
@@ -342,16 +344,17 @@ fuse_loc_fill (loc_t *loc, fuse_state_t *state, ino_t ino,
if (!inode) {
inode = fuse_ino_to_inode (ino, state->this);
loc->inode = inode;
+ if (inode)
+ uuid_copy (loc->gfid, inode->gfid);
}
parent = loc->parent;
if (!parent) {
- parent = fuse_ino_to_inode (par, state->this);
- if (!parent) {
- parent = inode_parent (inode, null_gfid, NULL);
- }
-
+ parent = inode_parent (inode, null_gfid, NULL);
loc->parent = parent;
+ if (parent)
+ uuid_copy (loc->pargfid, parent->gfid);
+
}
ret = inode_path (inode, NULL, &path);
diff --git a/xlators/mount/fuse/src/fuse-resolve.c b/xlators/mount/fuse/src/fuse-resolve.c
index 33606f87919..755e2f429f1 100644
--- a/xlators/mount/fuse/src/fuse-resolve.c
+++ b/xlators/mount/fuse/src/fuse-resolve.c
@@ -26,375 +26,203 @@
static int
fuse_resolve_all (fuse_state_t *state);
-static int
-fuse_resolve_path_simple (fuse_state_t *state);
-
-static int
-component_count (const char *path)
-{
- int count = 0;
- const char *trav = NULL;
-
- for (trav = path; *trav; trav++) {
- if (*trav == '/')
- count++;
- }
-
- return count + 2;
-}
-
-static int
-prepare_components (fuse_state_t *state)
-{
- fuse_resolve_t *resolve = NULL;
- char *resolved = NULL;
- struct fuse_resolve_comp *components = NULL;
- char *trav = NULL;
- int count = 0;
- int i = 0;
-
- resolve = state->resolve_now;
-
- resolved = gf_strdup (resolve->path);
- resolve->resolved = resolved;
-
- count = component_count (resolve->path);
- components = GF_CALLOC (sizeof (*components), count, 0); //TODO
- if (!components)
- goto out;
- resolve->components = components;
-
- components[0].basename = "";
- components[0].ino = 1;
- components[0].gen = 0;
- components[0].inode = inode_ref (state->itable->root);
-
- i = 1;
- for (trav = resolved; *trav; trav++) {
- if (*trav == '/') {
- components[i].basename = trav + 1;
- *trav = 0;
- i++;
- }
- }
-out:
- return 0;
-}
+int fuse_resolve_continue (fuse_state_t *state);
+int fuse_resolve_entry_simple (fuse_state_t *state);
+int fuse_resolve_inode_simple (fuse_state_t *state);
static int
fuse_resolve_loc_touchup (fuse_state_t *state)
{
fuse_resolve_t *resolve = NULL;
- loc_t *loc = NULL;
- char *path = NULL;
- int ret = 0;
+ loc_t *loc = NULL;
+ char *path = NULL;
+ int ret = 0;
resolve = state->resolve_now;
loc = state->loc_now;
if (!loc->path) {
- if (loc->parent) {
+ if (loc->parent && resolve->bname) {
ret = inode_path (loc->parent, resolve->bname, &path);
} else if (loc->inode) {
ret = inode_path (loc->inode, NULL, &path);
}
if (ret)
- gf_log ("", GF_LOG_TRACE,
+ gf_log (THIS->name, GF_LOG_TRACE,
"return value inode_path %d", ret);
-
- if (!path)
- path = gf_strdup (resolve->path);
-
loc->path = path;
}
- loc->name = strrchr (loc->path, '/');
- if (loc->name)
- loc->name++;
-
- if (!loc->parent && loc->inode) {
- loc->parent = inode_parent (loc->inode, 0, NULL);
- }
-
return 0;
}
-static int
-fuse_resolve_newfd_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd)
+
+int
+fuse_resolve_gfid_entry_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xattr,
+ struct iatt *postparent)
{
fuse_state_t *state = NULL;
fuse_resolve_t *resolve = NULL;
- fd_t *old_fd = NULL;
- fd_t *tmp_fd = NULL;
- fuse_fd_ctx_t *tmp_fd_ctx = 0;
- uint64_t val = 0;
- int ret = 0;
+ inode_t *link_inode = NULL;
+ loc_t *resolve_loc = NULL;
state = frame->root->state;
resolve = state->resolve_now;
+ resolve_loc = &resolve->resolve_loc;
STACK_DESTROY (frame->root);
if (op_ret == -1) {
- resolve->op_ret = -1;
- resolve->op_errno = op_errno;
+ gf_log (this->name, ((op_errno == ENOENT) ? GF_LOG_DEBUG :
+ GF_LOG_WARNING),
+ "%s/%s: failed to resolve (%s)",
+ uuid_utoa (resolve_loc->pargfid), resolve_loc->name,
+ strerror (op_errno));
goto out;
}
- old_fd = resolve->fd;
-
- state->fd = fd_ref (fd);
-
- fd_bind (fd);
+ link_inode = inode_link (inode, resolve_loc->parent,
+ resolve_loc->name, buf);
- resolve->fd = NULL;
+ if (!link_inode)
+ goto out;
- LOCK (&old_fd->lock);
- {
- ret = __fd_ctx_get (old_fd, state->this, &val);
- if (!ret) {
- tmp_fd_ctx = (fuse_fd_ctx_t *)(unsigned long)val;
- tmp_fd = tmp_fd_ctx->fd;
- if (tmp_fd) {
- fd_unref (tmp_fd);
- tmp_fd_ctx->fd = NULL;
- }
- } else {
- tmp_fd_ctx = __fuse_fd_ctx_check_n_create (old_fd,
- state->this);
- }
+ inode_lookup (link_inode);
- if (tmp_fd_ctx) {
- tmp_fd_ctx->fd = fd;
- } else {
- gf_log ("resolve", GF_LOG_WARNING,
- "failed to set the fd ctx with resolved fd");
- }
- }
- UNLOCK (&old_fd->lock);
+ inode_unref (link_inode);
out:
- fuse_resolve_all (state);
- return 0;
-}
-
-static void
-fuse_resolve_new_fd (fuse_state_t *state)
-{
- fuse_resolve_t *resolve = NULL;
- fd_t *new_fd = NULL;
- fd_t *fd = NULL;
-
- resolve = state->resolve_now;
- fd = resolve->fd;
-
- new_fd = fd_create (state->loc.inode, state->finh->pid);
- new_fd->flags = (fd->flags & ~O_TRUNC);
-
- gf_log ("resolve", GF_LOG_DEBUG,
- "%"PRIu64": OPEN %s", state->finh->unique,
- state->loc.path);
-
- FUSE_FOP (state, fuse_resolve_newfd_cbk, GF_FOP_OPEN,
- open, &state->loc, new_fd->flags, new_fd, 0);
-}
-
-static int
-fuse_resolve_deep_continue (fuse_state_t *state)
-{
- fuse_resolve_t *resolve = NULL;
- int ret = 0;
-
- resolve = state->resolve_now;
-
- resolve->op_ret = 0;
- resolve->op_errno = 0;
-
- if (resolve->path)
- ret = fuse_resolve_path_simple (state);
- if (ret)
- gf_log ("resolve", GF_LOG_TRACE,
- "return value of resolve_*_simple %d", ret);
-
- fuse_resolve_loc_touchup (state);
-
- /* This function is called by either fd resolve or inode resolve */
- if (!resolve->fd)
- fuse_resolve_all (state);
- else
- fuse_resolve_new_fd (state);
+ loc_wipe (resolve_loc);
+ fuse_resolve_continue (state);
return 0;
}
-static int
-fuse_resolve_deep_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, inode_t *inode, struct iatt *buf,
- dict_t *xattr, struct iatt *postparent)
+int
+fuse_resolve_gfid_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode, struct iatt *buf,
+ dict_t *xattr, struct iatt *postparent)
{
- fuse_state_t *state = NULL;
- fuse_resolve_t *resolve = NULL;
- struct fuse_resolve_comp *components = NULL;
- inode_t *link_inode = NULL;
- int i = 0;
+ fuse_state_t *state = NULL;
+ fuse_resolve_t *resolve = NULL;
+ inode_t *link_inode = NULL;
+ loc_t *resolve_loc = NULL;
state = frame->root->state;
resolve = state->resolve_now;
- components = resolve->components;
-
- i = (long) cookie;
+ resolve_loc = &resolve->resolve_loc;
STACK_DESTROY (frame->root);
if (op_ret == -1) {
- goto get_out_of_here;
+ gf_log (this->name, ((op_errno == ENOENT) ? GF_LOG_DEBUG :
+ GF_LOG_WARNING),
+ "%s: failed to resolve (%s)",
+ uuid_utoa (resolve_loc->gfid), strerror (op_errno));
+ loc_wipe (&resolve->resolve_loc);
+ goto out;
}
- if (i != 0) {
- /* no linking for root inode */
- link_inode = inode_link (inode, resolve->deep_loc.parent,
- resolve->deep_loc.name, buf);
- components[i].inode = link_inode;
- link_inode = NULL;
- }
+ loc_wipe (resolve_loc);
- loc_wipe (&resolve->deep_loc);
- i++; /* next component */
+ link_inode = inode_link (inode, NULL, NULL, buf);
- if (!components[i].basename) {
- /* all components of the path are resolved */
- goto get_out_of_here;
+ if (!link_inode)
+ goto out;
+
+ inode_lookup (link_inode);
+
+ if (uuid_is_null (resolve->pargfid)) {
+ inode_unref (link_inode);
+ goto out;
}
- /* join the current component with the path resolved until now */
- *(components[i].basename - 1) = '/';
+ resolve_loc->parent = link_inode;
+ uuid_copy (resolve_loc->pargfid, resolve_loc->parent->gfid);
- resolve->deep_loc.path = gf_strdup (resolve->resolved);
- resolve->deep_loc.parent = inode_ref (components[i-1].inode);
- resolve->deep_loc.inode = inode_new (state->itable);
- resolve->deep_loc.name = components[i].basename;
+ resolve_loc->name = resolve->bname;
- FUSE_FOP_COOKIE (state, state->itable->xl, fuse_resolve_deep_cbk,
- (void *)(long)i,
- GF_FOP_LOOKUP, lookup, &resolve->deep_loc, NULL);
- return 0;
+ resolve_loc->inode = inode_new (state->itable);
+ inode_path (resolve_loc->parent, resolve_loc->name,
+ (char **) &resolve_loc->path);
+
+ FUSE_FOP (state, fuse_resolve_gfid_entry_cbk, GF_FOP_LOOKUP,
+ lookup, &resolve->resolve_loc, NULL);
-get_out_of_here:
- fuse_resolve_deep_continue (state);
+ return 0;
+out:
+ fuse_resolve_continue (state);
return 0;
}
-static int
-fuse_resolve_path_deep (fuse_state_t *state)
+int
+fuse_resolve_gfid (fuse_state_t *state)
{
- fuse_resolve_t *resolve = NULL;
- struct fuse_resolve_comp *components = NULL;
- inode_t *inode = NULL;
- long i = 0;
+ fuse_resolve_t *resolve = NULL;
+ loc_t *resolve_loc = NULL;
+ int ret = 0;
resolve = state->resolve_now;
+ resolve_loc = &resolve->resolve_loc;
- prepare_components (state);
-
- components = resolve->components;
-
- /* start from the root */
- for (i = 1; components[i].basename; i++) {
- *(components[i].basename - 1) = '/';
- inode = inode_grep (state->itable, components[i-1].inode,
- components[i].basename);
- if (!inode)
- break;
- components[i].inode = inode;
+ if (!uuid_is_null (resolve->pargfid)) {
+ uuid_copy (resolve_loc->gfid, resolve->pargfid);
+ resolve_loc->inode = inode_new (state->itable);
+ ret = inode_path (resolve_loc->inode, NULL,
+ (char **)&resolve_loc->path);
+ } else if (!uuid_is_null (resolve->gfid)) {
+ uuid_copy (resolve_loc->gfid, resolve->gfid);
+ resolve_loc->inode = inode_new (state->itable);
+ ret = inode_path (resolve_loc->inode, NULL,
+ (char **)&resolve_loc->path);
+ }
+ if (ret <= 0) {
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "failed to get the path from inode %s",
+ uuid_utoa (resolve->gfid));
}
- if (!components[i].basename)
- goto resolved;
-
- resolve->deep_loc.path = gf_strdup (resolve->resolved);
- resolve->deep_loc.parent = inode_ref (components[i-1].inode);
- resolve->deep_loc.inode = inode_new (state->itable);
- resolve->deep_loc.name = components[i].basename;
-
- FUSE_FOP_COOKIE (state, state->itable->xl, fuse_resolve_deep_cbk,
- (void *)(long)i,
- GF_FOP_LOOKUP, lookup, &resolve->deep_loc, NULL);
+ FUSE_FOP (state, fuse_resolve_gfid_cbk, GF_FOP_LOOKUP,
+ lookup, &resolve->resolve_loc, NULL);
return 0;
-resolved:
- fuse_resolve_deep_continue (state);
- return 0;
}
-static int
-fuse_resolve_path_simple (fuse_state_t *state)
+int
+fuse_resolve_continue (fuse_state_t *state)
{
- fuse_resolve_t *resolve = NULL;
- struct fuse_resolve_comp *components = NULL;
- int ret = -1;
- int par_idx = 0;
- int ino_idx = 0;
- int i = 0;
+ fuse_resolve_t *resolve = NULL;
+ int ret = 0;
resolve = state->resolve_now;
- components = resolve->components;
-
- if (!components) {
- resolve->op_ret = -1;
- resolve->op_errno = ENOENT;
- goto out;
- }
-
- for (i = 0; components[i].basename; i++) {
- par_idx = ino_idx;
- ino_idx = i;
- }
-
- if (!components[par_idx].inode) {
- resolve->op_ret = -1;
- resolve->op_errno = ENOENT;
- goto out;
- }
-
- if (!components[ino_idx].inode &&
- (resolve->type == RESOLVE_MUST || resolve->type == RESOLVE_EXACT)) {
- resolve->op_ret = -1;
- resolve->op_errno = ENOENT;
- goto out;
- }
-
- if (components[ino_idx].inode && resolve->type == RESOLVE_NOT) {
- resolve->op_ret = -1;
- resolve->op_errno = EEXIST;
- goto out;
- }
- if (components[ino_idx].inode) {
- if (state->loc_now->inode) {
- inode_unref (state->loc_now->inode);
- }
-
- state->loc_now->inode = inode_ref (components[ino_idx].inode);
- }
+ resolve->op_ret = 0;
+ resolve->op_errno = 0;
- if (state->loc_now->parent) {
- inode_unref (state->loc_now->parent);
- }
+ /* TODO: should we handle 'fd' here ? */
+ if (!uuid_is_null (resolve->pargfid))
+ ret = fuse_resolve_entry_simple (state);
+ else if (!uuid_is_null (resolve->gfid))
+ ret = fuse_resolve_inode_simple (state);
+ if (ret)
+ gf_log (THIS->name, GF_LOG_DEBUG,
+ "return value of resolve_*_simple %d", ret);
- state->loc_now->parent = inode_ref (components[par_idx].inode);
+ fuse_resolve_loc_touchup (state);
- ret = 0;
+ fuse_resolve_all (state);
-out:
- return ret;
+ return 0;
}
+
/*
Check if the requirements are fulfilled by entries in the inode cache itself
Return value:
@@ -445,6 +273,7 @@ fuse_resolve_entry_simple (fuse_state_t *state)
}
state->loc_now->inode = inode_ref (inode);
+ uuid_copy (state->loc_now->gfid, resolve->gfid);
out:
if (parent)
@@ -468,7 +297,7 @@ fuse_resolve_entry (fuse_state_t *state)
ret = fuse_resolve_entry_simple (state);
if (ret > 0) {
loc_wipe (loc);
- fuse_resolve_path_deep (state);
+ fuse_resolve_gfid (state);
return 0;
}
@@ -505,6 +334,7 @@ fuse_resolve_inode_simple (fuse_state_t *state)
}
state->loc_now->inode = inode_ref (inode);
+ uuid_copy (state->loc_now->gfid, resolve->gfid);
out:
if (inode)
@@ -526,7 +356,7 @@ fuse_resolve_inode (fuse_state_t *state)
if (ret > 0) {
loc_wipe (loc);
- fuse_resolve_path_deep (state);
+ fuse_resolve_gfid (state);
return 0;
}
@@ -574,8 +404,6 @@ fuse_resolve_fd (fuse_state_t *state)
state->loc_now = &state->loc;
- fuse_resolve_path_deep (state);
-
out:
return 0;
}
@@ -600,10 +428,6 @@ fuse_resolve (fuse_state_t *state)
fuse_resolve_inode (state);
- } else if (resolve->path) {
-
- fuse_resolve_path_deep (state);
-
} else {
resolve->op_ret = 0;