summaryrefslogtreecommitdiffstats
path: root/xlators/cluster/dht/src/dht-common.h
diff options
context:
space:
mode:
authorKotresh HR <khiremat@redhat.com>2017-01-03 02:35:06 -0500
committerRaghavendra G <rgowdapp@redhat.com>2017-04-26 09:00:34 +0000
commit4076b73b2f4fb3cca0737974b124f33f76f9c9c1 (patch)
treecff52055113fd04c28d5a99719036d59522a51ff /xlators/cluster/dht/src/dht-common.h
parent1538c98f5e33e0794830d5153f17a96ff28c9914 (diff)
feature/dht: Directory synchronization
Design doc: https://review.gluster.org/16876 Directory creation is now synchronized with blocking inodelk of the parent on the hashed subvolume followed by the entrylk on the hashed subvolume between dht_mkdir, dht_rmdir, dht_rename_dir and lookup selfheal mkdir. To maintain internal consistency of directories across all subvols of dht, we need locks. Specifically we are interested in: 1. Consistency of layout of a directory. Only one writer should modify the layout at a time. A writer (layout setting during directory heal as part of lookup) shouldn't modify the layout while there are readers (all other fops like create, mkdir etc., which consume layout) and readers shouldn't read the layout while a writer is in progress. Readers can read the layout simultaneously. Writer takes a WRITE inodelk on the directory (whose layout is being modified) across ALL subvols. Reader takes a READ inodelk on the directory (whose layout is being read) on ANY subvol. 2. Consistency of directory namespace across subvols. The path and associated gfid should be same on all subvols. A gfid should not be associated with more than one path on any subvol. All fops that can change directory names (mkdir, rmdir, renamedir, directory creation phase in lookup-heal) takes an entrylk on hashed subvol of the directory. NOTE1: In point 2 above, since dht takes entrylk on hashed subvol of a directory, the transaction itself is a consumer of layout on parent directory. So, the transaction is a reader of parent layout and does an inodelk on parent directory just like any other layout reader. So a mkdir (dir/subdir) would: > Acquire a READ inodelk on "dir" on any subvol. > Acquire an entrylk (dir, "subdir") on hashed subvol of "subdir". > creates directory on hashed subvol and possibly on non-hashed subvols. > UNLOCK (entrylk) > UNLOCK (inodelk) NOTE2: mkdir fop while setting the layout of the directory being created is considered as a reader, but NOT a writer. The reason is for a fop which can consume the layout of a directory to come either of the following conditions has to be true: > mkdir syscall from application has to complete. In this case no need of synchronization. > A lookup issued on the directory racing with mkdir has to complete. Since layout setting by a lookup is considered as a writer, only one of either mkdir or lookup will set the layout. Code re-organization: All the lock related routines are moved to "dht-lock.c" file. New wrapper function is introduced to take blocking inodelk followed by entrylk 'dht_protect_namespace' Updates #191 Change-Id: I01569094dfbe1852de6f586475be79c1ba965a31 Signed-off-by: Kotresh HR <khiremat@redhat.com> BUG: 1443373 Reviewed-on: https://review.gluster.org/15472 NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> CentOS-regression: Gluster Build System <jenkins@build.gluster.org> Reviewed-by: Raghavendra G <rgowdapp@redhat.com> Smoke: Gluster Build System <jenkins@build.gluster.org>
Diffstat (limited to 'xlators/cluster/dht/src/dht-common.h')
-rw-r--r--xlators/cluster/dht/src/dht-common.h103
1 files changed, 51 insertions, 52 deletions
diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h
index 0e082e35c57..21433b6c8b7 100644
--- a/xlators/cluster/dht/src/dht-common.h
+++ b/xlators/cluster/dht/src/dht-common.h
@@ -30,7 +30,10 @@
#define GF_DHT_LOOKUP_UNHASHED_AUTO 2
#define DHT_PATHINFO_HEADER "DISTRIBUTE:"
#define DHT_FILE_MIGRATE_DOMAIN "dht.file.migrate"
+/* Layout synchronization */
#define DHT_LAYOUT_HEAL_DOMAIN "dht.layout.heal"
+/* Namespace synchronization */
+#define DHT_ENTRY_SYNC_DOMAIN "dht.entry.sync"
#define TIERING_MIGRATION_KEY "tiering.migration"
#define DHT_LAYOUT_HASH_INVALID 1
@@ -113,6 +116,11 @@ typedef enum {
DHT_HASH_TYPE_DM_USER,
} dht_hashfn_type_t;
+typedef enum {
+ DHT_INODELK,
+ DHT_ENTRYLK,
+} dht_lock_type_t;
+
/* rebalance related */
struct dht_rebalance_ {
xlator_t *from_subvol;
@@ -166,10 +174,52 @@ typedef struct {
char *domain; /* Only locks within a single domain
* contend with each other
*/
+ char *basename; /* Required for entrylk */
gf_lkowner_t lk_owner;
gf_boolean_t locked;
} dht_lock_t;
+/* The lock structure represents inodelk. */
+typedef struct {
+ fop_inodelk_cbk_t inodelk_cbk;
+ dht_lock_t **locks;
+ int lk_count;
+ dht_reaction_type_t reaction;
+
+ /* whether locking failed on _any_ of the "locks" above */
+ int op_ret;
+ int op_errno;
+} dht_ilock_wrap_t;
+
+/* The lock structure represents entrylk. */
+typedef struct {
+ fop_entrylk_cbk_t entrylk_cbk;
+ dht_lock_t **locks;
+ int lk_count;
+ dht_reaction_type_t reaction;
+
+ /* whether locking failed on _any_ of the "locks" above */
+ int op_ret;
+ int op_errno;
+} dht_elock_wrap_t;
+
+/* The first member of dht_dir_transaction_t should be of type dht_ilock_wrap_t.
+ * Otherwise it can result in subtle memory corruption issues as in most of the
+ * places we use lock[0].layout.my_layout or lock[0].layout.parent_layout and
+ * lock[0].ns.parent_layout (like in dht_local_wipe).
+ */
+typedef union {
+ union {
+ dht_ilock_wrap_t my_layout;
+ dht_ilock_wrap_t parent_layout;
+ } layout;
+ struct dht_namespace {
+ dht_ilock_wrap_t parent_layout;
+ dht_elock_wrap_t directory_ns;
+ fop_entrylk_cbk_t ns_cbk;
+ } ns;
+} dht_dir_transaction_t;
+
typedef
int (*dht_selfheal_layout_t)(call_frame_t *frame, loc_t *loc,
dht_layout_t *layout);
@@ -288,16 +338,7 @@ struct dht_local {
struct dht_skip_linkto_unlink skip_unlink;
- struct {
- fop_inodelk_cbk_t inodelk_cbk;
- dht_lock_t **locks;
- int lk_count;
- dht_reaction_type_t reaction;
-
- /* whether locking failed on _any_ of the "locks" above */
- int op_ret;
- int op_errno;
- } lock;
+ dht_dir_transaction_t lock[2], *current;
short lock_type;
@@ -1187,47 +1228,6 @@ dht_lookup_everywhere_done (call_frame_t *frame, xlator_t *this);
int
dht_fill_dict_to_avoid_unlink_of_migrating_file (dict_t *dict);
-
-/* Acquire non-blocking inodelk on a list of xlators.
- *
- * @lk_array: array of lock requests lock on.
- *
- * @lk_count: number of locks in @lk_array
- *
- * @inodelk_cbk: will be called after inodelk replies are received
- *
- * @retval: -1 if stack_winding inodelk fails. 0 otherwise.
- * inodelk_cbk is called with appropriate error on errors.
- * On failure to acquire lock on all members of list, successful
- * locks are unlocked before invoking cbk.
- */
-
-int
-dht_nonblocking_inodelk (call_frame_t *frame, dht_lock_t **lk_array,
- int lk_count, fop_inodelk_cbk_t inodelk_cbk);
-
-/* same as dht_nonblocking_inodelk, but issues sequential blocking locks on
- * @lk_array directly. locks are issued on some order which remains same
- * for a list of xlators (irrespective of order of xlators within list).
- */
-int
-dht_blocking_inodelk (call_frame_t *frame, dht_lock_t **lk_array,
- int lk_count, dht_reaction_type_t reaction,
- fop_inodelk_cbk_t inodelk_cbk);
-
-int32_t
-dht_unlock_inodelk (call_frame_t *frame, dht_lock_t **lk_array, int lk_count,
- fop_inodelk_cbk_t inodelk_cbk);
-
-dht_lock_t *
-dht_lock_new (xlator_t *this, xlator_t *xl, loc_t *loc, short type,
- const char *domain);
-void
-dht_lock_array_free (dht_lock_t **lk_array, int count);
-
-int32_t
-dht_lock_count (dht_lock_t **lk_array, int lk_count);
-
int
dht_layout_sort (dht_layout_t *layout);
@@ -1291,5 +1291,4 @@ getChoices (const char *value);
int
dht_aggregate_split_brain_xattr (dict_t *dst, char *key, data_t *value);
-
#endif/* _DHT_H */