summaryrefslogtreecommitdiffstats
path: root/xlators/experimental/jbr-server/src/jbr-internal.h
blob: f225e988a5fdd4de5d8444becbbab783133e7774 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
/*
   Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
   This file is part of GlusterFS.

   This file is licensed to you under your choice of the GNU Lesser
   General Public License, version 3 or any later version (LGPLv3 or
   later), or the GNU General Public License, version 2 (GPLv2), in all
   cases as published by the Free Software Foundation.
*/

#include <sys/stat.h>
#include <sys/types.h>

#define LEADER_XATTR "user.jbr.leader"
#define SECOND_CHILD(xl) (xl->children->next->xlator)
#define RECONCILER_PATH JBR_SCRIPT_PREFIX "/reconciler.py"
#define CHANGELOG_ENTRY_SIZE 128

enum {
    gf_mt_jbr_private_t = gf_common_mt_end + 1,
    gf_mt_jbr_fd_ctx_t,
    gf_mt_jbr_inode_ctx_t,
    gf_mt_jbr_dirty_t,
    gf_mt_jbr_end
};

typedef enum jbr_recon_notify_ev_id_t {
    JBR_RECON_SET_LEADER = 1,
    JBR_RECON_ADD_CHILD = 2
} jbr_recon_notify_ev_id_t;

typedef struct _jbr_recon_notify_ev_s {
    jbr_recon_notify_ev_id_t id;
    uint32_t index; /* in case of add */
    struct list_head list;
} jbr_recon_notify_ev_t;

typedef struct {
    /*
     * This is a hack to allow a non-leader to accept requests while the
     * leader is down, and it only works for n=2.  The way it works is that
     * "config_leader" indicates the state from our options (via init or
     * reconfigure) but "leader" is what the fop code actually looks at.  If
     * config_leader is true, then leader will *always* be true as well,
     * giving that brick precedence.  If config_leader is false, then
     * leader will only be true if there is no connection to the other
     * brick (tracked in jbr_notify).
     *
     * TBD: implement real leader election
     */
    gf_boolean_t config_leader;
    gf_boolean_t leader;
    uint8_t up_children;
    uint8_t n_children;
    char *vol_file;
    uint32_t current_term;
    uint32_t kid_state;
    gf_lock_t dirty_lock;
    struct list_head dirty_fds;
    uint32_t index;
    gf_lock_t index_lock;
    double quorum_pct;
    int term_fd;
    long term_total;
    long term_read;
    /*
     * This is a super-duper hack, but it will do for now.  The reason it's
     * a hack is that we pass this to dict_set_static_bin, so we don't have
     * to mess around with allocating and freeing it on every single IPC
     * request, but it's totally not thread-safe.  On the other hand, there
     * should only be one reconciliation thread running and calling these
     * functions at a time, so maybe that doesn't matter.
     *
     * TBD: re-evaluate how to manage this
     */
    char term_buf[CHANGELOG_ENTRY_SIZE];
    gf_boolean_t child_up; /* To maintain the state of *
                            * the translator */
} jbr_private_t;

typedef struct {
    call_stub_t *stub;
    call_stub_t *qstub;
    uint32_t call_count;
    uint32_t successful_acks;
    uint32_t successful_op_ret;
    fd_t *fd;
    struct list_head qlinks;
    dict_t *xdata;
} jbr_local_t;

/*
 * This should match whatever changelog returns on the pre-op for us to pass
 * when we're ready for our post-op.
 */
typedef uint32_t log_id_t;

typedef struct {
    struct list_head links;
    log_id_t id;
} jbr_dirty_list_t;

typedef struct {
    fd_t *fd;
    struct list_head dirty_list;
    struct list_head fd_list;
} jbr_fd_ctx_t;

typedef struct {
    gf_lock_t lock;
    uint32_t active;
    struct list_head aqueue;
    uint32_t pending;
    struct list_head pqueue;
} jbr_inode_ctx_t;

void
jbr_start_reconciler(xlator_t *this);