summaryrefslogtreecommitdiffstats
path: root/xlators/features/bit-rot/src/bitd/bit-rot.h
blob: 308f8c03a734353c0e90a2e8f284070ac683e44e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
/*
   Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
   This file is part of GlusterFS.

   This file is licensed to you under your choice of the GNU Lesser
   General Public License, version 3 or any later version (LGPLv3 or
   later), or the GNU General Public License, version 2 (GPLv2), in all
   cases as published by the Free Software Foundation.
*/

#ifndef __BIT_ROT_H__
#define __BIT_ROT_H__

#include "glusterfs.h"
#include "logging.h"
#include "dict.h"
#include "xlator.h"
#include "defaults.h"
#include "syncop.h"
#include "syncop-utils.h"
#include "changelog.h"
#include "timer-wheel.h"

#include "bit-rot-tbf.h"
#include "bit-rot-ssm.h"

#include "bit-rot-common.h"
#include "bit-rot-stub-mem-types.h"

#include <openssl/sha.h>

/**
 * TODO: make this configurable. As a best practice, set this to the
 * number of processor cores.
 */
#define BR_WORKERS 4

typedef enum scrub_throttle {
        BR_SCRUB_THROTTLE_VOID       = -1,
        BR_SCRUB_THROTTLE_LAZY       = 0,
        BR_SCRUB_THROTTLE_NORMAL     = 1,
        BR_SCRUB_THROTTLE_AGGRESSIVE = 2,
        BR_SCRUB_THROTTLE_STALLED    = 3,
} scrub_throttle_t;

typedef enum scrub_freq {
        BR_FSSCRUB_FREQ_HOURLY = 1,
        BR_FSSCRUB_FREQ_DAILY,
        BR_FSSCRUB_FREQ_WEEKLY,
        BR_FSSCRUB_FREQ_BIWEEKLY,
        BR_FSSCRUB_FREQ_MONTHLY,
        BR_FSSCRUB_FREQ_STALLED,
} scrub_freq_t;

#define signature_size(hl) (sizeof (br_isignature_t) + hl + 1)

struct br_scanfs {
        gf_lock_t entrylock;

        pthread_mutex_t waitlock;
        pthread_cond_t  waitcond;

        unsigned int     entries;
        struct list_head queued;
        struct list_head ready;

        /* scheduler */
        uint32_t boot;
        gf_boolean_t kick;
        gf_boolean_t over;

        br_scrub_state_t state;   /* current scrub state */

        pthread_mutex_t wakelock;
        pthread_cond_t  wakecond;

        struct gf_tw_timer_list *timer;
};

/* just need three states to track child status */
typedef enum br_child_state {
        BR_CHILD_STATE_CONNECTED = 1,
        BR_CHILD_STATE_INITIALIZING,
        BR_CHILD_STATE_CONNFAILED,
        BR_CHILD_STATE_DISCONNECTED,
} br_child_state_t;

struct br_child {
        gf_lock_t lock;               /* protects child state */
        char witnessed;               /* witnessed at least one succesfull
                                         connection */
        br_child_state_t c_state;     /* current state of this child */

        char child_up;                /* Indicates whether this child is
                                         up or not */
        xlator_t *xl;                 /* client xlator corresponding to
                                         this child */
        inode_table_t *table;         /* inode table for this child */
        char brick_path[PATH_MAX];    /* brick export directory of this
                                         child */
        struct list_head list;        /* hook to attach to the list of
                                         UP children */
        xlator_t *this;               /* Bit rot xlator */

        pthread_t thread;             /* initial crawler for unsigned
                                         object(s) or scrub crawler */
        int threadrunning;            /* active thread */

        struct mem_pool *timer_pool;  /* timer-wheel's timer mem-pool */

        struct timeval tv;

        struct br_scanfs fsscan;      /* per subvolume FS scanner */
};

typedef struct br_child br_child_t;

struct br_obj_n_workers {
        struct list_head objects;         /* queue of objects expired from the
                                             timer wheel and ready to be picked
                                             up for signing */
        pthread_t workers[BR_WORKERS];    /* Threads which pick up the objects
                                             from the above queue and start
                                             signing each object */
};

struct br_scrubber {
        xlator_t *this;

        scrub_throttle_t throttle;

        /**
         * frequency of scanning for this subvolume. this should
         * normally be per-child, but since all childs follow the
         * same frequency for a volume, this option ends up here
         * instead of br_child_t.
         */
        scrub_freq_t frequency;

        pthread_mutex_t mutex;
        pthread_cond_t  cond;

        unsigned int nr_scrubbers;
        struct list_head scrubbers;

        /**
         * list of "rotatable" subvolume(s) undergoing scrubbing
         */
        struct list_head scrublist;
};

typedef struct br_obj_n_workers br_obj_n_workers_t;

struct br_private {
        pthread_mutex_t lock;

        struct list_head bricks;          /* list of bricks from which enents
                                             have been received */

        struct list_head signing;

        pthread_cond_t object_cond;       /* handling signing of objects */
        int child_count;
        br_child_t *children;             /* list of subvolumes */
        int up_children;

        pthread_cond_t cond;              /* handling CHILD_UP notifications */
        pthread_t thread;                 /* thread for connecting each UP
                                             child with changelog */

        struct tvec_base *timer_wheel;    /* timer wheel where the objects which
                                             changelog has sent sits and waits
                                             for expiry */
        br_obj_n_workers_t *obj_queue;    /* place holder for all the objects
                                             that are expired from timer wheel
                                             and ready to be picked up for
                                             signing and the workers which sign
                                             the objects */
        uint32_t expiry_time;              /* objects "wait" time */

        br_tbf_t *tbf;                    /* token bucket filter */

        gf_boolean_t iamscrubber;         /* function as a fs scrubber */

        struct br_scrubber fsscrub;       /* scrubbers for this subvolume */
};

typedef struct br_private br_private_t;

struct br_object {
        xlator_t *this;

        uuid_t gfid;

        unsigned long signedversion;    /* version aginst which this object will
                                           be signed */
        br_child_t *child;              /* object's subvolume */

        int sign_info;

        struct list_head list;          /* hook to add to the queue once the
                                           object is expired from timer wheel */
        void *data;
};

typedef struct br_object br_object_t;
typedef int32_t (br_scrub_ssm_call) (xlator_t *, br_child_t *);

void
br_log_object (xlator_t *, char *, uuid_t, int32_t);

void
br_log_object_path (xlator_t *, char *, const char *, int32_t);

int32_t
br_calculate_obj_checksum (unsigned char *,
                           br_child_t *, fd_t *, struct iatt *);

int32_t
br_prepare_loc (xlator_t *, br_child_t *, loc_t *, gf_dirent_t *, loc_t *);

gf_boolean_t
bitd_is_bad_file (xlator_t *, br_child_t *, loc_t *, fd_t *);

static inline void
_br_set_child_state (br_child_t *child, br_child_state_t state)
{
        child->c_state = state;
}

static inline int
_br_is_child_connected (br_child_t *child)
{
        return (child->c_state == BR_CHILD_STATE_CONNECTED);
}

static inline int
_br_child_failed_conn (br_child_t *child)
{
        return (child->c_state == BR_CHILD_STATE_CONNFAILED);
}

static inline int
_br_child_witnessed_connection (br_child_t *child)
{
        return (child->witnessed == 1);
}

/* scrub state */
static inline void
_br_child_set_scrub_state (br_child_t *child, br_scrub_state_t state)
{
        struct br_scanfs *fsscan = &child->fsscan;
        fsscan->state = state;
}

static inline br_scrub_event_t
_br_child_get_scrub_event (struct br_scrubber *fsscrub)
{
        return (fsscrub->frequency == BR_FSSCRUB_FREQ_STALLED)
                ? BR_SCRUB_EVENT_PAUSE : BR_SCRUB_EVENT_SCHEDULE;
}

#endif /* __BIT_ROT_H__ */