diff options
| author | Kevin Vigor <kvigor@fb.com> | 2017-01-23 10:27:52 -0800 |
|---|---|---|
| committer | Kevin Vigor <kvigor@fb.com> | 2017-01-23 10:27:52 -0800 |
| commit | f80281c05e3f1c0ead5910586c7e81f047add623 (patch) | |
| tree | f58cac77acef73cfdf14d420bd10613ad3dc1850 | |
| parent | 6f6a21f1bbc5131e70b42d89a5ac8d8aa709ad3f (diff) | |
| parent | b21c51e6f0baa5145923637f54e79d221ca59cff (diff) | |
Merge remote-tracking branch 'origin/release-3.8' into merge-3.8
Change-Id: Ie6c73dee0b6798af4a69c43c0b03c3d02ff36aa2
24 files changed, 281 insertions, 137 deletions
diff --git a/configure.ac b/configure.ac index 739728085a6..4c2bb32fe23 100644 --- a/configure.ac +++ b/configure.ac @@ -740,7 +740,7 @@ AC_ARG_ENABLE([firewalld], [BUILD_FIREWALLD="${enableval}"], [BUILD_FIREWALLD="no"]) if test "x${BUILD_FIREWALLD}" = "xyes"; then - if !(which firewalld 1>/dev/null 2>&1) ; then + if !(test -d /usr/lib/firewalld/services 1>/dev/null 2>&1) ; then BUILD_FIREWALLD="no (firewalld not installed)" fi fi diff --git a/doc/release-notes/3.8.8.md b/doc/release-notes/3.8.8.md new file mode 100644 index 00000000000..282dee4cad0 --- /dev/null +++ b/doc/release-notes/3.8.8.md @@ -0,0 +1,48 @@ +# Release notes for Gluster 3.8.8 + +This is a bugfix release. The [Release Notes for 3.8.0](3.8.0.md), +[3.8.1](3.8.1.md), [3.8.2](3.8.2.md), [3.8.3](3.8.3.md), [3.8.4](3.8.4.md), +[3.8.5](3.8.5.md), [3.8.6](3.8.6.md) and [3.8.7](3.8.7.md) contain a listing of +all the new features that were added and bugs fixed in the GlusterFS 3.8 stable +release. + + +## Bugs addressed + +A total of 38 patches have been merged, addressing 35 bugs: + +- [#1375849](https://bugzilla.redhat.com/1375849): [RFE] enable sharding with virt profile - /var/lib/glusterd/groups/virt +- [#1378384](https://bugzilla.redhat.com/1378384): log level set in glfs_set_logging() does not work +- [#1378547](https://bugzilla.redhat.com/1378547): Asynchronous Unsplit-brain still causes Input/Output Error on system calls +- [#1389781](https://bugzilla.redhat.com/1389781): build: python on Debian-based dists use .../lib/python2.7/dist-packages instead of .../site-packages +- [#1394635](https://bugzilla.redhat.com/1394635): errors appear in brick and nfs logs and getting stale files on NFS clients +- [#1395510](https://bugzilla.redhat.com/1395510): Seeing error messages [snapview-client.c:283:gf_svc_lookup_cbk] and [dht-helper.c:1666ht_inode_ctx_time_update] (-->/usr/lib64/glusterfs/3.8.4/xlator/cluster/replicate.so(+0x5d75c) +- [#1399423](https://bugzilla.redhat.com/1399423): GlusterFS client crashes during remove-brick operation +- [#1399432](https://bugzilla.redhat.com/1399432): A hard link is lost during rebalance+lookup +- [#1399468](https://bugzilla.redhat.com/1399468): Wrong value in Last Synced column during Hybrid Crawl +- [#1399915](https://bugzilla.redhat.com/1399915): [SAMBA-CIFS] : IO hungs in cifs mount while graph switch on & off +- [#1401029](https://bugzilla.redhat.com/1401029): OOM kill of nfs-ganesha on one node while fs-sanity test suite is executed. +- [#1401534](https://bugzilla.redhat.com/1401534): fuse mount point not accessible +- [#1402697](https://bugzilla.redhat.com/1402697): glusterfsd crashed while taking snapshot using scheduler +- [#1402728](https://bugzilla.redhat.com/1402728): Worker restarts on log-rsync-performance config update +- [#1403109](https://bugzilla.redhat.com/1403109): Crash of glusterd when using long username with geo-replication +- [#1404105](https://bugzilla.redhat.com/1404105): Incorrect incrementation of volinfo refcnt during volume start +- [#1404583](https://bugzilla.redhat.com/1404583): Upcall: Possible use after free when log level set to TRACE +- [#1405004](https://bugzilla.redhat.com/1405004): [Perf] : pcs cluster resources went into stopped state during Multithreaded perf tests on RHGS layered over RHEL 6 +- [#1405130](https://bugzilla.redhat.com/1405130): `gluster volume heal <vol-name> split-brain' does not heal if data/metadata/entry self-heal options are turned off +- [#1405450](https://bugzilla.redhat.com/1405450): tests/bugs/snapshot/bug-1316437.t test is causing spurious failure +- [#1405577](https://bugzilla.redhat.com/1405577): [GANESHA] failed to create directory of hostname of new node in var/lib/nfs/ganesha/ in already existing cluster nodes +- [#1405886](https://bugzilla.redhat.com/1405886): Fix potential leaks in INODELK cbk in protocol/client +- [#1405890](https://bugzilla.redhat.com/1405890): Fix spurious failure in bug-1402841.t-mt-dir-scan-race.t +- [#1405951](https://bugzilla.redhat.com/1405951): NFS-Ganesha:Volume reset for any option causes reset of ganesha enable option and bring down the ganesha services +- [#1406740](https://bugzilla.redhat.com/1406740): Fix spurious failure in tests/bugs/replicate/bug-1402730.t +- [#1408414](https://bugzilla.redhat.com/1408414): Remove-brick rebalance failed while rm -rf is in progress +- [#1408772](https://bugzilla.redhat.com/1408772): [Arbiter] After Killing a brick writes drastically slow down +- [#1408786](https://bugzilla.redhat.com/1408786): with granular-entry-self-heal enabled i see that there is a gfid mismatch and vm goes to paused state after migrating to another host +- [#1410073](https://bugzilla.redhat.com/1410073): Fix failure of split-brain-favorite-child-policy.t in CentOS7 +- [#1410369](https://bugzilla.redhat.com/1410369): Dict_t leak in dht_migration_complete_check_task and dht_rebalance_inprogress_task +- [#1410699](https://bugzilla.redhat.com/1410699): [geo-rep]: Config commands fail when the status is 'Created' +- [#1410708](https://bugzilla.redhat.com/1410708): glusterd/geo-rep: geo-rep config command leaks fd +- [#1410764](https://bugzilla.redhat.com/1410764): Remove-brick rebalance failed while rm -rf is in progress +- [#1411011](https://bugzilla.redhat.com/1411011): atime becomes zero when truncating file via ganesha (or gluster-NFS) +- [#1411613](https://bugzilla.redhat.com/1411613): Fix the place where graph switch event is logged diff --git a/glusterfs.spec.in b/glusterfs.spec.in index 29bf00c60a9..29d07c530c0 100644 --- a/glusterfs.spec.in +++ b/glusterfs.spec.in @@ -226,7 +226,7 @@ BuildRequires: libattr-devel %endif %if (0%{?_with_firewalld:1}) -BuildRequires: firewalld +BuildRequires: firewalld-filesystem %endif Obsoletes: hekafs @@ -541,6 +541,10 @@ Requires(preun): /sbin/service Requires(preun): /sbin/chkconfig Requires(postun): /sbin/service %endif +%if (0%{?_with_firewalld:1}) +# we install firewalld rules, so we need to have the directory owned +Requires: firewalld-filesystem +%endif %if ( 0%{?fedora} ) || ( 0%{?rhel} && 0%{?rhel} >= 6 ) Requires: rpcbind %else @@ -811,11 +815,7 @@ if [ -e /etc/ld.so.conf.d/glusterfs.conf ]; then fi %if (0%{?_with_firewalld:1}) -#reload service files if firewalld running -if $(systemctl is-active firewalld 1>/dev/null 2>&1); then - #firewalld-filesystem is not available for rhel7, so command used for reload. - firewall-cmd --reload 1>/dev/null 2>&1 -fi + %firewalld_reload %endif pidof -c -o %PPID -x glusterd &> /dev/null @@ -883,10 +883,7 @@ exit 0 %postun server /sbin/ldconfig %if (0%{?_with_firewalld:1}) -#reload service files if firewalld running -if $(systemctl is-active firewalld 1>/dev/null 2>&1); then - firewall-cmd --reload -fi + %firewalld_reload %endif exit 0 @@ -1195,6 +1192,9 @@ exit 0 %endif %changelog +* Fri Jan 6 2017 Niels de Vos <ndevos@redhat.com> +- use macro provided by firewalld-filesystem to reload firewalld + * Thu Dec 19 2016 Jiffin Tony Thottan <jhottan@redhat.com> - remove S31ganesha-reset.sh from hooks (#1405951) diff --git a/libglusterfs/src/lkowner.h b/libglusterfs/src/lkowner.h index b6a950f5e12..9712f176f30 100644 --- a/libglusterfs/src/lkowner.h +++ b/libglusterfs/src/lkowner.h @@ -84,4 +84,10 @@ out: return is_null; } +static inline void +lk_owner_copy (gf_lkowner_t *dst, gf_lkowner_t *src) +{ + dst->len = src->len; + memcpy(dst->data, src->data, src->len); +} #endif /* _LK_OWNER_H */ diff --git a/libglusterfs/src/syscall.c b/libglusterfs/src/syscall.c index 316d80452fb..7cf1c7757fe 100644 --- a/libglusterfs/src/syscall.c +++ b/libglusterfs/src/syscall.c @@ -309,7 +309,40 @@ sys_lseek (int fd, off_t offset, int whence) int sys_statvfs (const char *path, struct statvfs *buf) { - return statvfs (path, buf); + int ret; + + ret = statvfs (path, buf); +#ifdef __FreeBSD__ + /* FreeBSD doesn't return the expected vaule in buf->f_bsize. It + * contains the optimal I/O size instead of the file system block + * size. Gluster expects that this field contains the block size. + */ + if (ret == 0) { + buf->f_bsize = buf->f_frsize; + } +#endif /* __FreeBSD__ */ + + return ret; +} + + +int +sys_fstatvfs (int fd, struct statvfs *buf) +{ + int ret; + + ret = fstatvfs (fd, buf); +#ifdef __FreeBSD__ + /* FreeBSD doesn't return the expected vaule in buf->f_bsize. It + * contains the optimal I/O size instead of the file system block + * size. Gluster expects this field to contain the block size. + */ + if (ret == 0) { + buf->f_bsize = buf->f_frsize; + } +#endif /* __FreeBSD__ */ + + return ret; } diff --git a/libglusterfs/src/syscall.h b/libglusterfs/src/syscall.h index b549f6a1b3c..81884f88164 100644 --- a/libglusterfs/src/syscall.h +++ b/libglusterfs/src/syscall.h @@ -147,6 +147,9 @@ int sys_statvfs (const char *path, struct statvfs *buf); int +sys_fstatvfs (int fd, struct statvfs *buf); + +int sys_close (int fd); int diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index 0f878a9be86..fb3318da36a 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -131,6 +131,7 @@ __afr_inode_ctx_get (xlator_t *this, inode_t *inode, afr_inode_ctx_t **ctx) out: return ret; } + /* * INODE CTX 64-bit VALUE FORMAT FOR SMALL (<= 16) SUBVOL COUNTS: * @@ -192,10 +193,7 @@ __afr_set_in_flight_sb_status (xlator_t *this, afr_local_t *local, metadatamap_old = metadatamap = (val & 0x000000000000ffff); datamap_old = datamap = (val & 0x00000000ffff0000) >> 16; - /* Hard-code event to 0 since there is a failure and the inode - * needs to be refreshed anyway. - */ - event = 0; + event = (val & 0xffffffff00000000) >> 32; if (txn_type == AFR_DATA_TRANSACTION) tmp_map = datamap; @@ -228,6 +226,8 @@ __afr_set_in_flight_sb_status (xlator_t *this, afr_local_t *local, local->transaction.in_flight_sb = _gf_true; metadatamap |= (1 << index); } + if (metadatamap_old != metadatamap) + event = 0; break; case AFR_DATA_TRANSACTION: @@ -237,10 +237,12 @@ __afr_set_in_flight_sb_status (xlator_t *this, afr_local_t *local, local->transaction.in_flight_sb = _gf_true; datamap |= (1 << index); } + if (datamap_old != datamap) + event = 0; break; default: - break; + break; } val = ((uint64_t) metadatamap) | @@ -351,7 +353,7 @@ out: } int -__afr_inode_read_subvol_reset_small (inode_t *inode, xlator_t *this) +__afr_inode_event_gen_reset_small (inode_t *inode, xlator_t *this) { int ret = -1; uint16_t datamap = 0; @@ -452,7 +454,7 @@ out: } int -__afr_inode_read_subvol_reset (inode_t *inode, xlator_t *this) +__afr_inode_event_gen_reset (inode_t *inode, xlator_t *this) { afr_private_t *priv = NULL; int ret = -1; @@ -460,7 +462,7 @@ __afr_inode_read_subvol_reset (inode_t *inode, xlator_t *this) priv = this->private; if (priv->child_count <= 16) - ret = __afr_inode_read_subvol_reset_small (inode, this); + ret = __afr_inode_event_gen_reset_small (inode, this); else ret = -1; @@ -593,7 +595,7 @@ out: int -afr_inode_read_subvol_reset (inode_t *inode, xlator_t *this) +afr_inode_event_gen_reset (inode_t *inode, xlator_t *this) { int ret = -1; @@ -601,7 +603,7 @@ afr_inode_read_subvol_reset (inode_t *inode, xlator_t *this) LOCK(&inode->lock); { - ret = __afr_inode_read_subvol_reset (inode, this); + ret = __afr_inode_event_gen_reset (inode, this); } UNLOCK(&inode->lock); out: @@ -2086,7 +2088,7 @@ afr_lookup_done (call_frame_t *frame, xlator_t *this) if (afr_replies_interpret (frame, this, local->inode, NULL)) { read_subvol = afr_read_subvol_decide (local->inode, this, &args); - afr_inode_read_subvol_reset (local->inode, this); + afr_inode_event_gen_reset (local->inode, this); goto cant_interpret; } else { read_subvol = afr_data_subvol_get (local->inode, this, diff --git a/xlators/cluster/afr/src/afr-dir-write.c b/xlators/cluster/afr/src/afr-dir-write.c index 286a5392da6..8e483c382c4 100644 --- a/xlators/cluster/afr/src/afr-dir-write.c +++ b/xlators/cluster/afr/src/afr-dir-write.c @@ -122,12 +122,12 @@ __afr_dir_write_finalize (call_frame_t *frame, xlator_t *this) continue; if (local->replies[i].op_ret < 0) { if (local->inode) - afr_inode_read_subvol_reset (local->inode, this); + afr_inode_event_gen_reset (local->inode, this); if (local->parent) - afr_inode_read_subvol_reset (local->parent, + afr_inode_event_gen_reset (local->parent, this); if (local->parent2) - afr_inode_read_subvol_reset (local->parent2, + afr_inode_event_gen_reset (local->parent2, this); continue; } diff --git a/xlators/cluster/afr/src/afr-read-txn.c b/xlators/cluster/afr/src/afr-read-txn.c index 926f7c4dc47..2390764bccd 100644 --- a/xlators/cluster/afr/src/afr-read-txn.c +++ b/xlators/cluster/afr/src/afr-read-txn.c @@ -48,17 +48,6 @@ afr_read_txn_next_subvol (call_frame_t *frame, xlator_t *this) return 0; } -#define AFR_READ_TXN_SET_ERROR_AND_GOTO(ret, errnum, index, label) \ - do { \ - local->op_ret = ret; \ - local->op_errno = errnum; \ - read_subvol = index; \ - gf_msg (this->name, GF_LOG_ERROR, EIO, AFR_MSG_SPLIT_BRAIN,\ - "Failing %s on gfid %s: split-brain observed.",\ - gf_fop_list[local->op], uuid_utoa (inode->gfid));\ - goto label; \ - } while (0) - int afr_read_txn_refresh_done (call_frame_t *frame, xlator_t *this, int err) { @@ -72,19 +61,16 @@ afr_read_txn_refresh_done (call_frame_t *frame, xlator_t *this, int err) inode = local->inode; if (err) { - local->op_errno = -err; - local->op_ret = -1; read_subvol = -1; - gf_msg (this->name, GF_LOG_ERROR, EIO, AFR_MSG_SPLIT_BRAIN, - "Failing %s on gfid %s: split-brain observed.", - gf_fop_list[local->op], uuid_utoa (inode->gfid)); goto readfn; } read_subvol = afr_read_subvol_select_by_policy (inode, this, local->readable, NULL); - if (read_subvol == -1) - AFR_READ_TXN_SET_ERROR_AND_GOTO (-1, EIO, -1, readfn); + if (read_subvol == -1) { + err = -EIO; + goto readfn; + } if (local->read_attempted[read_subvol]) { afr_read_txn_next_subvol (frame, this); @@ -99,6 +85,10 @@ readfn: if ((ret == 0) && spb_choice >= 0) read_subvol = spb_choice; } + + if (read_subvol == -1) { + AFR_SET_ERROR_AND_CHECK_SPLIT_BRAIN (-1, -err); + } local->readfn (frame, this, read_subvol); return 0; diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c index 9cb735ea7fa..8178fc0d18b 100644 --- a/xlators/cluster/afr/src/afr-transaction.c +++ b/xlators/cluster/afr/src/afr-transaction.c @@ -2255,15 +2255,9 @@ int afr_write_txn_refresh_done (call_frame_t *frame, xlator_t *this, int err) { afr_local_t *local = frame->local; - afr_private_t *priv = this->private; - int ret = 0; if (err) { - local->op_errno = -err; - local->op_ret = -1; - gf_msg (this->name, GF_LOG_ERROR, -ret, AFR_MSG_SPLIT_BRAIN, - "Failing %s on gfid %s: split-brain observed.", - gf_fop_list[local->op], uuid_utoa (local->inode->gfid)); + AFR_SET_ERROR_AND_CHECK_SPLIT_BRAIN(-1, -err); goto fail; } diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index bbfa309b868..aa19f1eeb37 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -54,6 +54,18 @@ typedef int (*afr_changelog_resume_t) (call_frame_t *frame, xlator_t *this); #define AFR_CMP(a1,a2,len) ({int __cmp = 0; int __i; for (__i = 0; __i < len; __i++) if (a1[__i] != a2[__i]) { __cmp = 1; break;} __cmp;}) #define AFR_IS_ARBITER_BRICK(priv, index) ((priv->arbiter_count == 1) && (index == ARBITER_BRICK_INDEX)) +#define AFR_SET_ERROR_AND_CHECK_SPLIT_BRAIN(ret, errnum) \ + do { \ + local->op_ret = ret; \ + local->op_errno = errnum; \ + if (local->op_errno == EIO) \ + gf_msg (this->name, GF_LOG_ERROR, local->op_errno, \ + AFR_MSG_SPLIT_BRAIN, "Failing %s on gfid %s: " \ + "split-brain observed.", \ + gf_fop_list[local->op], \ + uuid_utoa (local->inode->gfid)); \ + } while (0) + typedef enum { AFR_FAV_CHILD_NONE, AFR_FAV_CHILD_BY_SIZE, @@ -882,7 +894,7 @@ afr_inode_read_subvol_set (inode_t *inode, xlator_t *this, int event_generation); int -afr_inode_read_subvol_reset (inode_t *inode, xlator_t *this); +afr_inode_event_gen_reset (inode_t *inode, xlator_t *this); int afr_read_subvol_select_by_policy (inode_t *inode, xlator_t *this, @@ -906,10 +918,6 @@ afr_read_subvol_get (inode_t *inode, xlator_t *this, int *subvol_p, afr_read_subvol_get(i, t, s, r, e, AFR_METADATA_TRANSACTION, a) int -afr_inode_ctx_reset_unreadable_subvol (inode_t *inode, xlator_t *this, - int subvol_idx, int txn_type); - -int afr_inode_refresh (call_frame_t *frame, xlator_t *this, inode_t *inode, uuid_t gfid, afr_inode_refresh_cbk_t cbk); diff --git a/xlators/cluster/dht/src/dht-inode-read.c b/xlators/cluster/dht/src/dht-inode-read.c index e320109c796..ac0f0e186fa 100644 --- a/xlators/cluster/dht/src/dht-inode-read.c +++ b/xlators/cluster/dht/src/dht-inode-read.c @@ -849,6 +849,12 @@ dht_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, dht_set_local_rebalance (this, local, NULL, prebuf, postbuf, xdata); + if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2 (postbuf)) { + ret = dht_rebalance_complete_check (this, frame); + if (!ret) + return 0; + } + /* Check if the rebalance phase1 is true */ if (IS_DHT_MIGRATION_PHASE1 (postbuf)) { @@ -870,11 +876,6 @@ dht_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, } } - if (IS_DHT_MIGRATION_PHASE2 (postbuf)) { - ret = dht_rebalance_complete_check (this, frame); - if (!ret) - return 0; - } out: DHT_STRIP_PHASE1_FLAGS (postbuf); diff --git a/xlators/cluster/dht/src/dht-rename.c b/xlators/cluster/dht/src/dht-rename.c index a9ffd1d9fb5..d955ee411eb 100644 --- a/xlators/cluster/dht/src/dht-rename.c +++ b/xlators/cluster/dht/src/dht-rename.c @@ -724,6 +724,7 @@ dht_rename_cleanup (call_frame_t *frame) DHT_MARKER_DONT_ACCOUNT(xattr_new); + FRAME_SU_DO (frame, dht_local_t); STACK_WIND (frame, dht_rename_unlink_cbk, dst_hashed, dst_hashed->fops->unlink, &local->loc, 0, xattr_new); diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c index 2e6759a2803..2b4cec406a9 100644 --- a/xlators/cluster/ec/src/ec-common.c +++ b/xlators/cluster/ec/src/ec-common.c @@ -788,10 +788,10 @@ void ec_lock_prepare_inode(ec_fop_data_t *fop, loc_t *loc, uint32_t flags) ec_lock_prepare_inode_internal(fop, loc, flags, NULL); } -void ec_lock_prepare_parent_inode(ec_fop_data_t *fop, loc_t *loc, +void ec_lock_prepare_parent_inode(ec_fop_data_t *fop, loc_t *loc, loc_t *base, uint32_t flags) { - loc_t tmp, *base = NULL; + loc_t tmp; int32_t err; if (fop->error != 0) { @@ -806,8 +806,9 @@ void ec_lock_prepare_parent_inode(ec_fop_data_t *fop, loc_t *loc, } if ((flags & EC_INODE_SIZE) != 0) { - base = loc; flags ^= EC_INODE_SIZE; + } else { + base = NULL; } ec_lock_prepare_inode_internal(fop, &tmp, flags, base); @@ -1442,20 +1443,21 @@ gf_boolean_t ec_lock_acquire(ec_lock_link_t *link) { ec_lock_t *lock; ec_fop_data_t *fop; + gf_lkowner_t lk_owner; lock = link->lock; fop = link->fop; if (!lock->acquired) { - ec_owner_set(fop->frame, lock); + set_lk_owner_from_ptr(&lk_owner, lock); ec_trace("LOCK_ACQUIRE", fop, "lock=%p, inode=%p", lock, lock->loc.inode); lock->flock.l_type = F_WRLCK; - ec_inodelk(fop->frame, fop->xl, -1, EC_MINIMUM_ALL, ec_locked, - link, fop->xl->name, &lock->loc, F_SETLKW, &lock->flock, - NULL); + ec_inodelk(fop->frame, fop->xl, &lk_owner, -1, EC_MINIMUM_ALL, + ec_locked, link, fop->xl->name, &lock->loc, F_SETLKW, + &lock->flock, NULL); return _gf_false; } @@ -1760,6 +1762,7 @@ void ec_unlock_lock(ec_lock_link_t *link) { ec_lock_t *lock; ec_fop_data_t *fop; + gf_lkowner_t lk_owner; lock = link->lock; fop = link->fop; @@ -1767,13 +1770,13 @@ void ec_unlock_lock(ec_lock_link_t *link) ec_clear_inode_info(fop, lock->loc.inode); if ((lock->mask != 0) && lock->acquired) { - ec_owner_set(fop->frame, lock); + set_lk_owner_from_ptr(&lk_owner, lock); lock->flock.l_type = F_UNLCK; ec_trace("UNLOCK_INODELK", fop, "lock=%p, inode=%p", lock, lock->loc.inode); - ec_inodelk(fop->frame, fop->xl, lock->mask, EC_MINIMUM_ONE, + ec_inodelk(fop->frame, fop->xl, &lk_owner, lock->mask, EC_MINIMUM_ONE, ec_unlocked, link, fop->xl->name, &lock->loc, F_SETLK, &lock->flock, NULL); } else { diff --git a/xlators/cluster/ec/src/ec-common.h b/xlators/cluster/ec/src/ec-common.h index 8e724a81380..7c096db829c 100644 --- a/xlators/cluster/ec/src/ec-common.h +++ b/xlators/cluster/ec/src/ec-common.h @@ -89,7 +89,7 @@ gf_boolean_t ec_cbk_set_error(ec_cbk_data_t *cbk, int32_t error, gf_boolean_t ro); void ec_lock_prepare_inode(ec_fop_data_t *fop, loc_t *loc, uint32_t flags); -void ec_lock_prepare_parent_inode(ec_fop_data_t *fop, loc_t *loc, +void ec_lock_prepare_parent_inode(ec_fop_data_t *fop, loc_t *loc, loc_t *base, uint32_t flags); void ec_lock_prepare_fd(ec_fop_data_t *fop, fd_t *fd, uint32_t flags); void ec_lock(ec_fop_data_t * fop); diff --git a/xlators/cluster/ec/src/ec-dir-write.c b/xlators/cluster/ec/src/ec-dir-write.c index e181170650d..e068d77c827 100644 --- a/xlators/cluster/ec/src/ec-dir-write.c +++ b/xlators/cluster/ec/src/ec-dir-write.c @@ -177,7 +177,7 @@ int32_t ec_manager_create(ec_fop_data_t * fop, int32_t state) /* Fall through */ case EC_STATE_LOCK: - ec_lock_prepare_parent_inode(fop, &fop->loc[0], + ec_lock_prepare_parent_inode(fop, &fop->loc[0], NULL, EC_UPDATE_DATA | EC_UPDATE_META); ec_lock(fop); @@ -355,9 +355,9 @@ int32_t ec_manager_link(ec_fop_data_t * fop, int32_t state) { case EC_STATE_INIT: case EC_STATE_LOCK: - ec_lock_prepare_parent_inode(fop, &fop->loc[1], EC_UPDATE_DATA | - EC_UPDATE_META | - EC_INODE_SIZE); + ec_lock_prepare_parent_inode(fop, &fop->loc[1], &fop->loc[0], + EC_UPDATE_DATA | EC_UPDATE_META | + EC_INODE_SIZE); ec_lock(fop); return EC_STATE_DISPATCH; @@ -540,7 +540,7 @@ int32_t ec_manager_mkdir(ec_fop_data_t * fop, int32_t state) /* Fall through */ case EC_STATE_LOCK: - ec_lock_prepare_parent_inode(fop, &fop->loc[0], + ec_lock_prepare_parent_inode(fop, &fop->loc[0], NULL, EC_UPDATE_DATA | EC_UPDATE_META); ec_lock(fop); @@ -746,7 +746,7 @@ int32_t ec_manager_mknod(ec_fop_data_t * fop, int32_t state) /* Fall through */ case EC_STATE_LOCK: - ec_lock_prepare_parent_inode(fop, &fop->loc[0], + ec_lock_prepare_parent_inode(fop, &fop->loc[0], NULL, EC_UPDATE_DATA | EC_UPDATE_META); ec_lock(fop); @@ -905,10 +905,10 @@ int32_t ec_manager_rename(ec_fop_data_t * fop, int32_t state) { case EC_STATE_INIT: case EC_STATE_LOCK: - ec_lock_prepare_parent_inode(fop, &fop->loc[0], EC_UPDATE_DATA | - EC_UPDATE_META | - EC_INODE_SIZE); - ec_lock_prepare_parent_inode(fop, &fop->loc[1], + ec_lock_prepare_parent_inode(fop, &fop->loc[0], &fop->loc[0], + EC_UPDATE_DATA | EC_UPDATE_META | + EC_INODE_SIZE); + ec_lock_prepare_parent_inode(fop, &fop->loc[1], NULL, EC_UPDATE_DATA | EC_UPDATE_META); ec_lock(fop); @@ -1067,7 +1067,7 @@ int32_t ec_manager_rmdir(ec_fop_data_t * fop, int32_t state) { case EC_STATE_INIT: case EC_STATE_LOCK: - ec_lock_prepare_parent_inode(fop, &fop->loc[0], + ec_lock_prepare_parent_inode(fop, &fop->loc[0], NULL, EC_UPDATE_DATA | EC_UPDATE_META); ec_lock(fop); @@ -1213,7 +1213,7 @@ int32_t ec_manager_symlink(ec_fop_data_t * fop, int32_t state) { case EC_STATE_INIT: case EC_STATE_LOCK: - ec_lock_prepare_parent_inode(fop, &fop->loc[0], + ec_lock_prepare_parent_inode(fop, &fop->loc[0], NULL, EC_UPDATE_DATA | EC_UPDATE_META); ec_lock(fop); @@ -1379,7 +1379,7 @@ int32_t ec_manager_unlink(ec_fop_data_t * fop, int32_t state) { case EC_STATE_INIT: case EC_STATE_LOCK: - ec_lock_prepare_parent_inode(fop, &fop->loc[0], + ec_lock_prepare_parent_inode(fop, &fop->loc[0], NULL, EC_UPDATE_DATA | EC_UPDATE_META); ec_lock(fop); diff --git a/xlators/cluster/ec/src/ec-fops.h b/xlators/cluster/ec/src/ec-fops.h index 8d938427a18..bbacedc0140 100644 --- a/xlators/cluster/ec/src/ec-fops.h +++ b/xlators/cluster/ec/src/ec-fops.h @@ -63,16 +63,16 @@ void ec_fheal(call_frame_t * frame, xlator_t * this, uintptr_t target, int32_t minimum, fop_fheal_cbk_t func, void *data, fd_t * fd, int32_t partial, dict_t *xdata); -void ec_inodelk(call_frame_t * frame, xlator_t * this, uintptr_t target, - int32_t minimum, fop_inodelk_cbk_t func, void *data, - const char * volume, loc_t * loc, int32_t cmd, - struct gf_flock * flock, dict_t * xdata); - -void ec_finodelk(call_frame_t * frame, xlator_t * this, uintptr_t target, - int32_t minimum, fop_finodelk_cbk_t func, void *data, - const char * volume, fd_t * fd, int32_t cmd, +void ec_inodelk (call_frame_t *frame, xlator_t *this, gf_lkowner_t *owner, + uintptr_t target, int32_t minimum, fop_inodelk_cbk_t func, + void *data, const char *volume, loc_t *loc, int32_t cmd, struct gf_flock * flock, dict_t * xdata); +void ec_finodelk(call_frame_t *frame, xlator_t *this, gf_lkowner_t *owner, + uintptr_t target, int32_t minimum, fop_finodelk_cbk_t func, + void *data, const char *volume, fd_t *fd, int32_t cmd, + struct gf_flock *flock, dict_t *xdata); + void ec_link(call_frame_t * frame, xlator_t * this, uintptr_t target, int32_t minimum, fop_link_cbk_t func, void *data, loc_t * oldloc, loc_t * newloc, dict_t * xdata); diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c index 051d218331a..da08f6c9a75 100644 --- a/xlators/cluster/ec/src/ec-heal.c +++ b/xlators/cluster/ec/src/ec-heal.c @@ -177,14 +177,17 @@ void ec_heal_lock(ec_heal_t *heal, int32_t type, fd_t *fd, loc_t *loc, if (fd != NULL) { - ec_finodelk(heal->fop->frame, heal->xl, heal->fop->mask, + ec_finodelk(heal->fop->frame, heal->xl, + &heal->fop->frame->root->lk_owner, heal->fop->mask, EC_MINIMUM_ALL, cbk, heal, heal->xl->name, fd, F_SETLKW, &flock, NULL); } else { - ec_inodelk(heal->fop->frame, heal->xl, heal->fop->mask, EC_MINIMUM_ALL, - cbk, heal, heal->xl->name, loc, F_SETLKW, &flock, NULL); + ec_inodelk(heal->fop->frame, heal->xl, + &heal->fop->frame->root->lk_owner, heal->fop->mask, + EC_MINIMUM_ALL, cbk, heal, heal->xl->name, loc, F_SETLKW, + &flock, NULL); } } diff --git a/xlators/cluster/ec/src/ec-helpers.c b/xlators/cluster/ec/src/ec-helpers.c index 7cf8232353d..7df83126ae5 100644 --- a/xlators/cluster/ec/src/ec-helpers.c +++ b/xlators/cluster/ec/src/ec-helpers.c @@ -643,10 +643,9 @@ void ec_owner_set(call_frame_t * frame, void * owner) set_lk_owner_from_ptr(&frame->root->lk_owner, owner); } -void ec_owner_copy(call_frame_t * frame, gf_lkowner_t * owner) +void ec_owner_copy(call_frame_t *frame, gf_lkowner_t *owner) { - frame->root->lk_owner.len = owner->len; - memcpy(frame->root->lk_owner.data, owner->data, owner->len); + lk_owner_copy (&frame->root->lk_owner, owner); } ec_inode_t * __ec_inode_get(inode_t * inode, xlator_t * xl) diff --git a/xlators/cluster/ec/src/ec-locks.c b/xlators/cluster/ec/src/ec-locks.c index ed835f1aadc..bd525723ddf 100644 --- a/xlators/cluster/ec/src/ec-locks.c +++ b/xlators/cluster/ec/src/ec-locks.c @@ -608,12 +608,14 @@ int32_t ec_manager_inodelk(ec_fop_data_t * fop, int32_t state) flock.l_owner.len = 0; if (fop->id == GF_FOP_INODELK) { - ec_inodelk(fop->frame, fop->xl, mask, 1, + ec_inodelk(fop->frame, fop->xl, + &fop->frame->root->lk_owner, mask, 1, ec_lock_unlocked, NULL, fop->str[0], &fop->loc[0], F_SETLK, &flock, fop->xdata); } else { - ec_finodelk(fop->frame, fop->xl, mask, 1, + ec_finodelk(fop->frame, fop->xl, + &fop->frame->root->lk_owner, mask, 1, ec_lock_unlocked, NULL, fop->str[0], fop->fd, F_SETLK, &flock, fop->xdata); } @@ -692,10 +694,10 @@ int32_t ec_manager_inodelk(ec_fop_data_t * fop, int32_t state) } } -void ec_inodelk(call_frame_t * frame, xlator_t * this, uintptr_t target, - int32_t minimum, fop_inodelk_cbk_t func, void * data, - const char * volume, loc_t * loc, int32_t cmd, - struct gf_flock * flock, dict_t * xdata) +void ec_inodelk (call_frame_t *frame, xlator_t *this, gf_lkowner_t *owner, + uintptr_t target, int32_t minimum, fop_inodelk_cbk_t func, + void *data, const char *volume, loc_t *loc, int32_t cmd, + struct gf_flock *flock, dict_t *xdata) { ec_cbk_t callback = { .inodelk = func }; ec_fop_data_t * fop = NULL; @@ -715,6 +717,7 @@ void ec_inodelk(call_frame_t * frame, xlator_t * this, uintptr_t target, } fop->int32 = cmd; + ec_owner_copy (fop->frame, owner); if (volume != NULL) { fop->str[0] = gf_strdup(volume); @@ -828,10 +831,10 @@ void ec_wind_finodelk(ec_t * ec, ec_fop_data_t * fop, int32_t idx) fop->xdata); } -void ec_finodelk(call_frame_t * frame, xlator_t * this, uintptr_t target, - int32_t minimum, fop_finodelk_cbk_t func, void * data, - const char * volume, fd_t * fd, int32_t cmd, - struct gf_flock * flock, dict_t * xdata) +void ec_finodelk(call_frame_t *frame, xlator_t *this, gf_lkowner_t *owner, + uintptr_t target, int32_t minimum, fop_finodelk_cbk_t func, + void *data, const char *volume, fd_t *fd, int32_t cmd, + struct gf_flock *flock, dict_t *xdata) { ec_cbk_t callback = { .finodelk = func }; ec_fop_data_t * fop = NULL; @@ -853,6 +856,7 @@ void ec_finodelk(call_frame_t * frame, xlator_t * this, uintptr_t target, fop->use_fd = 1; fop->int32 = cmd; + ec_owner_copy (fop->frame, owner); if (volume != NULL) { fop->str[0] = gf_strdup(volume); diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c index 94d1241b364..b5e6bc08216 100644 --- a/xlators/cluster/ec/src/ec.c +++ b/xlators/cluster/ec/src/ec.c @@ -839,8 +839,8 @@ int32_t ec_gf_inodelk(call_frame_t * frame, xlator_t * this, if (flock->l_type == F_UNLCK) minimum = EC_MINIMUM_ONE; - ec_inodelk(frame, this, -1, minimum, default_inodelk_cbk, NULL, - volume, loc, cmd, flock, xdata); + ec_inodelk(frame, this, &frame->root->lk_owner, -1, minimum, + default_inodelk_cbk, NULL, volume, loc, cmd, flock, xdata); return 0; } @@ -852,8 +852,8 @@ int32_t ec_gf_finodelk(call_frame_t * frame, xlator_t * this, int32_t minimum = EC_MINIMUM_ALL; if (flock->l_type == F_UNLCK) minimum = EC_MINIMUM_ONE; - ec_finodelk(frame, this, -1, minimum, default_finodelk_cbk, NULL, - volume, fd, cmd, flock, xdata); + ec_finodelk(frame, this, &frame->root->lk_owner, -1, minimum, + default_finodelk_cbk, NULL, volume, fd, cmd, flock, xdata); return 0; } diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c index eead33fbd55..525a6a6fbbc 100644 --- a/xlators/mount/fuse/src/fuse-bridge.c +++ b/xlators/mount/fuse/src/fuse-bridge.c @@ -4802,6 +4802,7 @@ fuse_graph_sync (xlator_t *this) fuse_private_t *priv = NULL; int need_first_lookup = 0; int ret = 0; + int new_graph_id = 0; xlator_t *old_subvol = NULL, *new_subvol = NULL; uint64_t winds_on_old_subvol = 0; @@ -4814,6 +4815,7 @@ fuse_graph_sync (xlator_t *this) old_subvol = priv->active_subvol; new_subvol = priv->active_subvol = priv->next_graph->top; + new_graph_id = priv->next_graph->id; priv->next_graph = NULL; need_first_lookup = 1; @@ -4832,6 +4834,8 @@ unlock: pthread_mutex_unlock (&priv->sync_mutex); if (need_first_lookup) { + gf_log ("fuse", GF_LOG_INFO, "switched to graph %d", + new_graph_id); fuse_first_lookup (this); } @@ -5237,9 +5241,6 @@ fuse_graph_setup (xlator_t *this, glusterfs_graph_t *graph) prev_graph->top, NULL); } - gf_log ("fuse", GF_LOG_INFO, "switched to graph %d", - ((graph) ? graph->id : 0)); - return ret; unlock: pthread_mutex_unlock (&priv->sync_mutex); diff --git a/xlators/performance/io-threads/src/io-threads.c b/xlators/performance/io-threads/src/io-threads.c index 5ab38890df3..79845316315 100644 --- a/xlators/performance/io-threads/src/io-threads.c +++ b/xlators/performance/io-threads/src/io-threads.c @@ -153,9 +153,8 @@ iot_worker (void *data) struct timespec sleep_till = {0, }; int ret = 0; int pri = -1; - char timeout = 0; - char bye = 0; struct timespec sleep = {0,}; + gf_boolean_t bye = _gf_false; conf = data; this = conf->this; @@ -169,6 +168,12 @@ iot_worker (void *data) pri = -1; } while (conf->queue_size == 0) { + if (conf->down) { + bye = _gf_true;/*Avoid sleep*/ + break; + } + + conf->sleep_count++; clock_gettime (CLOCK_REALTIME_COARSE, &sleep_till); sleep_till.tv_sec += conf->idle_time; @@ -179,48 +184,48 @@ iot_worker (void *data) &sleep_till); conf->sleep_count--; - if (ret == ETIMEDOUT) { - timeout = 1; + if (conf->down || ret == ETIMEDOUT) { + bye = _gf_true; break; } } - if (timeout) { - if (conf->curr_count > IOT_MIN_THREADS) { + if (bye) { + if (conf->down || + conf->curr_count > IOT_MIN_THREADS) { conf->curr_count--; - bye = 1; + if (conf->curr_count == 0) + pthread_cond_broadcast (&conf->cond); gf_msg_debug (conf->this->name, 0, - "timeout, terminated. conf->curr_count=%d", + "terminated. " + "conf->curr_count=%d", conf->curr_count); } else { - timeout = 0; + bye = _gf_false; } } - stub = __iot_dequeue (conf, &pri, &sleep); - if (!stub && (sleep.tv_sec || sleep.tv_nsec)) { - pthread_cond_timedwait(&conf->cond, - &conf->mutex, &sleep); - pthread_mutex_unlock(&conf->mutex); - continue; + if (!bye) { + stub = __iot_dequeue (conf, &pri, &sleep); + if (!stub && (sleep.tv_sec || sleep.tv_nsec)) { + pthread_cond_timedwait(&conf->cond, + &conf->mutex, + &sleep); + pthread_mutex_unlock(&conf->mutex); + continue; + } } } pthread_mutex_unlock (&conf->mutex); if (stub) /* guard against spurious wakeups */ call_resume (stub); + stub = NULL; if (bye) break; } - if (pri != -1) { - pthread_mutex_lock (&conf->mutex); - { - conf->ac_iot_count[pri]--; - } - pthread_mutex_unlock (&conf->mutex); - } return NULL; } @@ -975,6 +980,7 @@ init (xlator_t *this) "pthread_cond_init failed (%d)", ret); goto out; } + conf->cond_inited = _gf_true; if ((ret = pthread_mutex_init(&conf->mutex, NULL)) != 0) { gf_msg (this->name, GF_LOG_ERROR, 0, @@ -982,6 +988,7 @@ init (xlator_t *this) "pthread_mutex_init failed (%d)", ret); goto out; } + conf->mutex_inited = _gf_true; set_stack_size (conf); @@ -1039,12 +1046,50 @@ out: return ret; } +static void +iot_exit_threads (iot_conf_t *conf) +{ + pthread_mutex_lock (&conf->mutex); + { + conf->down = _gf_true; + /*Let all the threads know that xl is going down*/ + pthread_cond_broadcast (&conf->cond); + while (conf->curr_count)/*Wait for threads to exit*/ + pthread_cond_wait (&conf->cond, &conf->mutex); + } + pthread_mutex_unlock (&conf->mutex); +} + +int +notify (xlator_t *this, int32_t event, void *data, ...) +{ + iot_conf_t *conf = this->private; + + if (GF_EVENT_PARENT_DOWN == event) + iot_exit_threads (conf); + + default_notify (this, event, data); + + return 0; +} void fini (xlator_t *this) { iot_conf_t *conf = this->private; + if (!conf) + return; + + if (conf->mutex_inited && conf->cond_inited) + iot_exit_threads (conf); + + if (conf->cond_inited) + pthread_cond_destroy (&conf->cond); + + if (conf->mutex_inited) + pthread_mutex_destroy (&conf->mutex); + GF_FREE (conf); this->private = NULL; diff --git a/xlators/performance/io-threads/src/io-threads.h b/xlators/performance/io-threads/src/io-threads.h index e5c97f690a2..673e1967617 100644 --- a/xlators/performance/io-threads/src/io-threads.h +++ b/xlators/performance/io-threads/src/io-threads.h @@ -82,6 +82,9 @@ struct iot_conf { xlator_t *this; size_t stack_size; + gf_boolean_t down; /*PARENT_DOWN event is notified*/ + gf_boolean_t mutex_inited; + gf_boolean_t cond_inited; struct iot_least_throttle throttle; }; |
