diff options
| author | Raghavan P <rpichai@redhat.com> | 2014-02-19 07:03:26 +0530 |
|---|---|---|
| committer | Jeff Darcy <jdarcy@redhat.com> | 2014-03-03 19:41:32 +0000 |
| commit | c28972ea53cc7cdb91c7aac01754dd7f0b66e1a7 (patch) | |
| tree | fc316e94c6494b282a1179bb97939909e5cbcba0 /xlators/cluster/nsr-server/src | |
| parent | 3bbfebc8dc21c469d47b576069ae137aec4567c9 (diff) | |
changes to NSR reconciliation code to add error handling.
Description of chnages added:
1) In recon driver, check for all glfs calls return values.
2) make the driver send back error values to other drivers or to main translator.
3) let the leader retry on errors
Change-Id: I050003a819d2314c8fdfd111df465041c30ee6e3
Signed-off-by: Raghavan P <rpichai@redhat.com>
Diffstat (limited to 'xlators/cluster/nsr-server/src')
| -rw-r--r-- | xlators/cluster/nsr-server/src/recon_notify.c | 43 |
1 files changed, 40 insertions, 3 deletions
diff --git a/xlators/cluster/nsr-server/src/recon_notify.c b/xlators/cluster/nsr-server/src/recon_notify.c index 7a0de85b1..7397192ae 100644 --- a/xlators/cluster/nsr-server/src/recon_notify.c +++ b/xlators/cluster/nsr-server/src/recon_notify.c @@ -120,12 +120,49 @@ nsr_recon_set_leader (xlator_t *this) // in the callback (once reconciliation is done), // we will unfence the IOs. // TBD - error handling later. - glfs_lseek(ctx->fd, nsr_recon_xlator_sector_1, SEEK_SET); + if (glfs_lseek(ctx->fd, nsr_recon_xlator_sector_1, SEEK_SET) == -1) { + gf_log (this->name, GF_LOG_ERROR, + "doing lseek failed\n"); + return; + } + glusterfs_this_set(old); gf_log (this->name, GF_LOG_INFO, "Writing to local node to set leader"); - glfs_write(ctx->fd, &role, - sizeof(role), 0); + do { + if (priv->leader != _gf_true) { + glusterfs_this_set(old); + gf_log (this->name, GF_LOG_ERROR, "no longer leader\n"); + return; + } + if (glfs_write(ctx->fd, &role, sizeof(role), 0) == -1) { + if (errno == EAGAIN) { + // Wait for old reconciliation to bail out. + glusterfs_this_set(old); + gf_log (this->name, GF_LOG_ERROR, + "write failed with retry. retrying after some time\n"); + sleep(5); + continue; + } + else{ + glusterfs_this_set(old); + gf_log (this->name, GF_LOG_ERROR, + "doing write failed\n"); + // This is because reconciliation has returned with error + // because some node has died in between. + // What should be done? Either we retry being leader + // or hook to CHILD_DOWN notification. + // Put that logic later. As of now we will just retry. + // This is easier. + sleep(5); + continue; + } + } else { + glusterfs_this_set(old); + gf_log (this->name, GF_LOG_INFO, "doing write with success\n"); + break; + } + } while(1); glusterfs_this_set(old); gf_log (this->name, GF_LOG_INFO, "glfs_write returned. unfencing IO\n"); |
