summaryrefslogtreecommitdiffstats
path: root/xlators/cluster/nsr-server/src
diff options
context:
space:
mode:
authorRaghavan P <rpichai@redhat.com>2014-02-19 07:03:26 +0530
committerJeff Darcy <jdarcy@redhat.com>2014-03-03 19:41:32 +0000
commitc28972ea53cc7cdb91c7aac01754dd7f0b66e1a7 (patch)
treefc316e94c6494b282a1179bb97939909e5cbcba0 /xlators/cluster/nsr-server/src
parent3bbfebc8dc21c469d47b576069ae137aec4567c9 (diff)
changes to NSR reconciliation code to add error handling.
Description of chnages added: 1) In recon driver, check for all glfs calls return values. 2) make the driver send back error values to other drivers or to main translator. 3) let the leader retry on errors Change-Id: I050003a819d2314c8fdfd111df465041c30ee6e3 Signed-off-by: Raghavan P <rpichai@redhat.com>
Diffstat (limited to 'xlators/cluster/nsr-server/src')
-rw-r--r--xlators/cluster/nsr-server/src/recon_notify.c43
1 files changed, 40 insertions, 3 deletions
diff --git a/xlators/cluster/nsr-server/src/recon_notify.c b/xlators/cluster/nsr-server/src/recon_notify.c
index 7a0de85b1..7397192ae 100644
--- a/xlators/cluster/nsr-server/src/recon_notify.c
+++ b/xlators/cluster/nsr-server/src/recon_notify.c
@@ -120,12 +120,49 @@ nsr_recon_set_leader (xlator_t *this)
// in the callback (once reconciliation is done),
// we will unfence the IOs.
// TBD - error handling later.
- glfs_lseek(ctx->fd, nsr_recon_xlator_sector_1, SEEK_SET);
+ if (glfs_lseek(ctx->fd, nsr_recon_xlator_sector_1, SEEK_SET) == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "doing lseek failed\n");
+ return;
+ }
+
glusterfs_this_set(old);
gf_log (this->name, GF_LOG_INFO,
"Writing to local node to set leader");
- glfs_write(ctx->fd, &role,
- sizeof(role), 0);
+ do {
+ if (priv->leader != _gf_true) {
+ glusterfs_this_set(old);
+ gf_log (this->name, GF_LOG_ERROR, "no longer leader\n");
+ return;
+ }
+ if (glfs_write(ctx->fd, &role, sizeof(role), 0) == -1) {
+ if (errno == EAGAIN) {
+ // Wait for old reconciliation to bail out.
+ glusterfs_this_set(old);
+ gf_log (this->name, GF_LOG_ERROR,
+ "write failed with retry. retrying after some time\n");
+ sleep(5);
+ continue;
+ }
+ else{
+ glusterfs_this_set(old);
+ gf_log (this->name, GF_LOG_ERROR,
+ "doing write failed\n");
+ // This is because reconciliation has returned with error
+ // because some node has died in between.
+ // What should be done? Either we retry being leader
+ // or hook to CHILD_DOWN notification.
+ // Put that logic later. As of now we will just retry.
+ // This is easier.
+ sleep(5);
+ continue;
+ }
+ } else {
+ glusterfs_this_set(old);
+ gf_log (this->name, GF_LOG_INFO, "doing write with success\n");
+ break;
+ }
+ } while(1);
glusterfs_this_set(old);
gf_log (this->name, GF_LOG_INFO,
"glfs_write returned. unfencing IO\n");