/* * You can put anything here - it doesn't even have to be a comment - and it * will be ignored until we reach the first template-name comment. */ // template-name read-fop $TYPE$ nsr_$NAME$ (call_frame_t *frame, xlator_t *this, $ARGS_LONG$) { nsr_private_t *priv = this->private; gf_boolean_t in_recon = _gf_false; int32_t recon_term, recon_index; // allow reads during reconciliation // TBD: allow "dirty" reads on non-leaders if (xdata && (dict_get_int32(xdata, RECON_TERM_XATTR, &recon_term) == 0) && (dict_get_int32(xdata, RECON_INDEX_XATTR, &recon_index) == 0)) { in_recon = _gf_true; } if ((!priv->leader) && (in_recon == _gf_false)) { goto err; } STACK_WIND (frame, default_$NAME$_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->$NAME$, $ARGS_SHORT$); return 0; err: STACK_UNWIND_STRICT ($NAME$, frame, -1, EREMOTE, $DEFAULTS$); return 0; } // template-name read-dispatch /* No "dispatch" function needed for $NAME$ */ // template-name read-fan-in /* No "fan-in" function needed for $NAME$ */ // template-name read-continue /* No "continue" function needed for $NAME$ */ // template-name read-complete /* No "complete" function needed for $NAME$ */ // template-name write-fop $TYPE$ nsr_$NAME$ (call_frame_t *frame, xlator_t *this, $ARGS_LONG$) { nsr_local_t *local = NULL; nsr_private_t *priv = this->private; int op_errno = ENOMEM; int from_leader; int from_recon; uint32_t ti = 0; double must_be_up; double are_up; /* * Our first goal here is to avoid "split brain surprise" for users who * specify exactly 50% with two- or three-way replication. That means * either a more-than check against half the total replicas or an * at-least check against half of our peers (one less). Of the two, * only an at-least check supports the intuitive use of 100% to mean * all replicas must be present, because "more than 100%" will never * succeed regardless of which count we use. This leaves us with a * slightly non-traditional definition of quorum ("at least X% of peers * not including ourselves") but one that's useful enough to be worth * it. * * Note that n_children and up_children *do* include the local * subvolume, so we need to subtract one in each case. */ must_be_up = ((double)(priv->n_children - 1)) * priv->quorum_pct; are_up = ((double)(priv->up_children - 1)) * 100.0; if (are_up < must_be_up) { /* Emulate the AFR client-side-quorum behavior. */ op_errno = EROFS; goto err; } local = mem_get0(this->local_pool); if (!local) { goto err; } #if defined(NSR_CG_NEED_FD) local->fd = fd_ref(fd); #else local->fd = NULL; #endif INIT_LIST_HEAD(&local->qlinks); frame->local = local; if (xdata) { from_leader = !!dict_get(xdata,NSR_TERM_XATTR); from_recon = !!dict_get(xdata,RECON_TERM_XATTR) && !!dict_get(xdata,RECON_INDEX_XATTR); } else { from_leader = from_recon = _gf_false; } // follower/recon path // just send it to local node if (from_leader || from_recon) { atomic_inc(&priv->ops_in_flight); STACK_WIND (frame, nsr_$NAME$_complete, FIRST_CHILD(this), FIRST_CHILD(this)->fops->$NAME$, $ARGS_SHORT$); return 0; } if (!priv->leader/* || priv->fence_io*/) { op_errno = EREMOTE; goto err; } if (!xdata) { xdata = dict_new(); if (!xdata) { gf_log (this->name, GF_LOG_ERROR, "failed to allocate xdata"); goto err; } } if (dict_set_int32(xdata,NSR_TERM_XATTR,priv->current_term) != 0) { gf_log (this->name, GF_LOG_ERROR, "failed to set nsr-term"); goto err; } LOCK(&priv->index_lock); ti = ++(priv->index); UNLOCK(&priv->index_lock); if (dict_set_int32(xdata,NSR_INDEX_XATTR,ti) != 0) { gf_log (this->name, GF_LOG_ERROR, "failed to set index"); goto err; } local->stub = fop_$NAME$_stub (frame,nsr_$NAME$_continue, $ARGS_SHORT$); if (!local->stub) { goto err; } #if defined(NSR_CG_QUEUE) nsr_inode_ctx_t *ictx = nsr_get_inode_ctx(this,fd->inode); if (!ictx) { op_errno = EIO; goto err; } LOCK(&ictx->lock); if (ictx->active) { gf_log (this->name, GF_LOG_DEBUG, "queuing request due to conflict"); /* * TBD: enqueue only for real conflict * * Currently we just act like all writes are in * conflict with one another. What we should really do * is check the active/pending queues and defer only if * there's a conflict there. * * It's important to check the pending queue because we * might have an active request X which conflicts with * a pending request Y, and this request Z might * conflict with Y but not X. If we checked only the * active queue then Z could jump ahead of Y, which * would be incorrect. */ local->qstub = fop_$NAME$_stub (frame, nsr_$NAME$_dispatch, $ARGS_SHORT$); if (!local->qstub) { UNLOCK(&ictx->lock); goto err; } list_add_tail(&local->qlinks,&ictx->pqueue); ++(ictx->pending); UNLOCK(&ictx->lock); return 0; } else { list_add_tail(&local->qlinks,&ictx->aqueue); ++(ictx->active); } UNLOCK(&ictx->lock); #endif return nsr_$NAME$_dispatch (frame, this, $ARGS_SHORT$); err: if (local) { if (local->stub) { call_stub_destroy(local->stub); } if (local->qstub) { call_stub_destroy(local->qstub); } if (local->fd) { fd_unref(local->fd); } mem_put(local); } STACK_UNWIND_STRICT ($NAME$, frame, -1, op_errno, $DEFAULTS$); return 0; } // template-name write-dispatch $TYPE$ nsr_$NAME$_dispatch (call_frame_t *frame, xlator_t *this, $ARGS_LONG$) { nsr_local_t *local = frame->local; nsr_private_t *priv = this->private; xlator_list_t *trav; atomic_inc(&priv->ops_in_flight); /* * TBD: unblock pending request(s) if we fail after this point but * before we get to nsr_$NAME$_complete (where that code currently * resides). */ local->call_count = priv->n_children - 1; for (trav = this->children->next; trav; trav = trav->next) { STACK_WIND (frame, nsr_$NAME$_fan_in, trav->xlator, trav->xlator->fops->$NAME$, $ARGS_SHORT$); } // TBD: variable Issue count return 0; } // template-name write-fan-in $TYPE$ nsr_$NAME$_fan_in (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, $ARGS_LONG$) { nsr_local_t *local = frame->local; uint8_t call_count; gf_log (this->name, GF_LOG_TRACE, "op_ret = %d, op_errno = %d\n", op_ret, op_errno); LOCK(&frame->lock); call_count = --(local->call_count); UNLOCK(&frame->lock); // TBD: variable Completion count if (call_count == 0) { call_resume(local->stub); } return 0; } // template-name write-continue $TYPE$ nsr_$NAME$_continue (call_frame_t *frame, xlator_t *this, $ARGS_LONG$) { STACK_WIND (frame, nsr_$NAME$_complete, FIRST_CHILD(this), FIRST_CHILD(this)->fops->$NAME$, $ARGS_SHORT$); return 0; } // template-name write-complete $TYPE$ nsr_$NAME$_complete (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, $ARGS_LONG$) { nsr_private_t *priv = this->private; #if defined(NSR_CG_NEED_FD) nsr_local_t *local = frame->local; #endif #if defined(NSR_CG_QUEUE) nsr_inode_ctx_t *ictx; nsr_local_t *next; if (local->qlinks.next != &local->qlinks) { list_del(&local->qlinks); ictx = nsr_get_inode_ctx(this,local->fd->inode); if (ictx) { LOCK(&ictx->lock); if (ictx->pending) { /* * TBD: dequeue *all* non-conflicting reqs * * With the stub implementation there can only * be one request active at a time (zero here) * so it's not an issue. In a real * implementation there might still be other * active requests to check against, and * multiple pending requests that could * continue. */ gf_log (this->name, GF_LOG_DEBUG, "unblocking next request"); --(ictx->pending); next = list_entry (ictx->pqueue.next, nsr_local_t, qlinks); list_del(&next->qlinks); list_add_tail(&next->qlinks,&ictx->aqueue); call_resume(next->qstub); } else { --(ictx->active); } UNLOCK(&ictx->lock); } } #endif #if defined(NSR_CG_FSYNC) nsr_mark_fd_dirty(this,local); #endif #if defined(NSR_CG_NEED_FD) fd_unref(local->fd); #endif STACK_UNWIND_STRICT ($NAME$, frame, op_ret, op_errno, $ARGS_SHORT$); atomic_dec(&priv->ops_in_flight); return 0; }