diff options
| author | M. Mohan Kumar <mohan@in.ibm.com> | 2013-11-09 14:51:53 +0530 | 
|---|---|---|
| committer | Vijay Bellur <vbellur@redhat.com> | 2013-11-10 21:25:49 -0800 | 
| commit | c8fef37c5d566c906728b5f6f27baaa9a8d2a20d (patch) | |
| tree | 03c833446bc73bfa3da6c621315b590c0d65c748 | |
| parent | d5335f9e40f6e9533f7812d153b9727bcc04aa4e (diff) | |
glusterfs: zerofill support
Add support for a new ZEROFILL fop. Zerofill writes zeroes to a file in
the specified range. This fop will be useful when a whole file needs to
be initialized with zero (could be useful for zero filled VM disk image
provisioning or  during scrubbing of VM disk images).
Client/application can issue this FOP for zeroing out. Gluster server
will zero out required range of bytes ie server offloaded zeroing. In
the absence of this fop,  client/application has to repetitively issue
write (zero) fop to the server, which is very inefficient method because
of the overheads involved in RPC calls  and acknowledgements.
WRITESAME is a  SCSI T10 command that takes a block of data as input and
writes the same data to other blocks and this write is handled
completely within the storage and hence is known as offload . Linux ,now
has support for SCSI WRITESAME command which is exposed to the user in
the form of BLKZEROOUT ioctl.  BD Xlator can exploit BLKZEROOUT ioctl to
implement this fop. Thus zeroing out operations can be completely
offloaded to the storage device , making it highly efficient.
The fop takes two arguments offset and size. It zeroes out 'size' number
of bytes in an opened file starting from 'offset' position.
This patch adds zerofill support to the following areas:
	- libglusterfs
	- io-stats
	- performance/md-cache,open-behind
	- quota
	- cluster/afr,dht,stripe
	- rpc/xdr
	- protocol/client,server
	- io-threads
	- marker
	- storage/posix
	- libgfapi
Client applications can exloit this fop by using glfs_zerofill introduced in
libgfapi.FUSE support to this fop has not been added as there is no system call
for this fop.
Changes from previous version 3:
* Removed redundant memory failure log messages
Changes from previous version 2:
* Rebased and fixed build error
Changes from previous version 1:
* Rebased for latest master
TODO :
     * Add zerofill support to trace xlator
     * Expose zerofill capability as part of gluster volume info
Here is a performance comparison of server offloaded zeofill vs zeroing
out using repeated writes.
[root@llmvm02 remote]# time ./offloaded aakash-test log 20
real	3m34.155s
user	0m0.018s
sys	0m0.040s
[root@llmvm02 remote]# time ./manually aakash-test log 20
real	4m23.043s
user	0m2.197s
sys	0m14.457s
[root@llmvm02 remote]# time ./offloaded aakash-test log 25;
real	4m28.363s
user	0m0.021s
sys	0m0.025s
[root@llmvm02 remote]# time ./manually aakash-test log 25
real	5m34.278s
user	0m2.957s
sys	0m18.808s
The argument log is a file which we want to set for logging purpose and
the third argument is size in GB .
As we can see there is a performance improvement of around 20% with this
fop.
Change-Id: I081159f5f7edde0ddb78169fb4c21c776ec91a18
BUG: 1028673
Signed-off-by: Aakash Lal Das <aakash@linux.vnet.ibm.com>
Signed-off-by: M. Mohan Kumar <mohan@in.ibm.com>
Reviewed-on: http://review.gluster.org/5327
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Vijay Bellur <vbellur@redhat.com>
36 files changed, 1661 insertions, 16 deletions
diff --git a/api/src/glfs-fops.c b/api/src/glfs-fops.c index 9070661b99e..7572a94f2ac 100644 --- a/api/src/glfs-fops.c +++ b/api/src/glfs-fops.c @@ -591,6 +591,9 @@ glfs_io_async_task (void *data)  	case GF_FOP_DISCARD:  		ret = glfs_discard (gio->glfd, gio->offset, gio->count);  		break; +        case GF_FOP_ZEROFILL: +                ret = glfs_zerofill(gio->glfd, gio->offset, gio->count); +                break;  	}  	return (int) ret; @@ -1865,6 +1868,38 @@ glfs_discard_async (struct glfs_fd *glfd, off_t offset, size_t len,  	return ret;  } +int +glfs_zerofill_async (struct glfs_fd *glfd, off_t offset, size_t len, +                      glfs_io_cbk fn, void *data) +{ +        struct glfs_io *gio  = NULL; +        int             ret  = 0; + +        gio = GF_CALLOC (1, sizeof (*gio), glfs_mt_glfs_io_t); +        if (!gio) { +                errno = ENOMEM; +                return -1; +        } + +        gio->op     = GF_FOP_ZEROFILL; +        gio->glfd   = glfd; +        gio->offset = offset; +        gio->count  = len; +        gio->fn     = fn; +        gio->data   = data; + +        ret = synctask_new (glfs_from_glfd (glfd)->ctx->env, +                            glfs_io_async_task, glfs_io_async_cbk, +                            NULL, gio); + +        if (ret) { +                GF_FREE (gio->iov); +                GF_FREE (gio); +        } + +        return ret; +} +  void  gf_dirent_to_dirent (gf_dirent_t *gf_dirent, struct dirent *dirent) @@ -2821,6 +2856,36 @@ out:  	return ret;  } +int +glfs_zerofill (struct glfs_fd *glfd, off_t offset, size_t len) +{ +        int               ret             = -1; +        xlator_t         *subvol          = NULL; +        fd_t             *fd              = NULL; + +        __glfs_entry_fd (glfd); + +        subvol = glfs_active_subvol (glfd->fs); +        if (!subvol) { +                errno = EIO; +                goto out; +        } + +        fd = glfs_resolve_fd (glfd->fs, subvol, glfd); +        if (!fd) { +                errno = EBADFD; +                goto out; +        } + +        ret = syncop_zerofill (subvol, fd, offset, len); +out: +        if (fd) +                fd_unref(fd); + +        glfs_subvol_done (glfd->fs, subvol); + +        return ret; +}  int  glfs_chdir (struct glfs *fs, const char *path) diff --git a/api/src/glfs.h b/api/src/glfs.h index c2fb26505d5..d179b87ba42 100644 --- a/api/src/glfs.h +++ b/api/src/glfs.h @@ -533,9 +533,15 @@ int glfs_fallocate(glfs_fd_t *fd, int keep_size, off_t offset, size_t len);  int glfs_discard(glfs_fd_t *fd, off_t offset, size_t len); +  int glfs_discard_async (glfs_fd_t *fd, off_t length, size_t lent,  			glfs_io_cbk fn, void *data); +int glfs_zerofill(glfs_fd_t *fd, off_t offset, size_t len); + +int glfs_zerofill_async (glfs_fd_t *fd, off_t length, size_t len, +                        glfs_io_cbk fn, void *data); +  char *glfs_getcwd (glfs_t *fs, char *buf, size_t size);  int glfs_chdir (glfs_t *fs, const char *path); diff --git a/libglusterfs/src/call-stub.c b/libglusterfs/src/call-stub.c index 2f07a0074c5..ac79cf0711c 100644 --- a/libglusterfs/src/call-stub.c +++ b/libglusterfs/src/call-stub.c @@ -2241,6 +2241,62 @@ out:  } +call_stub_t * +fop_zerofill_cbk_stub(call_frame_t *frame, fop_zerofill_cbk_t fn, +                     int32_t op_ret, int32_t op_errno, +                     struct iatt *statpre, struct iatt *statpost, +                     dict_t *xdata) +{ +        call_stub_t *stub = NULL; + +        GF_VALIDATE_OR_GOTO ("call-stub", frame, out); + +        stub = stub_new (frame, 0, GF_FOP_ZEROFILL); +        GF_VALIDATE_OR_GOTO ("call-stub", stub, out); + +        stub->fn_cbk.zerofill = fn; + +        stub->args_cbk.op_ret = op_ret; +        stub->args_cbk.op_errno = op_errno; + +        if (statpre) +                stub->args_cbk.prestat = *statpre; +        if (statpost) +                stub->args_cbk.poststat = *statpost; +        if (xdata) +                stub->args_cbk.xdata = dict_ref (xdata); +out: +        return stub; +} + +call_stub_t * +fop_zerofill_stub(call_frame_t *frame, fop_zerofill_t fn, fd_t *fd, +                 off_t offset, size_t len, dict_t *xdata) +{ +        call_stub_t *stub = NULL; + +        GF_VALIDATE_OR_GOTO ("call-stub", frame, out); +        GF_VALIDATE_OR_GOTO ("call-stub", fn, out); + +        stub = stub_new (frame, 1, GF_FOP_ZEROFILL); +        GF_VALIDATE_OR_GOTO ("call-stub", stub, out); + +        stub->fn.zerofill = fn; + +        if (fd) +                stub->args.fd = fd_ref (fd); + +        stub->args.offset = offset; +        stub->args.size = len; + +        if (xdata) +                stub->args.xdata = dict_ref (xdata); +out: +        return stub; + +} + +  static void  call_resume_wind (call_stub_t *stub)  { @@ -2468,6 +2524,12 @@ call_resume_wind (call_stub_t *stub)  				 stub->args.fd, stub->args.offset,  				 stub->args.size, stub->args.xdata);  		break; +        case GF_FOP_ZEROFILL: +                stub->fn.zerofill(stub->frame, stub->frame->this, +                                 stub->args.fd, stub->args.offset, +                                 stub->args.size, stub->args.xdata); +                break; +          default:                  gf_log_callingfn ("call-stub", GF_LOG_ERROR,                                    "Invalid value of FOP (%d)", @@ -2670,6 +2732,11 @@ call_resume_unwind (call_stub_t *stub)  		STUB_UNWIND(stub, discard, &stub->args_cbk.prestat,  			    &stub->args_cbk.poststat, stub->args_cbk.xdata);  		break; +        case GF_FOP_ZEROFILL: +                STUB_UNWIND(stub, zerofill, &stub->args_cbk.prestat, +                            &stub->args_cbk.poststat, stub->args_cbk.xdata); +                break; +          default:                  gf_log_callingfn ("call-stub", GF_LOG_ERROR,                                    "Invalid value of FOP (%d)", diff --git a/libglusterfs/src/call-stub.h b/libglusterfs/src/call-stub.h index f0872b1219a..45bef80443f 100644 --- a/libglusterfs/src/call-stub.h +++ b/libglusterfs/src/call-stub.h @@ -71,6 +71,7 @@ typedef struct {  		fop_fsetattr_t fsetattr;  		fop_fallocate_t fallocate;  		fop_discard_t discard; +                fop_zerofill_t zerofill;  	} fn;  	union { @@ -117,6 +118,7 @@ typedef struct {  		fop_fsetattr_cbk_t fsetattr;  		fop_fallocate_cbk_t fallocate;  		fop_discard_cbk_t discard; +                fop_zerofill_cbk_t zerofill;  	} fn_cbk;  	struct { @@ -745,6 +747,20 @@ fop_discard_cbk_stub(call_frame_t *frame,                       struct iatt *statpre, struct iatt *statpost,                       dict_t *xdata); +call_stub_t * +fop_zerofill_stub(call_frame_t *frame, +                 fop_zerofill_t fn, +                 fd_t *fd, +                 off_t offset, +                 size_t len, dict_t *xdata); + +call_stub_t * +fop_zerofill_cbk_stub(call_frame_t *frame, +                     fop_zerofill_cbk_t fn, +                     int32_t op_ret, int32_t op_errno, +                     struct iatt *statpre, struct iatt *statpost, +                     dict_t *xdata); +  void call_resume (call_stub_t *stub);  void call_stub_destroy (call_stub_t *stub);  void call_unwind_error (call_stub_t *stub, int op_ret, int op_errno); diff --git a/libglusterfs/src/defaults.c b/libglusterfs/src/defaults.c index a3c8d97f112..2ebb251504c 100644 --- a/libglusterfs/src/defaults.c +++ b/libglusterfs/src/defaults.c @@ -473,6 +473,17 @@ default_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this,  }  int32_t +default_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +                    int32_t op_ret, int32_t op_errno, struct iatt *pre, +                    struct iatt *post, dict_t *xdata) +{ +        STACK_UNWIND_STRICT(zerofill, frame, op_ret, op_errno, pre, +                           post, xdata); +        return 0; +} + + +int32_t  default_getspec_cbk (call_frame_t *frame, void *cookie, xlator_t *this,                       int32_t op_ret, int32_t op_errno, char *spec_data)  { @@ -900,6 +911,17 @@ default_discard_resume(call_frame_t *frame, xlator_t *this, fd_t *fd,          return 0;  } +int32_t +default_zerofill_resume(call_frame_t *frame, xlator_t *this, fd_t *fd, +                       off_t offset, size_t len, dict_t *xdata) +{ +        STACK_WIND(frame, default_zerofill_cbk, FIRST_CHILD(this), +                   FIRST_CHILD(this)->fops->zerofill, fd, offset, len, +                   xdata); +        return 0; +} + +  /* FOPS */  int32_t @@ -1325,6 +1347,17 @@ default_discard(call_frame_t *frame, xlator_t *this, fd_t *fd,  }  int32_t +default_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, +                off_t offset, size_t len, dict_t *xdata) +{ +        STACK_WIND_TAIL(frame, FIRST_CHILD(this), +                        FIRST_CHILD(this)->fops->zerofill, fd, offset, len, +                        xdata); +        return 0; +} + + +int32_t  default_forget (xlator_t *this, inode_t *inode)  {          gf_log_callingfn (this->name, GF_LOG_WARNING, "xlator does not " diff --git a/libglusterfs/src/defaults.h b/libglusterfs/src/defaults.h index f3cbb3a4bb7..0747027bc35 100644 --- a/libglusterfs/src/defaults.h +++ b/libglusterfs/src/defaults.h @@ -255,6 +255,13 @@ int32_t default_discard(call_frame_t *frame,  			off_t offset,  			size_t len, dict_t *xdata); +int32_t default_zerofill(call_frame_t *frame, +                        xlator_t *this, +                        fd_t *fd, +                        off_t offset, +                        size_t len, dict_t *xdata); + +  /* Resume */  int32_t default_getspec_resume (call_frame_t *frame,                                  xlator_t *this, @@ -477,6 +484,13 @@ int32_t default_discard_resume(call_frame_t *frame,  			       off_t offset,  			       size_t len, dict_t *xdata); +int32_t default_zerofill_resume(call_frame_t *frame, +                               xlator_t *this, +                               fd_t *fd, +                               off_t offset, +                               size_t len, dict_t *xdata); + +  /* _cbk */  int32_t @@ -695,6 +709,10 @@ int32_t default_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this,  			    int32_t op_ret, int32_t op_errno, struct iatt *pre,  			    struct iatt *post, dict_t *xdata); +int32_t default_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +                            int32_t op_ret, int32_t op_errno, struct iatt *pre, +                            struct iatt *post, dict_t *xdata); +  int32_t  default_getspec_cbk (call_frame_t *frame, void *cookie, xlator_t *this,                       int32_t op_ret, int32_t op_errno, char *spec_data); diff --git a/libglusterfs/src/globals.c b/libglusterfs/src/globals.c index 1e4cbf0edeb..259c5c885f7 100644 --- a/libglusterfs/src/globals.c +++ b/libglusterfs/src/globals.c @@ -70,6 +70,7 @@ const char *gf_fop_list[GF_FOP_MAXVALUE] = {          [GF_FOP_FREMOVEXATTR]= "FREMOVEXATTR",  	[GF_FOP_FALLOCATE]   = "FALLOCATE",  	[GF_FOP_DISCARD]     = "DISCARD", +        [GF_FOP_ZEROFILL]     = "ZEROFILL",  };  /* THIS */ diff --git a/libglusterfs/src/glusterfs.h b/libglusterfs/src/glusterfs.h index b38d6d53e2c..474631e3ff9 100644 --- a/libglusterfs/src/glusterfs.h +++ b/libglusterfs/src/glusterfs.h @@ -212,6 +212,7 @@ typedef enum {          GF_FOP_FREMOVEXATTR,  	GF_FOP_FALLOCATE,  	GF_FOP_DISCARD, +        GF_FOP_ZEROFILL,          GF_FOP_MAXVALUE,  } glusterfs_fop_t; diff --git a/libglusterfs/src/syncop.c b/libglusterfs/src/syncop.c index 866dca50ea4..c1620bb7088 100644 --- a/libglusterfs/src/syncop.c +++ b/libglusterfs/src/syncop.c @@ -2173,6 +2173,35 @@ syncop_discard(xlator_t *subvol, fd_t *fd, off_t offset, size_t len)          return args.op_ret;  } +int +syncop_zerofill_cbk (call_frame_t *frame, void *cookie, xlator_t *this, +                    int op_ret, int op_errno, struct iatt *prebuf, +                    struct iatt *postbuf, dict_t *xdata) +{ +        struct syncargs *args = NULL; + +        args = cookie; + +        args->op_ret   = op_ret; +        args->op_errno = op_errno; + +        __wake (args); + +        return 0; +} + +int +syncop_zerofill(xlator_t *subvol, fd_t *fd, off_t offset, size_t len) +{ +        struct syncargs args = {0, }; + +        SYNCOP (subvol, (&args), syncop_zerofill_cbk, subvol->fops->zerofill, +                fd, offset, len, NULL); + +        errno = args.op_errno; +        return args.op_ret; +} +  int  syncop_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, diff --git a/libglusterfs/src/syncop.h b/libglusterfs/src/syncop.h index 0fc9e14ba0f..f790981f0bc 100644 --- a/libglusterfs/src/syncop.h +++ b/libglusterfs/src/syncop.h @@ -403,6 +403,8 @@ int syncop_fallocate(xlator_t *subvol, fd_t *fd, int32_t keep_size, off_t offset  		     size_t len);  int syncop_discard(xlator_t *subvol, fd_t *fd, off_t offset, size_t len); +int syncop_zerofill(xlator_t *subvol, fd_t *fd, off_t offset, size_t len); +  int syncop_rename (xlator_t *subvol, loc_t *oldloc, loc_t *newloc);  int syncop_lk (xlator_t *subvol, fd_t *fd, int cmd, struct gf_flock *flock); diff --git a/libglusterfs/src/xlator.c b/libglusterfs/src/xlator.c index d029475504a..483c588a9f1 100644 --- a/libglusterfs/src/xlator.c +++ b/libglusterfs/src/xlator.c @@ -81,6 +81,7 @@ fill_defaults (xlator_t *xl)          SET_DEFAULT_FOP (fsetattr);  	SET_DEFAULT_FOP (fallocate);  	SET_DEFAULT_FOP (discard); +        SET_DEFAULT_FOP (zerofill);          SET_DEFAULT_FOP (getspec); diff --git a/libglusterfs/src/xlator.h b/libglusterfs/src/xlator.h index d6296262a13..2f938c384fc 100644 --- a/libglusterfs/src/xlator.h +++ b/libglusterfs/src/xlator.h @@ -434,6 +434,14 @@ typedef int32_t (*fop_discard_cbk_t) (call_frame_t *frame,                                        struct iatt *preop_stbuf,                                        struct iatt *postop_stbuf, dict_t *xdata); +typedef int32_t (*fop_zerofill_cbk_t) (call_frame_t *frame, +                                      void *cookie, +                                      xlator_t *this, +                                      int32_t op_ret, +                                      int32_t op_errno, +                                      struct iatt *preop_stbuf, +                                      struct iatt *postop_stbuf, dict_t *xdata); +  typedef int32_t (*fop_lookup_t) (call_frame_t *frame,                                   xlator_t *this,                                   loc_t *loc, @@ -665,6 +673,12 @@ typedef int32_t (*fop_discard_t) (call_frame_t *frame,  				  off_t offset,  				  size_t len,                                    dict_t *xdata); +typedef int32_t (*fop_zerofill_t) (call_frame_t *frame, +                                  xlator_t *this, +                                  fd_t *fd, +                                  off_t offset, +                                  size_t len, +                                  dict_t *xdata);  struct xlator_fops {          fop_lookup_t         lookup; @@ -711,6 +725,7 @@ struct xlator_fops {          fop_getspec_t        getspec;  	fop_fallocate_t	     fallocate;  	fop_discard_t	     discard; +        fop_zerofill_t       zerofill;          /* these entries are used for a typechecking hack in STACK_WIND _only_ */          fop_lookup_cbk_t         lookup_cbk; @@ -757,6 +772,7 @@ struct xlator_fops {          fop_getspec_cbk_t        getspec_cbk;  	fop_fallocate_cbk_t	 fallocate_cbk;  	fop_discard_cbk_t	 discard_cbk; +        fop_zerofill_cbk_t       zerofill_cbk;  };  typedef int32_t (*cbk_forget_t) (xlator_t *this, diff --git a/rpc/rpc-lib/src/protocol-common.h b/rpc/rpc-lib/src/protocol-common.h index cbd79bd9dbc..adec7b63849 100644 --- a/rpc/rpc-lib/src/protocol-common.h +++ b/rpc/rpc-lib/src/protocol-common.h @@ -58,6 +58,7 @@ enum gf_fop_procnum {          GFS3_OP_FREMOVEXATTR,  	GFS3_OP_FALLOCATE,  	GFS3_OP_DISCARD, +        GFS3_OP_ZEROFILL,          GFS3_OP_MAXVALUE,  } ; diff --git a/rpc/xdr/src/glusterfs3-xdr.c b/rpc/xdr/src/glusterfs3-xdr.c index 4e9791b2077..3205c551e5b 100644 --- a/rpc/xdr/src/glusterfs3-xdr.c +++ b/rpc/xdr/src/glusterfs3-xdr.c @@ -1585,6 +1585,47 @@ xdr_gfs3_discard_rsp (XDR *xdrs, gfs3_discard_rsp *objp)  }  bool_t +xdr_gfs3_zerofill_req (XDR *xdrs, gfs3_zerofill_req *objp) +{ +        register int32_t *buf; +        buf = NULL; + +        if (!xdr_opaque (xdrs, objp->gfid, 16)) +                return FALSE; +        if (!xdr_quad_t (xdrs, &objp->fd)) +                return FALSE; +        if (!xdr_u_quad_t (xdrs, &objp->offset)) +                return FALSE; +        if (!xdr_u_quad_t (xdrs, &objp->size)) +                return FALSE; +        if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, +             (u_int *) &objp->xdata.xdata_len, ~0)) +                return FALSE; +        return TRUE; +} + +bool_t +xdr_gfs3_zerofill_rsp (XDR *xdrs, gfs3_zerofill_rsp *objp) +{ +        register int32_t *buf; +        buf = NULL; + +        if (!xdr_int (xdrs, &objp->op_ret)) +                return FALSE; +        if (!xdr_int (xdrs, &objp->op_errno)) +                return FALSE; +        if (!xdr_gf_iatt (xdrs, &objp->statpre)) +                return FALSE; +        if (!xdr_gf_iatt (xdrs, &objp->statpost)) +                return FALSE; +        if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, +             (u_int *) &objp->xdata.xdata_len, ~0)) +                return FALSE; +        return TRUE; +} + + +bool_t  xdr_gfs3_rchecksum_req (XDR *xdrs, gfs3_rchecksum_req *objp)  {  	register int32_t *buf; diff --git a/rpc/xdr/src/glusterfs3-xdr.h b/rpc/xdr/src/glusterfs3-xdr.h index 9e5d2e67bcb..13566e69447 100644 --- a/rpc/xdr/src/glusterfs3-xdr.h +++ b/rpc/xdr/src/glusterfs3-xdr.h @@ -936,6 +936,31 @@ struct gfs3_discard_rsp {  };  typedef struct gfs3_discard_rsp gfs3_discard_rsp; +struct gfs3_zerofill_req { +        char gfid[16]; +        quad_t fd; +        u_quad_t offset; +        u_quad_t size; +        struct { +                u_int xdata_len; +                char *xdata_val; +        } xdata; +}; +typedef struct gfs3_zerofill_req gfs3_zerofill_req; + +struct gfs3_zerofill_rsp { +        int op_ret; +        int op_errno; +        struct gf_iatt statpre; +        struct gf_iatt statpost; +        struct { +                u_int xdata_len; +                char *xdata_val; +        } xdata; +}; +typedef struct gfs3_zerofill_rsp gfs3_zerofill_rsp; + +  struct gfs3_rchecksum_req {  	quad_t fd;  	u_quad_t offset; @@ -1235,6 +1260,8 @@ extern  bool_t xdr_gfs3_fallocate_req (XDR *, gfs3_fallocate_req*);  extern  bool_t xdr_gfs3_fallocate_rsp (XDR *, gfs3_fallocate_rsp*);  extern  bool_t xdr_gfs3_discard_req (XDR *, gfs3_discard_req*);  extern  bool_t xdr_gfs3_discard_rsp (XDR *, gfs3_discard_rsp*); +extern  bool_t xdr_gfs3_zerofill_req (XDR *, gfs3_zerofill_req*); +extern  bool_t xdr_gfs3_zerofill_rsp (XDR *, gfs3_zerofill_rsp*);  extern  bool_t xdr_gfs3_rchecksum_req (XDR *, gfs3_rchecksum_req*);  extern  bool_t xdr_gfs3_rchecksum_rsp (XDR *, gfs3_rchecksum_rsp*);  extern  bool_t xdr_gf_setvolume_req (XDR *, gf_setvolume_req*); @@ -1333,6 +1360,8 @@ extern bool_t xdr_gfs3_fallocate_req ();  extern bool_t xdr_gfs3_fallocate_rsp ();  extern bool_t xdr_gfs3_discard_req ();  extern bool_t xdr_gfs3_discard_rsp (); +extern bool_t xdr_gfs3_zerofill_req (); +extern bool_t xdr_gfs3_zerofill_rsp ();  extern bool_t xdr_gfs3_rchecksum_req ();  extern bool_t xdr_gfs3_rchecksum_rsp ();  extern bool_t xdr_gf_setvolume_req (); diff --git a/rpc/xdr/src/glusterfs3-xdr.x b/rpc/xdr/src/glusterfs3-xdr.x index e2b086b1d1a..1edbda3ada9 100644 --- a/rpc/xdr/src/glusterfs3-xdr.x +++ b/rpc/xdr/src/glusterfs3-xdr.x @@ -599,6 +599,23 @@ struct gfs3_fstat_req {          opaque   xdata<>; /* Extra data */  }  ; + struct gfs3_zerofill_req { +        opaque          gfid[16]; +        hyper           fd; +        unsigned hyper  offset; +        unsigned hyper  size; +        opaque   xdata<>; +}  ; + + struct gfs3_zerofill_rsp { +        int    op_ret; +        int    op_errno; +        struct gf_iatt statpre; +        struct gf_iatt statpost; +        opaque   xdata<>; +}  ; + +   struct gfs3_rchecksum_req {          hyper   fd;          unsigned hyper  offset; diff --git a/xlators/cluster/afr/src/afr-inode-write.c b/xlators/cluster/afr/src/afr-inode-write.c index 5f2d3096f66..c1ec69a5505 100644 --- a/xlators/cluster/afr/src/afr-inode-write.c +++ b/xlators/cluster/afr/src/afr-inode-write.c @@ -2609,3 +2609,253 @@ out:          return 0;  } + + +/* {{{ zerofill */ + +static int +afr_zerofill_unwind (call_frame_t *frame, xlator_t *this) +{ +        afr_local_t     *local            = NULL; +        call_frame_t    *main_frame       = NULL; + +        local = frame->local; + +        LOCK (&frame->lock); +        { +                if (local->transaction.main_frame) { +                        main_frame = local->transaction.main_frame; +                } +                local->transaction.main_frame = NULL; +        } +        UNLOCK (&frame->lock); + +        if (main_frame) { +                AFR_STACK_UNWIND (zerofill, main_frame, local->op_ret, +                                  local->op_errno, +                                  &local->cont.zerofill.prebuf, +                                  &local->cont.zerofill.postbuf, +                                  NULL); +        } +        return 0; +} + +static int +afr_zerofill_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +                     int32_t op_ret, int32_t op_errno, struct iatt *prebuf, +                     struct iatt *postbuf, dict_t *xdata) +{ +        afr_local_t       *local             = NULL; +        afr_private_t     *priv              = NULL; +        int                child_index       = (long) cookie; +        int                call_count        = -1; +        int                need_unwind       = 0; +        int                read_child        = 0; + +        local = frame->local; +        priv  = this->private; + +        read_child = afr_inode_get_read_ctx (this, local->fd->inode, NULL); + +        LOCK (&frame->lock); +        { +                if (child_index == read_child) { +                        local->read_child_returned = _gf_true; +                } + +                if (afr_fop_failed (op_ret, op_errno)) { +                        afr_transaction_fop_failed (frame, this, child_index); +                } + +                if (op_ret != -1) { +                        if (local->success_count == 0) { +                                local->op_ret = op_ret; +                                local->cont.zerofill.prebuf  = *prebuf; +                                local->cont.zerofill.postbuf = *postbuf; +                        } + +                        if (child_index == read_child) { +                                local->cont.zerofill.prebuf  = *prebuf; +                                local->cont.zerofill.postbuf = *postbuf; +                        } + +                        local->success_count++; + +                        if ((local->success_count >= priv->wait_count) +                            && local->read_child_returned) { +                                need_unwind = 1; +                        } +                } +                local->op_errno = op_errno; +        } +        UNLOCK (&frame->lock); + +        if (need_unwind) { +                local->transaction.unwind (frame, this); +        } +        call_count = afr_frame_return (frame); + +        if (call_count == 0) { +                local->transaction.resume (frame, this); +        } + +        return 0; +} + +static int +afr_zerofill_wind (call_frame_t *frame, xlator_t *this) +{ +        afr_local_t    *local         = NULL; +        afr_private_t  *priv          = NULL; +        int             call_count    = -1; +        int             i             = 0; + +        local = frame->local; +        priv = this->private; + +        call_count = afr_pre_op_done_children_count (local->transaction.pre_op, +                                                     priv->child_count); + +        if (call_count == 0) { +                local->transaction.resume (frame, this); +                return 0; +        } + +        local->call_count = call_count; + +        for (i = 0; i < priv->child_count; i++) { +                if (local->transaction.pre_op[i]) { +                        STACK_WIND_COOKIE (frame, afr_zerofill_wind_cbk, +                                           (void *) (long) i, +                                           priv->children[i], +                                           priv->children[i]->fops->zerofill, +                                           local->fd, +                                           local->cont.zerofill.offset, +                                           local->cont.zerofill.len, +                                           NULL); + +                        if (!--call_count) +                                break; +                } +        } + +        return 0; +} + +static int +afr_zerofill_done (call_frame_t *frame, xlator_t *this) +{ +        afr_local_t *local = NULL; + +        local = frame->local; + +        local->transaction.unwind (frame, this); + +        AFR_STACK_DESTROY (frame); + +        return 0; +} + +static int +afr_do_zerofill(call_frame_t *frame, xlator_t *this) +{ +        call_frame_t  *transaction_frame = NULL; +        afr_local_t   *local             = NULL; +        int            op_ret            = -1; +        int            op_errno          = 0; + +        local = frame->local; + +        transaction_frame = copy_frame (frame); +        if (!transaction_frame) { +                goto out; +        } + +        transaction_frame->local = local; +        frame->local = NULL; + +        local->op = GF_FOP_ZEROFILL; + +        local->transaction.fop    = afr_zerofill_wind; +        local->transaction.done   = afr_zerofill_done; +        local->transaction.unwind = afr_zerofill_unwind; + +        local->transaction.main_frame = frame; + +        local->transaction.start   = local->cont.zerofill.offset; +        local->transaction.len     = 0; + +        op_ret = afr_transaction (transaction_frame, this, +                                  AFR_DATA_TRANSACTION); +        if (op_ret < 0) { +                op_errno = -op_ret; +                goto out; +        } + +        op_ret = 0; +out: +        if (op_ret < 0) { +                if (transaction_frame) { +                        AFR_STACK_DESTROY (transaction_frame); +                } +                AFR_STACK_UNWIND (zerofill, frame, op_ret, op_errno, NULL, +                                  NULL, NULL); +        } + +        return 0; +} + +int +afr_zerofill (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, +              size_t len, dict_t *xdata) +{ +        afr_private_t   *priv               = NULL; +        afr_local_t     *local              = NULL; +        call_frame_t    *transaction_frame  = NULL; +        int              ret                = -1; +        int              op_errno           = 0; + +        VALIDATE_OR_GOTO (frame, out); +        VALIDATE_OR_GOTO (this, out); +        VALIDATE_OR_GOTO (this->private, out); + +        priv = this->private; + +        if (afr_is_split_brain (this, fd->inode)) { +                op_errno = EIO; +                goto out; +        } +        QUORUM_CHECK(zerofill, out); + +        AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out); +        local = frame->local; + +        ret = afr_local_init (local, priv, &op_errno); +        if (ret < 0) { +                goto out; +        } +        local->cont.zerofill.offset  = offset; +        local->cont.zerofill.len = len; + +        local->fd = fd_ref (fd); + +        afr_open_fd_fix (fd, this); + +        afr_do_zerofill(frame, this); + +        ret = 0; +out: +        if (ret < 0) { +                if (transaction_frame) { +                        AFR_STACK_DESTROY (transaction_frame); +                } +                AFR_STACK_UNWIND (zerofill, frame, -1, op_errno, NULL, +                                  NULL, NULL); +        } + +        return 0; +} + +/* }}} */ + + diff --git a/xlators/cluster/afr/src/afr-inode-write.h b/xlators/cluster/afr/src/afr-inode-write.h index 883faae6cb5..8e93ca44aaa 100644 --- a/xlators/cluster/afr/src/afr-inode-write.h +++ b/xlators/cluster/afr/src/afr-inode-write.h @@ -75,4 +75,8 @@ afr_discard (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,  int  afr_fallocate (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,                 off_t offset, size_t len, dict_t *xdata); + +int +afr_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, +             size_t len, dict_t *xdata);  #endif /* __INODE_WRITE_H__ */ diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c index aa8d002209c..c724eb2ae42 100644 --- a/xlators/cluster/afr/src/afr.c +++ b/xlators/cluster/afr/src/afr.c @@ -499,6 +499,7 @@ struct xlator_fops fops = {          .fentrylk    = afr_fentrylk,  	.fallocate   = afr_fallocate,  	.discard     = afr_discard, +        .zerofill    = afr_zerofill,          /* inode read */          .access      = afr_access, diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index 12dce541cf3..21064db58d9 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -710,6 +710,14 @@ typedef struct _afr_local {  			size_t len;  		} discard; +                struct { +                        off_t offset; +                        size_t len; +                        struct iatt prebuf; +                        struct iatt postbuf; +                } zerofill; + +          } cont;          struct { diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h index da8923e9b24..8c3449f0b3b 100644 --- a/xlators/cluster/dht/src/dht-common.h +++ b/xlators/cluster/dht/src/dht-common.h @@ -695,6 +695,8 @@ int32_t dht_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd,  		      int32_t mode, off_t offset, size_t len, dict_t *xdata);  int32_t dht_discard(call_frame_t *frame, xlator_t *this, fd_t *fd,  		    off_t offset, size_t len, dict_t *xdata); +int32_t dht_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, +                    off_t offset, size_t len, dict_t *xdata);  int32_t dht_init (xlator_t *this);  void    dht_fini (xlator_t *this); diff --git a/xlators/cluster/dht/src/dht-inode-write.c b/xlators/cluster/dht/src/dht-inode-write.c index 9bcd84ae15a..26db8a533c1 100644 --- a/xlators/cluster/dht/src/dht-inode-write.c +++ b/xlators/cluster/dht/src/dht-inode-write.c @@ -21,6 +21,7 @@ int dht_truncate2 (xlator_t *this, call_frame_t *frame, int ret);  int dht_setattr2 (xlator_t *this, call_frame_t *frame, int ret);  int dht_fallocate2(xlator_t *this, call_frame_t *frame, int op_ret);  int dht_discard2(xlator_t *this, call_frame_t *frame, int op_ret); +int dht_zerofill2(xlator_t *this, call_frame_t *frame, int op_ret);  int  dht_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this, @@ -624,6 +625,141 @@ err:          return 0;  } +int +dht_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +                int op_ret, int op_errno, struct iatt *prebuf, +                struct iatt *postbuf, dict_t *xdata) +{ +        dht_local_t  *local = NULL; +        call_frame_t *prev  = NULL; +        int           ret   = -1; + +        GF_VALIDATE_OR_GOTO ("dht", frame, err); +        GF_VALIDATE_OR_GOTO ("dht", this, out); +        GF_VALIDATE_OR_GOTO ("dht", frame->local, out); +        GF_VALIDATE_OR_GOTO ("dht", cookie, out); + +        local = frame->local; +        prev = cookie; + +        if ((op_ret == -1) && (op_errno != ENOENT)) { +                local->op_errno = op_errno; +                local->op_ret = -1; +                gf_log (this->name, GF_LOG_DEBUG, +                        "subvolume %s returned -1 (%s)", +                        prev->this->name, strerror (op_errno)); +                goto out; +        } + +        if (local->call_cnt != 1) { +                if (local->stbuf.ia_blocks) { +                        dht_iatt_merge (this, postbuf, &local->stbuf, NULL); +                        dht_iatt_merge (this, prebuf, &local->prebuf, NULL); +                } +                goto out; +        } +        local->rebalance.target_op_fn = dht_zerofill2; +        /* Phase 2 of migration */ +        if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2 (postbuf)) { +                ret = dht_rebalance_complete_check (this, frame); +                if (!ret) +                        return 0; +        } + +        /* Check if the rebalance phase1 is true */ +        if (IS_DHT_MIGRATION_PHASE1 (postbuf)) { +                dht_iatt_merge (this, &local->stbuf, postbuf, NULL); +                dht_iatt_merge (this, &local->prebuf, prebuf, NULL); +                ret = fd_ctx_get (local->fd, this, NULL); +                if (!ret) { +                        dht_zerofill2 (this, frame, 0); +                        return 0; +                } +                ret = dht_rebalance_in_progress_check (this, frame); +                if (!ret) +                        return 0; +        } + +out: +        DHT_STRIP_PHASE1_FLAGS (postbuf); +        DHT_STRIP_PHASE1_FLAGS (prebuf); +        DHT_STACK_UNWIND (zerofill, frame, op_ret, op_errno, +                          prebuf, postbuf, xdata); +err: +        return 0; +} + +int +dht_zerofill2(xlator_t *this, call_frame_t *frame, int op_ret) +{ +        dht_local_t  *local          = NULL; +        xlator_t     *subvol         = NULL; +        uint64_t      tmp_subvol     = 0; +        int           ret            = -1; + +        local = frame->local; + +        if (local->fd) +                ret = fd_ctx_get (local->fd, this, &tmp_subvol); +        if (!ret) +                subvol = (xlator_t *)(long)tmp_subvol; + +        if (!subvol) +                subvol = local->cached_subvol; + +        local->call_cnt = 2; /* This is the second attempt */ + +        STACK_WIND(frame, dht_zerofill_cbk, subvol, subvol->fops->zerofill, +                   local->fd, local->rebalance.offset, local->rebalance.size, +                   NULL); + +        return 0; +} + +int +dht_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, +            size_t len, dict_t *xdata) +{ +        xlator_t     *subvol       = NULL; +        int           op_errno     = -1; +        dht_local_t  *local        = NULL; + +        VALIDATE_OR_GOTO (frame, err); +        VALIDATE_OR_GOTO (this, err); +        VALIDATE_OR_GOTO (fd, err); + +        local = dht_local_init (frame, NULL, fd, GF_FOP_ZEROFILL); +        if (!local) { +                op_errno = ENOMEM; +                goto err; +        } + +        local->rebalance.offset = offset; +        local->rebalance.size = len; + +        local->call_cnt = 1; +        subvol = local->cached_subvol; +        if (!subvol) { +                gf_log (this->name, GF_LOG_DEBUG, +                        "no cached subvolume for fd=%p", fd); +                op_errno = EINVAL; +                goto err; +        } + +        STACK_WIND (frame, dht_zerofill_cbk, subvol, subvol->fops->zerofill, +                    fd, offset, len, xdata); + +        return 0; + +err: +        op_errno = (op_errno == -1) ? errno : op_errno; +        DHT_STACK_UNWIND (zerofill, frame, -1, op_errno, NULL, NULL, NULL); + +        return 0; +} + + +  /* handle cases of migration here for 'setattr()' calls */  int  dht_file_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, diff --git a/xlators/cluster/dht/src/dht.c b/xlators/cluster/dht/src/dht.c index 0349b63a91f..fc0ca2f7735 100644 --- a/xlators/cluster/dht/src/dht.c +++ b/xlators/cluster/dht/src/dht.c @@ -72,6 +72,7 @@ struct xlator_fops fops = {          .fsetattr    = dht_fsetattr,  	.fallocate   = dht_fallocate,  	.discard     = dht_discard, +        .zerofill    = dht_zerofill,  };  struct xlator_dumpops dumpops = { diff --git a/xlators/cluster/stripe/src/stripe.c b/xlators/cluster/stripe/src/stripe.c index 8faaec19673..d366b352c5b 100644 --- a/xlators/cluster/stripe/src/stripe.c +++ b/xlators/cluster/stripe/src/stripe.c @@ -4107,6 +4107,191 @@ err:  }  int32_t +stripe_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +                   int32_t op_ret, int32_t op_errno, struct iatt *prebuf, +                   struct iatt *postbuf, dict_t *xdata) +{ +        int32_t         callcnt    = 0; +        stripe_local_t *local      = NULL; +        stripe_local_t *mlocal     = NULL; +        call_frame_t   *prev       = NULL; +        call_frame_t   *mframe     = NULL; + +        if (!this || !frame || !frame->local || !cookie) { +                gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref"); +                goto out; +        } + +        prev  = cookie; +        local = frame->local; +        mframe = local->orig_frame; +        mlocal = mframe->local; + +        LOCK(&frame->lock); +        { +                callcnt = ++mlocal->call_count; + +                if (op_ret == 0) { +                        mlocal->post_buf = *postbuf; +                        mlocal->pre_buf = *prebuf; + +                        mlocal->prebuf_blocks  += prebuf->ia_blocks; +                        mlocal->postbuf_blocks += postbuf->ia_blocks; + +                        correct_file_size(prebuf, mlocal->fctx, prev); +                        correct_file_size(postbuf, mlocal->fctx, prev); + +                        if (mlocal->prebuf_size < prebuf->ia_size) +                                mlocal->prebuf_size = prebuf->ia_size; +                        if (mlocal->postbuf_size < postbuf->ia_size) +                                mlocal->postbuf_size = postbuf->ia_size; +                } + +                /* return the first failure */ +                if (mlocal->op_ret == 0) { +                        mlocal->op_ret = op_ret; +                        mlocal->op_errno = op_errno; +                } +        } +        UNLOCK (&frame->lock); + +        if ((callcnt == mlocal->wind_count) && mlocal->unwind) { +                mlocal->pre_buf.ia_size = mlocal->prebuf_size; +                mlocal->pre_buf.ia_blocks = mlocal->prebuf_blocks; +                mlocal->post_buf.ia_size = mlocal->postbuf_size; +                mlocal->post_buf.ia_blocks = mlocal->postbuf_blocks; + +                STRIPE_STACK_UNWIND (zerofill, mframe, mlocal->op_ret, +                                     mlocal->op_errno, &mlocal->pre_buf, +                                     &mlocal->post_buf, NULL); +        } +out: +        STRIPE_STACK_DESTROY(frame); +        return 0; +} + +int32_t +stripe_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, +               size_t len, dict_t *xdata) +{ +        stripe_local_t   *local            = NULL; +        stripe_fd_ctx_t  *fctx             = NULL; +        int32_t           op_errno         = 1; +        int32_t           idx              = 0; +        int32_t           offset_offset    = 0; +        int32_t           remaining_size   = 0; +        off_t             fill_size        = 0; +        uint64_t          stripe_size      = 0; +        uint64_t          tmp_fctx         = 0; +        off_t             dest_offset      = 0; +        call_frame_t     *fframe           = NULL; +        stripe_local_t   *flocal           = NULL; + +        VALIDATE_OR_GOTO (frame, err); +        VALIDATE_OR_GOTO (this, err); +        VALIDATE_OR_GOTO (fd, err); +        VALIDATE_OR_GOTO (fd->inode, err); + +        inode_ctx_get (fd->inode, this, &tmp_fctx); +        if (!tmp_fctx) { +                op_errno = EINVAL; +                goto err; +        } +        fctx = (stripe_fd_ctx_t *)(long)tmp_fctx; +        stripe_size = fctx->stripe_size; + +        STRIPE_VALIDATE_FCTX (fctx, err); + +        remaining_size = len; + +        local = mem_get0 (this->local_pool); +        if (!local) { +                op_errno = ENOMEM; +                goto err; +        } +        fctx = (stripe_fd_ctx_t *)(long)tmp_fctx; +        stripe_size = fctx->stripe_size; + +        STRIPE_VALIDATE_FCTX (fctx, err); + +        remaining_size = len; + +        local = mem_get0 (this->local_pool); +        if (!local) { +                op_errno = ENOMEM; +                goto err; +        } +        fctx = (stripe_fd_ctx_t *)(long)tmp_fctx; +        stripe_size = fctx->stripe_size; + +        STRIPE_VALIDATE_FCTX (fctx, err); + +        remaining_size = len; + +        local = mem_get0 (this->local_pool); +        if (!local) { +                op_errno = ENOMEM; +                goto err; +        } +        frame->local = local; +        local->stripe_size = stripe_size; +        local->fctx = fctx; + +        if (!stripe_size) { +                gf_log (this->name, GF_LOG_DEBUG, +                        "Wrong stripe size for the file"); +                op_errno = EINVAL; +                goto err; +        } + +        while (1) { +                fframe = copy_frame(frame); +                flocal = mem_get0(this->local_pool); +                if (!flocal) { +                        op_errno = ENOMEM; +                        goto err; +                } +                flocal->orig_frame = frame; +                fframe->local = flocal; + +                idx = (((offset + offset_offset) / +                        local->stripe_size) % fctx->stripe_count); + +                fill_size = (local->stripe_size - +                             ((offset + offset_offset) % local->stripe_size)); +                if (fill_size > remaining_size) +                        fill_size = remaining_size; + +                remaining_size -= fill_size; + +                local->wind_count++; +                if (remaining_size == 0) +                        local->unwind = 1; + +                dest_offset = offset + offset_offset; +                if (fctx->stripe_coalesce) +                        dest_offset = coalesced_offset(dest_offset, +                                                       local->stripe_size, +                                                       fctx->stripe_count); + +                STACK_WIND(fframe, stripe_zerofill_cbk, fctx->xl_array[idx], +                           fctx->xl_array[idx]->fops->zerofill, fd, +                           dest_offset, fill_size, xdata); +                offset_offset += fill_size; +                if (remaining_size == 0) +                        break; +        } + +        return 0; +err: +        if (fframe) +                STRIPE_STACK_DESTROY(fframe); + +        STRIPE_STACK_UNWIND (zerofill, frame, -1, op_errno, NULL, NULL, NULL); +        return 0; +} + +int32_t  stripe_release (xlator_t *this, fd_t *fd)  {  	return 0; @@ -5558,6 +5743,7 @@ struct xlator_fops fops = {          .readdirp       = stripe_readdirp,  	.fallocate	= stripe_fallocate,  	.discard	= stripe_discard, +        .zerofill       = stripe_zerofill,  };  struct xlator_cbks cbks = { diff --git a/xlators/debug/io-stats/src/io-stats.c b/xlators/debug/io-stats/src/io-stats.c index 4f27a2e418f..7fb697ae45d 100644 --- a/xlators/debug/io-stats/src/io-stats.c +++ b/xlators/debug/io-stats/src/io-stats.c @@ -1746,6 +1746,16 @@ io_stats_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this,  	return 0;  } +int +io_stats_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +                     int32_t op_ret, int32_t op_errno, struct iatt *prebuf, +                     struct iatt *postbuf, dict_t *xdata) +{ +        UPDATE_PROFILE_STATS(frame, ZEROFILL); +        STACK_UNWIND_STRICT(zerofill, frame, op_ret, op_errno, prebuf, postbuf, +                            xdata); +        return 0; +}  int  io_stats_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, @@ -2441,6 +2451,18 @@ io_stats_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,  	return 0;  } +int +io_stats_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, +                 size_t len, dict_t *xdata) +{ +        START_FOP_LATENCY(frame); + +        STACK_WIND(frame, io_stats_zerofill_cbk, FIRST_CHILD(this), +                   FIRST_CHILD(this)->fops->zerofill, fd, offset, len, xdata); + +        return 0; +} +  int  io_stats_lk (call_frame_t *frame, xlator_t *this, @@ -2870,6 +2892,7 @@ struct xlator_fops fops = {          .fsetattr    = io_stats_fsetattr,  	.fallocate   = io_stats_fallocate,  	.discard     = io_stats_discard, +        .zerofill    = io_stats_zerofill,  };  struct xlator_cbks cbks = { diff --git a/xlators/features/marker/src/marker.c b/xlators/features/marker/src/marker.c index 59152db43c7..6a2c856913e 100644 --- a/xlators/features/marker/src/marker.c +++ b/xlators/features/marker/src/marker.c @@ -1953,6 +1953,73 @@ err:          return 0;  } +int32_t +marker_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +                   int32_t op_ret, int32_t op_errno, struct iatt *prebuf, +                   struct iatt *postbuf, dict_t *xdata) +{ +        marker_local_t     *local   = NULL; +        marker_conf_t      *priv    = NULL; + +        if (op_ret == -1) { +                gf_log (this->name, GF_LOG_TRACE, "%s occurred during zerofill", +                        strerror (op_errno)); +        } + +        local = (marker_local_t *) frame->local; + +        frame->local = NULL; + +        STACK_UNWIND_STRICT (zerofill, frame, op_ret, op_errno, prebuf, +                             postbuf, xdata); + +        if (op_ret == -1 || local == NULL) +                goto out; + +        priv = this->private; + +        if (priv->feature_enabled & GF_QUOTA) +                mq_initiate_quota_txn (this, &local->loc); + +        if (priv->feature_enabled & GF_XTIME) +                marker_xtime_update_marks (this, local); +out: +        marker_local_unref (local); + +        return 0; +} + +int32_t +marker_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, +               size_t len, dict_t *xdata) +{ +        int32_t          ret   = 0; +        marker_local_t  *local = NULL; +        marker_conf_t   *priv  = NULL; + +        priv = this->private; + +        if (priv->feature_enabled == 0) +                goto wind; + +        local = mem_get0 (this->local_pool); + +        MARKER_INIT_LOCAL (frame, local); + +        ret = marker_inode_loc_fill (fd->inode, &local->loc); + +        if (ret == -1) +                goto err; +wind: +        STACK_WIND (frame, marker_zerofill_cbk, FIRST_CHILD(this), +                    FIRST_CHILD(this)->fops->zerofill, fd, offset, len, xdata); +        return 0; +err: +        STACK_UNWIND_STRICT (zerofill, frame, -1, ENOMEM, NULL, NULL, NULL); + +        return 0; +} +  /* when a call from the special client is received on   * key trusted.glusterfs.volume-mark with value "RESET" @@ -2778,6 +2845,7 @@ struct xlator_fops fops = {          .readdirp    = marker_readdirp,  	.fallocate   = marker_fallocate,  	.discard     = marker_discard, +        .zerofill    = marker_zerofill,  };  struct xlator_cbks cbks = { diff --git a/xlators/performance/io-cache/src/io-cache.c b/xlators/performance/io-cache/src/io-cache.c index 68ca4c56515..201777b380e 100644 --- a/xlators/performance/io-cache/src/io-cache.c +++ b/xlators/performance/io-cache/src/io-cache.c @@ -1449,6 +1449,31 @@ ioc_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,         return 0;  } +static int32_t +ioc_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +                int32_t op_ret, int32_t op_errno, struct iatt *pre, +                struct iatt *post, dict_t *xdata) +{ +        STACK_UNWIND_STRICT(zerofill, frame, op_ret, +                            op_errno, pre, post, xdata); +        return 0; +} + +static int32_t +ioc_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, +            size_t len, dict_t *xdata) +{ +        uint64_t ioc_inode = 0; + +        inode_ctx_get (fd->inode, this, &ioc_inode); + +        if (ioc_inode) +                ioc_inode_flush ((ioc_inode_t *)(long)ioc_inode); + +        STACK_WIND(frame, ioc_zerofill_cbk, FIRST_CHILD(this), +                   FIRST_CHILD(this)->fops->zerofill, fd, offset, len, xdata); +       return 0; +}  int32_t @@ -2077,6 +2102,7 @@ struct xlator_fops fops = {          .readdirp    = ioc_readdirp,  	.discard     = ioc_discard, +        .zerofill    = ioc_zerofill,  }; diff --git a/xlators/performance/io-threads/src/io-threads.c b/xlators/performance/io-threads/src/io-threads.c index a5fcd0300d0..bbcf4ed26ca 100644 --- a/xlators/performance/io-threads/src/io-threads.c +++ b/xlators/performance/io-threads/src/io-threads.c @@ -309,6 +309,7 @@ iot_schedule (call_frame_t *frame, xlator_t *this, call_stub_t *stub)          case GF_FOP_RCHECKSUM:  	case GF_FOP_FALLOCATE:  	case GF_FOP_DISCARD: +        case GF_FOP_ZEROFILL:                  pri = IOT_PRI_LO;                  break; @@ -2510,6 +2511,55 @@ out:  }  int +iot_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +                  int32_t op_ret, int32_t op_errno, +                  struct iatt *preop, struct iatt *postop, dict_t *xdata) +{ +        STACK_UNWIND_STRICT (zerofill, frame, op_ret, op_errno, preop, postop, +                             xdata); +        return 0; +} + +int +iot_zerofill_wrapper(call_frame_t *frame, xlator_t *this, fd_t *fd, +                     off_t offset, size_t len, dict_t *xdata) +{ +        STACK_WIND (frame, iot_zerofill_cbk, FIRST_CHILD (this), +                    FIRST_CHILD (this)->fops->zerofill, fd, offset, len, xdata); +        return 0; +} + +int +iot_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, +            size_t len, dict_t *xdata) +{ +        call_stub_t     *stub     = NULL; +        int              ret      = -1; + +        stub = fop_zerofill_stub(frame, iot_zerofill_wrapper, fd, +                                 offset, len, xdata); +        if (!stub) { +                gf_log (this->name, GF_LOG_ERROR, "cannot create zerofill stub" +                        "(out of memory)"); +                ret = -ENOMEM; +                goto out; +        } + +        ret = iot_schedule (frame, this, stub); + +out: +        if (ret < 0) { +                STACK_UNWIND_STRICT (zerofill, frame, -1, -ret, NULL, NULL, +                                     NULL); +                if (stub != NULL) { +                        call_stub_destroy (stub); +                } +        } +        return 0; +} + + +int  __iot_workers_scale (iot_conf_t *conf)  {          int       scale = 0; @@ -2840,6 +2890,7 @@ struct xlator_fops fops = {          .rchecksum   = iot_rchecksum,  	.fallocate   = iot_fallocate,  	.discard     = iot_discard, +        .zerofill    = iot_zerofill,  };  struct xlator_cbks cbks; diff --git a/xlators/performance/md-cache/src/md-cache.c b/xlators/performance/md-cache/src/md-cache.c index 36d81887c7b..3a5b7a5d1ae 100644 --- a/xlators/performance/md-cache/src/md-cache.c +++ b/xlators/performance/md-cache/src/md-cache.c @@ -2098,6 +2098,46 @@ int mdc_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,  }  int +mdc_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +                int32_t op_ret, int32_t op_errno, +                struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata) +{ +        mdc_local_t  *local = NULL; + +        local = frame->local; + +        if (op_ret != 0) +                goto out; + +        if (!local) +                goto out; + +        mdc_inode_iatt_set_validate(this, local->fd->inode, prebuf, postbuf); + +out: +        MDC_STACK_UNWIND(zerofill, frame, op_ret, op_errno, prebuf, postbuf, +                         xdata); + +        return 0; +} + +int mdc_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, +                size_t len, dict_t *xdata) +{ +        mdc_local_t *local; + +        local = mdc_local_get(frame); +        local->fd = fd_ref(fd); + +        STACK_WIND(frame, mdc_zerofill_cbk, FIRST_CHILD(this), +                   FIRST_CHILD(this)->fops->zerofill, fd, offset, len, +                   xdata); + +        return 0; +} + + +int  mdc_forget (xlator_t *this, inode_t *inode)  {          mdc_inode_wipe (this, inode); @@ -2229,6 +2269,7 @@ struct xlator_fops fops = {  	.readdir     = mdc_readdir,  	.fallocate   = mdc_fallocate,  	.discard     = mdc_discard, +        .zerofill    = mdc_zerofill,  }; diff --git a/xlators/performance/open-behind/src/open-behind.c b/xlators/performance/open-behind/src/open-behind.c index df4027509a9..7e5b5727872 100644 --- a/xlators/performance/open-behind/src/open-behind.c +++ b/xlators/performance/open-behind/src/open-behind.c @@ -720,6 +720,26 @@ err:  }  int +ob_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, +           size_t len, dict_t *xdata) +{ +        call_stub_t *stub; + +        stub = fop_zerofill_stub(frame, default_zerofill_resume, fd, +                                 offset, len, xdata); +        if (!stub) +                goto err; + +        open_and_resume(this, fd, stub); + +        return 0; +err: +        STACK_UNWIND_STRICT(zerofill, frame, -1, ENOMEM, NULL, NULL, NULL); +        return 0; +} + + +int  ob_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflags,  	   dict_t *xdata)  { @@ -946,6 +966,7 @@ struct xlator_fops fops = {  	.fsetattr    = ob_fsetattr,  	.fallocate   = ob_fallocate,  	.discard     = ob_discard, +        .zerofill    = ob_zerofill,  	.unlink      = ob_unlink,  	.rename      = ob_rename,  	.lk          = ob_lk, diff --git a/xlators/performance/read-ahead/src/read-ahead.c b/xlators/performance/read-ahead/src/read-ahead.c index 241fa477fda..069ab1f1a91 100644 --- a/xlators/performance/read-ahead/src/read-ahead.c +++ b/xlators/performance/read-ahead/src/read-ahead.c @@ -993,6 +993,57 @@ unwind:  }  int +ra_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +               int32_t op_ret, int32_t op_errno, struct iatt *prebuf, +               struct iatt *postbuf, dict_t *xdata) +{ +        GF_ASSERT (frame); + +        STACK_UNWIND_STRICT (zerofill, frame, op_ret, op_errno, prebuf, +                             postbuf, xdata); +        return 0; +} + +static int +ra_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, +             size_t len, dict_t *xdata) +{ +        ra_file_t *file    = NULL; +        fd_t      *iter_fd = NULL; +        inode_t   *inode   = NULL; +        uint64_t  tmp_file = 0; +        int32_t   op_errno = EINVAL; + +        GF_ASSERT (frame); +        GF_VALIDATE_OR_GOTO (frame->this->name, this, unwind); +        GF_VALIDATE_OR_GOTO (frame->this->name, fd, unwind); + +        inode = fd->inode; + +        LOCK (&inode->lock); +        { +                list_for_each_entry (iter_fd, &inode->fd_list, inode_list) { +                        fd_ctx_get (iter_fd, this, &tmp_file); +                        file = (ra_file_t *)(long)tmp_file; +                        if (!file) +                                continue; + +                        flush_region(frame, file, offset, len, 1); +                } +        } +        UNLOCK (&inode->lock); + +        STACK_WIND (frame, ra_zerofill_cbk, FIRST_CHILD (this), +                    FIRST_CHILD (this)->fops->zerofill, fd, +                    offset, len, xdata); +        return 0; + +unwind: +        STACK_UNWIND_STRICT (zerofill, frame, -1, op_errno, NULL, NULL, NULL); +        return 0; +} + +int  ra_priv_dump (xlator_t *this)  {          ra_conf_t       *conf                           = NULL; @@ -1173,6 +1224,7 @@ struct xlator_fops fops = {          .ftruncate   = ra_ftruncate,          .fstat       = ra_fstat,  	.discard     = ra_discard, +        .zerofill    = ra_zerofill,  };  struct xlator_cbks cbks = { diff --git a/xlators/protocol/client/src/client-rpc-fops.c b/xlators/protocol/client/src/client-rpc-fops.c index 38f4391145d..6355450c393 100644 --- a/xlators/protocol/client/src/client-rpc-fops.c +++ b/xlators/protocol/client/src/client-rpc-fops.c @@ -2048,6 +2048,62 @@ out:  }  int +client3_3_zerofill_cbk(struct rpc_req *req, struct iovec *iov, int count, +                      void *myframe) +{ +        call_frame_t    *frame         = NULL; +        gfs3_zerofill_rsp rsp          = {0,}; +        struct iatt      prestat       = {0,}; +        struct iatt      poststat      = {0,}; +        int              ret           = 0; +        xlator_t *this                 = NULL; +        dict_t  *xdata                 = NULL; + +        this = THIS; + +        frame = myframe; + +        if (-1 == req->rpc_status) { +                rsp.op_ret   = -1; +                rsp.op_errno = ENOTCONN; +                goto out; +        } +        ret = xdr_to_generic(*iov, &rsp, (xdrproc_t) xdr_gfs3_zerofill_rsp); +        if (ret < 0) { +                gf_log (this->name, GF_LOG_ERROR, "XDR decoding failed"); +                rsp.op_ret   = -1; +                rsp.op_errno = EINVAL; +                goto out; +        } + +        if (-1 != rsp.op_ret) { +                gf_stat_to_iatt (&rsp.statpre, &prestat); +                gf_stat_to_iatt (&rsp.statpost, &poststat); +        } + +        GF_PROTOCOL_DICT_UNSERIALIZE (this, xdata, (rsp.xdata.xdata_val), +                                      (rsp.xdata.xdata_len), ret, +                                      rsp.op_errno, out); + +out: +        if (rsp.op_ret == -1) { +                gf_log (this->name, GF_LOG_WARNING, +                        "remote operation failed: %s", +                        strerror (gf_error_to_errno (rsp.op_errno))); +        } +        CLIENT_STACK_UNWIND (zerofill, frame, rsp.op_ret, +                             gf_error_to_errno (rsp.op_errno), &prestat, +                             &poststat, xdata); + +        free (rsp.xdata.xdata_val); + +        if (xdata) +                dict_unref (xdata); + +        return 0; +} + +int  client3_3_setattr_cbk (struct rpc_req *req, struct iovec *iov, int count,                         void *myframe)  { @@ -5987,6 +6043,50 @@ unwind:          return 0;  } +int32_t +client3_3_zerofill(call_frame_t *frame, xlator_t *this, void *data) +{ +        clnt_args_t       *args        = NULL; +        int64_t            remote_fd   = -1; +        clnt_conf_t       *conf        = NULL; +        gfs3_zerofill_req   req        = {{0},}; +        int                op_errno    = ESTALE; +        int                ret         = 0; + +        if (!frame || !this || !data) +                goto unwind; + +        args = data; +        conf = this->private; + +        CLIENT_GET_REMOTE_FD (this, args->fd, DEFAULT_REMOTE_FD, +                              remote_fd, op_errno, unwind); + +        req.fd = remote_fd; +        req.offset = args->offset; +        req.size = args->size; +        memcpy(req.gfid, args->fd->inode->gfid, 16); + +        GF_PROTOCOL_DICT_SERIALIZE (this, args->xdata, (&req.xdata.xdata_val), +                                    req.xdata.xdata_len, op_errno, unwind); + +        ret = client_submit_request(this, &req, frame, conf->fops, +                                    GFS3_OP_ZEROFILL, client3_3_zerofill_cbk, +                                    NULL, NULL, 0, NULL, 0, NULL, +                                    (xdrproc_t) xdr_gfs3_zerofill_req); +        if (ret) +                gf_log (this->name, GF_LOG_WARNING, "failed to send the fop"); + +        GF_FREE (req.xdata.xdata_val); + +        return 0; +unwind: +        CLIENT_STACK_UNWIND(zerofill, frame, -1, op_errno, NULL, NULL, NULL); +        GF_FREE (req.xdata.xdata_val); + +        return 0; +} +  /* Table Specific to FOPS */ @@ -6034,6 +6134,7 @@ rpc_clnt_procedure_t clnt3_3_fop_actors[GF_FOP_MAXVALUE] = {          [GF_FOP_READDIRP]    = { "READDIRP",    client3_3_readdirp },  	[GF_FOP_FALLOCATE]   = { "FALLOCATE",	client3_3_fallocate },  	[GF_FOP_DISCARD]     = { "DISCARD",	client3_3_discard }, +        [GF_FOP_ZEROFILL]    = { "ZEROFILL",    client3_3_zerofill},          [GF_FOP_RELEASE]     = { "RELEASE",     client3_3_release },          [GF_FOP_RELEASEDIR]  = { "RELEASEDIR",  client3_3_releasedir },          [GF_FOP_GETSPEC]     = { "GETSPEC",     client3_getspec }, @@ -6088,6 +6189,8 @@ char *clnt3_3_fop_names[GFS3_OP_MAXVALUE] = {          [GFS3_OP_FREMOVEXATTR] = "FREMOVEXATTR",  	[GFS3_OP_FALLOCATE]   = "FALLOCATE",  	[GFS3_OP_DISCARD]     = "DISCARD", +        [GFS3_OP_ZEROFILL]    = "ZEROFILL", +  };  rpc_clnt_prog_t clnt3_3_fop_prog = { diff --git a/xlators/protocol/client/src/client.c b/xlators/protocol/client/src/client.c index 7703c6e8fba..1f7d13ea452 100644 --- a/xlators/protocol/client/src/client.c +++ b/xlators/protocol/client/src/client.c @@ -2031,6 +2031,42 @@ out:  }  int32_t +client_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, +               size_t len, dict_t *xdata) +{ +        int          ret              = -1; +        clnt_conf_t *conf             = NULL; +        rpc_clnt_procedure_t *proc    = NULL; +        clnt_args_t  args             = {0,}; + +        conf = this->private; +        if (!conf || !conf->fops) +                goto out; + +        args.fd = fd; +        args.offset = offset; +        args.size = len; +        args.xdata = xdata; + +        proc = &conf->fops->proctable[GF_FOP_ZEROFILL]; +        if (!proc) { +                gf_log (this->name, GF_LOG_ERROR, +                        "rpc procedure not found for %s", +                        gf_fop_list[GF_FOP_ZEROFILL]); +                goto out; +        } +        if (proc->fn) +                ret = proc->fn (frame, this, &args); +out: +        if (ret) +                STACK_UNWIND_STRICT(zerofill, frame, -1, ENOTCONN, +                                    NULL, NULL, NULL); + +        return 0; +} + + +int32_t  client_getspec (call_frame_t *frame, xlator_t *this, const char *key,                  int32_t flags)  { @@ -2749,6 +2785,7 @@ struct xlator_fops fops = {          .fsetattr    = client_fsetattr,  	.fallocate   = client_fallocate,  	.discard     = client_discard, +        .zerofill    = client_zerofill,          .getspec     = client_getspec,  }; diff --git a/xlators/protocol/server/src/server-rpc-fops.c b/xlators/protocol/server/src/server-rpc-fops.c index 59e808b2f14..d2da9aa769c 100644 --- a/xlators/protocol/server/src/server-rpc-fops.c +++ b/xlators/protocol/server/src/server-rpc-fops.c @@ -1993,6 +1993,46 @@ out:          return 0;  } +int +server_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +                   int32_t op_ret, int32_t op_errno, +                   struct iatt *statpre, struct iatt *statpost, dict_t *xdata) +{ +        gfs3_zerofill_rsp  rsp    = {0,}; +        server_state_t    *state  = NULL; +        rpcsvc_request_t  *req    = NULL; + +        req = frame->local; +        state  = CALL_STATE (frame); + +        GF_PROTOCOL_DICT_SERIALIZE (this, xdata, (&rsp.xdata.xdata_val), +                                    rsp.xdata.xdata_len, op_errno, out); + +        if (op_ret) { +                gf_log (this->name, GF_LOG_INFO, +                        "%"PRId64": ZEROFILL%"PRId64" (%s) ==> (%s)", +                        frame->root->unique, state->resolve.fd_no, +                        uuid_utoa (state->resolve.gfid), +                        strerror (op_errno)); +                goto out; +        } + +        gf_stat_from_iatt (&rsp.statpre, statpre); +        gf_stat_from_iatt (&rsp.statpost, statpost); + +out: +        rsp.op_ret    = op_ret; +        rsp.op_errno  = gf_errno_to_error (op_errno); + +        server_submit_reply(frame, req, &rsp, NULL, 0, NULL, +                            (xdrproc_t) xdr_gfs3_zerofill_rsp); + +        GF_FREE (rsp.xdata.xdata_val); + +        return 0; +} + +  /* Resume function section */  int @@ -3019,6 +3059,28 @@ err:          return 0;  } +int +server_zerofill_resume (call_frame_t *frame, xlator_t *bound_xl) +{ +        server_state_t *state = NULL; + +        state = CALL_STATE (frame); + +        if (state->resolve.op_ret != 0) +                goto err; + +        STACK_WIND (frame, server_zerofill_cbk, +                    bound_xl, bound_xl->fops->zerofill, +                    state->fd, state->offset, state->size, state->xdata); +        return 0; +err: +        server_zerofill_cbk(frame, NULL, frame->this, state->resolve.op_ret, +                           state->resolve.op_errno, NULL, NULL, NULL); + +        return 0; +} + +  /* Fop section */ @@ -3322,6 +3384,65 @@ out:  int +server3_3_zerofill(rpcsvc_request_t *req) +{ +        server_state_t       *state      = NULL; +        call_frame_t         *frame      = NULL; +        gfs3_zerofill_req     args       = {{0},}; +        int                   ret        = -1; +        int                   op_errno   = 0; + +        if (!req) +                return ret; + +        ret = xdr_to_generic (req->msg[0], &args, +                              (xdrproc_t)xdr_gfs3_zerofill_req); +        if (ret < 0) { +                /*failed to decode msg*/; +                req->rpc_err = GARBAGE_ARGS; +                goto out; +        } + +        frame = get_frame_from_request (req); +        if (!frame) { +                /* something wrong, mostly insufficient memory*/ +                req->rpc_err = GARBAGE_ARGS; /* TODO */ +                goto out; +        } +        frame->root->op = GF_FOP_ZEROFILL; + +        state = CALL_STATE (frame); +        if (!frame->root->client->bound_xl) { +                /* auth failure, request on subvolume without setvolume */ +                req->rpc_err = GARBAGE_ARGS; +                goto out; +        } + +        state->resolve.type   = RESOLVE_MUST; +        state->resolve.fd_no  = args.fd; + +        state->offset = args.offset; +        state->size = args.size; +        memcpy(state->resolve.gfid, args.gfid, 16); + +        GF_PROTOCOL_DICT_UNSERIALIZE (frame->root->client->bound_xl, state->xdata, +                                      (args.xdata.xdata_val), +                                      (args.xdata.xdata_len), ret, +                                      op_errno, out); + +        ret = 0; +        resolve_and_resume (frame, server_zerofill_resume); + +out: +        free (args.xdata.xdata_val); + +        if (op_errno) +                req->rpc_err = GARBAGE_ARGS; + +        return ret; +} + +int  server3_3_readlink (rpcsvc_request_t *req)  {          server_state_t    *state                 = NULL; @@ -6040,6 +6161,7 @@ rpcsvc_actor_t glusterfs3_3_fop_actors[] = {          [GFS3_OP_FREMOVEXATTR] = {"FREMOVEXATTR", GFS3_OP_FREMOVEXATTR, server3_3_fremovexattr, NULL, 0, DRC_NA},          [GFS3_OP_FALLOCATE]    = {"FALLOCATE",    GFS3_OP_FALLOCATE,    server3_3_fallocate,    NULL, 0, DRC_NA},          [GFS3_OP_DISCARD]      = {"DISCARD",      GFS3_OP_DISCARD,      server3_3_discard,      NULL, 0, DRC_NA}, +        [GFS3_OP_ZEROFILL]    =  {"ZEROFILL",     GFS3_OP_ZEROFILL,     server3_3_zerofill,     NULL, 0, DRC_NA},  }; diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c index 93ece2474f9..fb45c7a6746 100644 --- a/xlators/storage/posix/src/posix.c +++ b/xlators/storage/posix/src/posix.c @@ -24,6 +24,7 @@  #include <ftw.h>  #include <sys/stat.h>  #include <signal.h> +#include <sys/uio.h>  #ifndef GF_BSD_HOST_OS  #include <alloca.h> @@ -616,6 +617,166 @@ out:          return ret;  } +char* +_page_aligned_alloc (size_t size, char **aligned_buf) +{ +        char            *alloc_buf = NULL; +        char            *buf = NULL; + +        alloc_buf = GF_CALLOC (1, (size + ALIGN_SIZE), gf_posix_mt_char); +        if (!alloc_buf) +                goto out; +        /* page aligned buffer */ +        buf = GF_ALIGN_BUF (alloc_buf, ALIGN_SIZE); +        *aligned_buf = buf; +out: +        return alloc_buf; +} + +static int32_t +_posix_do_zerofill(int fd, off_t offset, size_t len, int o_direct) +{ +        size_t              num_vect            = 0; +        int32_t             num_loop            = 1; +        int32_t             idx                 = 0; +        int32_t             op_ret              = -1; +        int32_t             vect_size           = VECTOR_SIZE; +        size_t              remain              = 0; +        size_t              extra               = 0; +        struct iovec       *vector              = NULL; +        char               *iov_base            = NULL; +        char               *alloc_buf           = NULL; + +        if (len == 0) +                return 0; +        if (len < VECTOR_SIZE) +                vect_size = len; + +        num_vect = len / (vect_size); +        remain = len % vect_size ; +        if (num_vect > MAX_NO_VECT) { +                extra = num_vect % MAX_NO_VECT; +                num_loop = num_vect / MAX_NO_VECT; +                num_vect = MAX_NO_VECT; +        } + +        vector = GF_CALLOC (num_vect, sizeof(struct iovec), +                             gf_common_mt_iovec); +        if (!vector) +                  return -1; +        if (o_direct) { +                alloc_buf = _page_aligned_alloc(vect_size, &iov_base); +                if (!alloc_buf) { +                        gf_log ("_posix_do_zerofill", GF_LOG_DEBUG, +                                 "memory alloc failed, vect_size %d: %s", +                                  vect_size, strerror(errno)); +                        GF_FREE(vector); +                        return -1; +                } +        } else { +                iov_base = GF_CALLOC (vect_size, sizeof(char), +                                        gf_common_mt_char); +                if (!iov_base) { +                        GF_FREE(vector); +                        return -1; +                 } +        } + +        for (idx = 0; idx < num_vect; idx++) { +                vector[idx].iov_base = iov_base; +                vector[idx].iov_len  = vect_size; +        } +        lseek(fd, offset, SEEK_SET); +        for (idx = 0; idx < num_loop; idx++) { +                op_ret = writev(fd, vector, num_vect); +                if (op_ret < 0) +                        goto err; +        } +        if (extra) { +                op_ret = writev(fd, vector, extra); +                if (op_ret < 0) +                        goto err; +        } +        if (remain) { +                vector[0].iov_len = remain; +                op_ret = writev(fd, vector , 1); +                if (op_ret < 0) +                        goto err; +        } +err: +        if (o_direct) +                GF_FREE(alloc_buf); +        else +                GF_FREE(iov_base); +        GF_FREE(vector); +        return op_ret; +} + +static int32_t +posix_do_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, +                  off_t offset, size_t len, struct iatt *statpre, +                  struct iatt *statpost) +{ +        struct posix_fd *pfd       = NULL; +        int32_t          ret       = -1; + +        DECLARE_OLD_FS_ID_VAR; + +        SET_FS_ID (frame->root->uid, frame->root->gid); + +        VALIDATE_OR_GOTO (frame, out); +        VALIDATE_OR_GOTO (this, out); +        VALIDATE_OR_GOTO (fd, out); + +        ret = posix_fd_ctx_get (fd, this, &pfd); +        if (ret < 0) { +                gf_log (this->name, GF_LOG_DEBUG, +                        "pfd is NULL from fd=%p", fd); +                goto out; +        } + +        ret = posix_fdstat (this, pfd->fd, statpre); +        if (ret == -1) { +                ret = -errno; +                gf_log (this->name, GF_LOG_ERROR, +                        "pre-operation fstat failed on fd = %p: %s", fd, +                        strerror (errno)); +                goto out; +        } +        ret = _posix_do_zerofill(pfd->fd, offset, len, pfd->flags & O_DIRECT); +        if (ret < 0) { +                ret = -errno; +                gf_log(this->name, GF_LOG_ERROR, +                       "zerofill failed on fd %d length %ld %s", +                        pfd->fd, len, strerror(errno)); +                goto out; +        } +        if (pfd->flags & (O_SYNC|O_DSYNC)) { +                ret = fsync (pfd->fd); +                if (ret) { +                        gf_log (this->name, GF_LOG_ERROR, +                                "fsync() in writev on fd %d failed: %s", +                        pfd->fd, strerror (errno)); +                        ret = -errno; +                        goto out; +                } +        } + +        ret = posix_fdstat (this, pfd->fd, statpost); +        if (ret == -1) { +                ret = -errno; +                gf_log (this->name, GF_LOG_ERROR, +                        "post operation fstat failed on fd=%p: %s", fd, +                        strerror (errno)); +                goto out; +        } + +out: +        SET_TO_OLD_FS_ID (); + +        return ret; +} +  static int32_t  _posix_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t keep_size,  		off_t offset, size_t len, dict_t *xdata) @@ -664,6 +825,28 @@ err:  } +static int32_t +posix_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, +                size_t len, dict_t *xdata) +{ +        int32_t ret                      =  0; +        struct  iatt statpre             = {0,}; +        struct  iatt statpost            = {0,}; + +        ret = posix_do_zerofill(frame, this, fd, offset, len, +                                 &statpre, &statpost); +        if (ret < 0) +                goto err; + +        STACK_UNWIND_STRICT(zerofill, frame, 0, 0, &statpre, &statpost, NULL); +        return 0; + +err: +        STACK_UNWIND_STRICT(zerofill, frame, -1, -ret, NULL, NULL, NULL); +        return 0; + +} +  int32_t  posix_opendir (call_frame_t *frame, xlator_t *this,                 loc_t *loc, fd_t *fd, dict_t *xdata) @@ -2117,22 +2300,6 @@ err:          return op_ret;  } -char* -_page_aligned_alloc (size_t size, char **aligned_buf) -{ -        char            *alloc_buf = NULL; -        char            *buf = NULL; - -        alloc_buf = GF_CALLOC (1, (size + ALIGN_SIZE), gf_posix_mt_char); -        if (!alloc_buf) -                goto out; -        /* page aligned buffer */ -        buf = GF_ALIGN_BUF (alloc_buf, ALIGN_SIZE); -        *aligned_buf = buf; -out: -        return alloc_buf; -} -  int32_t  __posix_writev (int fd, struct iovec *vector, int count, off_t startoff,                  int odirect) @@ -4938,6 +5105,7 @@ struct xlator_fops fops = {          .fsetattr    = posix_fsetattr,  	.fallocate   = _posix_fallocate,  	.discard     = posix_discard, +        .zerofill    = posix_zerofill,  };  struct xlator_cbks cbks = { diff --git a/xlators/storage/posix/src/posix.h b/xlators/storage/posix/src/posix.h index a957e18768c..3121db2717e 100644 --- a/xlators/storage/posix/src/posix.h +++ b/xlators/storage/posix/src/posix.h @@ -50,6 +50,8 @@  #include "posix-aio.h"  #endif +#define VECTOR_SIZE 64 * 1024 /* vector size 64KB*/ +#define MAX_NO_VECT 1024  /**   * posix_fd - internal structure common to file and directory fd's   */  | 
