On Wed, Jul 1, 2020 at 9:22 PM Jeff Layton <jlayton@xxxxxxxxxx> wrote: > > On Wed, 2020-07-01 at 21:15 +0200, Ilya Dryomov wrote: > > On Wed, Jul 1, 2020 at 5:54 PM Jeff Layton <jlayton@xxxxxxxxxx> wrote: > > > ceph_osdc_new_request is cephfs specific. Move it and calc_layout into > > > ceph.ko. Also, calc_layout can be void return. > > > > > > Signed-off-by: Jeff Layton <jlayton@xxxxxxxxxx> > > > --- > > > fs/ceph/Makefile | 2 +- > > > fs/ceph/addr.c | 1 + > > > fs/ceph/file.c | 1 + > > > fs/ceph/osdc.c | 113 +++++++++++++++++++++++++++++++ > > > fs/ceph/osdc.h | 16 +++++ > > > include/linux/ceph/osd_client.h | 10 --- > > > net/ceph/osd_client.c | 115 -------------------------------- > > > 7 files changed, 132 insertions(+), 126 deletions(-) > > > create mode 100644 fs/ceph/osdc.c > > > create mode 100644 fs/ceph/osdc.h > > > > > > diff --git a/fs/ceph/Makefile b/fs/ceph/Makefile > > > index 50c635dc7f71..f2ec52fa8d37 100644 > > > --- a/fs/ceph/Makefile > > > +++ b/fs/ceph/Makefile > > > @@ -8,7 +8,7 @@ obj-$(CONFIG_CEPH_FS) += ceph.o > > > ceph-y := super.o inode.o dir.o file.o locks.o addr.o ioctl.o \ > > > export.o caps.o snap.o xattr.o quota.o io.o \ > > > mds_client.o mdsmap.o strings.o ceph_frag.o \ > > > - debugfs.o util.o metric.o > > > + debugfs.o util.o metric.o osdc.o > > > > > > ceph-$(CONFIG_CEPH_FSCACHE) += cache.o > > > ceph-$(CONFIG_CEPH_FS_POSIX_ACL) += acl.o > > > diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c > > > index 01ad09733ac7..1a3cc1875a31 100644 > > > --- a/fs/ceph/addr.c > > > +++ b/fs/ceph/addr.c > > > @@ -19,6 +19,7 @@ > > > #include "metric.h" > > > #include <linux/ceph/osd_client.h> > > > #include <linux/ceph/striper.h> > > > +#include "osdc.h" > > > > > > /* > > > * Ceph address space ops. > > > diff --git a/fs/ceph/file.c b/fs/ceph/file.c > > > index 160644ddaeed..b697a1f3c56e 100644 > > > --- a/fs/ceph/file.c > > > +++ b/fs/ceph/file.c > > > @@ -18,6 +18,7 @@ > > > #include "cache.h" > > > #include "io.h" > > > #include "metric.h" > > > +#include "osdc.h" > > > > > > static __le32 ceph_flags_sys2wire(u32 flags) > > > { > > > diff --git a/fs/ceph/osdc.c b/fs/ceph/osdc.c > > > new file mode 100644 > > > index 000000000000..303e39fce3d4 > > > --- /dev/null > > > +++ b/fs/ceph/osdc.c > > > @@ -0,0 +1,113 @@ > > > +// SPDX-License-Identifier: GPL-2.0 > > > +#include <linux/ceph/ceph_debug.h> > > > +#include <linux/ceph/libceph.h> > > > +#include <linux/ceph/osd_client.h> > > > +#include <linux/ceph/striper.h> > > > +#include "osdc.h" > > > + > > > +/* > > > + * calculate the mapping of a file extent onto an object, and fill out the > > > + * request accordingly. shorten extent as necessary if it crosses an > > > + * object boundary. > > > + * > > > + * fill osd op in request message. > > > + */ > > > +static void calc_layout(struct ceph_file_layout *layout, u64 off, u64 *plen, > > > + u64 *objnum, u64 *objoff, u64 *objlen) > > > +{ > > > + u64 orig_len = *plen; > > > + u32 xlen; > > > + > > > + /* object extent? */ > > > + ceph_calc_file_object_mapping(layout, off, orig_len, objnum, > > > + objoff, &xlen); > > > + *objlen = xlen; > > > + if (*objlen < orig_len) { > > > + *plen = *objlen; > > > + dout(" skipping last %llu, final file extent %llu~%llu\n", > > > + orig_len - *plen, off, *plen); > > > + } > > > + > > > + dout("calc_layout objnum=%llx %llu~%llu\n", *objnum, *objoff, *objlen); > > > +} > > > + > > > +/* > > > + * build new request AND message, calculate layout, and adjust file > > > + * extent as needed. > > > + * > > > + * if the file was recently truncated, we include information about its > > > + * old and new size so that the object can be updated appropriately. (we > > > + * avoid synchronously deleting truncated objects because it's slow.) > > > + */ > > > +struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, > > > + struct ceph_file_layout *layout, > > > + struct ceph_vino vino, > > > + u64 off, u64 *plen, > > > + unsigned int which, int num_ops, > > > + int opcode, int flags, > > > + struct ceph_snap_context *snapc, > > > + u32 truncate_seq, > > > + u64 truncate_size, > > > + bool use_mempool) > > > +{ > > > + struct ceph_osd_request *req; > > > + u64 objnum = 0; > > > + u64 objoff = 0; > > > + u64 objlen = 0; > > > + int r; > > > + > > > + BUG_ON(opcode != CEPH_OSD_OP_READ && opcode != CEPH_OSD_OP_WRITE && > > > + opcode != CEPH_OSD_OP_ZERO && opcode != CEPH_OSD_OP_TRUNCATE && > > > + opcode != CEPH_OSD_OP_CREATE && opcode != CEPH_OSD_OP_DELETE); > > > + > > > + req = ceph_osdc_alloc_request(osdc, snapc, num_ops, use_mempool, > > > + GFP_NOFS); > > > + if (!req) > > > + return ERR_PTR(-ENOMEM); > > > + > > > + /* calculate max write size */ > > > + calc_layout(layout, off, plen, &objnum, &objoff, &objlen); > > > + > > > + if (opcode == CEPH_OSD_OP_CREATE || opcode == CEPH_OSD_OP_DELETE) { > > > + osd_req_op_init(req, which, opcode, 0); > > > + } else { > > > + u32 object_size = layout->object_size; > > > + u32 object_base = off - objoff; > > > + if (!(truncate_seq == 1 && truncate_size == -1ULL)) { > > > + if (truncate_size <= object_base) { > > > + truncate_size = 0; > > > + } else { > > > + truncate_size -= object_base; > > > + if (truncate_size > object_size) > > > + truncate_size = object_size; > > > + } > > > + } > > > + osd_req_op_extent_init(req, which, opcode, objoff, objlen, > > > + truncate_size, truncate_seq); > > > + } > > > + > > > + req->r_base_oloc.pool = layout->pool_id; > > > + req->r_base_oloc.pool_ns = ceph_try_get_string(layout->pool_ns); > > > + req->r_flags = flags | osdc->client->options->read_from_replica; > > > + ceph_oid_printf(&req->r_base_oid, "%llx.%08llx", vino.ino, objnum); > > > + > > > + req->r_snapid = vino.snap; > > > + if (flags & CEPH_OSD_FLAG_WRITE) > > > + req->r_data_offset = off; > > > + > > > + if (num_ops > 1) > > > + /* > > > + * This is a special case for ceph_writepages_start(), but it > > > + * also covers ceph_uninline_data(). If more multi-op request > > > + * use cases emerge, we will need a separate helper. > > > + */ > > > + r = ceph_osdc_alloc_num_messages(req, GFP_NOFS, num_ops, 0); > > > + else > > > + r = ceph_osdc_alloc_messages(req, GFP_NOFS); > > > + if (r) { > > > + ceph_osdc_put_request(req); > > > + return ERR_PTR(r); > > > + } > > > + > > > + return req; > > > +} > > > diff --git a/fs/ceph/osdc.h b/fs/ceph/osdc.h > > > new file mode 100644 > > > index 000000000000..b03e5f9ef733 > > > --- /dev/null > > > +++ b/fs/ceph/osdc.h > > > @@ -0,0 +1,16 @@ > > > +#ifndef _FS_CEPH_OSDC_H > > > +#define _FS_CEPH_OSDC_H > > > + > > > +#include <linux/ceph/osd_client.h> > > > + > > > +struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, > > > + struct ceph_file_layout *layout, > > > + struct ceph_vino vino, > > > + u64 off, u64 *plen, > > > + unsigned int which, int num_ops, > > > + int opcode, int flags, > > > + struct ceph_snap_context *snapc, > > > + u32 truncate_seq, > > > + u64 truncate_size, > > > + bool use_mempool); > > > +#endif /* _FS_CEPH_OSDC_H */ > > > diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h > > > index 71b7610c3a3c..f59eb192c472 100644 > > > --- a/include/linux/ceph/osd_client.h > > > +++ b/include/linux/ceph/osd_client.h > > > @@ -486,16 +486,6 @@ int ceph_osdc_alloc_num_messages(struct ceph_osd_request *req, gfp_t gfp, > > > int num_reply_data_items); > > > int ceph_osdc_alloc_messages(struct ceph_osd_request *req, gfp_t gfp); > > > > > > -extern struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *, > > > - struct ceph_file_layout *layout, > > > - struct ceph_vino vino, > > > - u64 offset, u64 *len, > > > - unsigned int which, int num_ops, > > > - int opcode, int flags, > > > - struct ceph_snap_context *snapc, > > > - u32 truncate_seq, u64 truncate_size, > > > - bool use_mempool); > > > - > > > extern void ceph_osdc_get_request(struct ceph_osd_request *req); > > > extern void ceph_osdc_put_request(struct ceph_osd_request *req); > > > void ceph_osdc_init_request(struct ceph_osd_request *req, > > > diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c > > > index 7be78fa6e2c3..5e54971bb7b2 100644 > > > --- a/net/ceph/osd_client.c > > > +++ b/net/ceph/osd_client.c > > > @@ -93,33 +93,6 @@ static inline void verify_osd_locked(struct ceph_osd *osd) { } > > > static inline void verify_lreq_locked(struct ceph_osd_linger_request *lreq) { } > > > #endif > > > > > > -/* > > > - * calculate the mapping of a file extent onto an object, and fill out the > > > - * request accordingly. shorten extent as necessary if it crosses an > > > - * object boundary. > > > - * > > > - * fill osd op in request message. > > > - */ > > > -static int calc_layout(struct ceph_file_layout *layout, u64 off, u64 *plen, > > > - u64 *objnum, u64 *objoff, u64 *objlen) > > > -{ > > > - u64 orig_len = *plen; > > > - u32 xlen; > > > - > > > - /* object extent? */ > > > - ceph_calc_file_object_mapping(layout, off, orig_len, objnum, > > > - objoff, &xlen); > > > - *objlen = xlen; > > > - if (*objlen < orig_len) { > > > - *plen = *objlen; > > > - dout(" skipping last %llu, final file extent %llu~%llu\n", > > > - orig_len - *plen, off, *plen); > > > - } > > > - > > > - dout("calc_layout objnum=%llx %llu~%llu\n", *objnum, *objoff, *objlen); > > > - return 0; > > > -} > > > - > > > static void ceph_osd_data_init(struct ceph_osd_data *osd_data) > > > { > > > memset(osd_data, 0, sizeof (*osd_data)); > > > @@ -1056,94 +1029,6 @@ static u32 osd_req_encode_op(struct ceph_osd_op *dst, > > > return src->indata_len; > > > } > > > > > > -/* > > > - * build new request AND message, calculate layout, and adjust file > > > - * extent as needed. > > > - * > > > - * if the file was recently truncated, we include information about its > > > - * old and new size so that the object can be updated appropriately. (we > > > - * avoid synchronously deleting truncated objects because it's slow.) > > > - */ > > > -struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, > > > - struct ceph_file_layout *layout, > > > - struct ceph_vino vino, > > > - u64 off, u64 *plen, > > > - unsigned int which, int num_ops, > > > - int opcode, int flags, > > > - struct ceph_snap_context *snapc, > > > - u32 truncate_seq, > > > - u64 truncate_size, > > > - bool use_mempool) > > > -{ > > > - struct ceph_osd_request *req; > > > - u64 objnum = 0; > > > - u64 objoff = 0; > > > - u64 objlen = 0; > > > - int r; > > > - > > > - BUG_ON(opcode != CEPH_OSD_OP_READ && opcode != CEPH_OSD_OP_WRITE && > > > - opcode != CEPH_OSD_OP_ZERO && opcode != CEPH_OSD_OP_TRUNCATE && > > > - opcode != CEPH_OSD_OP_CREATE && opcode != CEPH_OSD_OP_DELETE); > > > - > > > - req = ceph_osdc_alloc_request(osdc, snapc, num_ops, use_mempool, > > > - GFP_NOFS); > > > - if (!req) { > > > - r = -ENOMEM; > > > - goto fail; > > > - } > > > - > > > - /* calculate max write size */ > > > - r = calc_layout(layout, off, plen, &objnum, &objoff, &objlen); > > > - if (r) > > > - goto fail; > > > - > > > - if (opcode == CEPH_OSD_OP_CREATE || opcode == CEPH_OSD_OP_DELETE) { > > > - osd_req_op_init(req, which, opcode, 0); > > > - } else { > > > - u32 object_size = layout->object_size; > > > - u32 object_base = off - objoff; > > > - if (!(truncate_seq == 1 && truncate_size == -1ULL)) { > > > - if (truncate_size <= object_base) { > > > - truncate_size = 0; > > > - } else { > > > - truncate_size -= object_base; > > > - if (truncate_size > object_size) > > > - truncate_size = object_size; > > > - } > > > - } > > > - osd_req_op_extent_init(req, which, opcode, objoff, objlen, > > > - truncate_size, truncate_seq); > > > - } > > > - > > > - req->r_base_oloc.pool = layout->pool_id; > > > - req->r_base_oloc.pool_ns = ceph_try_get_string(layout->pool_ns); > > > - ceph_oid_printf(&req->r_base_oid, "%llx.%08llx", vino.ino, objnum); > > > - req->r_flags = flags | osdc->client->options->read_from_replica; > > > - > > > - req->r_snapid = vino.snap; > > > - if (flags & CEPH_OSD_FLAG_WRITE) > > > - req->r_data_offset = off; > > > - > > > - if (num_ops > 1) > > > - /* > > > - * This is a special case for ceph_writepages_start(), but it > > > - * also covers ceph_uninline_data(). If more multi-op request > > > - * use cases emerge, we will need a separate helper. > > > - */ > > > - r = ceph_osdc_alloc_num_messages(req, GFP_NOFS, num_ops, 0); > > > - else > > > - r = ceph_osdc_alloc_messages(req, GFP_NOFS); > > > - if (r) > > > - goto fail; > > > - > > > - return req; > > > - > > > -fail: > > > - ceph_osdc_put_request(req); > > > - return ERR_PTR(r); > > > -} > > > -EXPORT_SYMBOL(ceph_osdc_new_request); > > > - > > > /* > > > * We keep osd requests in an rbtree, sorted by ->r_tid. > > > */ > > > > Do you have plans to refactor ceph_osdc_new_request() or change > > it significantly? > > > > I don't think the fact that it is used only by fs/ceph justifies > > moving it to fs/ceph, particularly given that you had to export a > > rather sensitive OSD client helper in the process. Also, you are > > creating a new file for it, which indicates that it doesn't fit > > into any of the existing files and reinforces my feeling that it > > probably doesn't belong in fs/ceph. > > > > Not today. I may need to refactor it a bit for the fscache work, but > that's still a ways out before it's ready. If you don't think this is > worth doing, we can just drop it for now. Yeah, let's drop this. If the new fscache needs something from the OSD client that it doesn't currently provide, let's get that written down and discuss before implementing. Thanks, Ilya