> On Feb 10, 2016, at 19:24, Ilya Dryomov <idryomov@xxxxxxxxx> wrote: > > Turn r_ops into a flexible array member to enable large, consisting of > up to 16 ops, OSD requests. The use case is scattered writeback in > cephfs and, as far as the kernel client is concerned, 16 is just a made > up number. > > r_ops had size 3 for copyup+hint+write, but copyup is really a special > case - it can only happen once. ceph_osd_request_cache is therefore > stuffed with num_ops=2 requests, anything bigger than that is allocated > with kmalloc(). req_mempool is backed by ceph_osd_request_cache, which > means either num_ops=1 or num_ops=2 for use_mempool=true - all existing > users (ceph_writepages_start(), ceph_osdc_writepages()) are fine with > that. > > Signed-off-by: Ilya Dryomov <idryomov@xxxxxxxxx> > --- > drivers/block/rbd.c | 2 -- > include/linux/ceph/osd_client.h | 6 ++++-- > net/ceph/osd_client.c | 32 +++++++++++++++++++------------- > 3 files changed, 23 insertions(+), 17 deletions(-) > > diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c > index 94f31bde73e8..08018710f363 100644 > --- a/drivers/block/rbd.c > +++ b/drivers/block/rbd.c > @@ -1847,8 +1847,6 @@ static void rbd_osd_req_callback(struct ceph_osd_request *osd_req, > if (osd_req->r_result < 0) > obj_request->result = osd_req->r_result; > > - rbd_assert(osd_req->r_num_ops <= CEPH_OSD_MAX_OP); > - > /* > * We support a 64-bit length, but ultimately it has to be > * passed to the block layer, which just supports a 32-bit > diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h > index c6d1d603bacf..aada6a1383a4 100644 > --- a/include/linux/ceph/osd_client.h > +++ b/include/linux/ceph/osd_client.h > @@ -43,7 +43,8 @@ struct ceph_osd { > }; > > > -#define CEPH_OSD_MAX_OP 3 > +#define CEPH_OSD_SLAB_OPS 2 > +#define CEPH_OSD_MAX_OPS 16 > > enum ceph_osd_data_type { > CEPH_OSD_DATA_TYPE_NONE = 0, > @@ -139,7 +140,6 @@ struct ceph_osd_request { > > /* request osd ops array */ > unsigned int r_num_ops; > - struct ceph_osd_req_op r_ops[CEPH_OSD_MAX_OP]; > > /* these are updated on each send */ > __le32 *r_request_osdmap_epoch; > @@ -175,6 +175,8 @@ struct ceph_osd_request { > unsigned long r_stamp; /* send OR check time */ > > struct ceph_snap_context *r_snapc; /* snap context for writes */ > + > + struct ceph_osd_req_op r_ops[]; > }; > > struct ceph_request_redirect { > diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c > index 8bf4f74064e5..37a0fc5273d1 100644 > --- a/net/ceph/osd_client.c > +++ b/net/ceph/osd_client.c > @@ -338,9 +338,10 @@ static void ceph_osdc_release_request(struct kref *kref) > ceph_put_snap_context(req->r_snapc); > if (req->r_mempool) > mempool_free(req, req->r_osdc->req_mempool); > - else > + else if (req->r_num_ops <= CEPH_OSD_SLAB_OPS) > kmem_cache_free(ceph_osd_request_cache, req); > - > + else > + kfree(req); > } > > void ceph_osdc_get_request(struct ceph_osd_request *req) > @@ -369,9 +370,6 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, > struct ceph_msg *msg; > size_t msg_size; > > - BUILD_BUG_ON(CEPH_OSD_MAX_OP > U16_MAX); > - BUG_ON(num_ops > CEPH_OSD_MAX_OP); > - > msg_size = 4 + 4 + 8 + 8 + 4+8; > msg_size += 2 + 4 + 8 + 4 + 4; /* oloc */ > msg_size += 1 + 8 + 4 + 4; /* pg_t */ > @@ -383,14 +381,21 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, > msg_size += 4; > > if (use_mempool) { > + BUG_ON(num_ops > CEPH_OSD_SLAB_OPS); > req = mempool_alloc(osdc->req_mempool, gfp_flags); > - memset(req, 0, sizeof(*req)); > + } else if (num_ops <= CEPH_OSD_SLAB_OPS) { > + req = kmem_cache_alloc(ceph_osd_request_cache, gfp_flags); > } else { > - req = kmem_cache_zalloc(ceph_osd_request_cache, gfp_flags); > + BUG_ON(num_ops > CEPH_OSD_MAX_OPS); > + req = kmalloc(sizeof(*req) + num_ops * sizeof(req->r_ops[0]), > + gfp_flags); > } > - if (req == NULL) > + if (unlikely(!req)) > return NULL; > > + /* req only, each op is zeroed in _osd_req_op_init() */ > + memset(req, 0, sizeof(*req)); > + > req->r_osdc = osdc; > req->r_mempool = use_mempool; > req->r_num_ops = num_ops; > @@ -1809,7 +1814,7 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg) > > ceph_decode_need(&p, end, 4, bad_put); > numops = ceph_decode_32(&p); > - if (numops > CEPH_OSD_MAX_OP) > + if (numops > CEPH_OSD_MAX_OPS) > goto bad_put; > if (numops != req->r_num_ops) > goto bad_put; > @@ -2773,11 +2778,12 @@ EXPORT_SYMBOL(ceph_osdc_writepages); > > int ceph_osdc_setup(void) > { > + size_t size = sizeof(struct ceph_osd_request) + > + CEPH_OSD_SLAB_OPS * sizeof(struct ceph_osd_req_op); > + > BUG_ON(ceph_osd_request_cache); > - ceph_osd_request_cache = kmem_cache_create("ceph_osd_request", > - sizeof (struct ceph_osd_request), > - __alignof__(struct ceph_osd_request), > - 0, NULL); > + ceph_osd_request_cache = kmem_cache_create("ceph_osd_request", size, > + 0, 0, NULL); > > return ceph_osd_request_cache ? 0 : -ENOMEM; > } This approach is better than mine. But the code that calculates r_request/r_reply messages size get lost in your patch Regards Yan, Zheng > -- > 2.4.3 > -- To unsubscribe from this list: send the line "unsubscribe ceph-devel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html