Convert ceph_mds_request::r_pagelist to a databuf, along with the stuff that uses it such as setxattr ops. Signed-off-by: David Howells <dhowells@xxxxxxxxxx> cc: Viacheslav Dubeyko <slava@xxxxxxxxxxx> cc: Alex Markuze <amarkuze@xxxxxxxxxx> cc: Ilya Dryomov <idryomov@xxxxxxxxx> cc: ceph-devel@xxxxxxxxxxxxxxx cc: linux-fsdevel@xxxxxxxxxxxxxxx --- fs/ceph/acl.c | 39 ++++++++++---------- fs/ceph/file.c | 12 ++++--- fs/ceph/inode.c | 85 +++++++++++++++++++------------------------- fs/ceph/mds_client.c | 11 +++--- fs/ceph/mds_client.h | 2 +- fs/ceph/super.h | 2 +- fs/ceph/xattr.c | 68 +++++++++++++++-------------------- 7 files changed, 96 insertions(+), 123 deletions(-) diff --git a/fs/ceph/acl.c b/fs/ceph/acl.c index 1564eacc253d..d6da650db83e 100644 --- a/fs/ceph/acl.c +++ b/fs/ceph/acl.c @@ -171,7 +171,7 @@ int ceph_pre_init_acls(struct inode *dir, umode_t *mode, { struct posix_acl *acl, *default_acl; size_t val_size1 = 0, val_size2 = 0; - struct ceph_pagelist *pagelist = NULL; + struct ceph_databuf *dbuf = NULL; void *tmp_buf = NULL; int err; @@ -201,58 +201,55 @@ int ceph_pre_init_acls(struct inode *dir, umode_t *mode, tmp_buf = kmalloc(max(val_size1, val_size2), GFP_KERNEL); if (!tmp_buf) goto out_err; - pagelist = ceph_pagelist_alloc(GFP_KERNEL); - if (!pagelist) + dbuf = ceph_databuf_req_alloc(1, PAGE_SIZE, GFP_KERNEL); + if (!dbuf) goto out_err; - err = ceph_pagelist_reserve(pagelist, PAGE_SIZE); - if (err) - goto out_err; - - ceph_pagelist_encode_32(pagelist, acl && default_acl ? 2 : 1); + ceph_databuf_encode_32(dbuf, acl && default_acl ? 2 : 1); if (acl) { size_t len = strlen(XATTR_NAME_POSIX_ACL_ACCESS); - err = ceph_pagelist_reserve(pagelist, len + val_size1 + 8); + err = ceph_databuf_reserve(dbuf, len + val_size1 + 8, + GFP_KERNEL); if (err) goto out_err; - ceph_pagelist_encode_string(pagelist, XATTR_NAME_POSIX_ACL_ACCESS, - len); + ceph_databuf_encode_string(dbuf, XATTR_NAME_POSIX_ACL_ACCESS, + len); err = posix_acl_to_xattr(&init_user_ns, acl, tmp_buf, val_size1); if (err < 0) goto out_err; - ceph_pagelist_encode_32(pagelist, val_size1); - ceph_pagelist_append(pagelist, tmp_buf, val_size1); + ceph_databuf_encode_32(dbuf, val_size1); + ceph_databuf_append(dbuf, tmp_buf, val_size1); } if (default_acl) { size_t len = strlen(XATTR_NAME_POSIX_ACL_DEFAULT); - err = ceph_pagelist_reserve(pagelist, len + val_size2 + 8); + err = ceph_databuf_reserve(dbuf, len + val_size2 + 8, + GFP_KERNEL); if (err) goto out_err; - ceph_pagelist_encode_string(pagelist, - XATTR_NAME_POSIX_ACL_DEFAULT, len); + ceph_databuf_encode_string(dbuf, + XATTR_NAME_POSIX_ACL_DEFAULT, len); err = posix_acl_to_xattr(&init_user_ns, default_acl, tmp_buf, val_size2); if (err < 0) goto out_err; - ceph_pagelist_encode_32(pagelist, val_size2); - ceph_pagelist_append(pagelist, tmp_buf, val_size2); + ceph_databuf_encode_32(dbuf, val_size2); + ceph_databuf_append(dbuf, tmp_buf, val_size2); } kfree(tmp_buf); as_ctx->acl = acl; as_ctx->default_acl = default_acl; - as_ctx->pagelist = pagelist; + as_ctx->dbuf = dbuf; return 0; out_err: posix_acl_release(acl); posix_acl_release(default_acl); kfree(tmp_buf); - if (pagelist) - ceph_pagelist_release(pagelist); + ceph_databuf_release(dbuf); return err; } diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 851d70200c6b..9de2960748b9 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -679,9 +679,9 @@ static int ceph_finish_async_create(struct inode *dir, struct inode *inode, iinfo.change_attr = 1; ceph_encode_timespec64(&iinfo.btime, &now); - if (req->r_pagelist) { - iinfo.xattr_len = req->r_pagelist->length; - iinfo.xattr_data = req->r_pagelist->mapped_tail; + if (req->r_dbuf) { + iinfo.xattr_len = ceph_databuf_len(req->r_dbuf); + iinfo.xattr_data = kmap_ceph_databuf_page(req->r_dbuf, 0); } else { /* fake it */ iinfo.xattr_len = ARRAY_SIZE(xattr_buf); @@ -731,6 +731,8 @@ static int ceph_finish_async_create(struct inode *dir, struct inode *inode, ret = ceph_fill_inode(inode, NULL, &iinfo, NULL, req->r_session, req->r_fmode, NULL); up_read(&mdsc->snap_rwsem); + if (req->r_dbuf) + kunmap_local(iinfo.xattr_data); if (ret) { doutc(cl, "failed to fill inode: %d\n", ret); ceph_dir_clear_complete(dir); @@ -849,8 +851,8 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry, goto out_ctx; } /* Async create can't handle more than a page of xattrs */ - if (as_ctx.pagelist && - !list_is_singular(&as_ctx.pagelist->head)) + if (as_ctx.dbuf && + as_ctx.dbuf->nr_bvec > 1) try_async = false; } else if (!d_in_lookup(dentry)) { /* If it's not being looked up, it's negative */ diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index b060f765ad20..ec9b80fec7be 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -112,9 +112,9 @@ struct inode *ceph_new_inode(struct inode *dir, struct dentry *dentry, void ceph_as_ctx_to_req(struct ceph_mds_request *req, struct ceph_acl_sec_ctx *as_ctx) { - if (as_ctx->pagelist) { - req->r_pagelist = as_ctx->pagelist; - as_ctx->pagelist = NULL; + if (as_ctx->dbuf) { + req->r_dbuf = as_ctx->dbuf; + as_ctx->dbuf = NULL; } ceph_fscrypt_as_ctx_to_req(req, as_ctx); } @@ -2341,11 +2341,10 @@ static int fill_fscrypt_truncate(struct inode *inode, loff_t pos, orig_pos = round_down(attr->ia_size, CEPH_FSCRYPT_BLOCK_SIZE); u64 block = orig_pos >> CEPH_FSCRYPT_BLOCK_SHIFT; - struct ceph_pagelist *pagelist = NULL; - struct kvec iov = {0}; + struct ceph_databuf *dbuf = NULL; struct iov_iter iter; - struct page *page = NULL; - struct ceph_fscrypt_truncate_size_header header; + struct ceph_fscrypt_truncate_size_header *header; + void *p; int retry_op = 0; int len = CEPH_FSCRYPT_BLOCK_SIZE; loff_t i_size = i_size_read(inode); @@ -2372,37 +2371,35 @@ static int fill_fscrypt_truncate(struct inode *inode, goto out; } - page = __page_cache_alloc(GFP_KERNEL); - if (page == NULL) { - ret = -ENOMEM; + ret = -ENOMEM; + dbuf = ceph_databuf_req_alloc(2, 0, GFP_KERNEL); + if (!dbuf) goto out; - } - pagelist = ceph_pagelist_alloc(GFP_KERNEL); - if (!pagelist) { - ret = -ENOMEM; + if (ceph_databuf_insert_frag(dbuf, 0, sizeof(*header), GFP_KERNEL) < 0) + goto out; + if (ceph_databuf_insert_frag(dbuf, 1, PAGE_SIZE, GFP_KERNEL) < 0) goto out; - } - iov.iov_base = kmap_local_page(page); - iov.iov_len = len; - iov_iter_kvec(&iter, READ, &iov, 1, len); + iov_iter_bvec(&iter, ITER_DEST, &dbuf->bvec[1], 1, len); pos = orig_pos; ret = __ceph_sync_read(inode, &pos, &iter, &retry_op, &objver); if (ret < 0) goto out; + header = kmap_ceph_databuf_page(dbuf, 0); + /* Insert the header first */ - header.ver = 1; - header.compat = 1; - header.change_attr = cpu_to_le64(inode_peek_iversion_raw(inode)); + header->ver = 1; + header->compat = 1; + header->change_attr = cpu_to_le64(inode_peek_iversion_raw(inode)); /* * Always set the block_size to CEPH_FSCRYPT_BLOCK_SIZE, * because in MDS it may need this to do the truncate. */ - header.block_size = cpu_to_le32(CEPH_FSCRYPT_BLOCK_SIZE); + header->block_size = cpu_to_le32(CEPH_FSCRYPT_BLOCK_SIZE); /* * If we hit a hole here, we should just skip filling @@ -2417,51 +2414,41 @@ static int fill_fscrypt_truncate(struct inode *inode, if (!objver) { doutc(cl, "hit hole, ppos %lld < size %lld\n", pos, i_size); - header.data_len = cpu_to_le32(8 + 8 + 4); - header.file_offset = 0; + header->data_len = cpu_to_le32(8 + 8 + 4); + header->file_offset = 0; ret = 0; } else { - header.data_len = cpu_to_le32(8 + 8 + 4 + CEPH_FSCRYPT_BLOCK_SIZE); - header.file_offset = cpu_to_le64(orig_pos); + header->data_len = cpu_to_le32(8 + 8 + 4 + CEPH_FSCRYPT_BLOCK_SIZE); + header->file_offset = cpu_to_le64(orig_pos); doutc(cl, "encrypt block boff/bsize %d/%lu\n", boff, CEPH_FSCRYPT_BLOCK_SIZE); /* truncate and zero out the extra contents for the last block */ - memset(iov.iov_base + boff, 0, PAGE_SIZE - boff); + p = kmap_ceph_databuf_page(dbuf, 1); + memset(p + boff, 0, PAGE_SIZE - boff); + kunmap_local(p); /* encrypt the last block */ - ret = ceph_fscrypt_encrypt_block_inplace(inode, page, - CEPH_FSCRYPT_BLOCK_SIZE, - 0, block, - GFP_KERNEL); + ret = ceph_fscrypt_encrypt_block_inplace( + inode, ceph_databuf_page(dbuf, 1), + CEPH_FSCRYPT_BLOCK_SIZE, 0, block, GFP_KERNEL); if (ret) goto out; } - /* Insert the header */ - ret = ceph_pagelist_append(pagelist, &header, sizeof(header)); - if (ret) - goto out; + ceph_databuf_added_data(dbuf, sizeof(*header)); + if (header->block_size) + ceph_databuf_added_data(dbuf, CEPH_FSCRYPT_BLOCK_SIZE); - if (header.block_size) { - /* Append the last block contents to pagelist */ - ret = ceph_pagelist_append(pagelist, iov.iov_base, - CEPH_FSCRYPT_BLOCK_SIZE); - if (ret) - goto out; - } - req->r_pagelist = pagelist; + req->r_dbuf = dbuf; out: doutc(cl, "%p %llx.%llx size dropping cap refs on %s\n", inode, ceph_vinop(inode), ceph_cap_string(got)); ceph_put_cap_refs(ci, got); - if (iov.iov_base) - kunmap_local(iov.iov_base); - if (page) - __free_pages(page, 0); - if (ret && pagelist) - ceph_pagelist_release(pagelist); + kunmap_local(header); + if (ret) + ceph_databuf_release(dbuf); return ret; } diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 230e0c3f341f..09661a34f287 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -1125,8 +1125,7 @@ void ceph_mdsc_release_request(struct kref *kref) put_cred(req->r_cred); if (req->r_mnt_idmap) mnt_idmap_put(req->r_mnt_idmap); - if (req->r_pagelist) - ceph_pagelist_release(req->r_pagelist); + ceph_databuf_release(req->r_dbuf); kfree(req->r_fscrypt_auth); kfree(req->r_altname); put_request_session(req); @@ -3207,10 +3206,10 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session, msg->front.iov_len = p - msg->front.iov_base; msg->hdr.front_len = cpu_to_le32(msg->front.iov_len); - if (req->r_pagelist) { - struct ceph_pagelist *pagelist = req->r_pagelist; - ceph_msg_data_add_pagelist(msg, pagelist); - msg->hdr.data_len = cpu_to_le32(pagelist->length); + if (req->r_dbuf) { + struct ceph_databuf *dbuf = req->r_dbuf; + ceph_msg_data_add_databuf(msg, dbuf); + msg->hdr.data_len = cpu_to_le32(ceph_databuf_len(dbuf)); } else { msg->hdr.data_len = 0; } diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index 3e2a6fa7c19a..a7ee8da07ce7 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -333,7 +333,7 @@ struct ceph_mds_request { u32 r_direct_hash; /* choose dir frag based on this dentry hash */ /* data payload is used for xattr ops */ - struct ceph_pagelist *r_pagelist; + struct ceph_databuf *r_dbuf; /* what caps shall we drop? */ int r_inode_drop, r_inode_unless; diff --git a/fs/ceph/super.h b/fs/ceph/super.h index bb0db0cc8003..984a6d2a5378 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -1137,7 +1137,7 @@ struct ceph_acl_sec_ctx { #ifdef CONFIG_FS_ENCRYPTION struct ceph_fscrypt_auth *fscrypt_auth; #endif - struct ceph_pagelist *pagelist; + struct ceph_databuf *dbuf; }; #ifdef CONFIG_SECURITY diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index 537165db4519..b083cd3b3974 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c @@ -1114,17 +1114,17 @@ static int ceph_sync_setxattr(struct inode *inode, const char *name, struct ceph_mds_request *req; struct ceph_mds_client *mdsc = fsc->mdsc; struct ceph_osd_client *osdc = &fsc->client->osdc; - struct ceph_pagelist *pagelist = NULL; + struct ceph_databuf *dbuf = NULL; int op = CEPH_MDS_OP_SETXATTR; int err; if (size > 0) { - /* copy value into pagelist */ - pagelist = ceph_pagelist_alloc(GFP_NOFS); - if (!pagelist) + /* copy value into dbuf */ + dbuf = ceph_databuf_req_alloc(1, size, GFP_NOFS); + if (!dbuf) return -ENOMEM; - err = ceph_pagelist_append(pagelist, value, size); + err = ceph_databuf_append(dbuf, value, size); if (err) goto out; } else if (!value) { @@ -1154,8 +1154,8 @@ static int ceph_sync_setxattr(struct inode *inode, const char *name, req->r_args.setxattr.flags = cpu_to_le32(flags); req->r_args.setxattr.osdmap_epoch = cpu_to_le32(osdc->osdmap->epoch); - req->r_pagelist = pagelist; - pagelist = NULL; + req->r_dbuf = dbuf; + dbuf = NULL; } req->r_inode = inode; @@ -1169,8 +1169,7 @@ static int ceph_sync_setxattr(struct inode *inode, const char *name, doutc(cl, "xattr.ver (after): %lld\n", ci->i_xattrs.version); out: - if (pagelist) - ceph_pagelist_release(pagelist); + ceph_databuf_release(dbuf); return err; } @@ -1377,7 +1376,7 @@ bool ceph_security_xattr_deadlock(struct inode *in) int ceph_security_init_secctx(struct dentry *dentry, umode_t mode, struct ceph_acl_sec_ctx *as_ctx) { - struct ceph_pagelist *pagelist = as_ctx->pagelist; + struct ceph_databuf *dbuf = as_ctx->dbuf; const char *name; size_t name_len; int err; @@ -1391,14 +1390,11 @@ int ceph_security_init_secctx(struct dentry *dentry, umode_t mode, } err = -ENOMEM; - if (!pagelist) { - pagelist = ceph_pagelist_alloc(GFP_KERNEL); - if (!pagelist) + if (!dbuf) { + dbuf = ceph_databuf_req_alloc(0, PAGE_SIZE, GFP_KERNEL); + if (!dbuf) goto out; - err = ceph_pagelist_reserve(pagelist, PAGE_SIZE); - if (err) - goto out; - ceph_pagelist_encode_32(pagelist, 1); + ceph_databuf_encode_32(dbuf, 1); } /* @@ -1407,38 +1403,31 @@ int ceph_security_init_secctx(struct dentry *dentry, umode_t mode, * dentry_init_security hook. */ name_len = strlen(name); - err = ceph_pagelist_reserve(pagelist, - 4 * 2 + name_len + as_ctx->lsmctx.len); + err = ceph_databuf_reserve(dbuf, 4 * 2 + name_len + as_ctx->lsmctx.len, + GFP_KERNEL); if (err) goto out; - if (as_ctx->pagelist) { + if (as_ctx->dbuf) { /* update count of KV pairs */ - BUG_ON(pagelist->length <= sizeof(__le32)); - if (list_is_singular(&pagelist->head)) { - le32_add_cpu((__le32*)pagelist->mapped_tail, 1); - } else { - struct page *page = list_first_entry(&pagelist->head, - struct page, lru); - void *addr = kmap_atomic(page); - le32_add_cpu((__le32*)addr, 1); - kunmap_atomic(addr); - } + BUG_ON(ceph_databuf_len(dbuf) <= sizeof(__le32)); + __le32 *addr = kmap_ceph_databuf_page(dbuf, 0); + le32_add_cpu(addr, 1); + kunmap_local(addr); } else { - as_ctx->pagelist = pagelist; + as_ctx->dbuf = dbuf; } - ceph_pagelist_encode_32(pagelist, name_len); - ceph_pagelist_append(pagelist, name, name_len); + ceph_databuf_encode_32(dbuf, name_len); + ceph_databuf_append(dbuf, name, name_len); - ceph_pagelist_encode_32(pagelist, as_ctx->lsmctx.len); - ceph_pagelist_append(pagelist, as_ctx->lsmctx.context, - as_ctx->lsmctx.len); + ceph_databuf_encode_32(dbuf, as_ctx->lsmctx.len); + ceph_databuf_append(dbuf, as_ctx->lsmctx.context, as_ctx->lsmctx.len); err = 0; out: - if (pagelist && !as_ctx->pagelist) - ceph_pagelist_release(pagelist); + if (dbuf && !as_ctx->dbuf) + ceph_databuf_release(dbuf); return err; } #endif /* CONFIG_CEPH_FS_SECURITY_LABEL */ @@ -1456,8 +1445,7 @@ void ceph_release_acl_sec_ctx(struct ceph_acl_sec_ctx *as_ctx) #ifdef CONFIG_FS_ENCRYPTION kfree(as_ctx->fscrypt_auth); #endif - if (as_ctx->pagelist) - ceph_pagelist_release(as_ctx->pagelist); + ceph_databuf_release(as_ctx->dbuf); } /*