On Wed, Jun 02, 2021 at 12:48:45PM +0900, Namjae Jeon wrote: > +#include <linux/rwlock.h> > + > +#include "glob.h" > +#include "buffer_pool.h" > +#include "connection.h" > +#include "mgmt/ksmbd_ida.h" > + > +static struct kmem_cache *filp_cache; > + > +struct wm { > + struct list_head list; > + unsigned int sz; > + char buffer[0]; This should use buffer[]; > +}; > + > +struct wm_list { > + struct list_head list; > + unsigned int sz; > + > + spinlock_t wm_lock; > + int avail_wm; > + struct list_head idle_wm; > + wait_queue_head_t wm_wait; > +}; What does wm stand for? This looks like arbitrary caching of vmalloc buffers. I thought we decided to just make vmalloc suck less rather than papering over that? > +static LIST_HEAD(wm_lists); > +static DEFINE_RWLOCK(wm_lists_lock); Especially as this isn't going to scale at all using global loists and locks. > +void ksmbd_free_file_struct(void *filp) > +{ > + kmem_cache_free(filp_cache, filp); > +} > + > +void *ksmbd_alloc_file_struct(void) > +{ > + return kmem_cache_zalloc(filp_cache, GFP_KERNEL); > +} These are only ued in vfs_cache.c . So I'd suggest to just move filp_cache there and drop these wrappers. > +} > + > +static void ksmbd_vfs_inherit_owner(struct ksmbd_work *work, > + struct inode *parent_inode, > + struct inode *inode) > +{ > + if (!test_share_config_flag(work->tcon->share_conf, > + KSMBD_SHARE_FLAG_INHERIT_OWNER)) > + return; > + > + i_uid_write(inode, i_uid_read(parent_inode)); > +} Can you explain this a little more? When do the normal create/mdir fail to inherit the owner? int ksmbd_vfs_inode_permission(struct dentry *dentry, int acc_mode, bool delete) > +{ > + int mask, ret = 0; > + > + mask = 0; > + acc_mode &= O_ACCMODE; > + > + if (acc_mode == O_RDONLY) > + mask = MAY_READ; > + else if (acc_mode == O_WRONLY) > + mask = MAY_WRITE; > + else if (acc_mode == O_RDWR) > + mask = MAY_READ | MAY_WRITE; How about already setting up the MAY_ flags in smb2_create_open_flags and returning them in extra argument? That keeps the sm to Linux translation in a single place. > + > + if (inode_permission(&init_user_ns, d_inode(dentry), mask | MAY_OPEN)) > + return -EACCES; And this call can be open coded in the only caller. > + if (delete) { And this block could be split into a nice self-contained helper. > + struct dentry *child, *parent; > + > + parent = dget_parent(dentry); > + inode_lock_nested(d_inode(parent), I_MUTEX_PARENT); > + child = lookup_one_len(dentry->d_name.name, parent, > + dentry->d_name.len); > + if (IS_ERR(child)) { > + ret = PTR_ERR(child); > + goto out_lock; > + } > + > + if (child != dentry) { > + ret = -ESTALE; > + dput(child); > + goto out_lock; > + } > + dput(child); That being said I do not understand the need for this re-lookup at all. > + if (!inode_permission(&init_user_ns, d_inode(dentry), MAY_OPEN | MAY_WRITE)) All these inode_permission lines have overly long lines. It might be worth to pass the user_namespace to this function, not only to shorten the code, but also to prepare for user namespace support. > + parent = dget_parent(dentry); > + inode_lock_nested(d_inode(parent), I_MUTEX_PARENT); > + child = lookup_one_len(dentry->d_name.name, parent, > + dentry->d_name.len); > + if (IS_ERR(child)) { > + ret = PTR_ERR(child); > + goto out_lock; > + } > + > + if (child != dentry) { > + ret = -ESTALE; > + dput(child); > + goto out_lock; > + } > + dput(child); This is the same weird re-lookup dance as above. IFF there is a good rationale for it it needs to go into a self-contained and well documented helper. > +int ksmbd_vfs_create(struct ksmbd_work *work, const char *name, umode_t mode) > +{ > + struct path path; > + struct dentry *dentry; > + int err; > + > + dentry = kern_path_create(AT_FDCWD, name, &path, 0); Curious: why is this using absolute or CWD based path instead of doing lookups based off the parent as done by e.g. nfsd? Same also for mkdir and co. > +{ > + struct file *filp; > + ssize_t nbytes = 0; > + char *rbuf; > + struct inode *inode; > + > + rbuf = work->aux_payload_buf; > + filp = fp->filp; > + inode = file_inode(filp); These can be initialized on the declaration lines to make the code a little easier to read. > + if (!work->tcon->posix_extensions) { > + spin_lock(&src_dent->d_lock); > + list_for_each_entry(dst_dent, &src_dent->d_subdirs, d_child) { > + struct ksmbd_file *child_fp; > + > + if (d_really_is_negative(dst_dent)) > + continue; > + > + child_fp = ksmbd_lookup_fd_inode(d_inode(dst_dent)); > + if (child_fp) { > + spin_unlock(&src_dent->d_lock); > + ksmbd_debug(VFS, "Forbid rename, sub file/dir is in use\n"); > + return -EACCES; > + } > + } > + spin_unlock(&src_dent->d_lock); > + } This begs for being split into a self-contained helper. > +int ksmbd_vfs_lock(struct file *filp, int cmd, struct file_lock *flock) > +{ > + ksmbd_debug(VFS, "calling vfs_lock_file\n"); > + return vfs_lock_file(filp, cmd, flock, NULL); > +} > + > +int ksmbd_vfs_readdir(struct file *file, struct ksmbd_readdir_data *rdata) > +{ > + return iterate_dir(file, &rdata->ctx); > +} > + > +int ksmbd_vfs_alloc_size(struct ksmbd_work *work, struct ksmbd_file *fp, > + loff_t len) > +{ > + smb_break_all_levII_oplock(work, fp, 1); > + return vfs_fallocate(fp->filp, FALLOC_FL_KEEP_SIZE, 0, len); > +} Please avoid such trivial wrappers that just make the code hard to follow. > +int ksmbd_vfs_fqar_lseek(struct ksmbd_file *fp, loff_t start, loff_t length, > + struct file_allocated_range_buffer *ranges, > + int in_count, int *out_count) What is fqar? > + > +/* > + * ksmbd_vfs_get_logical_sector_size() - get logical sector size from inode > + * @inode: inode > + * > + * Return: logical sector size > + */ > +unsigned short ksmbd_vfs_logical_sector_size(struct inode *inode) > +{ > + struct request_queue *q; > + unsigned short ret_val = 512; > + > + if (!inode->i_sb->s_bdev) > + return ret_val; > + > + q = inode->i_sb->s_bdev->bd_disk->queue; > + > + if (q && q->limits.logical_block_size) > + ret_val = q->limits.logical_block_size; > + > + return ret_val; I don't think a CIFS server has any business poking into the block layer. What is this trying to do? > +struct posix_acl *ksmbd_vfs_posix_acl_alloc(int count, gfp_t flags) > +{ > +#if IS_ENABLED(CONFIG_FS_POSIX_ACL) > + return posix_acl_alloc(count, flags); > +#else > + return NULL; > +#endif > +} > + > +struct posix_acl *ksmbd_vfs_get_acl(struct inode *inode, int type) > +{ > +#if IS_ENABLED(CONFIG_FS_POSIX_ACL) > + return get_acl(inode, type); > +#else > + return NULL; > +#endif > +} > + > +int ksmbd_vfs_set_posix_acl(struct inode *inode, int type, > + struct posix_acl *acl) > +{ > +#if IS_ENABLED(CONFIG_FS_POSIX_ACL) > + return set_posix_acl(&init_user_ns, inode, type, acl); > +#else > + return -EOPNOTSUPP; > +#endif > +} Please avoid these pointless wrappers and just use large code block ifdefs or IS_ENABLED checks. > +int ksmbd_vfs_copy_file_range(struct file *file_in, loff_t pos_in, > + struct file *file_out, loff_t pos_out, size_t len) > +{ > + struct inode *inode_in = file_inode(file_in); > + struct inode *inode_out = file_inode(file_out); > + int ret; > + > + ret = vfs_copy_file_range(file_in, pos_in, file_out, pos_out, len, 0); > + /* do splice for the copy between different file systems */ > + if (ret != -EXDEV) > + return ret; > + > + if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode)) > + return -EISDIR; > + if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode)) > + return -EINVAL; > + > + if (!(file_in->f_mode & FMODE_READ) || > + !(file_out->f_mode & FMODE_WRITE)) > + return -EBADF; > + > + if (len == 0) > + return 0; > + > + file_start_write(file_out); > + > + /* > + * skip the verification of the range of data. it will be done > + * in do_splice_direct > + */ > + ret = do_splice_direct(file_in, &pos_in, file_out, &pos_out, > + len > MAX_RW_COUNT ? MAX_RW_COUNT : len, 0); vfs_copy_file_range already does this type of fallback, so this is dead code. > +#define XATTR_NAME_STREAM_LEN (sizeof(XATTR_NAME_STREAM) - 1) > + > +enum { > + XATTR_DOSINFO_ATTRIB = 0x00000001, > + XATTR_DOSINFO_EA_SIZE = 0x00000002, > + XATTR_DOSINFO_SIZE = 0x00000004, > + XATTR_DOSINFO_ALLOC_SIZE = 0x00000008, > + XATTR_DOSINFO_CREATE_TIME = 0x00000010, > + XATTR_DOSINFO_CHANGE_TIME = 0x00000020, > + XATTR_DOSINFO_ITIME = 0x00000040 > +}; > + > +struct xattr_dos_attrib { > + __u16 version; > + __u32 flags; > + __u32 attr; > + __u32 ea_size; > + __u64 size; > + __u64 alloc_size; > + __u64 create_time; > + __u64 change_time; > + __u64 itime; > +}; These looks like on-disk structures. Any chance you could re-order the headers so that things like on-disk, on the wire and netlink uapi structures all have a dedicated and well documented header for themselves? > + read_lock(&ci->m_lock); > + list_for_each(cur, &ci->m_fp_list) { > + lfp = list_entry(cur, struct ksmbd_file, node); Please use list_for_each_entry. There are very few places left where using list_for_each makes sense. > + if (inode == FP_INODE(lfp)) { > + atomic_dec(&ci->m_count); > + read_unlock(&ci->m_lock); > + return lfp; > + } > + } > + atomic_dec(&ci->m_count); > + read_unlock(&ci->m_lock); So a successful find increments m_count, but a miss decreases it? Isn't this going to create an underflow? > + if (!fp->f_ci) { > + ksmbd_free_file_struct(fp); > + return ERR_PTR(-ENOMEM); > + } > + > + ret = __open_id(&work->sess->file_table, fp, OPEN_ID_TYPE_VOLATILE_ID); > + if (ret) { > + ksmbd_inode_put(fp->f_ci); > + ksmbd_free_file_struct(fp); > + return ERR_PTR(ret); > + } > + > + atomic_inc(&work->conn->stats.open_files_count); > + return fp; Please use goto based unwinding instead of duplicating the resoure cleanup. > +static bool tree_conn_fd_check(struct ksmbd_tree_connect *tcon, struct ksmbd_file *fp) Overly long line. > +{ > + return fp->tcon != tcon; > +} > + > +static bool session_fd_check(struct ksmbd_tree_connect *tcon, struct ksmbd_file *fp) Same.