Happy Thanksgiving everyone... I wish I only used UID 0 when necessary. I changed my code like this: - if ((type == ORANGEFS_IO_WRITE) && open_for_write) - new_op->upcall.uid = 0; - if ((type == ORANGEFS_IO_READ) && open_for_read) - new_op->upcall.uid = 0; + if ((type == ORANGEFS_IO_WRITE) && open_for_write) { + if (inode_permission(inode, MAY_WRITE)) + new_op->upcall.uid = 0; + } + if ((type == ORANGEFS_IO_READ) && open_for_read) { + if (inode_permission(inode, MAY_READ)) + new_op->upcall.uid = 0; + } At first this seems to work, then, whatever process is using my filesystem wedges up... nothing in dmesg, nothing in the orangefs userspace logs... vim on a /pvfsmnt/file does it every time, open a new line and the whole terminal's locked up. If you strace the vim pid, that locks up :-) ... I've watched ftrace output while this is happening, inode_permission sure does call orangefs_permission a whole bunch of times. Anywho... I think inode_permission will return 0 when I can read (or write) and non-zero when I can't. I'll be trying to figure out what is happening, if anyone here sees right off how I might be mis-using inode_permission, please let me know... -Mike On Tue, Nov 26, 2019 at 1:50 PM <hubcap@xxxxxxxxxx> wrote: > > From: Mike Marshall <hubcap@xxxxxxxxxxxx> > > Here's another version that is hopefully closer to > usable... > > Orangefs has no open, and orangefs checks file permissions > on each file access. Posix requires that file permissions > be checked on open and nowhere else. Orangefs-through-the-kernel > needs to seem posix compliant. > > The VFS opens files, even if the filesystem provides no > method. We can see if a file was successfully opened for > read and or for write by looking at file->f_mode. > > When writes are flowing from the page cache, file is no > longer available. We can trust the VFS to have checked > file->f_mode before writing to the page cache. > > The mode of a file might change between when it is opened > and IO commences, or it might be created with an arbitrary mode. > > We'll make sure we don't hit EACCES during the IO stage by > using UID 0. Some of the time we have access without changing > to UID 0 - how to check? > > Signed-off-by: Mike Marshall <hubcap@xxxxxxxxxxxx> > --- > fs/orangefs/file.c | 39 +++++++++++++++++++++++++++++++++-- > fs/orangefs/inode.c | 8 +++---- > fs/orangefs/orangefs-kernel.h | 3 ++- > 3 files changed, 43 insertions(+), 7 deletions(-) > > diff --git a/fs/orangefs/file.c b/fs/orangefs/file.c > index a5612abc0936..c740159d9ad1 100644 > --- a/fs/orangefs/file.c > +++ b/fs/orangefs/file.c > @@ -46,8 +46,9 @@ static int flush_racache(struct inode *inode) > * Post and wait for the I/O upcall to finish > */ > ssize_t wait_for_direct_io(enum ORANGEFS_io_type type, struct inode *inode, > - loff_t *offset, struct iov_iter *iter, size_t total_size, > - loff_t readahead_size, struct orangefs_write_range *wr, int *index_return) > + loff_t *offset, struct iov_iter *iter, size_t total_size, > + loff_t readahead_size, struct orangefs_write_range *wr, > + int *index_return, struct file *file) > { > struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); > struct orangefs_khandle *handle = &orangefs_inode->refn.khandle; > @@ -55,6 +56,8 @@ ssize_t wait_for_direct_io(enum ORANGEFS_io_type type, struct inode *inode, > int buffer_index; > ssize_t ret; > size_t copy_amount; > + int open_for_read; > + int open_for_write; > > new_op = op_alloc(ORANGEFS_VFS_OP_FILE_IO); > if (!new_op) > @@ -90,6 +93,38 @@ ssize_t wait_for_direct_io(enum ORANGEFS_io_type type, struct inode *inode, > new_op->upcall.uid = from_kuid(&init_user_ns, wr->uid); > new_op->upcall.gid = from_kgid(&init_user_ns, wr->gid); > } > + /* > + * Orangefs has no open, and orangefs checks file permissions > + * on each file access. Posix requires that file permissions > + * be checked on open and nowhere else. Orangefs-through-the-kernel > + * needs to seem posix compliant. > + * > + * The VFS opens files, even if the filesystem provides no > + * method. We can see if a file was successfully opened for > + * read and or for write by looking at file->f_mode. > + * > + * When writes are flowing from the page cache, file is no > + * longer available. We can trust the VFS to have checked > + * file->f_mode before writing to the page cache. > + * > + * The mode of a file might change between when it is opened > + * and IO commences, or it might be created with an arbitrary mode. > + * > + * We'll make sure we don't hit EACCES during the IO stage by > + * using UID 0. Some of the time we have access without changing > + * to UID 0 - how to check? > + */ > + if (file) { > + open_for_write = file->f_mode & FMODE_WRITE; > + open_for_read = file->f_mode & FMODE_READ; > + } else { > + open_for_write = 1; > + open_for_read = 0; /* not relevant? */ > + } > + if ((type == ORANGEFS_IO_WRITE) && open_for_write) > + new_op->upcall.uid = 0; > + if ((type == ORANGEFS_IO_READ) && open_for_read) > + new_op->upcall.uid = 0; > > gossip_debug(GOSSIP_FILE_DEBUG, > "%s(%pU): offset: %llu total_size: %zd\n", > diff --git a/fs/orangefs/inode.c b/fs/orangefs/inode.c > index efb12197da18..961c0fd8675a 100644 > --- a/fs/orangefs/inode.c > +++ b/fs/orangefs/inode.c > @@ -55,7 +55,7 @@ static int orangefs_writepage_locked(struct page *page, > iov_iter_bvec(&iter, WRITE, &bv, 1, wlen); > > ret = wait_for_direct_io(ORANGEFS_IO_WRITE, inode, &off, &iter, wlen, > - len, wr, NULL); > + len, wr, NULL, NULL); > if (ret < 0) { > SetPageError(page); > mapping_set_error(page->mapping, ret); > @@ -126,7 +126,7 @@ static int orangefs_writepages_work(struct orangefs_writepages *ow, > wr.uid = ow->uid; > wr.gid = ow->gid; > ret = wait_for_direct_io(ORANGEFS_IO_WRITE, inode, &off, &iter, ow->len, > - 0, &wr, NULL); > + 0, &wr, NULL, NULL); > if (ret < 0) { > for (i = 0; i < ow->npages; i++) { > SetPageError(ow->pages[i]); > @@ -311,7 +311,7 @@ static int orangefs_readpage(struct file *file, struct page *page) > iov_iter_bvec(&iter, READ, &bv, 1, PAGE_SIZE); > > ret = wait_for_direct_io(ORANGEFS_IO_READ, inode, &off, &iter, > - read_size, inode->i_size, NULL, &buffer_index); > + read_size, inode->i_size, NULL, &buffer_index, file); > remaining = ret; > /* this will only zero remaining unread portions of the page data */ > iov_iter_zero(~0U, &iter); > @@ -651,7 +651,7 @@ static ssize_t orangefs_direct_IO(struct kiocb *iocb, > (int)*offset); > > ret = wait_for_direct_io(type, inode, offset, iter, > - each_count, 0, NULL, NULL); > + each_count, 0, NULL, NULL, file); > gossip_debug(GOSSIP_FILE_DEBUG, > "%s(%pU): return from wait_for_io:%d\n", > __func__, > diff --git a/fs/orangefs/orangefs-kernel.h b/fs/orangefs/orangefs-kernel.h > index 34a6c99fa29b..ed67f39fa7ce 100644 > --- a/fs/orangefs/orangefs-kernel.h > +++ b/fs/orangefs/orangefs-kernel.h > @@ -398,7 +398,8 @@ bool __is_daemon_in_service(void); > */ > int orangefs_revalidate_mapping(struct inode *); > ssize_t wait_for_direct_io(enum ORANGEFS_io_type, struct inode *, loff_t *, > - struct iov_iter *, size_t, loff_t, struct orangefs_write_range *, int *); > + struct iov_iter *, size_t, loff_t, struct orangefs_write_range *, int *, > + struct file *); > ssize_t do_readv_writev(enum ORANGEFS_io_type, struct file *, loff_t *, > struct iov_iter *); > > -- > 2.20.1 >