> @@ -916,6 +921,17 @@ void aio_complete(struct kiocb *iocb, long res, long res2) > struct io_event *ev_page, *event; > unsigned long flags; > unsigned tail, pos; > + int ret; > + > + ret = io_teardown_extensions(iocb); > + if (ret) { > + if (!res) > + res = ret; > + else if (!res2) > + res2 = ret; > + else > + pr_err("error %d tearing down aio extensions\n", ret); > + } This ends up trying to copy the kernel's io_extension copy back to userspace from interrupts, which obviously won't fly. And to what end? So that maybe someone can later add an 'extension' that can fill in some field that's then copied to userspace? But by copying the entire argument struct back? Let's not get ahead of ourselves. If they're going to try and give userspace some feedback after IO completion they're going to have to try a lot harder because they don't have acces to the submitting task context anymore. They'd have to pin some reference to a feedback mechanism in the in-flight io. I think we'd want that explicit in the iocb, not hiding off on the other side of this extension interface. I'd just remove this generic teardown callback path entirely. If there's PI state hanging off the iocb tear it down during iocb teardown. > +struct io_extension_type { > + unsigned int type; > + unsigned int extension_struct_size; > + int (*setup_fn)(struct kiocb *, int is_write); > + int (*destroy_fn)(struct kiocb *); > +}; I'd also get rid of all of this. More below. > +static int io_setup_extensions(struct kiocb *req, int is_write, > + struct io_extension __user *ioext) > +{ > + struct io_extension_type *iet; > + __u64 sz, has; > + int ret; > + > + /* Check size of buffer */ > + if (unlikely(copy_from_user(&sz, &ioext->ie_size, sizeof(sz)))) > + return -EFAULT; > + if (sz > PAGE_SIZE || > + sz > sizeof(struct io_extension) || > + sz < IO_EXT_SIZE(ie_has)) > + return -EINVAL; > + > + /* Check that the buffer's big enough */ > + if (unlikely(copy_from_user(&has, &ioext->ie_has, sizeof(has)))) > + return -EFAULT; > + ret = io_check_bufsize(has, sz); > + if (ret) > + return ret; > + > + /* Copy from userland */ > + req->ki_ioext = kzalloc(sizeof(struct kio_extension), GFP_NOIO); > + if (!req->ki_ioext) > + return -ENOMEM; > + > + req->ki_ioext->ke_user = ioext; > + if (unlikely(copy_from_user(&req->ki_ioext->ke_kern, ioext, sz))) { > + ret = -EFAULT; > + goto out; > + } (Isn't there some allocate-and-copy-from-userspace helper now? But..) I don't like the rudundancy of the implicit size requirement by a field's flag being set being duplicated by the explicit size argument. What does that give us, exactly? Our notion of the total size only seems to only matter if we're copying the entire struct from userspace and I'm don't think we need to do that. For each argument, we're translating it into some kernel equivalent, right? Fields in the iocb As each of these are initialized I'd just test the presence bits and __get_user() the userspace arguemnts directly, or copy_from_user() something slightly more complicated on to the stack. That gets rid of us having to care about the size at all. It stops us from allocating a kernel copy and pinning it for the duration of the IO. We'd just be sampling the present userspace arguments as we initialie the iocb during submission. > + /* Try to initialize all the extensions */ > + has = 0; > + for (iet = extensions; iet->type != IO_EXT_INVALID; iet++) { > + if (!(req->ki_ioext->ke_kern.ie_has & iet->type)) > + continue; > + ret = iet->setup_fn(req, is_write); > + if (ret) { > + req->ki_ioext->ke_kern.ie_has = has; > + goto out_destroy; > + } > + has |= iet->type; > + } So instead of doing all this we'd test explicit bits and act accordingly. If they're trivial translations between userspace fields and iocb fields we could just do it inline in this helper that'd be more like iocb_parse_more_args(iocb, struct __user *ptr). For more complicated stuff, like the PI page pinning, it could call out to PI. > + user_ext = (struct io_extension __user *)iocb->aio_extension_ptr; Need a __force there? Has this been run through sparse? - z -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html