On Thu, Aug 27, 2020 at 07:49:45AM -0600, Jens Axboe wrote: > On 8/27/20 7:40 AM, Stefano Garzarella wrote: > > @@ -6414,6 +6425,19 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req, > > if (unlikely(sqe_flags & ~SQE_VALID_FLAGS)) > > return -EINVAL; > > > > + if (unlikely(ctx->restricted)) { > > + if (!test_bit(req->opcode, ctx->restrictions.sqe_op)) > > + return -EACCES; > > + > > + if ((sqe_flags & ctx->restrictions.sqe_flags_required) != > > + ctx->restrictions.sqe_flags_required) > > + return -EACCES; > > + > > + if (sqe_flags & ~(ctx->restrictions.sqe_flags_allowed | > > + ctx->restrictions.sqe_flags_required)) > > + return -EACCES; > > + } > > + > > This should be a separate function, ala: > > if (unlikely(ctx->restricted)) { > ret = io_check_restriction(ctx, req); > if (ret) > return ret; > } > > to move it totally out of the (very) hot path. I'll fix. > > > if ((sqe_flags & IOSQE_BUFFER_SELECT) && > > !io_op_defs[req->opcode].buffer_select) > > return -EOPNOTSUPP; > > @@ -8714,6 +8738,71 @@ static int io_unregister_personality(struct io_ring_ctx *ctx, unsigned id) > > return -EINVAL; > > } > > > > +static int io_register_restrictions(struct io_ring_ctx *ctx, void __user *arg, > > + unsigned int nr_args) > > +{ > > + struct io_uring_restriction *res; > > + size_t size; > > + int i, ret; > > + > > + /* We allow only a single restrictions registration */ > > + if (ctx->restricted) > > + return -EBUSY; > > + > > + if (!arg || nr_args > IORING_MAX_RESTRICTIONS) > > + return -EINVAL; > > + > > + size = array_size(nr_args, sizeof(*res)); > > + if (size == SIZE_MAX) > > + return -EOVERFLOW; > > + > > + res = memdup_user(arg, size); > > + if (IS_ERR(res)) > > + return PTR_ERR(res); > > + > > + for (i = 0; i < nr_args; i++) { > > + switch (res[i].opcode) { > > + case IORING_RESTRICTION_REGISTER_OP: > > + if (res[i].register_op >= IORING_REGISTER_LAST) { > > + ret = -EINVAL; > > + goto out; > > + } > > + > > + __set_bit(res[i].register_op, > > + ctx->restrictions.register_op); > > + break; > > + case IORING_RESTRICTION_SQE_OP: > > + if (res[i].sqe_op >= IORING_OP_LAST) { > > + ret = -EINVAL; > > + goto out; > > + } > > + > > + __set_bit(res[i].sqe_op, ctx->restrictions.sqe_op); > > + break; > > + case IORING_RESTRICTION_SQE_FLAGS_ALLOWED: > > + ctx->restrictions.sqe_flags_allowed = res[i].sqe_flags; > > + break; > > + case IORING_RESTRICTION_SQE_FLAGS_REQUIRED: > > + ctx->restrictions.sqe_flags_required = res[i].sqe_flags; > > + break; > > + default: > > + ret = -EINVAL; > > + goto out; > > + } > > + } > > + > > + ctx->restricted = 1; > > + > > + ret = 0; > > I'd set ret = 0 above the switch, that's the usual idiom - start at > zero, have someone set it to -ERROR if something fails. Yes, it is better. I'll fix it. Thanks, Stefano