On Tue, Jan 29, 2019 at 03:26:26PM +0200, Joel Nider wrote: > Add a new handler for new uverb reg_remote_mr. The purpose is to register > a memory region in a different address space (i.e. process) than the > caller. > > The main use case which motivated this change is post-copy container > migration. When a migration manager (i.e. CRIU) starts a migration, it > must have an open connection for handling any page faults that occur > in the container after restoration on the target machine. Even though > CRIU establishes and maintains the connection, ultimately the memory > is copied from the container being migrated (i.e. a remote address > space). This container must remain passive -- meaning it cannot have > any knowledge of the RDMA connection; therefore the migration manager > must have the ability to register a remote memory region. This remote > memory region will serve as the source for any memory pages that must > be copied (on-demand or otherwise) during the migration. > > Signed-off-by: Joel Nider <joeln@xxxxxxxxxx> > drivers/infiniband/core/uverbs_std_types_mr.c | 129 +++++++++++++++++++++++++- > include/rdma/ib_verbs.h | 8 ++ > include/uapi/rdma/ib_user_ioctl_cmds.h | 13 +++ > 3 files changed, 149 insertions(+), 1 deletion(-) > > diff --git a/drivers/infiniband/core/uverbs_std_types_mr.c b/drivers/infiniband/core/uverbs_std_types_mr.c > index 4d4be0c..bf7b4b2 100644 > +++ b/drivers/infiniband/core/uverbs_std_types_mr.c > @@ -150,6 +150,99 @@ static int UVERBS_HANDLER(UVERBS_METHOD_DM_MR_REG)( > return ret; > } > > +static int UVERBS_HANDLER(UVERBS_METHOD_REG_REMOTE_MR)( > + struct uverbs_attr_bundle *attrs) > +{ I think this should just be REG_MR with an optional remote PID argument > + struct pid *owner_pid; > + struct ib_reg_remote_mr_attr attr = {}; > + struct ib_uobject *uobj = > + uverbs_attr_get_uobject(attrs, > + UVERBS_ATTR_REG_REMOTE_MR_HANDLE); > + struct ib_pd *pd = > + uverbs_attr_get_obj(attrs, UVERBS_ATTR_REG_REMOTE_MR_PD_HANDLE); > + > + struct ib_mr *mr; > + int ret; > + > + ret = uverbs_copy_from(&attr.start, attrs, > + UVERBS_ATTR_REG_REMOTE_MR_START); > + if (ret) > + return ret; > + > + ret = uverbs_copy_from(&attr.length, attrs, > + UVERBS_ATTR_REG_REMOTE_MR_LENGTH); > + if (ret) > + return ret; > + > + ret = uverbs_copy_from(&attr.hca_va, attrs, > + UVERBS_ATTR_REG_REMOTE_MR_HCA_VA); > + if (ret) > + return ret; > + > + ret = uverbs_copy_from(&attr.owner, attrs, > + UVERBS_ATTR_REG_REMOTE_MR_OWNER); > + if (ret) > + return ret; Maybe these should use the const version, it is becoming intended for small integers, then we can do sensible things like use uintptr_t to store pointer values, and size_t to store sizes - the code will automatically bounds check the user input if it is done like this. > + ret = uverbs_get_flags32(&attr.access_flags, attrs, > + UVERBS_ATTR_REG_REMOTE_MR_ACCESS_FLAGS, > + IB_ACCESS_SUPPORTED); > + if (ret) > + return ret; > + > + /* ensure the offsets are identical */ > + if ((attr.start & ~PAGE_MASK) != (attr.hca_va & ~PAGE_MASK)) > + return -EINVAL; > + > + ret = ib_check_mr_access(attr.access_flags); > + if (ret) > + return ret; > + > + if (attr.access_flags & IB_ACCESS_ON_DEMAND) { > + if (!(pd->device->attrs.device_cap_flags & > + IB_DEVICE_ON_DEMAND_PAGING)) { > + pr_debug("ODP support not available\n"); > + ret = -EINVAL; > + return ret; > + } > + } > + > + /* get the owner's pid struct before something happens to it */ > + owner_pid = find_get_pid(attr.owner); security? Match what ptrace does? > + mr = pd->device->ops.reg_user_mr(pd, attr.start, attr.length, > + attr.hca_va, attr.access_flags, owner_pid, NULL); > + if (IS_ERR(mr)) > + return PTR_ERR(mr); > + > + mr->device = pd->device; > + mr->pd = pd; > + mr->dm = NULL; > + mr->uobject = uobj; > + atomic_inc(&pd->usecnt); > + mr->res.type = RDMA_RESTRACK_MR; > + mr->res.task = get_pid_task(owner_pid, PIDTYPE_PID); > + rdma_restrack_kadd(&mr->res); > + > + uobj->object = mr; > + > + ret = uverbs_copy_to(attrs, UVERBS_ATTR_REG_REMOTE_MR_RESP_LKEY, > + &mr->lkey, sizeof(mr->lkey)); > + if (ret) > + goto err_dereg; > + > + ret = uverbs_copy_to(attrs, UVERBS_ATTR_REG_REMOTE_MR_RESP_RKEY, > + &mr->rkey, sizeof(mr->rkey)); > + if (ret) > + goto err_dereg; > + > + return 0; > + > +err_dereg: > + ib_dereg_mr(mr); > + > + return ret; > +} > + > DECLARE_UVERBS_NAMED_METHOD( > UVERBS_METHOD_ADVISE_MR, > UVERBS_ATTR_IDR(UVERBS_ATTR_ADVISE_MR_PD_HANDLE, > @@ -203,12 +296,46 @@ DECLARE_UVERBS_NAMED_METHOD_DESTROY( > UVERBS_ACCESS_DESTROY, > UA_MANDATORY)); > > +DECLARE_UVERBS_NAMED_METHOD( > + UVERBS_METHOD_REG_REMOTE_MR, > + UVERBS_ATTR_IDR(UVERBS_ATTR_REG_REMOTE_MR_HANDLE, > + UVERBS_OBJECT_MR, > + UVERBS_ACCESS_NEW, > + UA_MANDATORY), > + UVERBS_ATTR_IDR(UVERBS_ATTR_REG_REMOTE_MR_PD_HANDLE, > + UVERBS_OBJECT_PD, > + UVERBS_ACCESS_READ, > + UA_MANDATORY), > + UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_REMOTE_MR_START, > + UVERBS_ATTR_TYPE(u64), > + UA_MANDATORY), > + UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_REMOTE_MR_LENGTH, > + UVERBS_ATTR_TYPE(u64), > + UA_MANDATORY), > + UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_REMOTE_MR_HCA_VA, > + UVERBS_ATTR_TYPE(u64), > + UA_MANDATORY), > + UVERBS_ATTR_FLAGS_IN(UVERBS_ATTR_REG_REMOTE_MR_ACCESS_FLAGS, > + enum ib_access_flags), > + UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_REMOTE_MR_OWNER, > + UVERBS_ATTR_TYPE(u32), > + UA_MANDATORY), > + UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_REG_REMOTE_MR_RESP_LKEY, > + UVERBS_ATTR_TYPE(u32), > + UA_MANDATORY), > + UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_REG_REMOTE_MR_RESP_RKEY, > + UVERBS_ATTR_TYPE(u32), > + UA_MANDATORY), > +); > + > DECLARE_UVERBS_NAMED_OBJECT( > UVERBS_OBJECT_MR, > UVERBS_TYPE_ALLOC_IDR(uverbs_free_mr), > &UVERBS_METHOD(UVERBS_METHOD_DM_MR_REG), > &UVERBS_METHOD(UVERBS_METHOD_MR_DESTROY), > - &UVERBS_METHOD(UVERBS_METHOD_ADVISE_MR)); > + &UVERBS_METHOD(UVERBS_METHOD_ADVISE_MR), > + &UVERBS_METHOD(UVERBS_METHOD_REG_REMOTE_MR), > +); I'm kind of surprised this compiles with the trailing comma? > const struct uapi_definition uverbs_def_obj_mr[] = { > UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_MR, > diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h > index 3432404..dcf5edc 100644 > +++ b/include/rdma/ib_verbs.h > @@ -334,6 +334,14 @@ struct ib_dm_alloc_attr { > u32 flags; > }; > > +struct ib_reg_remote_mr_attr { > + u64 start; > + u64 length; > + u64 hca_va; > + u32 access_flags; > + u32 owner; > +}; Why? Why here? Jason