Re: [PATCH 5/5] RDMA/uverbs: add UVERBS_METHOD_REG_REMOTE_MR

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Tue, Jan 29, 2019 at 03:26:26PM +0200, Joel Nider wrote:
> Add a new handler for new uverb reg_remote_mr. The purpose is to register
> a memory region in a different address space (i.e. process) than the
> caller.
> 
> The main use case which motivated this change is post-copy container
> migration. When a migration manager (i.e. CRIU) starts a migration, it
> must have an open connection for handling any page faults that occur
> in the container after restoration on the target machine. Even though
> CRIU establishes and maintains the connection, ultimately the memory
> is copied from the container being migrated (i.e. a remote address
> space). This container must remain passive -- meaning it cannot have
> any knowledge of the RDMA connection; therefore the migration manager
> must have the ability to register a remote memory region. This remote
> memory region will serve as the source for any memory pages that must
> be copied (on-demand or otherwise) during the migration.
> 
> Signed-off-by: Joel Nider <joeln@xxxxxxxxxx>
>  drivers/infiniband/core/uverbs_std_types_mr.c | 129 +++++++++++++++++++++++++-
>  include/rdma/ib_verbs.h                       |   8 ++
>  include/uapi/rdma/ib_user_ioctl_cmds.h        |  13 +++
>  3 files changed, 149 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/infiniband/core/uverbs_std_types_mr.c b/drivers/infiniband/core/uverbs_std_types_mr.c
> index 4d4be0c..bf7b4b2 100644
> +++ b/drivers/infiniband/core/uverbs_std_types_mr.c
> @@ -150,6 +150,99 @@ static int UVERBS_HANDLER(UVERBS_METHOD_DM_MR_REG)(
>  	return ret;
>  }
>  
> +static int UVERBS_HANDLER(UVERBS_METHOD_REG_REMOTE_MR)(
> +	struct uverbs_attr_bundle *attrs)
> +{

I think this should just be REG_MR with an optional remote PID
argument

> +	struct pid *owner_pid;
> +	struct ib_reg_remote_mr_attr attr = {};
> +	struct ib_uobject *uobj =
> +		uverbs_attr_get_uobject(attrs,
> +					UVERBS_ATTR_REG_REMOTE_MR_HANDLE);
> +	struct ib_pd *pd =
> +		uverbs_attr_get_obj(attrs, UVERBS_ATTR_REG_REMOTE_MR_PD_HANDLE);
> +
> +	struct ib_mr *mr;
> +	int ret;
> +
> +	ret = uverbs_copy_from(&attr.start, attrs,
> +				UVERBS_ATTR_REG_REMOTE_MR_START);
> +	if (ret)
> +		return ret;
> +
> +	ret = uverbs_copy_from(&attr.length, attrs,
> +				UVERBS_ATTR_REG_REMOTE_MR_LENGTH);
> +	if (ret)
> +		return ret;
> +
> +	ret = uverbs_copy_from(&attr.hca_va, attrs,
> +				UVERBS_ATTR_REG_REMOTE_MR_HCA_VA);
> +	if (ret)
> +		return ret;
> +
> +	ret = uverbs_copy_from(&attr.owner, attrs,
> +				UVERBS_ATTR_REG_REMOTE_MR_OWNER);
> +	if (ret)
> +		return ret;

Maybe these should use the const version, it is becoming intended for
small integers, then we can do sensible things like use uintptr_t to
store pointer values, and size_t to store sizes - the code will
automatically bounds check the user input if it is done like this.

> +	ret = uverbs_get_flags32(&attr.access_flags, attrs,
> +				 UVERBS_ATTR_REG_REMOTE_MR_ACCESS_FLAGS,
> +				 IB_ACCESS_SUPPORTED);
> +	if (ret)
> +		return ret;
> +
> +	/* ensure the offsets are identical */
> +	if ((attr.start & ~PAGE_MASK) != (attr.hca_va & ~PAGE_MASK))
> +		return -EINVAL;
> +
> +	ret = ib_check_mr_access(attr.access_flags);
> +	if (ret)
> +		return ret;
> +
> +	if (attr.access_flags & IB_ACCESS_ON_DEMAND) {
> +		if (!(pd->device->attrs.device_cap_flags &
> +		      IB_DEVICE_ON_DEMAND_PAGING)) {
> +			pr_debug("ODP support not available\n");
> +			ret = -EINVAL;
> +			return ret;
> +		}
> +	}
> +
> +	/* get the owner's pid struct before something happens to it */
> +	owner_pid = find_get_pid(attr.owner);

security? Match what ptrace does?

> +	mr = pd->device->ops.reg_user_mr(pd, attr.start, attr.length,
> +		attr.hca_va, attr.access_flags, owner_pid, NULL);
> +	if (IS_ERR(mr))
> +		return PTR_ERR(mr);
> +
> +	mr->device  = pd->device;
> +	mr->pd      = pd;
> +	mr->dm	    = NULL;
> +	mr->uobject = uobj;
> +	atomic_inc(&pd->usecnt);
> +	mr->res.type = RDMA_RESTRACK_MR;
> +	mr->res.task = get_pid_task(owner_pid, PIDTYPE_PID);
> +	rdma_restrack_kadd(&mr->res);
> +
> +	uobj->object = mr;
> +
> +	ret = uverbs_copy_to(attrs, UVERBS_ATTR_REG_REMOTE_MR_RESP_LKEY,
> +		   &mr->lkey, sizeof(mr->lkey));
> +	if (ret)
> +		goto err_dereg;
> +
> +	ret = uverbs_copy_to(attrs, UVERBS_ATTR_REG_REMOTE_MR_RESP_RKEY,
> +			&mr->rkey, sizeof(mr->rkey));
> +	if (ret)
> +		goto err_dereg;
> +
> +	return 0;
> +
> +err_dereg:
> +	ib_dereg_mr(mr);
> +
> +	return ret;
> +}
> +
>  DECLARE_UVERBS_NAMED_METHOD(
>  	UVERBS_METHOD_ADVISE_MR,
>  	UVERBS_ATTR_IDR(UVERBS_ATTR_ADVISE_MR_PD_HANDLE,
> @@ -203,12 +296,46 @@ DECLARE_UVERBS_NAMED_METHOD_DESTROY(
>  			UVERBS_ACCESS_DESTROY,
>  			UA_MANDATORY));
>  
> +DECLARE_UVERBS_NAMED_METHOD(
> +	UVERBS_METHOD_REG_REMOTE_MR,
> +	UVERBS_ATTR_IDR(UVERBS_ATTR_REG_REMOTE_MR_HANDLE,
> +			UVERBS_OBJECT_MR,
> +			UVERBS_ACCESS_NEW,
> +			UA_MANDATORY),
> +	UVERBS_ATTR_IDR(UVERBS_ATTR_REG_REMOTE_MR_PD_HANDLE,
> +			UVERBS_OBJECT_PD,
> +			UVERBS_ACCESS_READ,
> +			UA_MANDATORY),
> +	UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_REMOTE_MR_START,
> +			   UVERBS_ATTR_TYPE(u64),
> +			   UA_MANDATORY),
> +	UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_REMOTE_MR_LENGTH,
> +			   UVERBS_ATTR_TYPE(u64),
> +			   UA_MANDATORY),
> +	UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_REMOTE_MR_HCA_VA,
> +			   UVERBS_ATTR_TYPE(u64),
> +			   UA_MANDATORY),
> +	UVERBS_ATTR_FLAGS_IN(UVERBS_ATTR_REG_REMOTE_MR_ACCESS_FLAGS,
> +			     enum ib_access_flags),
> +	UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_REMOTE_MR_OWNER,
> +			   UVERBS_ATTR_TYPE(u32),
> +			   UA_MANDATORY),
> +	UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_REG_REMOTE_MR_RESP_LKEY,
> +			    UVERBS_ATTR_TYPE(u32),
> +			    UA_MANDATORY),
> +	UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_REG_REMOTE_MR_RESP_RKEY,
> +			    UVERBS_ATTR_TYPE(u32),
> +			    UA_MANDATORY),
> +);
> +
>  DECLARE_UVERBS_NAMED_OBJECT(
>  	UVERBS_OBJECT_MR,
>  	UVERBS_TYPE_ALLOC_IDR(uverbs_free_mr),
>  	&UVERBS_METHOD(UVERBS_METHOD_DM_MR_REG),
>  	&UVERBS_METHOD(UVERBS_METHOD_MR_DESTROY),
> -	&UVERBS_METHOD(UVERBS_METHOD_ADVISE_MR));
> +	&UVERBS_METHOD(UVERBS_METHOD_ADVISE_MR),
> +	&UVERBS_METHOD(UVERBS_METHOD_REG_REMOTE_MR),
> +);

I'm kind of surprised this compiles with the trailing comma?

>  const struct uapi_definition uverbs_def_obj_mr[] = {
>  	UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_MR,
> diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
> index 3432404..dcf5edc 100644
> +++ b/include/rdma/ib_verbs.h
> @@ -334,6 +334,14 @@ struct ib_dm_alloc_attr {
>  	u32	flags;
>  };
>  
> +struct ib_reg_remote_mr_attr {
> +	u64      start;
> +	u64      length;
> +	u64      hca_va;
> +	u32      access_flags;
> +	u32      owner;
> +};

Why? Why here?

Jason



[Index of Archives]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Photo]     [Yosemite News]     [Yosemite Photos]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux