Re: [PATCH v3 07/17] IB/core: Add flags for on demand paging support

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi,

Le jeudi 11 décembre 2014 à 17:04 +0200, Haggai Eran a écrit :
> From: Sagi Grimberg <sagig@xxxxxxxxxxxx>
> 
> * Add a configuration option for enable on-demand paging support in the
>   infiniband subsystem (CONFIG_INFINIBAND_ON_DEMAND_PAGING). In a later patch,
>   this configuration option will select the MMU_NOTIFIER configuration option
>   to enable mmu notifiers.
> * Add a flag for on demand paging (ODP) support in the IB device capabilities.
> * Add a flag to request ODP MR in the access flags to reg_mr.
> * Fail registrations done with the ODP flag when the low-level driver doesn't
>   support this.
> * Change the conditions in which an MR will be writable to explicitly
>   specify the access flags. This is to avoid making an MR writable just
>   because it is an ODP MR.
> * Add a ODP capabilities to the extended query device verb.
> 
> Signed-off-by: Sagi Grimberg <sagig@xxxxxxxxxxxx>
> Signed-off-by: Shachar Raindel <raindel@xxxxxxxxxxxx>
> Signed-off-by: Haggai Eran <haggaie@xxxxxxxxxxxx>
> ---
>  drivers/infiniband/Kconfig           | 10 ++++++++++
>  drivers/infiniband/core/umem.c       |  8 +++++---
>  drivers/infiniband/core/uverbs_cmd.c | 25 +++++++++++++++++++++++++
>  include/rdma/ib_verbs.h              | 28 ++++++++++++++++++++++++++--
>  include/uapi/rdma/ib_user_verbs.h    | 15 +++++++++++++++
>  5 files changed, 81 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig
> index 77089399359b..089a2c2af329 100644
> --- a/drivers/infiniband/Kconfig
> +++ b/drivers/infiniband/Kconfig
> @@ -38,6 +38,16 @@ config INFINIBAND_USER_MEM
>  	depends on INFINIBAND_USER_ACCESS != n
>  	default y
>  
> +config INFINIBAND_ON_DEMAND_PAGING
> +	bool "InfiniBand on-demand paging support"
> +	depends on INFINIBAND_USER_MEM
> +	default y
> +	---help---
> +	  On demand paging support for the InfiniBand subsystem.
> +	  Together with driver support this allows registration of
> +	  memory regions without pinning their pages, fetching the
> +	  pages on demand instead.
> +
>  config INFINIBAND_ADDR_TRANS
>  	bool
>  	depends on INFINIBAND
> diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
> index 6f152628e0d2..c328e4693d14 100644
> --- a/drivers/infiniband/core/umem.c
> +++ b/drivers/infiniband/core/umem.c
> @@ -107,13 +107,15 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
>  	umem->page_size = PAGE_SIZE;
>  	umem->pid       = get_task_pid(current, PIDTYPE_PID);
>  	/*
> -	 * We ask for writable memory if any access flags other than
> -	 * "remote read" are set.  "Local write" and "remote write"
> +	 * We ask for writable memory if any of the following
> +	 * access flags are set.  "Local write" and "remote write"
>  	 * obviously require write access.  "Remote atomic" can do
>  	 * things like fetch and add, which will modify memory, and
>  	 * "MW bind" can change permissions by binding a window.
>  	 */
> -	umem->writable  = !!(access & ~IB_ACCESS_REMOTE_READ);
> +	umem->writable  = !!(access &
> +		(IB_ACCESS_LOCAL_WRITE   | IB_ACCESS_REMOTE_WRITE |
> +		 IB_ACCESS_REMOTE_ATOMIC | IB_ACCESS_MW_BIND));
>  
>  	/* We assume the memory is from hugetlb until proved otherwise */
>  	umem->hugetlb   = 1;
> diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
> index c7a43624c96b..f9326ccda4b5 100644
> --- a/drivers/infiniband/core/uverbs_cmd.c
> +++ b/drivers/infiniband/core/uverbs_cmd.c
> @@ -953,6 +953,18 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
>  		goto err_free;
>  	}
>  
> +	if (cmd.access_flags & IB_ACCESS_ON_DEMAND) {
> +		struct ib_device_attr attr;
> +
> +		ret = ib_query_device(pd->device, &attr);
> +		if (ret || !(attr.device_cap_flags &
> +				IB_DEVICE_ON_DEMAND_PAGING)) {
> +			pr_debug("ODP support not available\n");
> +			ret = -EINVAL;
> +			goto err_put;
> +		}
> +	}
> +
>  	mr = pd->device->reg_user_mr(pd, cmd.start, cmd.length, cmd.hca_va,
>  				     cmd.access_flags, &udata);
>  	if (IS_ERR(mr)) {
> @@ -3289,6 +3301,19 @@ int ib_uverbs_ex_query_device(struct ib_uverbs_file *file,
>  	copy_query_dev_fields(file, &resp.base, &attr);
>  	resp.comp_mask = 0;
>  
> +#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
> +	if (cmd.comp_mask & IB_USER_VERBS_EX_QUERY_DEVICE_ODP) {
> +		resp.odp_caps.general_caps = attr.odp_caps.general_caps;
> +		resp.odp_caps.per_transport_caps.rc_odp_caps =
> +			attr.odp_caps.per_transport_caps.rc_odp_caps;
> +		resp.odp_caps.per_transport_caps.uc_odp_caps =
> +			attr.odp_caps.per_transport_caps.uc_odp_caps;
> +		resp.odp_caps.per_transport_caps.ud_odp_caps =
> +			attr.odp_caps.per_transport_caps.ud_odp_caps;
> +		resp.comp_mask |= IB_USER_VERBS_EX_QUERY_DEVICE_ODP;
> +	}

You need to clear the tail of the response otherwise, kernel will leak
stack content to userspace:

+ #else /* !CONFIG_INFINIBAND_ON_DEMAND_PAGING */
+	resp.odp_caps.general_caps = 0;
+	resp.odp_caps.per_transport_caps.rc_odp_caps = 0;
+	resp.odp_caps.per_transport_caps.uc_odp_caps = 0;
+	resp.odp_caps.per_transport_caps.ud_odp_caps = 0;
 	
> +#endif
> +

+	resp.odp_caps.reserved = 0

>  	err = ib_copy_to_udata(ucore, &resp, sizeof(resp));
>  	if (err)
>  		return err;
> diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
> index 97a999f9e4d8..a41bc5a39ebf 100644
> --- a/include/rdma/ib_verbs.h
> +++ b/include/rdma/ib_verbs.h
> @@ -123,7 +123,8 @@ enum ib_device_cap_flags {
>  	IB_DEVICE_MEM_WINDOW_TYPE_2A	= (1<<23),
>  	IB_DEVICE_MEM_WINDOW_TYPE_2B	= (1<<24),
>  	IB_DEVICE_MANAGED_FLOW_STEERING = (1<<29),
> -	IB_DEVICE_SIGNATURE_HANDOVER	= (1<<30)
> +	IB_DEVICE_SIGNATURE_HANDOVER	= (1<<30),
> +	IB_DEVICE_ON_DEMAND_PAGING	= (1<<31),
>  };
>  
>  enum ib_signature_prot_cap {
> @@ -143,6 +144,27 @@ enum ib_atomic_cap {
>  	IB_ATOMIC_GLOB
>  };
>  
> +enum ib_odp_general_cap_bits {
> +	IB_ODP_SUPPORT = 1 << 0,
> +};
> +
> +enum ib_odp_transport_cap_bits {
> +	IB_ODP_SUPPORT_SEND	= 1 << 0,
> +	IB_ODP_SUPPORT_RECV	= 1 << 1,
> +	IB_ODP_SUPPORT_WRITE	= 1 << 2,
> +	IB_ODP_SUPPORT_READ	= 1 << 3,
> +	IB_ODP_SUPPORT_ATOMIC	= 1 << 4,
> +};
> +
> +struct ib_odp_caps {
> +	uint64_t general_caps;
> +	struct {
> +		uint32_t  rc_odp_caps;
> +		uint32_t  uc_odp_caps;
> +		uint32_t  ud_odp_caps;
> +	} per_transport_caps;
> +};
> +
>  struct ib_device_attr {
>  	u64			fw_ver;
>  	__be64			sys_image_guid;
> @@ -186,6 +208,7 @@ struct ib_device_attr {
>  	u8			local_ca_ack_delay;
>  	int			sig_prot_cap;
>  	int			sig_guard_cap;
> +	struct ib_odp_caps	odp_caps;
>  };
>  
>  enum ib_mtu {
> @@ -1073,7 +1096,8 @@ enum ib_access_flags {
>  	IB_ACCESS_REMOTE_READ	= (1<<2),
>  	IB_ACCESS_REMOTE_ATOMIC	= (1<<3),
>  	IB_ACCESS_MW_BIND	= (1<<4),
> -	IB_ZERO_BASED		= (1<<5)
> +	IB_ZERO_BASED		= (1<<5),
> +	IB_ACCESS_ON_DEMAND     = (1<<6),
>  };
>  
>  struct ib_phys_buf {
> diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h
> index e8a96071e352..4275b961bf60 100644
> --- a/include/uapi/rdma/ib_user_verbs.h
> +++ b/include/uapi/rdma/ib_user_verbs.h
> @@ -202,15 +202,30 @@ struct ib_uverbs_query_device_resp {
>  	__u8  reserved[4];
>  };
>  
> +enum {
> +	IB_USER_VERBS_EX_QUERY_DEVICE_ODP =		1ULL << 0,
> +};
> +
>  struct ib_uverbs_ex_query_device {
>  	__u32 comp_mask;
>  	__u32 reserved;
>  };
>  
> +struct ib_uverbs_odp_caps {
> +	__u64 general_caps;
> +	struct {
> +		__u32 rc_odp_caps;
> +		__u32 uc_odp_caps;
> +		__u32 ud_odp_caps;
> +	} per_transport_caps;
> +	__u32 reserved;
> +};
> +
>  struct ib_uverbs_ex_query_device_resp {
>  	struct ib_uverbs_query_device_resp base;
>  	__u32 comp_mask;
>  	__u32 reserved;
> +	struct ib_uverbs_odp_caps odp_caps;
>  };

Hopefully, no kernel was released with ib_uverbs_ex_query_device_resp
without odp_caps (eg. in between '[PATCH v3 06/17] IB/core: Add support
for extended query device caps' and this one, or 
ib_uverbs_ex_query_device() should have been modified
to handle shorter ib_uverbs_ex_query_device_resp to accomodate the ABI
variations.

>  
>  struct ib_uverbs_query_port {

Regards.

-- 
Yann Droneaud


--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html




[Index of Archives]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Photo]     [Yosemite News]     [Yosemite Photos]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux