On 5/18/23 03:21, Daisuke Matsuda wrote: > Enable 'fetch and add' and 'compare and swap' operations to manipulate > data in an ODP-enabled MR. This is comprised of the following steps: > 1. Check the driver page table(umem_odp->dma_list) to see if the target > page is both readable and writable. > 2. If not, then trigger page fault to map the page. > 3. Update the entry in the MR xarray. > 4. Execute the operation. > > umem_mutex is used to ensure that dma_list (an array of addresses of an MR) > is not changed while it is being checked and that the target page is not > invalidated before data access completes. > > Signed-off-by: Daisuke Matsuda <matsuda-daisuke@xxxxxxxxxxx> > --- > drivers/infiniband/sw/rxe/rxe.c | 1 + > drivers/infiniband/sw/rxe/rxe_loc.h | 9 +++++++++ > drivers/infiniband/sw/rxe/rxe_odp.c | 26 ++++++++++++++++++++++++++ > drivers/infiniband/sw/rxe/rxe_resp.c | 5 ++++- > 4 files changed, 40 insertions(+), 1 deletion(-) > > diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c > index 207a022156f0..abd3267c2873 100644 > --- a/drivers/infiniband/sw/rxe/rxe.c > +++ b/drivers/infiniband/sw/rxe/rxe.c > @@ -88,6 +88,7 @@ static void rxe_init_device_param(struct rxe_dev *rxe) > rxe->attr.odp_caps.per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_RECV; > rxe->attr.odp_caps.per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_WRITE; > rxe->attr.odp_caps.per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_READ; > + rxe->attr.odp_caps.per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_ATOMIC; > rxe->attr.odp_caps.per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_SRQ_RECV; > } > } > diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h > index 4b95c8c46bdc..b9d2985774ee 100644 > --- a/drivers/infiniband/sw/rxe/rxe_loc.h > +++ b/drivers/infiniband/sw/rxe/rxe_loc.h > @@ -208,6 +208,9 @@ int rxe_odp_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, > u64 iova, int access_flags, struct rxe_mr *mr); > int rxe_odp_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length, > enum rxe_mr_copy_dir dir); > +int rxe_odp_mr_atomic_op(struct rxe_mr *mr, u64 iova, int opcode, > + u64 compare, u64 swap_add, u64 *orig_val); > + > #else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */ > static inline int > rxe_odp_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova, > @@ -221,6 +224,12 @@ rxe_odp_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, > { > return -EOPNOTSUPP; > } > +static inline int > +rxe_odp_mr_atomic_op(struct rxe_mr *mr, u64 iova, int opcode, > + u64 compare, u64 swap_add, u64 *orig_val) > +{ > + return RESPST_ERR_UNSUPPORTED_OPCODE; > +} > > #endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */ > > diff --git a/drivers/infiniband/sw/rxe/rxe_odp.c b/drivers/infiniband/sw/rxe/rxe_odp.c > index cbe5d0c3fcc4..194b1fab98b7 100644 > --- a/drivers/infiniband/sw/rxe/rxe_odp.c > +++ b/drivers/infiniband/sw/rxe/rxe_odp.c > @@ -283,3 +283,29 @@ int rxe_odp_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length, > > return err; > } > + > +int rxe_odp_mr_atomic_op(struct rxe_mr *mr, u64 iova, int opcode, > + u64 compare, u64 swap_add, u64 *orig_val) > +{ > + int err; > + struct ib_umem_odp *umem_odp = to_ib_umem_odp(mr->umem); > + > + /* If pagefault is not required, umem mutex will be held until the > + * atomic operation completes. Otherwise, it is released and locked > + * again in rxe_odp_map_range() to let invalidation handler do its > + * work meanwhile. > + */ > + mutex_lock(&umem_odp->umem_mutex); > + > + /* Atomic operations manipulate a single char. */ > + err = rxe_odp_map_range(mr, iova, sizeof(char), 0); > + if (err) > + return err; > + > + err = rxe_mr_do_atomic_op(mr, iova, opcode, compare, > + swap_add, orig_val); > + > + mutex_unlock(&umem_odp->umem_mutex); > + > + return err; > +} > diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c > index 90c31c4f2944..0a918145dc07 100644 > --- a/drivers/infiniband/sw/rxe/rxe_resp.c > +++ b/drivers/infiniband/sw/rxe/rxe_resp.c > @@ -684,7 +684,10 @@ static enum resp_states atomic_reply(struct rxe_qp *qp, > u64 iova = qp->resp.va + qp->resp.offset; > > if (mr->odp_enabled) > - err = RESPST_ERR_UNSUPPORTED_OPCODE; > + err = rxe_odp_mr_atomic_op(mr, iova, pkt->opcode, > + atmeth_comp(pkt), > + atmeth_swap_add(pkt), > + &res->atomic.orig_val); > else > err = rxe_mr_do_atomic_op(mr, iova, pkt->opcode, > atmeth_comp(pkt), Reviewed-by: Bob Pearson <rpearsonhpe@xxxxxxxxx>