On Tue, Jan 17, 2023 at 11:25:41AM -0600, Bob Pearson wrote: > @@ -574,27 +559,49 @@ int rxe_mr_do_atomic_write(struct rxe_mr *mr, u64 iova, u64 value) > return -EINVAL; > } > > - va = iova_to_vaddr(mr, iova, sizeof(value)); > - if (unlikely(!va)) { > - rxe_dbg_mr(mr, "iova out of range"); > - return -ERANGE; > + if (mr->ibmr.type == IB_MR_TYPE_DMA) { > + page_offset = iova & (PAGE_SIZE - 1); > + page = virt_to_page(iova & PAGE_MASK); > + } else { > + unsigned long index; > + int err; > + > + /* See IBA oA19-28 */ > + err = mr_check_range(mr, iova, sizeof(value)); > + if (unlikely(err)) { > + rxe_dbg_mr(mr, "iova out of range"); > + return -ERANGE; > + } > + page_offset = rxe_mr_iova_to_page_offset(mr, iova); > + index = rxe_mr_iova_to_index(mr, iova); > + page = xa_load(&mr->page_list, index); > + if (!page) > + return -EFAULT; > } > > /* See IBA A19.4.2 */ > - if (unlikely((uintptr_t)va & 0x7 || iova & 0x7)) { > + if (unlikely(page_offset & 0x7)) { > rxe_dbg_mr(mr, "misaligned address"); > return -RXE_ERR_NOT_ALIGNED; > } > > + va = kmap_local_page(page); > + > /* Do atomic write after all prior operations have completed */ > - smp_store_release(va, value); > + /* TODO: This is what was chosen by the implementer but I am > + * concerned it isn't what they want. This only guarantees that > + * the write will complete before any subsequent reads but the > + * comment says all prior operations have completed. That would > + * require a full mb() or matching acquire. > + * Normal usage has a matching load_acquire and store_release. > + */ > + smp_store_release(&va[page_offset >> 3], value); The 'atomicness' is that the NIC side does a 'release' and the CPU side will do an 'acquire' when it reads the same memory. The 'acquire' from the CPU side will ensure that any prior writes or atomcis done by the NIC are visible by the CPU - because that is what acquire/release means. Eg if the NIC does a RDMA write to X and then an atomic update (and release) then the acquire will observe X too if it observed the atomic update. acquire/release and rmb/wmb are two different models of the same concept. acquire/release is more datacentric and tends to speak more about how data observability is ordered, while the barriers tend to talk more about how the CPU orders operations. Jason