Since commit 295525e29a5b ("virtio_net: merge dma operations when filling mergeable buffers"), VDUSE device require support for DMA's .sync_single_for_cpu() operation as the memory is non-coherent between the device and CPU because of the use of a bounce buffer. This patch implements both .sync_single_for_cpu() and .sync_single_for_device() callbacks, and also skip bounce buffer copies during DMA map and unmap operations if the DMA_ATTR_SKIP_CPU_SYNC attribute is set to avoid extra copies of the same buffer. Signed-off-by: Maxime Coquelin <maxime.coquelin@xxxxxxxxxx> --- drivers/vdpa/vdpa_user/iova_domain.c | 27 ++++++++++++++++++++++++--- drivers/vdpa/vdpa_user/iova_domain.h | 8 ++++++++ drivers/vdpa/vdpa_user/vduse_dev.c | 22 ++++++++++++++++++++++ 3 files changed, 54 insertions(+), 3 deletions(-) diff --git a/drivers/vdpa/vdpa_user/iova_domain.c b/drivers/vdpa/vdpa_user/iova_domain.c index 5e4a77b9bae6..791d38d6284c 100644 --- a/drivers/vdpa/vdpa_user/iova_domain.c +++ b/drivers/vdpa/vdpa_user/iova_domain.c @@ -373,6 +373,26 @@ static void vduse_domain_free_iova(struct iova_domain *iovad, free_iova_fast(iovad, iova >> shift, iova_len); } +void vduse_domain_sync_single_for_device(struct vduse_iova_domain *domain, + dma_addr_t dma_addr, size_t size, + enum dma_data_direction dir) +{ + read_lock(&domain->bounce_lock); + if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL) + vduse_domain_bounce(domain, dma_addr, size, DMA_TO_DEVICE); + read_unlock(&domain->bounce_lock); +} + +void vduse_domain_sync_single_for_cpu(struct vduse_iova_domain *domain, + dma_addr_t dma_addr, size_t size, + enum dma_data_direction dir) +{ + read_lock(&domain->bounce_lock); + if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL) + vduse_domain_bounce(domain, dma_addr, size, DMA_FROM_DEVICE); + read_unlock(&domain->bounce_lock); +} + dma_addr_t vduse_domain_map_page(struct vduse_iova_domain *domain, struct page *page, unsigned long offset, size_t size, enum dma_data_direction dir, @@ -393,7 +413,8 @@ dma_addr_t vduse_domain_map_page(struct vduse_iova_domain *domain, if (vduse_domain_map_bounce_page(domain, (u64)iova, (u64)size, pa)) goto err_unlock; - if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL) + if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) && + (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)) vduse_domain_bounce(domain, iova, size, DMA_TO_DEVICE); read_unlock(&domain->bounce_lock); @@ -411,9 +432,9 @@ void vduse_domain_unmap_page(struct vduse_iova_domain *domain, enum dma_data_direction dir, unsigned long attrs) { struct iova_domain *iovad = &domain->stream_iovad; - read_lock(&domain->bounce_lock); - if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL) + if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) && + (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)) vduse_domain_bounce(domain, dma_addr, size, DMA_FROM_DEVICE); vduse_domain_unmap_bounce_page(domain, (u64)dma_addr, (u64)size); diff --git a/drivers/vdpa/vdpa_user/iova_domain.h b/drivers/vdpa/vdpa_user/iova_domain.h index 173e979b84a9..f92f22a7267d 100644 --- a/drivers/vdpa/vdpa_user/iova_domain.h +++ b/drivers/vdpa/vdpa_user/iova_domain.h @@ -44,6 +44,14 @@ int vduse_domain_set_map(struct vduse_iova_domain *domain, void vduse_domain_clear_map(struct vduse_iova_domain *domain, struct vhost_iotlb *iotlb); +void vduse_domain_sync_single_for_device(struct vduse_iova_domain *domain, + dma_addr_t dma_addr, size_t size, + enum dma_data_direction dir); + +void vduse_domain_sync_single_for_cpu(struct vduse_iova_domain *domain, + dma_addr_t dma_addr, size_t size, + enum dma_data_direction dir); + dma_addr_t vduse_domain_map_page(struct vduse_iova_domain *domain, struct page *page, unsigned long offset, size_t size, enum dma_data_direction dir, diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c index 1d24da79c399..75354ce186a1 100644 --- a/drivers/vdpa/vdpa_user/vduse_dev.c +++ b/drivers/vdpa/vdpa_user/vduse_dev.c @@ -798,6 +798,26 @@ static const struct vdpa_config_ops vduse_vdpa_config_ops = { .free = vduse_vdpa_free, }; +static void vduse_dev_sync_single_for_device(struct device *dev, + dma_addr_t dma_addr, size_t size, + enum dma_data_direction dir) +{ + struct vduse_dev *vdev = dev_to_vduse(dev); + struct vduse_iova_domain *domain = vdev->domain; + + vduse_domain_sync_single_for_device(domain, dma_addr, size, dir); +} + +static void vduse_dev_sync_single_for_cpu(struct device *dev, + dma_addr_t dma_addr, size_t size, + enum dma_data_direction dir) +{ + struct vduse_dev *vdev = dev_to_vduse(dev); + struct vduse_iova_domain *domain = vdev->domain; + + vduse_domain_sync_single_for_cpu(domain, dma_addr, size, dir); +} + static dma_addr_t vduse_dev_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction dir, @@ -858,6 +878,8 @@ static size_t vduse_dev_max_mapping_size(struct device *dev) } static const struct dma_map_ops vduse_dev_dma_ops = { + .sync_single_for_device = vduse_dev_sync_single_for_device, + .sync_single_for_cpu = vduse_dev_sync_single_for_cpu, .map_page = vduse_dev_map_page, .unmap_page = vduse_dev_unmap_page, .alloc = vduse_dev_alloc_coherent, -- 2.43.0