On 06/22/23 10:25, David Hildenbrand wrote: > On 22.06.23 09:27, Vivek Kasireddy wrote: > > There are *probably* more issues on the QEMU side when udmabuf is paired > with things like MADV_DONTNEED/FALLOC_FL_PUNCH_HOLE used for virtio-balloon, > virtio-mem, postcopy live migration, ... for example, in the vfio/vdpa case > we make sure that we disallow most of these, because otherwise there can be > an accidental "disconnect" between the pages mapped into the VM (guest view) > and the pages mapped into the IOMMU (device view), for example, after a > reboot. > Yes, this "disconnect" is still possible. Attached is a test program I hacked up based on the udmabuf selftest. You can see different content in the memfd pages and udma pages. FYI- I can verify this new udmabuf code is not accessing struct pages of hugetlb tail pages, as this test program BUG'ed if hugetlb vmemmap optimization was enabled in the old udmabuf. -- Mike Kravetz
// SPDX-License-Identifier: GPL-2.0 #define _GNU_SOURCE #define __EXPORTED_HEADERS__ #include <stdio.h> #include <stdlib.h> #include <unistd.h> #include <string.h> #include <errno.h> #include <fcntl.h> #include <malloc.h> #include <sys/mman.h> #include <sys/ioctl.h> #include <sys/syscall.h> #include <linux/memfd.h> #include <linux/udmabuf.h> #define TEST_PREFIX "drivers/dma-buf/udmabuf" #define NUM_PAGES 2 static int my_getpagesize(void) { /* huge page size */ return getpagesize() * 512; } #if 0 static int memfd_create(const char *name, unsigned int flags) { return syscall(__NR_memfd_create, name, flags); } #endif int main(int argc, char *argv[]) { struct udmabuf_create create; int devfd, memfd, buf, ret; off_t size; void *mem; int i; char foo; int mem_fd, udma_fd; void *addr1, *addr2; devfd = open("/dev/udmabuf", O_RDWR); if (devfd < 0) { printf("%s: [skip,no-udmabuf: Unable to access DMA buffer device file]\n", TEST_PREFIX); exit(77); } mem_fd = memfd_create("udmabuf-test", MFD_HUGETLB | MFD_ALLOW_SEALING); if (mem_fd < 0) { printf("%s: [skip,no-memfd]\n", TEST_PREFIX); exit(77); } ret = fcntl(mem_fd, F_ADD_SEALS, F_SEAL_SHRINK); if (ret < 0) { printf("%s: [skip,fcntl-add-seals]\n", TEST_PREFIX); exit(77); } size = my_getpagesize() * NUM_PAGES; ret = ftruncate(mem_fd, size); if (ret == -1) { printf("%s: [FAIL,memfd-truncate]\n", TEST_PREFIX); exit(1); } /* touch all pages */ addr1 = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, mem_fd, 0); if (addr1 == MAP_FAILED) { printf("%s: [FAIL,mmap]\n", TEST_PREFIX); exit(1); } for (i = 0; i < size / getpagesize(); i++) { *((char *)addr1 + (i * getpagesize())) = 'a'; } memset(&create, 0, sizeof(create)); #if 0 /* should fail (offset not page aligned) */ create.memfd = mem_fd; create.offset = getpagesize()/2; create.size = getpagesize(); buf = ioctl(devfd, UDMABUF_CREATE, &create); if (buf >= 0) { printf("%s: [FAIL,test-1]\n", TEST_PREFIX); exit(1); } /* should fail (size not multiple of page) */ create.memfd = mem_fd; create.offset = 0; create.size = getpagesize()/2; buf = ioctl(devfd, UDMABUF_CREATE, &create); if (buf >= 0) { printf("%s: [FAIL,test-2]\n", TEST_PREFIX); exit(1); } /* should fail (not memfd) */ create.memfd = 0; /* stdin */ create.offset = 0; create.size = size; buf = ioctl(devfd, UDMABUF_CREATE, &create); if (buf >= 0) { printf("%s: [FAIL,test-3]\n", TEST_PREFIX); exit(1); } #endif /* should work */ create.memfd = mem_fd; create.offset = getpagesize() * 256; create.size = getpagesize() * 4; udma_fd = ioctl(devfd, UDMABUF_CREATE, &create); if (udma_fd < 0) { perror("UDMABUF_CREATE"); printf("%s: [FAIL,test-4]\n", TEST_PREFIX); exit(1); } printf("before hole punch\n"); (void)getchar(); ret = fallocate(mem_fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0, my_getpagesize()); if (ret) perror("fallocate punch hole"); printf("after hole punch\n"); (void)getchar(); for (i = 0; i < size / getpagesize(); i++) { *((char *)addr1 + (i * getpagesize())) = 'b'; } printf("after touch again\n"); (void)getchar(); /* touch all pages */ addr2 = mmap(NULL, getpagesize() * 4, PROT_READ|PROT_WRITE, MAP_SHARED, udma_fd, 0); if (addr2 == MAP_FAILED) { perror("mmap"); printf("%s: udma_fd mmap fail\n", TEST_PREFIX); exit(1); } for (i = 0; i < 4; i++) { foo = *((char *)addr2 + (i * getpagesize())); printf("udmabuf %c\n", foo); } for (i = 256; i < 260; i++) { foo = *((char *)addr1 + (i * getpagesize())); printf("memfd %c\n", foo); } fprintf(stderr, "%s: ok\n", TEST_PREFIX); close(udma_fd); close(mem_fd); close(devfd); return 0; }