Quoting Tvrtko Ursulin (2018-06-29 10:31:23) > > On 29/06/2018 08:44, Chris Wilson wrote: > > Setup a userptr object that only has a read-only mapping back to a file > > store (memfd). Then attempt to write into that mapping using the GPU and > > assert that those writes do not land (while also writing via a writable > > userptr mapping into the same memfd to verify that the GPU is working!) > > > > Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> > > Cc: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> > > Cc: Joonas Lahtinen <joonas.lahtinen@xxxxxxxxxxxxxxx> > > --- > > Minor commentary additions > > --- > > configure.ac | 1 + > > lib/ioctl_wrappers.c | 4 +- > > lib/ioctl_wrappers.h | 4 +- > > lib/meson.build | 1 + > > meson.build | 1 + > > tests/Makefile.am | 4 +- > > tests/gem_userptr_blits.c | 372 +++++++++++++++++++++++++++++++++++++- > > 7 files changed, 377 insertions(+), 10 deletions(-) > > > > diff --git a/configure.ac b/configure.ac > > index 1ee4e90e9..195963d4f 100644 > > --- a/configure.ac > > +++ b/configure.ac > > @@ -125,6 +125,7 @@ PKG_CHECK_MODULES(PCIACCESS, [pciaccess >= 0.10]) > > PKG_CHECK_MODULES(KMOD, [libkmod]) > > PKG_CHECK_MODULES(PROCPS, [libprocps]) > > PKG_CHECK_MODULES(LIBUNWIND, [libunwind]) > > +PKG_CHECK_MODULES(SSL, [openssl]) > > PKG_CHECK_MODULES(VALGRIND, [valgrind], [have_valgrind=yes], [have_valgrind=no]) > > > > if test x$have_valgrind = xyes; then > > diff --git a/lib/ioctl_wrappers.c b/lib/ioctl_wrappers.c > > index 79db44a8c..d5d2a4e4c 100644 > > --- a/lib/ioctl_wrappers.c > > +++ b/lib/ioctl_wrappers.c > > @@ -869,7 +869,7 @@ int gem_madvise(int fd, uint32_t handle, int state) > > return madv.retained; > > } > > > > -int __gem_userptr(int fd, void *ptr, int size, int read_only, uint32_t flags, uint32_t *handle) > > +int __gem_userptr(int fd, void *ptr, uint64_t size, int read_only, uint32_t flags, uint32_t *handle) > > { > > struct drm_i915_gem_userptr userptr; > > > > @@ -898,7 +898,7 @@ int __gem_userptr(int fd, void *ptr, int size, int read_only, uint32_t flags, ui > > * > > * Returns userptr handle for the GEM object. > > */ > > -void gem_userptr(int fd, void *ptr, int size, int read_only, uint32_t flags, uint32_t *handle) > > +void gem_userptr(int fd, void *ptr, uint64_t size, int read_only, uint32_t flags, uint32_t *handle) > > { > > igt_assert_eq(__gem_userptr(fd, ptr, size, read_only, flags, handle), 0); > > } > > diff --git a/lib/ioctl_wrappers.h b/lib/ioctl_wrappers.h > > index b966f72c9..8e2cd380b 100644 > > --- a/lib/ioctl_wrappers.h > > +++ b/lib/ioctl_wrappers.h > > @@ -133,8 +133,8 @@ struct local_i915_gem_userptr { > > #define LOCAL_I915_USERPTR_UNSYNCHRONIZED (1<<31) > > uint32_t handle; > > }; > > -void gem_userptr(int fd, void *ptr, int size, int read_only, uint32_t flags, uint32_t *handle); > > -int __gem_userptr(int fd, void *ptr, int size, int read_only, uint32_t flags, uint32_t *handle); > > +void gem_userptr(int fd, void *ptr, uint64_t size, int read_only, uint32_t flags, uint32_t *handle); > > +int __gem_userptr(int fd, void *ptr, uint64_t size, int read_only, uint32_t flags, uint32_t *handle); > > > > void gem_sw_finish(int fd, uint32_t handle); > > > > diff --git a/lib/meson.build b/lib/meson.build > > index 1a355414e..939167f91 100644 > > --- a/lib/meson.build > > +++ b/lib/meson.build > > @@ -62,6 +62,7 @@ lib_deps = [ > > pthreads, > > math, > > realtime, > > + ssl, > > ] > > > > if libdrm_intel.found() > > diff --git a/meson.build b/meson.build > > index 4d15d6238..638c01066 100644 > > --- a/meson.build > > +++ b/meson.build > > @@ -98,6 +98,7 @@ pciaccess = dependency('pciaccess', version : '>=0.10') > > libkmod = dependency('libkmod') > > libprocps = dependency('libprocps', required : true) > > libunwind = dependency('libunwind', required : true) > > +ssl = dependency('openssl', required : true) > > > > valgrind = null_dep > > valgrindinfo = 'No' > > diff --git a/tests/Makefile.am b/tests/Makefile.am > > index f41ad5096..ba307b220 100644 > > --- a/tests/Makefile.am > > +++ b/tests/Makefile.am > > @@ -126,8 +126,8 @@ gem_tiled_swapping_CFLAGS = $(AM_CFLAGS) $(THREAD_CFLAGS) > > gem_tiled_swapping_LDADD = $(LDADD) -lpthread > > prime_self_import_CFLAGS = $(AM_CFLAGS) $(THREAD_CFLAGS) > > prime_self_import_LDADD = $(LDADD) -lpthread > > -gem_userptr_blits_CFLAGS = $(AM_CFLAGS) $(THREAD_CFLAGS) > > -gem_userptr_blits_LDADD = $(LDADD) -lpthread > > +gem_userptr_blits_CFLAGS = $(AM_CFLAGS) $(THREAD_CFLAGS) $(SSL_CFLAGS) > > +gem_userptr_blits_LDADD = $(LDADD) $(SSL_LIBS) -lpthread > > perf_pmu_LDADD = $(LDADD) $(top_builddir)/lib/libigt_perf.la > > > > gem_eio_LDADD = $(LDADD) -lrt > > diff --git a/tests/gem_userptr_blits.c b/tests/gem_userptr_blits.c > > index 7e3b6ef38..0fb7eba04 100644 > > --- a/tests/gem_userptr_blits.c > > +++ b/tests/gem_userptr_blits.c > > @@ -43,13 +43,17 @@ > > #include <fcntl.h> > > #include <inttypes.h> > > #include <errno.h> > > +#include <setjmp.h> > > #include <sys/stat.h> > > #include <sys/time.h> > > #include <sys/mman.h> > > +#include <openssl/sha.h> > > #include <signal.h> > > #include <pthread.h> > > #include <time.h> > > > > +#include <linux/memfd.h> > > + > > #include "drm.h" > > #include "i915_drm.h" > > > > @@ -238,6 +242,57 @@ blit(int fd, uint32_t dst, uint32_t src, uint32_t *all_bo, int n_bo) > > return ret; > > } > > > > +static void store_dword(int fd, uint32_t target, > > + uint32_t offset, uint32_t value) > > +{ > > + const int gen = intel_gen(intel_get_drm_devid(fd)); > > + struct drm_i915_gem_exec_object2 obj[2]; > > + struct drm_i915_gem_relocation_entry reloc; > > + struct drm_i915_gem_execbuffer2 execbuf; > > + uint32_t batch[16]; > > + int i; > > + > > + memset(&execbuf, 0, sizeof(execbuf)); > > + execbuf.buffers_ptr = to_user_pointer(obj); > > + execbuf.buffer_count = ARRAY_SIZE(obj); > > + execbuf.flags = 0; > > + if (gen < 6) > > + execbuf.flags |= I915_EXEC_SECURE; > > + > > + memset(obj, 0, sizeof(obj)); > > + obj[0].handle = target; > > + obj[1].handle = gem_create(fd, 4096); > > + > > + memset(&reloc, 0, sizeof(reloc)); > > + reloc.target_handle = obj[0].handle; > > + reloc.presumed_offset = 0; > > + reloc.offset = sizeof(uint32_t); > > + reloc.delta = offset; > > + reloc.read_domains = I915_GEM_DOMAIN_RENDER; > > + reloc.write_domain = I915_GEM_DOMAIN_RENDER; > > + obj[1].relocs_ptr = to_user_pointer(&reloc); > > + obj[1].relocation_count = 1; > > + > > + i = 0; > > + batch[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0); > > + if (gen >= 8) { > > + batch[++i] = offset; > > + batch[++i] = 0; > > + } else if (gen >= 4) { > > + batch[++i] = 0; > > + batch[++i] = offset; > > + reloc.offset += sizeof(uint32_t); > > + } else { > > + batch[i]--; > > + batch[++i] = offset; > > + } > + batch[++i] = value; > > I still think to avoid too much code duplication: > > batch = __emit_store_dword(batch, gen, offset, value, &reloc.offset) > > Then from there see if something more could be extracted form the three > call sites. > > > + batch[++i] = MI_BATCH_BUFFER_END; > > + gem_write(fd, obj[1].handle, 0, batch, sizeof(batch)); > > + gem_execbuf(fd, &execbuf); > > + gem_close(fd, obj[1].handle); > > +} > > + > > static uint32_t > > create_userptr(int fd, uint32_t val, uint32_t *ptr) > > { > > @@ -941,6 +996,310 @@ static int test_dmabuf(void) > > return 0; > > } > > > > +static void test_readonly(int i915) > > +{ > > + unsigned char orig[SHA_DIGEST_LENGTH]; > > + uint64_t aperture_size; > > + uint32_t whandle, rhandle; > > + size_t sz, total; > > + void *pages, *space; > > + int memfd; > > + > > + /* > > + * A small batch of pages; small enough to cheaply check for stray > > + * writes but large enough that we don't create too many VMA pointing > > + * back to this set from the large arena. The limit on total number > > + * of VMA for a process is 65,536 (at least on this kernel). > > + * > > + * We then write from the GPU through the large arena into the smaller > > + * backing storage, which we can cheaply check to see if those writes > > + * have landed (using a SHA1sum). Repeating the same random GPU writes > > + * though a read-only handle to confirm that this time the writes are > > through > > > + * discarded and the backing store unchanged. > > + */ > > + sz = 16 << 12; > > Here you aim not to exceed the above mentioned per-process VMA limit but > please just express that in the code. Maybe re-order the code a bit: > > total = 2Gib > sz = 2Gib / max_vmas_per_process * 2 > aperture_size = ... > total = round_down > > Then proceed with allocation etc. > > > + memfd = memfd_create("pages", 0); > > + igt_require(memfd != -1); > > + igt_require(ftruncate(memfd, sz) == 0); > > + > > + pages = mmap(NULL, sz, PROT_WRITE, MAP_SHARED, memfd, 0); > > + igt_assert(pages != MAP_FAILED); > > + > > + igt_require(__gem_userptr(i915, pages, sz, true, userptr_flags, &rhandle) == 0); > > + gem_close(i915, rhandle); > > + > > + gem_userptr(i915, pages, sz, false, userptr_flags, &whandle); > > + > > /* > From your reply: > """ > the largest offset we can use is 4G, and we can't use the full range > as we need some extra room for batches, and we can't use the full VMA > limit without serious slow down and risk of exhaustion. > Then sticking to a pot. > """ > */ > > > + total = 2048ull << 20; > > + aperture_size = gem_aperture_size(i915) / 2; > > + if (aperture_size < total) > > + total = aperture_size; > > + total = total / sz * sz; > > + igt_info("Using a %'zuB (%'zu pages) arena onto %zu pages\n", > > + total, total >> 12, sz >> 12); > > + > > + /* Create an arena all pointing to the same set of pages */ > > + space = mmap(NULL, total, PROT_READ, MAP_ANON | MAP_SHARED, -1, 0); > > Why MAP_SHARED? fork. > > + igt_require(space != MAP_FAILED); > > + for (size_t offset = 0; offset < total; offset += sz) { > > + igt_assert(mmap(space + offset, sz, > > + PROT_WRITE, MAP_SHARED | MAP_FIXED, > > + memfd, 0) != MAP_FAILED); > > + *(uint32_t *)(space + offset) = offset; > > AFAIU: > > First write instantiates the backing store, well, one page of it I > guess. Depending how memfd works I guess. But mlock later will do all of it. > > > + } > > + igt_assert_eq_u32(*(uint32_t *)pages, (uint32_t)(total - sz)); > > ... and this checks that the arena is made up from repeating chunks. > (Checking that the signature written into the last chunk is mirrored in > the first one.) > > > + igt_assert(mlock(space, total) == 0); > > So this allocates all 64KiB definitely. > > > + close(memfd); > > + > > + /* Check we can create a normal userptr bo wrapping the wrapper */ > > + gem_userptr(i915, space, total, false, userptr_flags, &rhandle); > > This is not read-only so rhandle is a bit misleading. Why do you btw > create the whandle so early on and not just here? Hmm... whandle is > chunk size, rhandle is arena size.. so the two loops below are different > in that respect. Why is that? Because the arena will be readonly, the backing store is writeable. > > + gem_set_domain(i915, rhandle, I915_GEM_DOMAIN_CPU, 0); > > + for (size_t offset = 0; offset < total; offset += sz) > > + store_dword(i915, rhandle, offset + 4, offset / sz); > > + gem_sync(i915, rhandle); > > I did not get your last reply here - once store dwords have completed > and you proceed to check the memory via CPU PTEs - do you need to move > the userptr bo back to the CPU domain so any flushes would happen? rhandle, it's a new userptr that we want to verify we can populate. > > + igt_assert_eq_u32(*(uint32_t *)(pages + 0), (uint32_t)(total - sz)); > > + igt_assert_eq_u32(*(uint32_t *)(pages + 4), (uint32_t)(total / sz - 1)); > > I really think comment explaining the layout and which side writes at > which offset would be beneficial. It's literally explained in the code in this block and never used again. > > + gem_close(i915, rhandle); > > + > > + /* Now enforce read-only henceforth */ > > + igt_assert(mprotect(space, total, PROT_READ) == 0); > > + > > + SHA1(pages, sz, orig); > > + igt_fork(child, 1) { > > + const int gen = intel_gen(intel_get_drm_devid(i915)); > > + const int nreloc = 1024; > > + struct drm_i915_gem_relocation_entry *reloc; > > + struct drm_i915_gem_exec_object2 obj[2]; > > + struct drm_i915_gem_execbuffer2 exec; > > + unsigned char ref[SHA_DIGEST_LENGTH], result[SHA_DIGEST_LENGTH]; > > + uint32_t *batch; > > + int i; > > + > > + reloc = calloc(sizeof(*reloc), nreloc); > > + gem_userptr(i915, space, total, true, userptr_flags, &rhandle); > > + > > + memset(obj, 0, sizeof(obj)); > > + obj[0].flags = LOCAL_EXEC_OBJECT_SUPPORTS_48B; > > + obj[1].handle = gem_create(i915, sz); > > I didn't get your previous reply. This is the batch buffer right? So the > size needed is relating to the number of store dword + bbend you need to > emit, rather than sz, no? And nreloc is arbitrary subset of sz / > sizeof(uint32_t), right? > > So maybe: > > const int nreloc = sz / sizeof(uint32_t) / 16; /* arbitrary sub-size */ > ... > obj[1].handle = gem_create(i915, sizeof(uint32_t) + nreloc * > sizeof(one_store_dword_sz)); > > Or I am missing something? Just pointless. > > + obj[1].relocation_count = nreloc; > > + obj[1].relocs_ptr = to_user_pointer(reloc); > > + > > + batch = gem_mmap__wc(i915, obj[1].handle, 0, sz, PROT_WRITE); > > + > > + memset(&exec, 0, sizeof(exec)); > > + exec.buffer_count = 2; > > + exec.buffers_ptr = to_user_pointer(obj); > > + if (gen < 6) > > + exec.flags |= I915_EXEC_SECURE; > > + > > + for_each_engine(i915, exec.flags) { > > + /* First tweak the backing store through the write */ > > + i = 0; > > + obj[0].handle = whandle; > > + for (int n = 0; n < nreloc; n++) { > > + uint64_t offset; > > + > > + reloc[n].target_handle = obj[0].handle; > > + reloc[n].delta = rand() % (sz / 4) * 4; > > + reloc[n].offset = (i + 1) * sizeof(uint32_t); > > + reloc[n].presumed_offset = obj[0].offset; > > + reloc[n].read_domains = I915_GEM_DOMAIN_RENDER; > > + reloc[n].write_domain = I915_GEM_DOMAIN_RENDER; > > How about: > > __fill_reloc(&reloc, &obj[0], delta, offset) ? No. I really dislike functions whose only purpose is to obfuscate copying their arguments into the struct passed in. Because it just makes it harder to adapt in future, whereas I think this is quite clear as to how the batch is constructed. > > + > > + offset = reloc[n].presumed_offset + reloc[n].delta; > > + > > + batch[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0); > > + if (gen >= 8) { > > + batch[++i] = offset; > > + batch[++i] = offset >> 32; > > + } else if (gen >= 4) { > > + batch[++i] = 0; > > + batch[++i] = offset; > > + reloc[n].offset += sizeof(uint32_t); > > + } else { > > + batch[i]--; > > + batch[++i] = offset; > > + } > > + batch[++i] = rand(); > > + i++; > > + } > > + batch[i] = MI_BATCH_BUFFER_END; > > + igt_assert(i * sizeof(uint32_t) < sz); > > + > > + gem_execbuf(i915, &exec); > > + gem_sync(i915, obj[0].handle); > > + SHA1(pages, sz, ref); > > + > > + igt_assert(memcmp(ref, orig, sizeof(ref))); > > + memcpy(orig, ref, sizeof(orig)); > > + > > + /* Now try the same through the read-only handle */ > > + i = 0; > > + obj[0].handle = rhandle; > > + for (int n = 0; n < nreloc; n++) { > > + uint64_t offset; > > + > > + reloc[n].target_handle = obj[0].handle; > > + reloc[n].delta = rand() % (total / 4) * 4; > > + reloc[n].offset = (i + 1) * sizeof(uint32_t); > > + reloc[n].presumed_offset = obj[0].offset; > > + reloc[n].read_domains = I915_GEM_DOMAIN_RENDER; > > + reloc[n].write_domain = I915_GEM_DOMAIN_RENDER; > > + > > + offset = reloc[n].presumed_offset + reloc[n].delta; > > + > > + batch[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0); > > + if (gen >= 8) { > > + batch[++i] = offset; > > + batch[++i] = offset >> 32; > > + } else if (gen >= 4) { > > + batch[++i] = 0; > > + batch[++i] = offset; > > + reloc[n].offset += sizeof(uint32_t); > > + } else { > > + batch[i]--; > > + batch[++i] = offset; > > + } > > + batch[++i] = rand(); > > + i++; > > + } > > + batch[i] = MI_BATCH_BUFFER_END; > > + > > + gem_execbuf(i915, &exec); > > + gem_sync(i915, obj[0].handle); > > + SHA1(pages, sz, result); > > + > > + /* > > + * As the writes into the read-only GPU bo should fail, > > + * the SHA1 hash of the backing store should be > > + * unaffected. > > + */ > > + igt_assert(memcmp(ref, result, SHA_DIGEST_LENGTH) == 0); > > + } > > + > > + munmap(batch, sz); > > + gem_close(i915, obj[1].handle); > > + gem_close(i915, rhandle); > > + } > > + igt_waitchildren(); > > + > > + munmap(space, total); > > + munmap(pages, sz); > > +} > > + > > +static jmp_buf sigjmp; > > +static void sigjmp_handler(int sig) > > +{ > > + siglongjmp(sigjmp, sig); > > +} > > + > > +static void test_readonly_mmap(int i915) > > +{ > > + unsigned char original[SHA_DIGEST_LENGTH]; > > + unsigned char result[SHA_DIGEST_LENGTH]; > > + uint32_t handle; > > + uint32_t sz; > > + void *pages; > > + void *ptr; > > + int sig; > > + > > + /* > > + * A quick check to ensure that we cannot circumvent the > > + * read-only nature of our memory by creating a GTT mmap into > > + * the pages. Imagine receiving a readonly SHM segment from > > + * another process, or a readonly file mmap, it must remain readonly > > + * on the GPU as well. > > + */ > > + > > + igt_require(igt_setup_clflush()); > > + > > + sz = 16 << 12; > > + pages = mmap(NULL, sz, PROT_WRITE, MAP_ANON | MAP_PRIVATE, -1, 0); > > + igt_assert(pages != MAP_FAILED); > > + > > + igt_require(__gem_userptr(i915, pages, sz, true, userptr_flags, &handle) == 0); > > + gem_set_caching(i915, handle, 0); > > + > > + memset(pages, 0xa5, sz); > > + igt_clflush_range(pages, sz); > > Please add comment saying why it is needed. Hmm, is it not obvious from context? > > + SHA1(pages, sz, original); > > + > > + ptr = __gem_mmap__gtt(i915, handle, sz, PROT_WRITE); > > + igt_assert(ptr == NULL); > > + > > + ptr = gem_mmap__gtt(i915, handle, sz, PROT_READ); > > + gem_close(i915, handle); > > + > > + /* Check that a write into the GTT readonly map fails */ > > + if (!(sig = sigsetjmp(sigjmp, 1))) { > > + signal(SIGBUS, sigjmp_handler); > > + signal(SIGSEGV, sigjmp_handler); > > + memset(ptr, 0x5a, sz); > > + igt_assert(0); > > + } > > + igt_assert_eq(sig, SIGSEGV); > > + > > + /* Check that we disallow removing the readonly protection */ > > + igt_assert(mprotect(ptr, sz, PROT_WRITE)); > > + if (!(sig = sigsetjmp(sigjmp, 1))) { > > Continuing from previous reply - there is no longjmp so I don't know who > will jump here. Maybe it is just me since I am not familiar with the > facility but I still have a feeling comment on high level setup here is > warranted. Look at sigjmp_handler. -Chris _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx