On 11/30/2022 4:29 AM, Jason Gunthorpe wrote:
+
+/* pfn_reader_user is just the pin_user_pages() path */
+struct pfn_reader_user {
+ struct page **upages;
+ size_t upages_len;
+ unsigned long upages_start;
+ unsigned long upages_end;
+ unsigned int gup_flags;
+ /*
+ * 1 means mmget() and mmap_read_lock(), 0 means only mmget(), -1 is
+ * neither
+ */
+ int locked;
+};
+
+static void pfn_reader_user_init(struct pfn_reader_user *user,
+ struct iopt_pages *pages)
+{
+ user->upages = NULL;
+ user->upages_start = 0;
+ user->upages_end = 0;
+ user->locked = -1;
+
+ if (pages->writable) {
+ user->gup_flags = FOLL_LONGTERM | FOLL_WRITE;
+ } else {
+ /* Still need to break COWs on read */
+ user->gup_flags = FOLL_LONGTERM | FOLL_FORCE | FOLL_WRITE;
+ }
+}
+
+static void pfn_reader_user_destroy(struct pfn_reader_user *user,
+ struct iopt_pages *pages)
+{
+ if (user->locked != -1) {
+ if (user->locked)
+ mmap_read_unlock(pages->source_mm);
+ if (pages->source_mm != current->mm)
+ mmput(pages->source_mm);
+ user->locked = 0;
Set back to -1 is more aligned with the definition of the locked?
Although the value doesn't matter due to the end of lifecyle of
pfn_reader_user.
+ }
+
+ kfree(user->upages);
+ user->upages = NULL;
+}
+
+static int pfn_reader_user_pin(struct pfn_reader_user *user,
+ struct iopt_pages *pages,
+ unsigned long start_index,
+ unsigned long last_index)
+{
+ bool remote_mm = pages->source_mm != current->mm;
+ unsigned long npages;
+ uintptr_t uptr;
+ long rc;
+
+ if (!user->upages) {
+ /* All undone in pfn_reader_destroy() */
+ user->upages_len =
+ (last_index - start_index + 1) * sizeof(*user->upages);
+ user->upages = temp_kmalloc(&user->upages_len, NULL, 0);
+ if (!user->upages)
+ return -ENOMEM;
+ }
+
+ if (user->locked == -1) {
+ /*
+ * The majority of usages will run the map task within the mm
+ * providing the pages, so we can optimize into
+ * get_user_pages_fast()
+ */
+ if (remote_mm) {
+ if (!mmget_not_zero(pages->source_mm))
+ return -EFAULT;
+ }
+ user->locked = 0;
+ }
+
+ npages = min_t(unsigned long, last_index - start_index + 1,
+ user->upages_len / sizeof(*user->upages));
+
+ uptr = (uintptr_t)(pages->uptr + start_index * PAGE_SIZE);
+ if (!remote_mm)
+ rc = pin_user_pages_fast(uptr, npages, user->gup_flags,
+ user->upages);
+ else {
+ if (!user->locked) {
+ mmap_read_lock(pages->source_mm);
+ user->locked = 1;
+ }
+ /*
+ * FIXME: last NULL can be &pfns->locked once the GUP patch
+ * is merged.
+ */
+ rc = pin_user_pages_remote(pages->source_mm, uptr, npages,
+ user->gup_flags, user->upages, NULL,
+ NULL);
+ }
+ if (rc <= 0) {
+ if (WARN_ON(!rc))
+ return -EFAULT;
+ return rc;
+ }
+ iopt_pages_add_npinned(pages, rc);
+ user->upages_start = start_index;
+ user->upages_end = start_index + rc;
+ return 0;
+}
+
+/* This is the "modern" and faster accounting method used by io_uring */
+static int incr_user_locked_vm(struct iopt_pages *pages, unsigned long npages)
+{
+ unsigned long lock_limit;
+ unsigned long cur_pages;
+ unsigned long new_pages;
+
+ lock_limit = task_rlimit(pages->source_task, RLIMIT_MEMLOCK) >>
+ PAGE_SHIFT;
+ npages = pages->npinned - pages->last_npinned;
The passed in value of npages is not used?
+ do {
+ cur_pages = atomic_long_read(&pages->source_user->locked_vm);
+ new_pages = cur_pages + npages;
+ if (new_pages > lock_limit)
+ return -ENOMEM;
+ } while (atomic_long_cmpxchg(&pages->source_user->locked_vm, cur_pages,
+ new_pages) != cur_pages);
+ return 0;
+}
+