VMAs for shared addresses are hosted by a separate host mm. Copy over original PTEs from the donor process to host mm so the PTEs are maintained independent of donor process. Signed-off-by: Khalid Aziz <khalid.aziz@xxxxxxxxxx> --- include/linux/mm.h | 2 ++ mm/memory.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++ mm/mshare.c | 14 +++++--------- 3 files changed, 55 insertions(+), 9 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index d9456d424202..78c22891a792 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1845,6 +1845,8 @@ void free_pgd_range(struct mmu_gather *tlb, unsigned long addr, unsigned long end, unsigned long floor, unsigned long ceiling); int copy_page_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma); +int +mshare_copy_ptes(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma); int follow_invalidate_pte(struct mm_struct *mm, unsigned long address, struct mmu_notifier_range *range, pte_t **ptepp, pmd_t **pmdpp, spinlock_t **ptlp); diff --git a/mm/memory.c b/mm/memory.c index e7c5bc6f8836..9010d68f053a 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1234,6 +1234,54 @@ copy_p4d_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma, return 0; } +/* + * Copy PTEs for mshare'd pages. + * This code is based upon copy_page_range() + */ +int +mshare_copy_ptes(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma) +{ + pgd_t *src_pgd, *dst_pgd; + unsigned long next; + unsigned long addr = src_vma->vm_start; + unsigned long end = src_vma->vm_end; + struct mm_struct *dst_mm = dst_vma->vm_mm; + struct mm_struct *src_mm = src_vma->vm_mm; + struct mmu_notifier_range range; + int ret = 0; + + mmu_notifier_range_init(&range, MMU_NOTIFY_PROTECTION_PAGE, + 0, src_vma, src_mm, addr, end); + mmu_notifier_invalidate_range_start(&range); + /* + * Disabling preemption is not needed for the write side, as + * the read side doesn't spin, but goes to the mmap_lock. + * + * Use the raw variant of the seqcount_t write API to avoid + * lockdep complaining about preemptibility. + */ + mmap_assert_write_locked(src_mm); + raw_write_seqcount_begin(&src_mm->write_protect_seq); + + dst_pgd = pgd_offset(dst_mm, addr); + src_pgd = pgd_offset(src_mm, addr); + do { + next = pgd_addr_end(addr, end); + if (pgd_none_or_clear_bad(src_pgd)) + continue; + if (unlikely(copy_p4d_range(dst_vma, src_vma, dst_pgd, src_pgd, + addr, next))) { + ret = -ENOMEM; + break; + } + } while (dst_pgd++, src_pgd++, addr = next, addr != end); + + raw_write_seqcount_end(&src_mm->write_protect_seq); + mmu_notifier_invalidate_range_end(&range); + + return ret; +} + int copy_page_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma) { diff --git a/mm/mshare.c b/mm/mshare.c index fba31f3c190f..a399234bf106 100644 --- a/mm/mshare.c +++ b/mm/mshare.c @@ -385,7 +385,6 @@ SYSCALL_DEFINE5(mshare, const char __user *, name, unsigned long, addr, * Copy this vma over to host mm */ vma->vm_private_data = info; - vma->vm_mm = new_mm; vma->vm_flags |= VM_SHARED_PT; new_vma = vm_area_dup(vma); if (!new_vma) { @@ -394,6 +393,7 @@ SYSCALL_DEFINE5(mshare, const char __user *, name, unsigned long, addr, err = -ENOMEM; goto free_info; } + new_vma->vm_mm = new_mm; err = insert_vm_struct(new_mm, new_vma); if (err) { mmap_write_unlock(new_mm); @@ -402,17 +402,13 @@ SYSCALL_DEFINE5(mshare, const char __user *, name, unsigned long, addr, goto free_info; } + /* Copy over current PTEs */ + err = mshare_copy_ptes(new_vma, vma); + if (err != 0) + goto free_info; vma = vma->vm_next; } - /* - * Copy over current PTEs - */ - myaddr = addr; - while (myaddr < new_mm->task_size) { - *pgd_offset(new_mm, myaddr) = *pgd_offset(old_mm, myaddr); - myaddr += PGDIR_SIZE; - } /* * TODO: Free the corresponding page table in calling * process -- 2.32.0