Re: [PATCH 1/3] mm: Introduce revoke_mappings.

Pekka Enberg <penberg@xxxxxxxxxx> · Mon, 27 Sep 2010 13:49:57 +0300

> Subject: [PATCH 1/3] mm: Introduce revoke_mappings.
>
> When the backing store of a file becomes inaccessible we need a function
> to remove that file from the page tables and arrange for page faults
> to trigger SIGBUS.
>
> Signed-off-by: Eric W. Biederman <ebiederm@xxxxxxxxxxxxxxxxxx>

> +static void revoke_vma(struct vm_area_struct *old)
> +{
> +	/* Atomically replace a vma with an identical one that returns
> +	 * VM_FAULT_SIGBUS to every mmap request.
> +	 *
> +	 * This function must be called with the mm->mmap semaphore held.
> +	 */
> +	unsigned long start, end, len, pgoff, vm_flags;
> +	struct vm_area_struct *new;
> +	struct mm_struct *mm;
> +	struct file *file;
> +
> +	file  = revoked_filp;
> +	mm    = old->vm_mm;
> +	start = old->vm_start;
> +	end   = old->vm_end;
> +	len   = end - start;
> +	pgoff = old->vm_pgoff;
> +
> +	/* Preserve user visble vm_flags. */
> +	vm_flags = VM_SHARED | VM_MAYSHARE | (old->vm_flags & REVOKED_VM_FLAGS);
> +
> +	/* If kmem_cache_zalloc fails return and ultimately try again */
> +	new = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
> +	if (!new)
> +		goto out;
> +
> +	/* I am freeing exactly one vma so munmap should never fail.
> +	 * If munmap fails return and ultimately try again.
> +	 */
> +	if (unlikely(do_munmap(mm, start, len)))
> +		goto fail;
> +
> +	INIT_LIST_HEAD(&new->anon_vma_chain);
> +	new->vm_mm    = mm;
> +	new->vm_start = start;
> +	new->vm_end   = end;
> +	new->vm_flags = vm_flags;
> +	new->vm_page_prot = vm_get_page_prot(vm_flags);
> +	new->vm_pgoff = pgoff;
> +	new->vm_file  = file;
> +	get_file(file);
> +	new->vm_ops   = &revoked_vm_ops;
> +
> +	/* Since the area was just umapped there is no excuse for
> +	 * insert_vm_struct to fail.
> +	 *
> +	 * If insert_vm_struct fails we will cause a SIGSEGV instead
> +	 * a SIGBUS.  A shame but not the end of the world.

Can we simply fix up the old vma to avoid kmem_cache_zalloc() and
insert_vm_struct altogether? We're protected by ->mmap_sem so that shouldn't be
a problem?

> +	 */
> +	if (unlikely(insert_vm_struct(mm, new)))
> +		goto fail;
> +
> +	mm->total_vm += len >> PAGE_SHIFT;
> +
> +	perf_event_mmap(new);
> +
> +	return;
> +fail:
> +	kmem_cache_free(vm_area_cachep, new);
> +	WARN_ONCE(1, "%s failed\n", __func__);

Why don't we just propagate errors such as -ENOMEM to the callers? It seems
pointless to try to retry the operation at this level.

> +out:
> +	return;
> +}
> +
> +static bool revoke_mapping(struct address_space *mapping, struct mm_struct *mm,
> +			   unsigned long addr)
> +{
> +	/* Returns true if the locks were dropped */
> +	struct vm_area_struct *vma;
> +
> +	/*
> +	 * Drop i_mmap_lock and grab the mm sempahore so I can call

s/sempahore/semaphore/

> +	 * revoke_vma.
> +	 */
> +	if (!atomic_inc_not_zero(&mm->mm_users))
> +		return false;
> +	spin_unlock(&mapping->i_mmap_lock);
> +	down_write(&mm->mmap_sem);
> +
> +	/* There was a vma at mm, addr that needed to be revoked.
> +	 * Look and see if there is still a vma there that needs
> +	 * to be revoked.
> +	 */
> +	vma = find_vma(mm, addr);

Why aren't we checking for NULL vma here? AFAICT, there's a tiny window between
dropping ->i_mmap_lock and grabbing ->mmap_sem where the vma might have been
unmapped.

> +	if (vma->vm_file->f_mapping == mapping)
> +		revoke_vma(vma);
> +
> +	up_write(&mm->mmap_sem);
> +	mmput(mm);
> +	spin_lock(&mapping->i_mmap_lock);
> +	return true;
> +}
> +
> +void revoke_mappings(struct address_space *mapping)
> +{
> +	/* Make any access to previously mapped pages trigger a SIGBUS,
> +	 * and stop calling vm_ops methods.
> +	 *
> +	 * When revoke_mappings returns invocations of vm_ops->close
> +	 * may still be in progress, but no invocations of any other
> +	 * vm_ops methods will be.
> +	 */
> +	struct vm_area_struct *vma;
> +	struct prio_tree_iter iter;
> +
> +	spin_lock(&mapping->i_mmap_lock);
> +
> +restart_tree:
> +	vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, 0, ULONG_MAX) {
> +		if (revoke_mapping(mapping, vma->vm_mm, vma->vm_start))
> +			goto restart_tree;
> +	}
> +
> +restart_list:
> +	list_for_each_entry(vma, &mapping->i_mmap_nonlinear, shared.vm_set.list) {
> +		if (revoke_mapping(mapping, vma->vm_mm, vma->vm_start))
> +			goto restart_list;
> +	}
> +

What prevents a process from remapping the file after we've done revoking the
vma prio tree? Shouldn't we always restart from the top?

Also, don't we need spin_needbreak() on ->i_mmap_lock and cond_resched()
somewhere here like we do in mm/memory.c, for example?

> +	spin_unlock(&mapping->i_mmap_lock);
> +}
> +EXPORT_SYMBOL_GPL(revoke_mappings);

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@xxxxxxxxxx  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@xxxxxxxxx";> email@xxxxxxxxx </a>