[RFC][PATCH 2/5] mm,perf: Make use of VM_PINNED

Peter Zijlstra <peterz@xxxxxxxxxxxxx> · Mon, 26 May 2014 16:56:07 +0200

Change the perf RLIMIT_MEMLOCK accounting to use VM_PINNED. Because
the way VM_PINNED works (it hard assumes the entire vma length is
accounted) we have to slightly change semantics.

We used to only add to the RLIMIT_MEMLOCK accounting once we were over
the per-user limit, now we'll directly account to both.

XXX: anon_inode_inode->i_mapping doesn't have AS_UNEVICTABLE set,
should it?

Cc: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
Cc: Hugh Dickins <hughd@xxxxxxxxxx>
Cc: Mel Gorman <mgorman@xxxxxxx>
Cc: Roland Dreier <roland@xxxxxxxxxx>
Cc: Christoph Lameter <cl@xxxxxxxxx>
Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Signed-off-by: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
---
 kernel/events/core.c |   36 ++++++++++++++++--------------------
 1 file changed, 16 insertions(+), 20 deletions(-)

--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -4059,13 +4059,12 @@ static const struct vm_operations_struct
 static int perf_mmap(struct file *file, struct vm_area_struct *vma)
 {
 	struct perf_event *event = file->private_data;
+	unsigned long locked, lock_limit, lock_extra;
 	unsigned long user_locked, user_lock_limit;
 	struct user_struct *user = current_user();
-	unsigned long locked, lock_limit;
-	struct ring_buffer *rb;
 	unsigned long vma_size;
 	unsigned long nr_pages;
-	long user_extra, extra;
+	struct ring_buffer *rb;
 	int ret = 0, flags = 0;
 
 	/*
@@ -4117,26 +4116,22 @@ static int perf_mmap(struct file *file,
 		goto unlock;
 	}
 
-	user_extra = nr_pages + 1;
-	user_lock_limit = sysctl_perf_event_mlock >> (PAGE_SHIFT - 10);
+	lock_extra = nr_pages + 1;
 
 	/*
 	 * Increase the limit linearly with more CPUs:
 	 */
+	user_lock_limit = sysctl_perf_event_mlock >> (PAGE_SHIFT - 10);
 	user_lock_limit *= num_online_cpus();
 
-	user_locked = atomic_long_read(&user->locked_vm) + user_extra;
-
-	extra = 0;
-	if (user_locked > user_lock_limit)
-		extra = user_locked - user_lock_limit;
+	user_locked = atomic_long_read(&user->locked_vm) + lock_extra;
 
 	lock_limit = rlimit(RLIMIT_MEMLOCK);
 	lock_limit >>= PAGE_SHIFT;
-	locked = vma->vm_mm->pinned_vm + extra;
+	locked = mm_locked_pages(vma->vm_mm) + lock_extra;
 
-	if ((locked > lock_limit) && perf_paranoid_tracepoint_raw() &&
-		!capable(CAP_IPC_LOCK)) {
+	if ((user_locked > user_lock_limit && locked > lock_limit) &&
+	    perf_paranoid_tracepoint_raw() && !capable(CAP_IPC_LOCK)) {
 		ret = -EPERM;
 		goto unlock;
 	}
@@ -4146,7 +4141,7 @@ static int perf_mmap(struct file *file,
 	if (vma->vm_flags & VM_WRITE)
 		flags |= RING_BUFFER_WRITABLE;
 
-	rb = rb_alloc(nr_pages, 
+	rb = rb_alloc(nr_pages,
 		event->attr.watermark ? event->attr.wakeup_watermark : 0,
 		event->cpu, flags);
 
@@ -4156,11 +4151,9 @@ static int perf_mmap(struct file *file,
 	}
 
 	atomic_set(&rb->mmap_count, 1);
-	rb->mmap_locked = extra;
 	rb->mmap_user = get_current_user();
 
-	atomic_long_add(user_extra, &user->locked_vm);
-	vma->vm_mm->pinned_vm += extra;
+	atomic_long_add(lock_extra, &user->locked_vm);
 
 	ring_buffer_attach(event, rb);
 
@@ -4173,10 +4166,13 @@ static int perf_mmap(struct file *file,
 	mutex_unlock(&event->mmap_mutex);
 
 	/*
-	 * Since pinned accounting is per vm we cannot allow fork() to copy our
-	 * vma.
+	 * VM_PINNED - this memory is pinned as we need to write to it from
+	 *             pretty much any context and cannot page.
+	 * VM_DONTCOPY - don't share over fork()
+	 * VM_DONTEXPAND - its not stack
+	 * VM_DONTDUMP - ...
 	 */
-	vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP;
+	vma->vm_flags |= VM_PINNED | VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP;
 	vma->vm_ops = &perf_mmap_vmops;
 
 	return ret;


--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@xxxxxxxxx.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@xxxxxxxxx";> email@xxxxxxxxx </a>