On 6/19/18 3:02 AM, Peter Zijlstra wrote:
On Tue, Jun 19, 2018 at 07:34:16AM +0800, Yang Shi wrote:
diff --git a/mm/mmap.c b/mm/mmap.c
index fc41c05..e84f80c 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2686,6 +2686,141 @@ int split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
return __split_vma(mm, vma, addr, new_below);
}
+/* Consider PUD size or 1GB mapping as large mapping */
+#ifdef HPAGE_PUD_SIZE
+#define LARGE_MAP_THRESH HPAGE_PUD_SIZE
+#else
+#define LARGE_MAP_THRESH (1 * 1024 * 1024 * 1024)
+#endif
+
+/* Unmap large mapping early with acquiring read mmap_sem */
+static int do_munmap_zap_early(struct mm_struct *mm, unsigned long start,
+ size_t len, struct list_head *uf)
+{
+ unsigned long end = 0;
+ struct vm_area_struct *vma = NULL, *prev, *last, *tmp;
+ bool success = false;
+ int ret = 0;
+
+ if ((offset_in_page(start)) || start > TASK_SIZE || len > TASK_SIZE - start)
+ return -EINVAL;
+
+ len = (PAGE_ALIGN(len));
+ if (len == 0)
+ return -EINVAL;
+
+ /* Just deal with uf in regular path */
+ if (unlikely(uf))
+ goto regular_path;
+
+ if (len >= LARGE_MAP_THRESH) {
+ down_read(&mm->mmap_sem);
+ vma = find_vma(mm, start);
+ if (!vma) {
+ up_read(&mm->mmap_sem);
+ return 0;
+ }
+
+ prev = vma->vm_prev;
+
+ end = start + len;
+ if (vma->vm_start > end) {
+ up_read(&mm->mmap_sem);
+ return 0;
+ }
+
+ if (start > vma->vm_start) {
+ int error;
+
+ if (end < vma->vm_end &&
+ mm->map_count > sysctl_max_map_count) {
+ up_read(&mm->mmap_sem);
+ return -ENOMEM;
+ }
+
+ error = __split_vma(mm, vma, start, 0);
+ if (error) {
+ up_read(&mm->mmap_sem);
+ return error;
+ }
+ prev = vma;
+ }
+
+ last = find_vma(mm, end);
+ if (last && end > last->vm_start) {
+ int error = __split_vma(mm, last, end, 1);
+
+ if (error) {
+ up_read(&mm->mmap_sem);
+ return error;
+ }
+ }
+ vma = prev ? prev->vm_next : mm->mmap;
Hold up, two things: you having to copy most of do_munmap() didn't seem
to suggest a helper function? And second, since when are we allowed to
Yes, they will be extracted into a helper function in the next version.
May bad, I don't think it is allowed. We could reform this to:
acquire write mmap_sem
vma lookup (split vmas)
release write mmap_sem
acquire read mmap_sem
zap pages
release read mmap_sem
I'm supposed this is safe as what Michal said before.
Thanks,
Yang
split VMAs under a read lock?