[PATCH v2 08/10] mm: kill vma flag VM_EXECUTABLE

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Currently the kernel sets mm->exe_file during sys_execve() and then tracks
number of vmas with VM_EXECUTABLE flag in mm->num_exe_file_vmas, as soon as
this counter drops to zero kernel resets mm->exe_file to NULL. Plus it resets
mm->exe_file at last mmput() when mm->mm_users drops to zero.

Vma with VM_EXECUTABLE flag appears after mapping file with flag MAP_EXECUTABLE,
such vmas can appears only at sys_execve() or after vma splitting, because
sys_mmap ignores this flag. Usually binfmt module sets mm->exe_file and mmaps
some executable vmas with this file, they hold mm->exe_file while task is running.

comment from v2.6.25-6245-g925d1c4 ("procfs task exe symlink"),
where all this stuff was introduced:

> The kernel implements readlink of /proc/pid/exe by getting the file from
> the first executable VMA.  Then the path to the file is reconstructed and
> reported as the result.
>
> Because of the VMA walk the code is slightly different on nommu systems.
> This patch avoids separate /proc/pid/exe code on nommu systems.  Instead of
> walking the VMAs to find the first executable file-backed VMA we store a
> reference to the exec'd file in the mm_struct.
>
> That reference would prevent the filesystem holding the executable file
> from being unmounted even after unmapping the VMAs.  So we track the number
> of VM_EXECUTABLE VMAs and drop the new reference when the last one is
> unmapped.  This avoids pinning the mounted filesystem.

After this patch we track the number of VMAs with vma->vm_file == mm->exe_file,
instead of vmas with VM_EXECUTABLE. Behaviour is nearly the same: kernel will
reset mm->exe_file as soon as task unmap its executable file.

Signed-off-by: Konstantin Khlebnikov <khlebnikov@xxxxxxxxxx>
Cc: Matt Helsley <matthltc@xxxxxxxxxx>
Cc: Oleg Nesterov <oleg@xxxxxxxxxx>
Cc: Cyrill Gorcunov <gorcunov@xxxxxxxxxx>
---
 include/linux/mm.h   |    1 -
 include/linux/mman.h |    1 -
 mm/mmap.c            |   12 ++++++------
 mm/nommu.c           |   11 ++++++-----
 4 files changed, 12 insertions(+), 13 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 553d134..8e82b79 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -88,7 +88,6 @@ extern unsigned int kobjsize(const void *objp);
 #define VM_PFNMAP	0x00000400	/* Page-ranges managed without "struct page", just pure PFN */
 #define VM_DENYWRITE	0x00000800	/* ETXTBSY on write attempts.. */
 
-#define VM_EXECUTABLE	0x00001000
 #define VM_LOCKED	0x00002000
 #define VM_IO           0x00004000	/* Memory mapped I/O or similar */
 
diff --git a/include/linux/mman.h b/include/linux/mman.h
index 8b74e9b..77cec2f 100644
--- a/include/linux/mman.h
+++ b/include/linux/mman.h
@@ -86,7 +86,6 @@ calc_vm_flag_bits(unsigned long flags)
 {
 	return _calc_vm_trans(flags, MAP_GROWSDOWN,  VM_GROWSDOWN ) |
 	       _calc_vm_trans(flags, MAP_DENYWRITE,  VM_DENYWRITE ) |
-	       _calc_vm_trans(flags, MAP_EXECUTABLE, VM_EXECUTABLE) |
 	       _calc_vm_trans(flags, MAP_LOCKED,     VM_LOCKED    );
 }
 #endif /* __KERNEL__ */
diff --git a/mm/mmap.c b/mm/mmap.c
index 3d254ca..bc67ed7 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -232,7 +232,7 @@ static struct vm_area_struct *remove_vma(struct vm_area_struct *vma)
 		vma->vm_ops->close(vma);
 	if (vma->vm_file) {
 		fput(vma->vm_file);
-		if (vma->vm_flags & VM_EXECUTABLE)
+		if (vma->vm_file == vma->vm_mm->exe_file)
 			removed_exe_file_vma(vma->vm_mm);
 	}
 	mpol_put(vma_policy(vma));
@@ -618,7 +618,7 @@ again:			remove_next = 1 + (end > next->vm_end);
 	if (remove_next) {
 		if (file) {
 			fput(file);
-			if (next->vm_flags & VM_EXECUTABLE)
+			if (file == mm->exe_file)
 				removed_exe_file_vma(mm);
 		}
 		if (next->anon_vma)
@@ -1293,7 +1293,7 @@ munmap_back:
 		error = file->f_op->mmap(file, vma);
 		if (error)
 			goto unmap_and_free_vma;
-		if (vm_flags & VM_EXECUTABLE)
+		if (file == mm->exe_file)
 			added_exe_file_vma(mm);
 
 		/* Can addr have changed??
@@ -1971,7 +1971,7 @@ static int __split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
 
 	if (new->vm_file) {
 		get_file(new->vm_file);
-		if (vma->vm_flags & VM_EXECUTABLE)
+		if (new->vm_file == mm->exe_file)
 			added_exe_file_vma(mm);
 	}
 
@@ -1992,7 +1992,7 @@ static int __split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
 	if (new->vm_ops && new->vm_ops->close)
 		new->vm_ops->close(new);
 	if (new->vm_file) {
-		if (vma->vm_flags & VM_EXECUTABLE)
+		if (new->vm_file == mm->exe_file)
 			removed_exe_file_vma(mm);
 		fput(new->vm_file);
 	}
@@ -2379,7 +2379,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
 			new_vma->vm_pgoff = pgoff;
 			if (new_vma->vm_file) {
 				get_file(new_vma->vm_file);
-				if (vma->vm_flags & VM_EXECUTABLE)
+				if (new_vma->vm_file == mm->exe_file)
 					added_exe_file_vma(mm);
 			}
 			if (new_vma->vm_ops && new_vma->vm_ops->open)
diff --git a/mm/nommu.c b/mm/nommu.c
index afa0a15..db8da78 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -791,7 +791,7 @@ static void delete_vma(struct mm_struct *mm, struct vm_area_struct *vma)
 		vma->vm_ops->close(vma);
 	if (vma->vm_file) {
 		fput(vma->vm_file);
-		if (vma->vm_flags & VM_EXECUTABLE)
+		if (vma->vm_file == mm->exe_file)
 			removed_exe_file_vma(mm);
 	}
 	put_nommu_region(vma->vm_region);
@@ -1287,7 +1287,7 @@ unsigned long do_mmap_pgoff(struct file *file,
 		get_file(file);
 		vma->vm_file = file;
 		get_file(file);
-		if (vm_flags & VM_EXECUTABLE) {
+		if (file == current->mm->exe_file) {
 			added_exe_file_vma(current->mm);
 			vma->vm_mm = current->mm;
 		}
@@ -1441,10 +1441,11 @@ error:
 	if (region->vm_file)
 		fput(region->vm_file);
 	kmem_cache_free(vm_region_jar, region);
-	if (vma->vm_file)
+	if (vma->vm_file) {
 		fput(vma->vm_file);
-	if (vma->vm_flags & VM_EXECUTABLE)
-		removed_exe_file_vma(vma->vm_mm);
+		if (vma->vm_file == vma->vm_mm->exe_file)
+			removed_exe_file_vma(vma->vm_mm);
+	}
 	kmem_cache_free(vm_area_cachep, vma);
 	kleave(" = %d", ret);
 	return ret;

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@xxxxxxxxx.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@xxxxxxxxx";> email@xxxxxxxxx </a>


[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux]     [Linux OMAP]     [Linux MIPS]     [ECOS]     [Asterisk Internet PBX]     [Linux API]