On Mon, Jun 14, 2021 at 10:31 PM Rustam Kovhaev <rkovhaev@xxxxxxxxx> wrote: > > hello Catalin, Andrew! > > while troubleshooting a false positive syzbot kmemleak report i have > noticed an interesting behavior in kmemleak and i wonder whether it is > behavior by design and should be documented, or maybe something to > improve. > apologies if some of the questions do not make sense, i am still going > through kmemleak code.. > > a) kmemleak scans struct page (kmemleak.c:1462), but it does not scan > the actual contents (page_address(page)) of the page. > if we allocate an object with kmalloc(), then allocate page with > alloc_page(), and if we put kmalloc pointer somewhere inside that page, > kmemleak will report kmalloc pointer as a false positive. > should we improve kmemleak and make it scan page contents? > or will this bring too many false negatives? Hi Rustam, Nice debugging! I assume lots of pages are allocated for slab and we don't want to scan the whole page if only a few slab objects are alive on the page. However alloc_pages() can be called by end kernel code as well. I grepped for any kmemleak annotations around existing calls to alloc_pages, but did not find any... Does it require an explicit kmemleak_alloc() after allocating the page and kmemleak_free () before freeing the page? If there are more than one use case for this, I guess we could add some GFP flag for this maybe. > b) when kmemleak object gets created (kmemleak.c:598) it gets checksum > of 0, by the time user requests kmemleak "scan" via debugfs the pointer > will be most likely changed to some value by the kernel and during > first scan kmemleak won't report the object as orphan even if it did not > find any reference to it, because it will execute update_checksum() and > after that will proceed to updating object->count (kmemleak.c:1502). > and so the user will have to initiate a second "scan" via debugfs and > only then kmemleak will produce the report. > should we document this? > > below i am attaching a simplified reproducer for the false positive > kmemleak report (a). > i could have done it in the module, but i found it to be easier and > faster to test when done in a syscall, so that i did not have to > modprobe/modprobe -r. > > tyvm! > > --- > arch/x86/entry/syscalls/syscall_64.tbl | 1 + > include/linux/syscalls.h | 1 + > mm/Makefile | 2 +- > mm/kmemleak_test.c | 45 ++++++++++++++++++++++++++ > 4 files changed, 48 insertions(+), 1 deletion(-) > create mode 100644 mm/kmemleak_test.c > > diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl > index ce18119ea0d0..da967a87eb78 100644 > --- a/arch/x86/entry/syscalls/syscall_64.tbl > +++ b/arch/x86/entry/syscalls/syscall_64.tbl > @@ -343,6 +343,7 @@ > 332 common statx sys_statx > 333 common io_pgetevents sys_io_pgetevents > 334 common rseq sys_rseq > +335 common kmemleak_test sys_kmemleak_test > # don't use numbers 387 through 423, add new calls after the last > # 'common' entry > 424 common pidfd_send_signal sys_pidfd_send_signal > diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h > index 050511e8f1f8..0602308aabf4 100644 > --- a/include/linux/syscalls.h > +++ b/include/linux/syscalls.h > @@ -1029,6 +1029,7 @@ asmlinkage long sys_statx(int dfd, const char __user *path, unsigned flags, > unsigned mask, struct statx __user *buffer); > asmlinkage long sys_rseq(struct rseq __user *rseq, uint32_t rseq_len, > int flags, uint32_t sig); > +asmlinkage long sys_kmemleak_test() > asmlinkage long sys_open_tree(int dfd, const char __user *path, unsigned flags); > asmlinkage long sys_move_mount(int from_dfd, const char __user *from_path, > int to_dfd, const char __user *to_path, > diff --git a/mm/Makefile b/mm/Makefile > index bf71e295e9f6..878783838fa1 100644 > --- a/mm/Makefile > +++ b/mm/Makefile > @@ -97,7 +97,7 @@ obj-$(CONFIG_CGROUP_HUGETLB) += hugetlb_cgroup.o > obj-$(CONFIG_GUP_TEST) += gup_test.o > obj-$(CONFIG_MEMORY_FAILURE) += memory-failure.o > obj-$(CONFIG_HWPOISON_INJECT) += hwpoison-inject.o > -obj-$(CONFIG_DEBUG_KMEMLEAK) += kmemleak.o > +obj-$(CONFIG_DEBUG_KMEMLEAK) += kmemleak.o kmemleak_test.o > obj-$(CONFIG_DEBUG_RODATA_TEST) += rodata_test.o > obj-$(CONFIG_DEBUG_VM_PGTABLE) += debug_vm_pgtable.o > obj-$(CONFIG_PAGE_OWNER) += page_owner.o > diff --git a/mm/kmemleak_test.c b/mm/kmemleak_test.c > new file mode 100644 > index 000000000000..828246e20b7f > --- /dev/null > +++ b/mm/kmemleak_test.c > @@ -0,0 +1,45 @@ > +// SPDX-License-Identifier: GPL-2.0+ > + > +#include <linux/sched.h> > +#include <linux/syscalls.h> > +#include <linux/types.h> > +#include <linux/mm.h> > + > +struct kmemleak_check { > + unsigned long canary; > + struct work_struct work; > + struct page **pages; > +}; > + > +static void work_func(struct work_struct *work) > +{ > + struct page **pages; > + struct kmemleak_check *ptr; > + > + set_current_state(TASK_INTERRUPTIBLE); > + schedule_timeout(3600*HZ); > + > + ptr = container_of(work, struct kmemleak_check, work); > + pages = ptr->pages; > + __free_page(pages[0]); > + kvfree(pages); > +} > + > +SYSCALL_DEFINE0(kmemleak_test) > +{ > + struct page **pages, *page; > + struct kmemleak_check *ptr; > + > + pages = kzalloc(sizeof(*pages), GFP_KERNEL); > + page = alloc_page(GFP_KERNEL); > + pages[0] = page; > + ptr = page_address(page); > + ptr->canary = 0x00FF00FF00FF00FF; > + ptr->pages = pages; > + pr_info("DEBUG: pages %px page %px ptr %px\n", pages, page, ptr); > + > + INIT_WORK(&ptr->work, work_func); > + schedule_work(&ptr->work); > + > + return 0; > +} > -- > 2.30.2 >