The ioctl can be used to request allocation of marks with large size and attach them to an object, even if another mark already exists for the group on the marked object. These large marks serve no function other than testing direct reclaim in the context of mark allocation. Setting the value to 0 restores normal mark allocation. FAN_MARK_REMOVE refers to the first mark of the group on an object, so the number of FAN_MARK_REMOVE calls need to match the number of large marks on the object in order to remove all marks from the object or use FAN_MARK_FLUSH to remove all marks of that object type. Signed-off-by: Amir Goldstein <amir73il@xxxxxxxxx> --- fs/notify/fanotify/fanotify.c | 5 +++- fs/notify/fanotify/fanotify_user.c | 42 +++++++++++++++++++++++++++--- include/linux/fsnotify_backend.h | 2 ++ include/uapi/linux/fanotify.h | 4 +++ 4 files changed, 49 insertions(+), 4 deletions(-) diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index 985e995d2a39..02990a6b1b65 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -1075,7 +1075,10 @@ static void fanotify_freeing_mark(struct fsnotify_mark *mark, static void fanotify_free_mark(struct fsnotify_mark *fsn_mark) { - kmem_cache_free(fanotify_mark_cache, fsn_mark); + if (fsn_mark->flags & FSNOTIFY_MARK_FLAG_KMALLOC) + kfree(fsn_mark); + else + kmem_cache_free(fanotify_mark_cache, fsn_mark); } const struct fsnotify_ops fanotify_fsnotify_ops = { diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 2c65038da4ce..a3539bd8e443 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -928,6 +928,16 @@ static long fanotify_ioctl(struct file *file, unsigned int cmd, unsigned long ar spin_unlock(&group->notification_lock); ret = put_user(send_len, (int __user *) p); break; + case FAN_IOC_SET_MARK_PAGE_ORDER: + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + mutex_lock(&group->mark_mutex); + group->fanotify_data.mark_page_order = (unsigned int)arg; + pr_info("fanotify: set mark size page order to %u", + group->fanotify_data.mark_page_order); + ret = 0; + mutex_unlock(&group->mark_mutex); + break; } return ret; @@ -1150,6 +1160,7 @@ static struct fsnotify_mark *fanotify_add_new_mark(struct fsnotify_group *group, __kernel_fsid_t *fsid) { struct ucounts *ucounts = group->fanotify_data.ucounts; + unsigned int order = group->fanotify_data.mark_page_order; struct fsnotify_mark *mark; int ret; @@ -1162,7 +1173,21 @@ static struct fsnotify_mark *fanotify_add_new_mark(struct fsnotify_group *group, !inc_ucount(ucounts->ns, ucounts->uid, UCOUNT_FANOTIFY_MARKS)) return ERR_PTR(-ENOSPC); - mark = kmem_cache_alloc(fanotify_mark_cache, GFP_KERNEL); + /* + * If requested to test direct reclaim in mark allocation context, + * start by trying to allocate requested page order per mark and + * fall back to allocation size that is likely to trigger direct + * reclaim but not too large to trigger compaction. + */ + if (order) { + mark = kmalloc(PAGE_SIZE << order, + GFP_KERNEL_ACCOUNT | __GFP_NOWARN); + if (!mark && order > PAGE_ALLOC_COSTLY_ORDER) + mark = kmalloc(PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER, + GFP_KERNEL_ACCOUNT | __GFP_NOWARN); + } else { + mark = kmem_cache_alloc(fanotify_mark_cache, GFP_KERNEL); + } if (!mark) { ret = -ENOMEM; goto out_dec_ucounts; @@ -1171,6 +1196,15 @@ static struct fsnotify_mark *fanotify_add_new_mark(struct fsnotify_group *group, fsnotify_init_mark(mark, group); if (fan_flags & FAN_MARK_EVICTABLE) mark->flags |= FSNOTIFY_MARK_FLAG_NO_IREF; + /* + * Allow adding multiple large marks per object for testing. + * FAN_MARK_REMOVE refers to the first mark of the group, so one + * FAN_MARK_REMOVE is needed for every added large mark (or use + * FAN_MARK_FLUSH to remove all marks). + */ + if (order) + mark->flags |= FSNOTIFY_MARK_FLAG_KMALLOC | + FSNOTIFY_MARK_FLAG_ALLOW_DUPS; ret = fsnotify_add_mark_locked(mark, connp, obj_type, fsid); if (ret) { @@ -1201,11 +1235,13 @@ static int fanotify_add_mark(struct fsnotify_group *group, __u32 mask, unsigned int flags, __kernel_fsid_t *fsid) { - struct fsnotify_mark *fsn_mark; + struct fsnotify_mark *fsn_mark = NULL; int ret = 0; mutex_lock(&group->mark_mutex); - fsn_mark = fsnotify_find_mark(connp, group); + /* Allow adding multiple large marks per object for testing */ + if (!group->fanotify_data.mark_page_order) + fsn_mark = fsnotify_find_mark(connp, group); if (!fsn_mark) { fsn_mark = fanotify_add_new_mark(group, connp, obj_type, flags, fsid); diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index df58439a86fa..8220cf560c28 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -250,6 +250,7 @@ struct fsnotify_group { int f_flags; /* event_f_flags from fanotify_init() */ struct ucounts *ucounts; mempool_t error_events_pool; + unsigned int mark_page_order; /* for testing only */ } fanotify_data; #endif /* CONFIG_FANOTIFY */ }; @@ -528,6 +529,7 @@ struct fsnotify_mark { #define FSNOTIFY_MARK_FLAG_ALLOW_DUPS 0x0040 #define FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY 0x0100 #define FSNOTIFY_MARK_FLAG_NO_IREF 0x0200 +#define FSNOTIFY_MARK_FLAG_KMALLOC 0x0400 unsigned int flags; /* flags [mark->lock] */ }; diff --git a/include/uapi/linux/fanotify.h b/include/uapi/linux/fanotify.h index f1f89132d60e..49cdc9008bf2 100644 --- a/include/uapi/linux/fanotify.h +++ b/include/uapi/linux/fanotify.h @@ -3,6 +3,7 @@ #define _UAPI_LINUX_FANOTIFY_H #include <linux/types.h> +#include <linux/ioctl.h> /* the following events that user-space can register for */ #define FAN_ACCESS 0x00000001 /* File was accessed */ @@ -206,4 +207,7 @@ struct fanotify_response { (long)(meta)->event_len >= (long)FAN_EVENT_METADATA_LEN && \ (long)(meta)->event_len <= (long)(len)) +/* Only for testing. Not useful otherwise */ +#define FAN_IOC_SET_MARK_PAGE_ORDER _IOW(0xfa, 1, long) + #endif /* _UAPI_LINUX_FANOTIFY_H */ -- 2.25.1