Hi, So as discussed, this is one possible prctl interface for task isolation. Is this something that is desired? If not, what is the proper way for the interface to be? (addition of a new capability CAP_TASK_ISOLATION, for permissions is still missing, should be done in the next versions). Thanks. add prctl interface for task isolation Add a new extensible interface for task isolation, and allow userspace to quiesce the CPU. This means putting the system into a quiet state by completing all workqueue items, idle all subsystems that need it and put the cpu into NOHZ mode. Suggested-by: Christopher Lameter <cl@xxxxxxxxx> Signed-off-by: Marcelo Tosatti <mtosatti@xxxxxxxxxx> Index: linux-2.6-vmstat2/include/uapi/linux/prctl.h =================================================================== --- linux-2.6-vmstat2.orig/include/uapi/linux/prctl.h +++ linux-2.6-vmstat2/include/uapi/linux/prctl.h @@ -247,4 +247,10 @@ struct prctl_mm_map { #define PR_SET_IO_FLUSHER 57 #define PR_GET_IO_FLUSHER 58 +/* Task isolation control */ +#define PR_TASK_ISOLATION_FEATURES 59 +#define PR_TASK_ISOLATION_GET 60 +#define PR_TASK_ISOLATION_SET 61 +#define PR_TASK_ISOLATION_REQUEST 62 + #endif /* _LINUX_PRCTL_H */ Index: linux-2.6-vmstat2/kernel/sys.c =================================================================== --- linux-2.6-vmstat2.orig/kernel/sys.c +++ linux-2.6-vmstat2/kernel/sys.c @@ -58,6 +58,7 @@ #include <linux/sched/coredump.h> #include <linux/sched/task.h> #include <linux/sched/cputime.h> +#include <linux/isolation.h> #include <linux/rcupdate.h> #include <linux/uidgid.h> #include <linux/cred.h> @@ -2530,6 +2531,25 @@ SYSCALL_DEFINE5(prctl, int, option, unsi error = (current->flags & PR_IO_FLUSHER) == PR_IO_FLUSHER; break; + case PR_TASK_ISOLATION_FEATURES: { + struct isolation_features ifeat; + + memset(&ifeat, 0, sizeof(ifeat)); + + prctl_task_isolation_features(&ifeat); + if (copy_to_user((char __user *)arg2, &ifeat, sizeof(ifeat))) + return -EFAULT; + break; + } + case PR_TASK_ISOLATION_SET: + error = prctl_task_isolation_set(arg2, arg3, arg4, arg5); + break; + case PR_TASK_ISOLATION_GET: + error = prctl_task_isolation_get(arg2, arg3, arg4, arg5); + break; + case PR_TASK_ISOLATION_REQUEST: + error = prctl_task_isolation_request(arg2, arg3, arg4, arg5); + break; default: error = -EINVAL; break; Index: linux-2.6-vmstat2/Documentation/userspace-api/task_isolation.rst =================================================================== --- /dev/null +++ linux-2.6-vmstat2/Documentation/userspace-api/task_isolation.rst @@ -0,0 +1,99 @@ +.. SPDX-License-Identifier: GPL-2.0 + +============================ +Task isolation CPU interface +============================ + +The kernel might perform a number of activities in the background, +on a given CPU, in the form of workqueues or interrupts. + +This interface allows userspace to indicate to the kernel when +its running latency critical code (and what is the behaviour +on activities that would interrupt the CPU). + +This allows the system to take preventive measures to avoid +deferred actions and create a OS noise free environment for +the application. + +The task isolation mode is a bitmap specifying which individual +features the application desires to be enabled. + +Each individual feature can be configured via + + prctl(PR_TASK_ISOLATION_SET, ISOL_F_featurename, params...) + +Enablement of the set of features is requested via + + prctl(PR_TASK_ISOLATION_REQUEST, ISOL_F_featurename, 0, 0, 0) + +PR_TASK_ISO_feature (both GET/SET) are supported if the flags +field of struct isolation_features contains bit number ISOL_F_featurename +set (see "Example" section below). + +In summary, the usual flow is + + # Determine the supported features + prctl(PR_TASK_ISOLATION_FEATURES, ifeat, 0, 0, 0); + + # Configure the desired features, based on ifeat + if ((ifeat & PR_TASK_ISO_feature1) == ISOL_F_feature1) { + prctl(PR_TASK_ISOLATION_SET, ISOL_F_feature1, params...) + featuremask |= ISOL_F_feature1 + } + + if ((ifeat & ISOL_F_feature2) == ISOL_F_feature2) { + prctl(PR_TASK_ISOLATION_SET, ISOL_F_feature2, params...) + featuremask |= ISOL_F_feature2 + } + + ... + + # Enable isolation (feature set in bitmask), with each + # feature configured as above + prctl(PR_TASK_ISOLATION_REQUEST, featuremask, 0, 0, 0) + +Usage +===== +``PR_TASK_ISOLATION_FEATURES``: + Returns the supported features. Features are defined + at include/uapi/linux/isolation.h. + + Usage:: + + prctl(PR_TASK_ISOLATION_FEATURES, ifeat, 0, 0, 0); + + The 'ifeat' argument is a pointer to a struct isolation_features: + + struct isolation_features { + __u32 flags; + __u32 pad[3]; + }; + + Where flags contains bits set for the features the kernel supports. + +``PR_TASK_ISOLATION_SET``: + Configures task isolation features. Each individual feature is + configured separately via + + prctl(PR_TASK_ISOLATION_SET, PR_TASK_ISO_feature, params...) + +``PR_TASK_ISOLATION_GET``: + Retrieves the currently configured task isolation mode parameters + for feature PR_TASK_ISO_feature (arg1). + + prctl(PR_TASK_ISOLATION_SET, PR_TASK_ISO_feature, params...) + +``PR_TASK_ISOLATION_REQUEST``: + Enter task isolation, with features in featuremask enabled. + supported. This will quiesce any pending activity + on the CPU, and enable mode specific configurations. + +Feature list +============ + +Example +======= + +The ``samples/task_isolation/`` directory contains a sample +application. + Index: linux-2.6-vmstat2/include/uapi/linux/isolation.h =================================================================== --- /dev/null +++ linux-2.6-vmstat2/include/uapi/linux/isolation.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _UAPI_LINUX_ISOL_H +#define _UAPI_LINUX_ISOL_H + +/* For PR_TASK_ISOLATION_FEATURES */ +struct isolation_features { + __u32 flags; + __u32 pad[3]; +}; + +/* Isolation features */ +#define ISOL_F_QUIESCE 0x1 + +#endif /* _UAPI_LINUX_ISOL_H */ + Index: linux-2.6-vmstat2/kernel/isolation.c =================================================================== --- /dev/null +++ linux-2.6-vmstat2/kernel/isolation.c @@ -0,0 +1,41 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Implementation of task isolation. + * + * Authors: + * Chris Metcalf <cmetcalf@xxxxxxxxxxxx> + * Alex Belits <abelits@xxxxxxxxxxx> + * Yuri Norov <ynorov@xxxxxxxxxxx> + */ + +#include <linux/sched.h> +#include <linux/isolation.h> +#include <linux/vmstat.h> + +void prctl_task_isolation_features(struct isolation_features *ifeat) +{ + ifeat->flags = ISOL_F_QUIESCE; +} + +int prctl_task_isolation_get(unsigned long arg2, unsigned long arg3, + unsigned long arg4, unsigned long arg5) +{ + return 0; +} + +int prctl_task_isolation_set(unsigned long arg2, unsigned long arg3, + unsigned long arg4, unsigned long arg5) +{ + return 0; +} + +int prctl_task_isolation_request(unsigned long arg2, unsigned long arg3, + unsigned long arg4, unsigned long arg5) +{ + int ret; + int cpu = raw_smp_processor_id(); + + ret = user_quiet_vmstat(cpu); + + return ret; +} Index: linux-2.6-vmstat2/samples/task_isolation/task_isolation.c =================================================================== --- /dev/null +++ linux-2.6-vmstat2/samples/task_isolation/task_isolation.c @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <sys/mman.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <sys/prctl.h> + +void main(void) +{ + int ret; + void *buf = malloc(4096); + + struct isolation_features ifeat; + struct isolation_control icontrol; + unsigned long fmask = 0; + + memset(ifeat, 0, sizeof(struct isolation_features)); + + memset(buf, 1, 4096); + ret = mlock(buf, 4096); + if (ret) { + perror("mlock"); + exit(0); + } + + ret = prctl(PR_TASK_ISOLATION_FEATURES, &ifeat, 0, 0, 0); + if (ret == -1) { + perror("prctl"); + exit(0); + } + +#ifdef ISOL_F_QUIESCE + /* enable ISOL_F_QUIESCE */ + if (!(ifeat.flags & ISOL_F_QUIESCE)) { + printf("ISOL_F_QUIESCE not set!\n"); + exit(0); + } + fmask = fmask | ISOL_F_QUIESCE; +#endif + + /* busy loop */ + while (ret != 0) + memset(buf, 0, 10); + +} + Index: linux-2.6-vmstat2/include/linux/isolation.h =================================================================== --- /dev/null +++ linux-2.6-vmstat2/include/linux/isolation.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef __LINUX_ISOL_H +#define __LINUX_ISOL_H + +#include <uapi/linux/isolation.h> + +void prctl_task_isolation_features(struct isolation_features *ifeat); + +int prctl_task_isolation_get(unsigned long arg2, unsigned long arg3, + unsigned long arg4, unsigned long arg5); + +int prctl_task_isolation_set(unsigned long arg2, unsigned long arg3, + unsigned long arg4, unsigned long arg5); + +int prctl_task_isolation_request(unsigned long arg2, unsigned long arg3, + unsigned long arg4, unsigned long arg5); +#endif /* __LINUX_ISOL_H */ Index: linux-2.6-vmstat2/kernel/Makefile =================================================================== --- linux-2.6-vmstat2.orig/kernel/Makefile +++ linux-2.6-vmstat2/kernel/Makefile @@ -10,7 +10,7 @@ obj-y = fork.o exec_domain.o panic.o extable.o params.o \ kthread.o sys_ni.o nsproxy.o \ notifier.o ksysfs.o cred.o reboot.o \ - async.o range.o smpboot.o ucount.o regset.o + async.o range.o smpboot.o ucount.o regset.o isolation.o obj-$(CONFIG_USERMODE_DRIVER) += usermode_driver.o obj-$(CONFIG_MODULES) += kmod.o Index: linux-2.6-vmstat2/include/linux/vmstat.h =================================================================== --- linux-2.6-vmstat2.orig/include/linux/vmstat.h +++ linux-2.6-vmstat2/include/linux/vmstat.h @@ -290,6 +290,7 @@ void refresh_zone_stat_thresholds(void); struct ctl_table; int vmstat_refresh(struct ctl_table *, int write, void *buffer, size_t *lenp, loff_t *ppos); +int user_quiet_vmstat(int cpu); void drain_zonestat(struct zone *zone, struct per_cpu_pageset *); Index: linux-2.6-vmstat2/mm/vmstat.c =================================================================== --- linux-2.6-vmstat2.orig/mm/vmstat.c +++ linux-2.6-vmstat2/mm/vmstat.c @@ -1936,6 +1936,16 @@ void quiet_vmstat(void) refresh_cpu_vm_stats(false); } +int user_quiet_vmstat(int cpu) +{ + if (need_update(cpu) == true) + refresh_cpu_vm_stats(false); + + flush_delayed_work(per_cpu_ptr(&vmstat_work, cpu)); + + return 0; +} + /* * Shepherd worker thread that checks the * differentials of processors that have their worker