On Mon, Jan 04, 2016 at 02:34:42PM -0500, Chris Metcalf wrote: > diff --git a/kernel/isolation.c b/kernel/isolation.c > new file mode 100644 > index 000000000000..68a9f7457bc0 > --- /dev/null > +++ b/kernel/isolation.c > @@ -0,0 +1,105 @@ > +/* > + * linux/kernel/isolation.c > + * > + * Implementation for task isolation. > + * > + * Distributed under GPLv2. > + */ > + > +#include <linux/mm.h> > +#include <linux/swap.h> > +#include <linux/vmstat.h> > +#include <linux/isolation.h> > +#include <linux/syscalls.h> > +#include "time/tick-sched.h" > + > +cpumask_var_t task_isolation_map; > + > +/* > + * Isolation requires both nohz and isolcpus support from the scheduler. > + * We provide a boot flag that enables both for now, and which we can > + * add other functionality to over time if needed. Note that just > + * specifying "nohz_full=... isolcpus=..." does not enable task isolation. > + */ > +static int __init task_isolation_setup(char *str) > +{ > + alloc_bootmem_cpumask_var(&task_isolation_map); > + if (cpulist_parse(str, task_isolation_map) < 0) { > + pr_warn("task_isolation: Incorrect cpumask '%s'\n", str); > + return 1; > + } > + > + alloc_bootmem_cpumask_var(&cpu_isolated_map); > + cpumask_copy(cpu_isolated_map, task_isolation_map); > + > + alloc_bootmem_cpumask_var(&tick_nohz_full_mask); > + cpumask_copy(tick_nohz_full_mask, task_isolation_map); > + tick_nohz_full_running = true; How about calling tick_nohz_full_setup() instead? I'd rather prefer that nohz full implementation details stay in tick-sched.c Also what happens if nohz_full= is given as well as task_isolation= ? Don't we risk a memory leak and maybe breaking the fact that (nohz_full & task_isolation != task_isolation) which is really a requirement? > + > + return 1; > +} > +__setup("task_isolation=", task_isolation_setup); > + > +/* > + * This routine controls whether we can enable task-isolation mode. > + * The task must be affinitized to a single task_isolation core or we will > + * return EINVAL. Although the application could later re-affinitize > + * to a housekeeping core and lose task isolation semantics, this > + * initial test should catch 99% of bugs with task placement prior to > + * enabling task isolation. > + */ > +int task_isolation_set(unsigned int flags) > +{ > + if (cpumask_weight(tsk_cpus_allowed(current)) != 1 || > + !task_isolation_possible(smp_processor_id())) > + return -EINVAL; > + > + current->task_isolation_flags = flags; > + return 0; > +} What if we concurrently change the task's affinity? Also it seems that preemption isn't disabled, so we can also migrate concurrently. I'm surprised you haven't seen warnings with smp_processor_id(). Also we should protect against task's affinity change when task_isolation_flags is set. > + > +/* > + * In task isolation mode we try to return to userspace only after > + * attempting to make sure we won't be interrupted again. To handle > + * the periodic scheduler tick, we test to make sure that the tick is > + * stopped, and if it isn't yet, we request a reschedule so that if > + * another task needs to run to completion first, it can do so. > + * Similarly, if any other subsystems require quiescing, we will need > + * to do that before we return to userspace. > + */ > +bool _task_isolation_ready(void) > +{ > + WARN_ON_ONCE(!irqs_disabled()); > + > + /* If we need to drain the LRU cache, we're not ready. */ > + if (lru_add_drain_needed(smp_processor_id())) > + return false; > + > + /* If vmstats need updating, we're not ready. */ > + if (!vmstat_idle()) > + return false; > + > + /* Request rescheduling unless we are in full dynticks mode. */ > + if (!tick_nohz_tick_stopped()) { > + set_tsk_need_resched(current); I'm not sure doing this will help getting the tick to get stopped. > + return false; > + } > + > + return true; > +} Thanks! -- To unsubscribe from this list: send the line "unsubscribe linux-api" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html