On Tue, Oct 20, 2015 at 04:36:02PM -0400, Chris Metcalf wrote: > diff --git a/kernel/isolation.c b/kernel/isolation.c > new file mode 100644 > index 000000000000..9a73235db0bb > --- /dev/null > +++ b/kernel/isolation.c > @@ -0,0 +1,78 @@ > +/* > + * linux/kernel/isolation.c > + * > + * Implementation for task isolation. > + * > + * Distributed under GPLv2. > + */ > + > +#include <linux/mm.h> > +#include <linux/swap.h> > +#include <linux/vmstat.h> > +#include <linux/isolation.h> > +#include <linux/syscalls.h> > +#include "time/tick-sched.h" > + > +/* > + * This routine controls whether we can enable task-isolation mode. > + * The task must be affinitized to a single nohz_full core or we will > + * return EINVAL. Although the application could later re-affinitize > + * to a housekeeping core and lose task isolation semantics, this > + * initial test should catch 99% of bugs with task placement prior to > + * enabling task isolation. > + */ > +int task_isolation_set(unsigned int flags) > +{ > + if (cpumask_weight(tsk_cpus_allowed(current)) != 1 || I think you'll have to make sure the task can not be concurrently reaffined to more CPUs. This may involve setting task_isolation_flags under the runqueue lock and thus move that tiny part to the scheduler code. And then we must forbid changing the affinity while the task has the isolation flag, or deactivate the flag. In any case this needs some synchronization. > + !tick_nohz_full_cpu(smp_processor_id())) > + return -EINVAL; > + > + current->task_isolation_flags = flags; > + return 0; > +} > + > +/* > + * In task isolation mode we try to return to userspace only after > + * attempting to make sure we won't be interrupted again. To handle > + * the periodic scheduler tick, we test to make sure that the tick is > + * stopped, and if it isn't yet, we request a reschedule so that if > + * another task needs to run to completion first, it can do so. > + * Similarly, if any other subsystems require quiescing, we will need > + * to do that before we return to userspace. > + */ > +bool _task_isolation_ready(void) > +{ > + WARN_ON_ONCE(!irqs_disabled()); > + > + /* If we need to drain the LRU cache, we're not ready. */ > + if (lru_add_drain_needed(smp_processor_id())) > + return false; > + > + /* If vmstats need updating, we're not ready. */ > + if (!vmstat_idle()) > + return false; > + > + /* If the tick is running, request rescheduling; we're not ready. */ > + if (!tick_nohz_tick_stopped()) { Note that this function tells whether the tick is in dynticks mode, which means the tick currently only run on-demand. But it's not necessarily completely stopped. I think we should rename that function and the field it refers to. > + set_tsk_need_resched(current); > + return false; > + } > + > + return true; > +} Thanks. -- To unsubscribe from this list: send the line "unsubscribe linux-api" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html