AutoPage Migration - add /proc/<tgid>/migrate Add a "migrate" control file to per process /proc subdir to allow external trigger of task auto [==self] migration. When lazy auto-migration is enabled, this effectively resets all unmappable pages so that the next touch will cause a migrate on fault, if the page is remote from the faulting task. This allows one to "poke" a task externally to force "re-affinitization on next touch" independent of inter-node migration. On read, show current value of task's "migrate_pending". Nearly useless, but I wanted to avoid a "write-only" file. On write, if value is non-zero, set migrate_pending to 1 and set 'NOTIFY_RESUME thread info flag to cause task to handle the pending migration. If value is zero, clear the migrate_pending--also not very useful, but falls out of the code. Don't bother to reset the thread info flag when clearing migrate_pending -- just being lazy. It's a no-op in this case as far as auto-migration, but might have been set by something else. Signed-off-by: Lee Schermerhorn <lee.schermerhorn@xxxxxx> fs/proc/base.c | 6 ++++ fs/proc/internal.h | 2 + fs/proc/task_mmu.c | 70 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 78 insertions(+) Index: linux-2.6.36-mmotm-101103-1217/fs/proc/base.c =================================================================== --- linux-2.6.36-mmotm-101103-1217.orig/fs/proc/base.c +++ linux-2.6.36-mmotm-101103-1217/fs/proc/base.c @@ -2743,6 +2743,9 @@ static const struct pid_entry tgid_base_ REG("maps", S_IRUGO, proc_maps_operations), #ifdef CONFIG_NUMA REG("numa_maps", S_IRUGO, proc_numa_maps_operations), +#ifdef CONFIG_AUTO_MIGRATION + REG("migrate", S_IRUGO|S_IWUSR, proc_migrate_operations), +#endif #endif REG("mem", S_IRUSR|S_IWUSR, proc_mem_operations), LNK("cwd", proc_cwd_link), @@ -3080,6 +3083,9 @@ static const struct pid_entry tid_base_s REG("maps", S_IRUGO, proc_maps_operations), #ifdef CONFIG_NUMA REG("numa_maps", S_IRUGO, proc_numa_maps_operations), +#ifdef CONFIG_AUTO_MIGRATION + REG("migrate", S_IRUGO|S_IWUSR, proc_migrate_operations), +#endif #endif REG("mem", S_IRUSR|S_IWUSR, proc_mem_operations), LNK("cwd", proc_cwd_link), Index: linux-2.6.36-mmotm-101103-1217/fs/proc/internal.h =================================================================== --- linux-2.6.36-mmotm-101103-1217.orig/fs/proc/internal.h +++ linux-2.6.36-mmotm-101103-1217/fs/proc/internal.h @@ -59,6 +59,8 @@ extern const struct file_operations proc extern const struct file_operations proc_clear_refs_operations; extern const struct file_operations proc_pagemap_operations; extern const struct file_operations proc_net_operations; +extern const struct file_operations proc_migrate_operations; + extern const struct inode_operations proc_net_inode_operations; void proc_init_inodecache(void); Index: linux-2.6.36-mmotm-101103-1217/fs/proc/task_mmu.c =================================================================== --- linux-2.6.36-mmotm-101103-1217.orig/fs/proc/task_mmu.c +++ linux-2.6.36-mmotm-101103-1217/fs/proc/task_mmu.c @@ -1016,4 +1016,74 @@ const struct file_operations proc_numa_m .llseek = seq_lseek, .release = seq_release_private, }; + +#ifdef CONFIG_AUTO_MIGRATION +/* + * read/write task's "migrate_pending" flag. + * on write, set TIF_NOTIFY_RESUME thread info flag so that task + * will handle "migrate_pending" on next return to user space -- + * no later than next clock tick. + */ +static ssize_t proc_migrate_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + struct task_struct *task; + char buffer[PROC_NUMBUF]; + size_t len; + int migpend; + loff_t __ppos = *ppos; + + task = get_proc_task(file->f_dentry->d_inode); + if (!task) + return -ESRCH; + migpend = task->migrate_pending; + put_task_struct(task); + + len = snprintf(buffer, sizeof(buffer), "%i\n", migpend); + if (__ppos >= len) + return 0; + if (count > len-__ppos) + count = len-__ppos; + if (copy_to_user(buf, buffer + __ppos, count)) + return -EFAULT; + *ppos = __ppos + count; + return count; +} + +static ssize_t proc_migrate_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct task_struct *task; + char buffer[PROC_NUMBUF], *end; + int migpend; + + if (!capable(CAP_SYS_RESOURCE)) + return -EPERM; + memset(buffer, 0, sizeof(buffer)); + if (count > sizeof(buffer) - 1) + count = sizeof(buffer) - 1; + if (copy_from_user(buffer, buf, count)) + return -EFAULT; + migpend = simple_strtol(buffer, &end, 0); + if (*end == '\n') + end++; + if (end - buffer == 0) + return -EIO; + + task = get_proc_task(file->f_dentry->d_inode); + if (!task) + return -ESRCH; + task->migrate_pending = !!migpend; + if (migpend) + set_tsk_thread_flag(task, TIF_NOTIFY_RESUME); + put_task_struct(task); + + return end - buffer; +} + +const struct file_operations proc_migrate_operations = { + .read = proc_migrate_read, + .write = proc_migrate_write, +}; +#endif #endif -- To unsubscribe from this list: send the line "unsubscribe linux-numa" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html